In [1]:
import numpy as np
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from sklearn.metrics import confusion_matrix
import pandas as pd
import torchmetrics
import seaborn as sb
import matplotlib.pyplot as plt

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Set the number of epochs
num_epochs = 30

# Set the image size
image_size = 150

# Set the number of classes
num_of_classes = 24

# Set batch_size
batch_size = 64

# Define the transformation to resize the images and convert them to tensors
transformation = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor()
])

# Create the dataset using the ImageFolder class
train_dataset = torchvision.datasets.ImageFolder(root="train/images", transform=transformation)

# Define the data loader
train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [2]:
# Define the CNN model

class CNN(nn.Module):
    def __init__(self, im_size, num_classes):
        super(CNN, self).__init__()
        self.im_size = im_size
        self.num_classes = num_classes

        # define the layers of the CNN
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)

        self.pool = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(64 * im_size * im_size // 4, 512)
        self.fc2 = nn.Linear(512, num_classes)

        self.dropout = nn.Dropout(0.25)

    def forward(self, x):
        # apply the layers in the CNN
        x = self.conv1(x)
        x = torch.relu(x)

        x = self.conv2(x)
        x = torch.relu(x)

        x = self.conv3(x)
        x = torch.relu(x)

        x = self.pool(x)

        x = x.view(-1, 64 * self.im_size * self.im_size // 4)

        x = self.fc1(x)
        x = torch.relu(x)
        x = self.dropout(x)

        x = self.fc2(x)

        return x


# Create the model and move it to the designated device
model = CNN(im_size=image_size, num_classes=num_of_classes).to(device)

# Define the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

In [None]:
total = 1368
batch_no = int((total / batch_size) + 1)
loss_array = []
accuracy_array = []

# Train the model
for epoch in range(num_epochs):
    correct = 0
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_data_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        #number of correct predictions in an epoch
        _, predicted = torch.max(outputs.data, 1)
        correct += (predicted == labels).sum().item()

        if (i+1) % batch_no == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, running_loss/batch_no))
            loss_array.append(running_loss/100)
        running_loss = 0.0

    accuracy = 100 * correct / total
    print('Accuracy for Epoch[{}/{}]: {} %\n'.format(epoch+1, num_epochs, accuracy))
    accuracy_array.append(accuracy)


PATH = './trained_model.pth'
torch.save(model.state_dict(), PATH)

# Report the loss and performance of the model
print('Finished Training')

In [None]:
plot = sb.lineplot(data=accuracy_array)
plot.set(xlabel='Epoch', ylabel='Accuracy', title="Accuracy vs Epoch for Training with 30x30 Images")

In [None]:
plot = sb.lineplot(data=loss_array)
plot.set(xlabel='Epoch', ylabel='Loss', title="Loss vs Epoch for Training with 30x30 Images")

In [None]:
test_dataset = torchvision.datasets.ImageFolder(root="test/images", transform=transformation)
test_data_loader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)

correct = 0
total = len(test_dataset)
with torch.no_grad():
    for data in test_data_loader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = model(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print('\nAccuracy for 64 test images dataset when image size is ({}x{}): {} %'.format(image_size,image_size, accuracy))

sb.heatmap(confusion_matrix(labels, predicted), annot=True, cbar=None,cmap="YlGnBu",fmt="d")


In [None]:
val_dataset = torchvision.datasets.ImageFolder(root="valid/images", transform=transformation)
val_data_loader = DataLoader(val_dataset, batch_size=132, shuffle=False)

correct = 0
total = 132
with torch.no_grad():
    for data in val_data_loader:
        val_images, val_labels = data
        # calculate outputs by running images through the network
        val_outputs = model(val_images)
        # the class with the highest energy is what we choose as prediction
        _, val_predicted = torch.max(val_outputs.data, 1)
        correct += (val_predicted == val_labels).sum().item()

accuracy = 100 * correct / total
print('\nAccuracy on the 132 validation images dataset when image size is ({}x{}): {} %\n'.format(image_size,image_size, accuracy))

sb.heatmap(confusion_matrix(val_labels, val_predicted), annot=True, cbar=None,cmap="YlGnBu",fmt="d")