# Deeper Networks for Image Classification

## VGG Model for Image Classification

- Code by: Kaviraj Gosaye
- Student ID: 220575371

### 0. Imports

In [None]:
# import libraries
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time
from torchinfo import summary
from sklearn.metrics import confusion_matrix
import seaborn as sns

### 1. Data Loading and Preprocessing

In [None]:
# transform PIL image to tensor and normalize
transform = transforms.Compose([transforms.Grayscale(num_output_channels=3), transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# load mnist dataset
train_set = torchvision.datasets.MNIST(root='./datasets', train=True, download=True, transform=transform)
test_set = torchvision.datasets.MNIST(root='./datasets', train=False, download=True, transform=transform)
                                    
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True, num_workers=8)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=False, num_workers=8)

In [None]:
# extract one sample from the training set
dataiter = iter(train_loader)
# images, labels = dataiter.next()
images, labels = next(dataiter)

# plot the image
def imshow(img):
    # reverse normalization
    img = img / 2 + 0.5
    # convert tensor to numpy array
    npimg = img.numpy()
    # rearrange the dimensions to match matplotlib format
    # matplotlib:   H x W x C
    # torch:        C x H x W
    plt.imshow(np.transpose(npimg, (1, 2, 0)))

# show images
imshow(torchvision.utils.make_grid(images))

### 2. Model Building

In [None]:
# defining the VGG16 layers#
VGG16_layers = [64, 64, "M", 128, 128, "M", 256, 256, 256, "M", 512, 512, 512, "M", 512, 512, 512, "M" ]

In [None]:
# creating a class of the VGG16 model
class VGG16(nn.Module):
    def __init__(self, in_channels=3, num_classes=1000):
        super(VGG16, self).__init__()
        # inpuy layer
        self.in_channels = in_channels
        # hidden layers
        self.hidden_layers = self.conv_layers(VGG16_layers)
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        # output layer
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.hidden_layers(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        # x = x.reshape(x.shape[0], -1)
        x = self.classifier(x)
        return x

    # function to create the hidden convolutional layers
    def conv_layers(self, layer_types):
        layers = []
        in_channels = self.in_channels

        for layer in layer_types:
            if type(layer) == int:
                out_channels = layer

                layers += [ nn.Conv2d( in_channels=in_channels, out_channels=out_channels, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), ), nn.BatchNorm2d(layer), nn.ReLU()]
                in_channels = layer
            elif layer == "M":
                layers += [nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))]

        return nn.Sequential(*layers)

In [None]:
# setting the device to cuda if available
device = "cuda" if torch.cuda.is_available() else "cpu"

# creating instance of model and setting it to the device
vgg16 = VGG16().to(device)

In [None]:
# visualize the model
info = summary(vgg16, (3,3, 224, 224), col_names = ('input_size', 'output_size', 'num_params', 'kernel_size'))
print(info)

### 3. Model Training

In [None]:
# loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(vgg16.parameters(), lr=0.001, momentum=0.9)

# training the model
start = time.time()

num_epochs = 10
losses = []
train_accs = []

for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = vgg16(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # cumulative loss
        running_loss += loss.item()
        # printing the average loss every 100 mini-batches
        if i % 100 == 99:
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 100}")
            running_loss = 0.0

    correct = 0.0
    pred = outputs.argmax(dim=1, keepdim=True)
    # reshaping the labels to match the shape of the predictions
    # comparing the predictions to the labels using element-wise comparison
    # summing the correct predictions
    correct += pred.eq(labels.view_as(pred)).sum().item()
    train_acc = 100. * correct / len(outputs)
    train_accs.append(train_acc)
    losses.append(loss.item())
    
print(f"Finished Training after {time.time() - start} seconds")

### 4. Model Evaluation

In [None]:
# Disable gradient calculation
with torch.no_grad():
    correct = 0
    total = 0
    predicted_labels = []
    true_labels = []
    test_loss = 0

    # Using test set
    for data in test_loader:
        images, labels = data[0].to(device), data[1].to(device)
        
        # Forward pass
        outputs = vgg16(images)
        
        # Calculate the test loss
        loss = criterion(outputs, labels)
        test_loss += loss.item()

        # Get the predicted labels
        _, predicted = torch.max(outputs.data, 1)
        
        # Update the total and correct predictions
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Append the predicted and true labels
        predicted_labels.extend(predicted.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())

    # Calculate the accuracy
    accuracy = 100 * correct / total
    test_loss /= len(test_loader)

# Print the accuracy and test loss
print(f"Accuracy on the test data: {accuracy}%")
print(f"Test Loss: {test_loss}")

# Create the confusion matrix
cm = confusion_matrix(true_labels, predicted_labels)

# Plot the confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix")
plt.show()

In [None]:
# Plot the loss
plt.plot(losses)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss')
plt.show()

In [None]:
# Plot the accuracy
plt.plot(train_accs)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training Accuracy')
plt.show()