# Deeper Networks for Image Classification

## VGG Model for Image Classification

- Code by: Kaviraj Gosaye
- Student ID: 220575371

### 0. Imports

In [None]:
# import libraries
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time

### 1. Data Loading and Preprocessing

In [None]:
# transform PIL image to tensor and normalize
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

# load mnist dataset
train_set = torchvision.datasets.MNIST(root='./datasets', train=True, download=True, transform=transform)
test_set = torchvision.datasets.MNIST(root='./datasets', train=False, download=True, transform=transform)
                                    
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True, num_workers=8)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=False, num_workers=8)

### 2. Model Building

In [None]:
# defining the VGG16 layers#
VGG16_layers = [64, 64, "M", 128, 128, "M", 256, 256, 256, "M", 512, 512, 512, "M", 512, 512, 512, "M" ]

In [None]:
# creating a class of the VGG16 model
class VGG16(nn.Module):
    def __init__(self, in_channels=3, num_classes=1000):
        super(VGG16, self).__init__()
        # inpuy layer
        self.in_channels = in_channels
        # hidden layers
        self.hidden_layers = self.conv_layers(VGG16_layers)
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        # output layer
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.hidden_layers(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        # x = x.reshape(x.shape[0], -1)
        x = self.classifier(x)
        return x

    # function to create the hidden convolutional layers
    def conv_layers(self, layer_types):
        layers = []
        in_channels = self.in_channels

        for layer in layer_types:
            if type(layer) == int:
                out_channels = layer

                layers += [ nn.Conv2d( in_channels=in_channels, out_channels=out_channels, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), ), nn.BatchNorm2d(layer), nn.ReLU()]
                in_channels = layer
            elif layer == "M":
                layers += [nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))]

        return nn.Sequential(*layers)

In [None]:
# setting the device to cuda if available
device = "cuda" if torch.cuda.is_available() else "cpu"

# creating instance of model and setting it to the device
vgg16 = VGG16().to(device)

# testing the model with a random input (batch size 3, 3 channels, 224x224 image)
example = torch.randn(3, 3, 224, 224).to(device)

assert vgg16(example).shape == torch.Size([3, 1000])
print(vgg16(example).shape)

In [None]:
# visualize the model
from torchinfo import summary
info = summary(vgg16, (3,3, 224, 224), col_names = ('input_size', 'output_size', 'num_params', 'kernel_size'))
print(info)