# Implement VGG-Net

1. Assert you have the following libraries installed:

> `pip install torch torchvision numpy tqdm ipywidgets matplotlib`

2. All Conv2D are **3x3** with padding **1**, and are followed by a BatchNorm2d (optional) and a **ReLU** activation

3. All MaxPooling are **2x2** with stride **2**

**Block 1**
> + Conv2D 3->64 
> + Conv2D 64->64 
> + MaxPool

**Block 2**
> + Conv2D 64->128
> + Conv2D 128->128
> + MaxPool

**Block 3**
> + Conv2D 128->256
> + Conv2D 256->256
> + Conv2D 256->256
> + MaxPool

**Block 4**
> + Conv2D 256->512
> + Conv2D 512->512
> + Conv2D 512->512
> + MaxPool

**Block 5**
> + Conv2D 512->512
> + Conv2D 512->512
> + Conv2D 512->512
> + MaxPool

**Classifier**
> + Linear INPUT_SIZE -> 4096 + ReLU
> + Linear 4096 -> 1000 + ReLU
> + Linear 1000 -> OUTPUT SIZE

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms


class VGG16(nn.Module):

    def __init__(self, in_channels: int = 3, num_classes: int = 1000):
        super(VGG16, self).__init__()

        # suggestion: look into nn.Sequential()
        # and divide the convolutional feature extraction part of the net
        # from the final fully-connected classification part
        self.conv_features = nn.Sequential(
            ...
        )

        # more self.stuff here...

    def forward(self, x):
        # code goes here for the forward function
        return x


# Forward Pass Debug
If it can process random data, then you're mostly alright :D

In [None]:
import numpy as np

net = ...  # instantiate your net
num_params = sum([np.prod(p.shape) for p in net.parameters()])
print(f"Number of parameters : {num_params}")
print('-' * 50)

# test on Imagenet-like shaped data (224x224)

X = torch.rand((8, 3, 224, 224))
print('output shape for imgnet', net(X).shape)


In [None]:
# test on CIFAR-like shaped data (32x32)

X = torch.rand((8, 3, 32, 32))
print('output shape for cifar', net(X).shape)


# OPTIONAL: Let's train on CIFAR-10

let's load the dataset

In [None]:
import torchvision
import torchvision.transforms as transforms

mean = (0.4913997551666284, 0.48215855929893703, 0.4465309133731618)
std = (0.24703225141799082, 0.24348516474564, 0.26158783926049628)

# Choose the appropriate transforms for the problem at hand
# see https://pytorch.org/docs/stable/torchvision/transforms.html
transform = transforms.Compose(
    [transforms.ToTensor(),
     ...  # your transforms here
     ])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


Let's plot some sample images

In [None]:
import matplotlib.pyplot as plt

# get some random training images
images, labels = next(iter(trainloader))
images, labels = images[:4], labels[:4]

# show images
img = torchvision.utils.make_grid(images, padding=0)
img = img * np.array(std)[:, None, None] + np.array(mean)[:, None, None]  # unnormalize
npimg = img.numpy().clip(0, 1)
fig, ax = plt.subplots(figsize=(10, 10))
ax.imshow(np.transpose(npimg, (1, 2, 0)))
for i in range(4):
    ax.text((i + 0.5) * (img.shape[-1] // 4), -2, classes[labels[i]], ha='center', fontsize=16)
plt.show()


In [None]:
assert torch.cuda.is_available(), "Notebook is not configured properly!"
print('Congrats, you\'re running this code on a', torch.cuda.get_device_name(), 'gpu')
device = 'cuda:0'

net = VGG16(...)  # initialize VGG16 for this specific classification problem

# Nothing works w/o Batch Norm or Proper Initialization


def initialize_weights(model):
    for m in model.modules():
        if isinstance(m, nn.Conv2d):
            nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.BatchNorm2d):
            nn.init.constant_(m.weight, 1)
            nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.Linear):
            nn.init.normal_(m.weight, 0, 0.01)
            nn.init.constant_(m.bias, 0)


initialize_weights(net)

# define here the Pytorch objects needed for training
crit = ...  # loss criterion
opt = ...  # optimizer
epochs = 10


Training loop

In [None]:
from tqdm.notebook import tqdm


for e in range(epochs):
    pbar = tqdm(total=len(trainloader), desc=f'Epoch {e}')
    net.train()
    for i, (x, y) in enumerate(trainloader):

        # forward pass goes here
        ...

        # logging functions
        pbar.update(1)
        pbar.set_postfix({'Loss': loss.item()})
    pbar.close()

    # evaluation loop
    with torch.no_grad():
        net.eval()
        correct = 0
        for x, y in testloader:
            ...
            correct += ...
    print(f"Accuracy for epoch {e}: {correct / len(testset):.2%}")
