# Implementation of VGG 16 architecture

3x3 kernel with padding of 1 and stride of 1.

Input image resolution is 224x224 and is RGB image.

Image resolution stays the same.

I've followed Aladdin Persson [tutorial](https://www.youtube.com/watch?v=ACmuBbuXn20)

In [2]:
import torch
import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions


In [10]:
# Integer values - number of channels in the convolutional layers
# M - Maxpooling layer
VGG16_architecture = [ 
    64, 64, "M", 
    128, 128, "M",
    256, 256, 256, "M",
    512, 512, 512, "M",
    512, 512, 512, "M",
    # Then flatten
    # Then 4096x4096x1000 linear layers
]

In [None]:
class VGG_net(nn.Module):
    def __init__(self, in_channels=3, num_classes=1000):
        super(VGG_net, self).__init__()
        self.in_channels = in_channels
        self.conv_layers = self.create_conv_layers(VGG16_architecture)

        self.fcs = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fcs(x)
        return x

    def create_conv_layers(self, architecture):
        layers = []
        in_channels = self.in_channels

        for x in architecture:
            if type(x) == int:
                out_channels = x

                layers += [
                    nn.Conv2d(
                        in_channels=in_channels,
                        out_channels=out_channels,
                        kernel_size=(3, 3),
                        stride=(1, 1),
                        padding=(1, 1),
                    ),
                    nn.BatchNorm2d(x),  # Not included in the original paper
                    nn.ReLU(),
                ]
                in_channels = x
            elif x == "M":
                layers += [nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))]

        return nn.Sequential(*layers)


In [12]:

device = "mps" if torch.backends.mps.is_available() else "cpu"
model = VGG_net(in_channels=3, num_classes=1000).to(device)
BATCH_SIZE = 3
x = torch.randn(3, 3, 224, 224).to(device)
assert model(x).shape == torch.Size([BATCH_SIZE, 1000])
print(model(x).shape)

torch.Size([3, 1000])
