In [1]:
"""
A from scratch implementation of the VGG architecture.
"""

# Imports
import torch
import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions

In [3]:
VGG_types = {
    "VGG11": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "VGG13": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "VGG16": [
        64,
        64,
        "M",
        128,
        128,
        "M",
        256,
        256,
        256,
        "M",
        512,
        512,
        512,
        "M",
        512,
        512,
        512,
        "M",
    ],
    "VGG19": [
        64,
        64,
        "M",
        128,
        128,
        "M",
        256,
        256,
        256,
        256,
        "M",
        512,
        512,
        512,
        512,
        "M",
        512,
        512,
        512,
        512,
        "M",
    ],
}


In [4]:
class VGG_net(nn.Module):
    def __init__(self, in_channels=3, num_classes=1000):
        super(VGG_net, self).__init__()
        self.in_channels = in_channels
        self.conv_layers = self.create_conv_layers(VGG_types["VGG16"])

        self.fcs = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fcs(x)
        return x

    def create_conv_layers(self, architecture):
        layers = []
        in_channels = self.in_channels

        for x in architecture:
            if type(x) == int:
                out_channels = x

                layers += [
                    nn.Conv2d(
                        in_channels=in_channels,
                        out_channels=out_channels,
                        kernel_size=(3, 3),
                        stride=(1, 1),
                        padding=(1, 1),
                    ),
                    nn.BatchNorm2d(x),
                    nn.ReLU(),
                ]
                in_channels = x
            elif x == "M":
                layers += [nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))]

        return nn.Sequential(*layers)

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = VGG_net(in_channels=3, num_classes=1000).to(device)
BATCH_SIZE = 3
x = torch.randn(3, 3, 224, 224).to(device)
assert model(x).shape == torch.Size([BATCH_SIZE, 1000])
print(model(x).shape)

torch.Size([3, 1000])


In [6]:
device

'cuda'

In [7]:
x

tensor([[[[ 3.0723e-01, -1.4043e-01, -8.7562e-01,  ...,  2.1716e-01,
            2.2254e+00, -9.7100e-01],
          [ 2.8314e-01,  2.3916e-02,  3.4754e-01,  ...,  1.8823e-01,
            7.4606e-02, -9.8500e-01],
          [-4.4776e-01,  1.5414e+00,  1.1374e+00,  ..., -7.2016e-01,
           -1.2177e+00,  1.5801e+00],
          ...,
          [ 1.2693e+00,  4.4301e-01,  3.2667e-01,  ..., -1.2716e+00,
            5.3746e-01,  1.4650e-01],
          [-2.1451e+00,  7.0084e-01,  5.7755e-01,  ..., -1.1123e-01,
           -1.6867e+00, -7.2227e-01],
          [ 2.6348e-01, -4.5303e-01, -2.4310e+00,  ..., -2.8624e-01,
           -1.4389e-01, -1.5979e+00]],

         [[-4.0931e-01,  1.6537e-01, -2.1161e+00,  ..., -1.5484e+00,
           -4.1801e-01,  5.2278e-01],
          [-5.7944e-01,  1.0100e+00, -4.9766e-01,  ...,  2.3712e-01,
            5.2865e-01, -1.6419e+00],
          [-1.3505e-01, -1.6298e+00, -8.2050e-01,  ..., -4.8377e-01,
           -1.9728e-01,  8.9214e-01],
          ...,
     