In [1]:
import torch
import torch.nn as nn

In [2]:
#no of filters in subsequent layers
VGG_types_dict = {
    "VGG11": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "VGG13": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "VGG16": [64,64,"M",128,128,"M",256,256,256,"M",512,512,512,"M",512,512,512,"M"],
    "VGG19": [64,64,"M",128,128,"M",256,256,256,256,"M",512,512,512,512,"M",512,512,512,512,"M"]
}

In [5]:
VGGType = "VGG16"
# using nn.Module class in PyTorch
# 3 i/p channels for images with RGB color channels
# 500:no of categories to classify the input images into
class VGGnet(nn.Module):
    def __init__(self, in_channels=3, num_classes=500):
        super(VGGnet, self).__init__()
        self.in_channels = in_channels
        self.conv_layers = self.create_conv_layers(VGG_types_dict["VGG16"])

        #fully connected layers(FCs) of the VGGnet
        #nn.Sequential for  chaining multiple layers
        self.fcs = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),     #flattened input of size 512 * 7 * 7
            nn.ReLU(),                        # for non-linearity
            nn.Dropout(p=0.5),                #prevent overfitting
            nn.Linear(4096, 4096),            #transforms the input from size 4096 to size 4096
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, num_classes),     #transforms the input from size 4096 to 500 classes
        )
    # i/p data to convolutional layers- flattened- fully connected layers
    def forward(self, x):
        x = self.conv_layers(x)
        x = x.reshape(x.shape[0], -1)         # batch size dim changed from multi-D to a 1-D tensor.
        x = self.fcs(x)
        return x

    def create_conv_layers(self, architecture):
        layer = []                            # to store the convolutional layers
        in_channels = self.in_channels        #3 input channels

        for i in architecture:
            if type(i) == int:                # convolutional layer checked via integer
                out_channels = i

                layer += [
                    nn.Conv2d(
                        in_channels=in_channels,
                        out_channels=out_channels,
                        kernel_size=(3, 3),
                        stride=(1, 1),
                        padding=(1, 1),
                    ),
                    nn.BatchNorm2d(i),        # batch normalization layer to normalize the output of the convolutional layer
                    nn.ReLU(),
                ]
                in_channels = i
            elif i == "M":
                layer += [nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))]

        return nn.Sequential(*layer)


if __name__ == "__main__":
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = VGGnet(in_channels=3, num_classes=500).to(device)
    ## N = 3 (Mini batch size)i.e. samples processed together
    x = torch.randn(3, 3, 224, 224).to(device)  #transfers the model parameters to GPU or CPU
    print(model(x).shape)

torch.Size([3, 500])


more output channels->more detailed feature maps

In [6]:
print(model)

VGGnet(
  (conv_layers): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU()
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU()
    (13): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256, kernel_size=(3, 3), 