Coded by Lujia Zhong @lujiazho<br>
Reference: https://github.com/msyim/VGG16, https://github.com/WZMIAOMIAO/deep-learning-for-image-processing

In [1]:
import time
import torch
import torch.nn as nn

def ConvBlock(in_channel, out_channel, num_conv):
    layers = [nn.Sequential(nn.Conv2d(in_channel, out_channel, kernel_size=3, padding=1), nn.ReLU())]
    
    for _ in range(num_conv-1):
        layers.append(nn.Sequential(nn.Conv2d(out_channel, out_channel, kernel_size=3, padding=1), nn.ReLU()))
    
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
    return nn.Sequential(*layers)

class VGG16(nn.Module):
    def __init__(self, config):
        super().__init__()

        self.block1 = ConvBlock(3, 64, config.num_conv[0])
        self.block2 = ConvBlock(64, 128, config.num_conv[1])
        self.block3 = ConvBlock(128, 256, config.num_conv[2])
        self.block4 = ConvBlock(256, 512, config.num_conv[3])
        self.block5 = ConvBlock(512, 512, config.num_conv[4])

        # FC layers
        self.fc1 = nn.Sequential(nn.Linear(7*7*512, 4096), nn.Dropout(0.5), nn.ReLU())
        self.fc2 = nn.Sequential(nn.Linear(4096, 4096), nn.Dropout(0.5), nn.ReLU())
        self.fc3 = nn.Linear(4096, config.num_classes)

    def forward(self, x):
        # torch.Size([4, 3, 224, 224])
        assert x.shape[1] == 3, "Unsupported input channel."
        assert x.shape[-2] == 224 and x.shape[-1] == 224, "Unsupported image size."
        
        x = self.block1(x)  # torch.Size([4, 64, 112, 112])
        x = self.block2(x)  # torch.Size([4, 128, 56, 56])
        x = self.block3(x)  # torch.Size([4, 256, 28, 28])
        x = self.block4(x)  # torch.Size([4, 512, 14, 14])
        x = self.block5(x)  # torch.Size([4, 512, 7, 7])

        x = x.view(x.shape[0], -1) # torch.Size([4, 25088])

        x = self.fc1(x)     # torch.Size([4, 4096])
        x = self.fc2(x)     # torch.Size([4, 4096])
        x = self.fc3(x)     # torch.Size([4, 1000])

        # x = nn.functional.softmax(x, dim=-1): We don't need to add cause CrossEntropyLoss includes one.
        return x

class VGGConfig:
    def __init__(self, VGGType='11'):
        self.num_classes = 1000
        assert VGGType in ['11', '13', '16', '19'], "Unknown VGG Type."
        
        if VGGType == '11':
            self.num_conv = [1, 1, 2, 2, 2]
        elif VGGType == '13':
            self.num_conv = [2, 2, 2, 2, 2]
        elif VGGType == '16':
            self.num_conv = [2, 2, 3, 3, 3]
        elif VGGType == '19':
            self.num_conv = [2, 2, 4, 4, 4]

config = VGGConfig('16')
model = VGG16(config)

In [2]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

batch = 4
iterarions = 5
begin = time.time()

# Training
for iterarion in range(iterarions):
    x = torch.rand((batch, 3, 224, 224))
    y = torch.randint(0, config.num_classes, (batch,))
    
    optimizer.zero_grad()
    pred = model(x)

    loss = criterion(pred, y)

    if iterarion % 1 == 0:
        print('Iterarion:', '%2d,' % (iterarion + 1), 'loss =', '{:.4f}'.format(loss))

    loss.backward()
    optimizer.step()

print(f"{(time.time() - begin)/iterarions:.4f}s / iterarion")

Iterarion:  1, loss = 6.9173
Iterarion:  2, loss = 7.0432
Iterarion:  3, loss = 6.9376
Iterarion:  4, loss = 6.9018
Iterarion:  5, loss = 6.9245
4.3176s / iterarion
