In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary

In [2]:
"""
VGG Network 하드코딩
"""

class VGG11(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv5 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1)
        self.conv6 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv7 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
        self.conv8 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
        self.pool5 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(512*7*7, 4096)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(4096, 4096)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(p=0.5)
        self.output = nn.Linear(4096, num_classes)
 
    def forward(self, x):
        x = self.conv1(x)
        x = self.pool1(x)
        
        x = self.conv2(x)
        x = self.pool2(x)
        
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.pool3(x)

        x = self.conv5(x)
        x = self.conv6(x)
        x = self.pool4(x)
        x = self.conv7(x)
        x = self.conv8(x)
        x = self.pool5(x)
        x = x.view(-1, 512*7*7)
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.output(x)
        return x
        

In [4]:
# Imports

config = {
    "A": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "A-LRN": [64, "LRN", "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "A-BN": [64, "BN", "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"], # 논문에는 없음. 커스터마이징함 ㅎ
    "B": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    'C': [64, 64, "M", 128, 128, "M", 256, 256, ('conv1x1', 256), "M", 512, 512, ('conv1x1', 512), "M", 512, 512, ('conv1x1', 512), "M"],
    'D': [64, 64, "M", 128, 128, "M", 256, 256, 256, "M", 512, 512, 512, "M", 512, 512, 512, "M"],
    "E": [64, 64, "M", 128, 128, "M", 256, 256, 256, 256, "M", 512, 512, 512, 512, "M", 512, 512, 512, 512, "M"],
}

def conv3x3(in_channels, out_channels):
    return [nn.Conv2d(
        in_channels=in_channels,
        out_channels=out_channels,
        kernel_size=3,
        stride=1,
        padding=1,
    ), nn.ReLU()]


def conv1x1(in_channels, out_channels):
    return [nn.Conv2d(
        in_channels=in_channels,
        out_channels=out_channels,
        kernel_size=1,
        stride=1,
        padding=0,
    ), nn.ReLU()]


class VGGNet(nn.Module):
    def __init__(self, num_classes, vgg_type=config['A']):
        super().__init__()
        self.features = self._make_layers(vgg_type)
        self.classifier = nn.Sequential(
            # nn.Linear(512 * 7 * 7, 4096),  # 224x224
            nn.Linear(512, 512),  # 32x32
            nn.ReLU(),
            # nn.Dropout(p=0.5),
            nn.Dropout(),
            nn.Linear(512, 512),
            nn.ReLU(),
            # nn.Dropout(p=0.5),
            nn.Dropout(),
            nn.Linear(512, num_classes))
        
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                m.weight.data.normal_(0, 0.001)
                m.bias.data.zero_()

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.shape[0], -1)
        x = self.classifier(x)

        return x

    def _make_layers(self, architecture):
        layers = []
        in_channels = 3

        for x in architecture:
            if isinstance(x, int):
                layers += conv3x3(in_channels, x)
                in_channels = x

            elif isinstance(x, tuple):
                layers += conv1x1(in_channels, x[1])
                in_channels = x[1]

            elif x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]

            elif x == 'LRN':
                layers += [nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75,k=2)]

            elif x == 'BN':
                layers += [nn.BatchNorm2d(out_channels)]

        return nn.Sequential(*layers)


In [7]:
device = 'cuda'
model = VGGNet(num_classes = 10, vgg_type=config['A']).to(device)
print(summary(model, input_data=(3, 32, 32), verbose=0))

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 512, 1, 1]           --
|    └─Conv2d: 2-1                       [-1, 64, 32, 32]          1,792
|    └─ReLU: 2-2                         [-1, 64, 32, 32]          --
|    └─MaxPool2d: 2-3                    [-1, 64, 16, 16]          --
|    └─Conv2d: 2-4                       [-1, 128, 16, 16]         73,856
|    └─ReLU: 2-5                         [-1, 128, 16, 16]         --
|    └─MaxPool2d: 2-6                    [-1, 128, 8, 8]           --
|    └─Conv2d: 2-7                       [-1, 256, 8, 8]           295,168
|    └─ReLU: 2-8                         [-1, 256, 8, 8]           --
|    └─Conv2d: 2-9                       [-1, 256, 8, 8]           590,080
|    └─ReLU: 2-10                        [-1, 256, 8, 8]           --
|    └─MaxPool2d: 2-11                   [-1, 256, 4, 4]           --
|    └─Conv2d: 2-12                      [-1, 512, 4, 4]           1

In [None]:
------------------------------------------------------------------------------------------
Layer (type:depth-idx)                   Output Shape              Param #
==========================================================================================
├─Sequential: 1-1                        [-1, 512, 1, 1]           --
|    └─Conv2d: 2-1                       [-1, 64, 32, 32]          1,792
|    └─ReLU: 2-2                         [-1, 64, 32, 32]          --
|    └─MaxPool2d: 2-3                    [-1, 64, 16, 16]          --
|    └─Conv2d: 2-4                       [-1, 128, 16, 16]         73,856
|    └─ReLU: 2-5                         [-1, 128, 16, 16]         --
|    └─MaxPool2d: 2-6                    [-1, 128, 8, 8]           --
|    └─Conv2d: 2-7                       [-1, 256, 8, 8]           295,168
|    └─ReLU: 2-8                         [-1, 256, 8, 8]           --
|    └─Conv2d: 2-9                       [-1, 256, 8, 8]           590,080
|    └─ReLU: 2-10                        [-1, 256, 8, 8]           --
|    └─MaxPool2d: 2-11                   [-1, 256, 4, 4]           --
|    └─Conv2d: 2-12                      [-1, 512, 4, 4]           1,180,160
|    └─ReLU: 2-13                        [-1, 512, 4, 4]           --
|    └─Conv2d: 2-14                      [-1, 512, 4, 4]           2,359,808
|    └─ReLU: 2-15                        [-1, 512, 4, 4]           --
|    └─MaxPool2d: 2-16                   [-1, 512, 2, 2]           --
|    └─Conv2d: 2-17                      [-1, 512, 2, 2]           2,359,808
|    └─ReLU: 2-18                        [-1, 512, 2, 2]           --
|    └─Conv2d: 2-19                      [-1, 512, 2, 2]           2,359,808
|    └─ReLU: 2-20                        [-1, 512, 2, 2]           --
|    └─MaxPool2d: 2-21                   [-1, 512, 1, 1]           --
├─Sequential: 1-2                        [-1, 10]                  --
|    └─Linear: 2-22                      [-1, 512]                 262,656
|    └─ReLU: 2-23                        [-1, 512]                 --
|    └─Dropout: 2-24                     [-1, 512]                 --
|    └─Linear: 2-25                      [-1, 512]                 262,656
|    └─ReLU: 2-26                        [-1, 512]                 --
|    └─Dropout: 2-27                     [-1, 512]                 --
|    └─Linear: 2-28                      [-1, 10]                  5,130
==========================================================================================
Total params: 9,750,922
Trainable params: 9,750,922
Non-trainable params: 0
Total mult-adds (M): 163.04
------------------------------------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 1.16
Params size (MB): 37.20
Estimated Total Size (MB): 38.37
------------------------------------------------------------------------------------------