# Introduction


**What?** AlexNet, Inception, DenseNet



# Import python modules

In [4]:
import torch
import torch.nn as nn
import torchvision

# AlexNet - ImageNet Classification with Deep Convolutional Neural Networks - 2012

In [None]:
"""
Alexnet is made up of 5 conv layers starting from an 11x11 kernel. It was the first architecture that employed
max-pooling layers, ReLu activation functions, and dropout for the 3 enormous linear layers. The network was
used for image classification with 1000 possible classes, which for that time was madness.
"""

In [2]:
class AlexNet(nn.Module):
    def __init__(self, num_classes: int = 1000) -> None:
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )

        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# InceptionNet/GoogleNet (2014)

In [None]:
"""
The InceptionNet/GoogLeNet architecture consists of 9 inception modules stacked together, with max-pooling 
layers between (to halve the spatial dimensions). It consists of 22 layers (27 with the pooling layers). 
It uses global average pooling after the last inception module.
"""

In [3]:
class InceptionModule(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(InceptionModule, self).__init__()
        relu = nn.ReLU()
        self.branch1 = nn.Sequential(
                  nn.Conv2d(in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0),
                  relu)

        conv3_1 = nn.Conv2d(in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0)
        conv3_3 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.branch2 = nn.Sequential(conv3_1, conv3_3,relu)

        conv5_1 = nn.Conv2d(in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0)
        conv5_5 = nn.Conv2d(out_channels, out_channels, kernel_size=5, stride=1, padding=2)
        self.branch3 = nn.Sequential(conv5_1,conv5_5,relu)

        max_pool_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        conv_max_1 = nn.Conv2d(in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0)
        self.branch4 = nn.Sequential(max_pool_1, conv_max_1,relu)

    def forward(self, input):
        output1 = self.branch1(input)
        output2 = self.branch2(input)
        output3 = self.branch3(input)
        output4 = self.branch4(input)
        return torch.cat([output1, output2, output3, output4], dim=1)

model = InceptionModule(in_channels=3,out_channels=32)
inp = torch.rand(1,3,128,128)
print(model(inp).shape)

torch.Size([1, 128, 128, 128])


# DenseNet: Densely Connected Convolutional Networks (2017)

In [None]:
"""
Skip connections are a pretty cool idea. Why don’t we just skip-connect everything? Densenet is an example of 
pushing this idea into the extremity. Of course, the main difference with ResNets is that we will concatenate 
instead of adding the feature maps. Thus, the core idea behind it is feature reuse, which leads to very compact 
models. As a result it requires fewer parameters than other CNNs, as there are no repeated feature-maps.
"""

In [5]:
model = torchvision.models.DenseNet(
    growth_rate = 16, # how many filters to add each layer (`k` in paper)
    block_config = (6, 12, 24, 16), # how many layers in each pooling block
    num_init_features = 16, # the number of filters to learn in the first convolution layer (k0)
    bn_size= 4, # multiplicative factor for number of bottleneck (1x1 cons) layers
    drop_rate = 0, # dropout rate after each dense conv layer
    num_classes = 30 # number of classification classes
)

print(model) # see snapshot below

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 16, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1):