In [2]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt # for making figures
%matplotlib inline

### Densenet

In [None]:
class BNReluConvBlock(torch.nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0):
        """DenseNet building block consisting of BatchNorm -> ReLU -> Conv.
        Args:
            in_channels: number of input channels.
            out_channels: number of output channels.
            kernel_size: size of the convolving kernel.
            stride: stride of the convolution.
            padding: zero-padding added to both sides of the input.
        """
        super(BNReluConvBlock, self).__init__()
        self.bn = torch.nn.BatchNorm2d(in_channels)
        self.relu = torch.nn.ReLU(inplace=True)  # inplace=True to save memory
        # no need in bias because of the BatchNorm
        self.conv = torch.nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, 
                                    stride=stride, padding=padding, bias=False)
    
    def forward(self, x):
        return self.conv(self.relu(self.bn(x)))

class DenseLayer(torch.nn.Module):
    def __init__(self, in_channels, growth_rate, bn_size=4):
        """
        A DenseLayer consists of BN -> ReLU -> Conv1x1 -> BN -> ReLU -> Conv3x3.
        Args:
            in_channels: number of input channels.
            growth_rate: how many channels the layer will add.
            bn_size: bottleneck size multiplier. default is 4.
        """
        super(DenseLayer, self).__init__()
        self.bn_relu_conv1 = BNReluConvBlock(in_channels, growth_rate * bn_size, kernel_size=1)
        self.bn_relu_conv2 = BNReluConvBlock(growth_rate * bn_size, growth_rate, kernel_size=3, padding=1)
    
    def forward(self, x):
        bottleneck = self.bn_relu_conv1(x)
        new_features = self.bn_relu_conv2(bottleneck)
        return new_features   # Return only the new features (B, growth_rate, H, W)

class DenseBlock(torch.nn.Module):
    def __init__(self, in_channels, growth_rate, num_layers, bn_size=4):
        """
        A DenseBlock stacks num_layers of Dense layers consiting of 
        BN -> ReLU -> Conv1x1 -> BN -> ReLU -> Conv3x3.
        Args:
            num_layers: number of layers in the block (e.g., 6, 12, 32, or 32).
            in_channels: number of input channels.
            growth_rate: how many channels each layer will add.
            bn_size: bottleneck size multiplier. default is 4.
        """
        super(DenseBlock, self).__init__()
        self.layers = torch.nn.ModuleList()
        for i in range(num_layers):
            self.layers.append(DenseLayer(in_channels + i * growth_rate, growth_rate, bn_size))

    def forward(self, x):
        features = [x]
        for dense_layer in self.layers:
            #here we concatenate the input with the features from the previous layers
            new_feature = dense_layer(torch.cat(features, 1)) 
            features.append(new_feature) 
        return torch.cat(features, 1) # concatenate all the features (channels) from the layers

class TransitionLayer(torch.nn.Module):
    def __init__(self, in_channels, theta=0.5):
        """
        A TransitionLayer downsamples the spatial size by 2 and reduces the number of channels by theta.
        Args:
            in_channels: number of input channels.
            theta: compression factor. default is 0.5.
        """
        super(TransitionLayer, self).__init__()
        out_channels = int(in_channels * theta)
        self.bn_relu_conv = BNReluConvBlock(in_channels, out_channels, kernel_size=1)
        self.avg_pool = torch.nn.AvgPool2d(kernel_size=2, stride=2) # reduce the spatial size by 2
    
    def forward(self, x):
        return self.avg_pool(self.bn_relu_conv(x))

class DenseNet(torch.nn.Module):
    def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), num_classes=1000, bn_size=4, theta=0.5):
        """ A DenseNet model.

        Args:
            growth_rate: how many channels each layer will add. (k in the paper, e.g., 32 for DenseNet-121)
            block_config: a list of numbers of layers in each block. (e.g., (6, 12, 24, 16) for DenseNet-121)
            num_classes: number of classes in the dataset. Default is 1000 for ImageNet.
            bn_size: bottleneck size multiplier. Default is 4. (1x1 Conv layer before 3x3 Conv layer)
            theta: compression factor. Default is 0.5. (0.5 means 50% of the channels will be dropped)
        """
        super(DenseNet, self).__init__()

        # initial convolution layer
        self.conv1 = torch.nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3) # 224x224x3 -> 112x112x64 (224-7 + 2*3)/2 + 1 = 112
        self.maxpool = torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # 112x112x64 -> 56x56x64 (112-3+2)/2+1=56

        num_channels = 64
        self.net = torch.nn.Sequential()

        for i, num_layers in enumerate(block_config):
            dense_block = DenseBlock(num_channels, growth_rate, num_layers, bn_size)
            self.net.add_module(f'dense_block{i + 1}', dense_block)
            num_channels += num_layers * growth_rate
            if i != len(block_config) - 1:
                trans_layer = TransitionLayer(num_channels, theta)
                self.net.add_module(f'trans_layer{i + 1}', trans_layer)
                num_channels = int(num_channels * theta) # half the number of channels            

        # For myself to understand the code
        # self.dense1 = DenseBlock(64, growth_rate, block_config[0], bn_size=bn_size) # returns 56x56x64 + 6*32 = 56x56x256
        # self.trans1 = TransitionLayer(64 + block_config[0] * growth_rate, theta) # 56x56x256 -> 28x28x128 with theta=0.5

        # self.dense2 = DenseBlock(128, growth_rate, block_config[1], bn_size=bn_size) # 28x28x128 + 12*32 = 28x28x512
        # self.trans2 = TransitionLayer(128 + block_config[1] * growth_rate, theta) # 28x28x512 -> 14x14x256 with theta=0.5

        # self.dense3 = DenseBlock(256, growth_rate, block_config[2], bn_size=bn_size) # 14x14x256 + 24*32 = 14x14x1024
        # self.trans3 = TransitionLayer(256 + block_config[2] * growth_rate, theta) # 14x14x1024 -> 7x7x512 with theta=0.5

        # self.dense4 = DenseBlock(512, growth_rate, block_config[3], bn_size=bn_size) # 7x7x512 + 16*32 = 7x7x1024

        self.avgpool = torch.nn.AdaptiveAvgPool2d((1, 1)) # 7x7x1024 -> 1x1x1024
        self.fc = torch.nn.Linear(num_channels, num_classes) # 1024 -> num_classes
    
    def forward(self, x):
        x = self.maxpool(F.relu(self.conv1(x)))
        x = self.net(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        logits = self.fc(x)
        return logits

# create a DenseNet-121 model
model = DenseNet(growth_rate=32, block_config=(6, 12, 24, 16), num_classes=1000, bn_size=4, theta=0.5)
print('Densnet-121', model(torch.randn(1, 3, 224, 224)).shape)
print(f'Num Parameters: {sum(p.numel() for p in model.parameters())}')

# create a DenseNet-169 model
model = DenseNet(growth_rate=32, block_config=(6, 12, 32, 32), num_classes=1000, bn_size=4, theta=0.5)  
print('DenseNet-169', model(torch.randn(1, 3, 224, 224)).shape)
print(f'Num Parameters: {sum(p.numel() for p in model.parameters())}')

# create a DenseNet-201 model
model = DenseNet(growth_rate=32, block_config=(6, 12, 48, 32), num_classes=1000, bn_size=4, theta=0.5)
print('DenseNet-201', model(torch.randn(1, 3, 224, 224)).shape)
print(f'Num Parameters: {sum(p.numel() for p in model.parameters())}')

# create a DenseNet-264 model
model = DenseNet(growth_rate=32, block_config=(6, 12, 64, 48), num_classes=1000, bn_size=4, theta=0.5)
print('DenseNet-264', model(torch.randn(1, 3, 224, 224)).shape)
print(f'Num Parameters: {sum(p.numel() for p in model.parameters())}')


Densnet-121 torch.Size([1, 1000])
Num Parameters: 7976744
DenseNet-169 torch.Size([1, 1000])
Num Parameters: 14146088
DenseNet-201 torch.Size([1, 1000])
Num Parameters: 20010024
DenseNet-264 torch.Size([1, 1000])
Num Parameters: 33332264


#### Resnet


In [29]:

class ConvBNReluBlock(torch.nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0):
        """ResNet building block consisting of Conv -> BatchNorm -> ReLU.

        Args:
            in_channels: number of input channels.
            out_channels: number of output channels.
            kernel_size: kernel size of the convolution.
            stride: stride of the convolution.
            padding: padding of the convolution.
        """
        super(ConvBNReluBlock, self).__init__()
        # bias=False because of the BatchNorm
        self.conv = torch.nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
        self.bn = torch.nn.BatchNorm2d(out_channels)
        self.relu = torch.nn.ReLU(inplace=True) # inplace=True to save memory

    def forward(self, x):
        return self.relu(self.bn(self.conv(x)))

class BottleneckLayer(torch.nn.Module):
    
    def __init__(self, in_channels, out_channels, expansion=4, stride=1):
        """ A ResNet bottleneck layer consisting of 3 conv layers.
        Conv1x1 -> Conv3x3 -> Conv1x1.
        Args:
           in_channels: Number of input channels.
           out_channels: Number of channels before expansion.
           expansion: Factor to scale the number of channels. 
                    For bottleneck design, the number of output channels is scaled by 'expansion'
           stride: Stride of the second conv layer.
        """
        super(BottleneckLayer, self).__init__()
        # 1x1 Convolution
        self.conv_bn_relu1 = ConvBNReluBlock(in_channels, out_channels, kernel_size=1)
        # 3x3 Convolution
        self.conv_bn_relu2 = ConvBNReluBlock(out_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        # 1x1 Convolution
        self.conv_bn_relu3 = ConvBNReluBlock(out_channels, out_channels * expansion, kernel_size=1)

        self.downsample = None

        if stride != 1 or in_channels != out_channels * expansion:
            self.downsample = ConvBNReluBlock(in_channels, out_channels * expansion, kernel_size=1, stride=stride)
    
    def forward(self, x):
        identity = x

        out = self.conv_bn_relu1(x)
        out = self.conv_bn_relu2(out)
        out = self.conv_bn_relu3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out = out + identity
        return F.relu(out)

class ResNetBlock(torch.nn.Module):
    def __init__(self, in_channels, bottleneck_channels, num_layers, expansion=4, stride=1):
        """ A block of ResNet consisting of multiple Bottleneck layers.
        Args:
            in_channels: Number of input channels.
            bottleneck_channels: Number of channels in the Bottleneck layer.
            num_layers: Number of Bottleneck layers in the block.
            expansion: Factor to scale the number of channels.
            stride: Stride of the first Bottleneck layer.
        """
        super(ResNetBlock, self).__init__()
        self.layers = torch.nn.ModuleList()
        self.layers.append(BottleneckLayer(in_channels, bottleneck_channels, expansion=expansion, stride=stride))
        for _ in range(1, num_layers):
            self.layers.append(BottleneckLayer(bottleneck_channels * expansion, bottleneck_channels, expansion=expansion))

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    
class ResNet(torch.nn.Module):
    def __init__(self, layers=[3,4,6,3], num_classes=1000):
        """ResNet model with 50, 101, 152, or 200 layers.

        Args:
            layers: A list of 4 integers specifying the number of layers in each block. (e.g., [3, 4, 6, 3] for ResNet-50)
            num_classes: Number of classes in the dataset. (e.g. 1000 for ImageNet)
        """
        super(ResNet, self).__init__()
    
        # initial convolution layer
        self.conv_bn_relu = ConvBNReluBlock(3, 64, kernel_size=7, stride=2, padding=3) # 224x224x3 -> 112x112x64
        self.maxpool = torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # 112x112x64 -> 56x56x64

        num_channels = 64
        expansion = 4

        self.net = torch.nn.Sequential()

        for i, num_layers in enumerate(layers):
            stride = 1 if i == 0 else 2
            bottleneck_channels = 64 * (2**i) # 64, 128, 256, 512
            resnet_block = ResNetBlock(num_channels, bottleneck_channels, num_layers, expansion=expansion, stride=stride)
            self.net.add_module(f'resnet_block{i + 1}', resnet_block)
            num_channels = bottleneck_channels * expansion

        # ResNet layers
        # Written to understand the code by block
        # self.resnet_block1 =  ResNetBlock(num_channels, 64, layers[0], stride=1) # 56x56x64 -> 56x56x256
        # num_channels = 64 * expansion
        # self.resnet_block2 =  ResNetBlock(num_channels, 128, layers[1], stride=2) # 56x56x256 -> 28x28x512
        # num_channels = 128 * expansion
        # self.resnet_block3 =  ResNetBlock(num_channels, 256, layers[2], stride=2) # 28x28x512 -> 14x14x1024
        # num_channels = 256 * expansion
        # self.resnet_block4 =  ResNetBlock(num_channels, 512, layers[3], stride=2) # 14x14x1024 -> 7x7x2048
        # num_channels = 512 * expansion

        self.avgpool = torch.nn.AdaptiveAvgPool2d((1, 1))
        self.fc = torch.nn.Linear(num_channels, num_classes)

    def forward(self, x):
        x = self.maxpool(self.conv_bn_relu(x))
        x = self.net(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


# create a ResNet-50 model
model = ResNet(layers=[3, 4, 6, 3], num_classes=1000)
# print(model)
print('ResNet-50', model(torch.randn(1, 3, 224, 224)).shape)
print(f'Num Parameters: {sum(p.numel() for p in model.parameters())}')

# create a ResNet-101 model
model = ResNet(layers=[3, 4, 23, 3], num_classes=1000)
print('ResNet-101', model(torch.randn(1, 3, 224, 224)).shape)
print(f'Num Parameters: {sum(p.numel() for p in model.parameters())}')

# create a ResNet-152 model
model = ResNet(layers=[3, 8, 36, 3], num_classes=1000)
print('ResNet-152', model(torch.randn(1, 3, 224, 224)).shape)
print(f'Num Parameters: {sum(p.numel() for p in model.parameters())}')

# create a ResNet-200 model
model = ResNet(layers=[3, 24, 36, 3], num_classes=1000)
print('ResNet-200', model(torch.randn(1, 3, 224, 224)).shape)
print(f'Num Parameters: {sum(p.numel() for p in model.parameters())}')


ResNet-50 torch.Size([1, 1000])
Num Parameters: 25557032
ResNet-101 torch.Size([1, 1000])
Num Parameters: 44549160
ResNet-152 torch.Size([1, 1000])
Num Parameters: 60192808
ResNet-200 torch.Size([1, 1000])
Num Parameters: 64673832
