In [3]:
%autosave 300
%autoreload 2
%reload_ext autoreload
%config Completer.use_jedi = False

Autosaving every 300 seconds


In [2]:
import os

os.chdir(
    "/mnt/batch/tasks/shared/LS_root/mounts/clusters/insights-model-run/code/Users/soutrik.chowdhury/advanced_vision_modules_pytorch"
)
print(os.getcwd())

/mnt/batch/tasks/shared/LS_root/mounts/clusters/insights-model-run/code/Users/soutrik.chowdhury/advanced_vision_modules_pytorch


Objective:
* Application of Resnet-34 paper model as described in article: https://python.plainenglish.io/paper-walkthrough-residual-network-resnet-62af58d1c521
* We will test the model on random sample which represtents the CIFAR-10 dataset

https://debuggercafe.com/wp-content/uploads/2021/04/diff_dimensions_residual_block.jpg

https://debuggercafe.com/wp-content/uploads/2021/04/diff_dimensions_residual_block.jpg

In [4]:
import torch
from torch import nn
import torch.nn.functional as F
from torchinfo import summary

Sample setup

In [19]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

torch.Size([16, 3, 32, 32])


In [20]:
random_tensor_batch = torch.randn(16,3,32,32, dtype=torch.float32, device='cuda')
print(random_tensor_batch.shape)

torch.Size([16, 3, 32, 32])


Residual block:
* ResNet-34, the residual block comprises of two convolution layers, both using the kernel size of 3×3, stride of 1, and padding of 1 along with Batchnorm and Relu\
* Provision for the skip connection

In [57]:
class Residual_Block(nn.Module):
    """This block contains the structure of a residual block without the downsampling of input but with skip connection for the Input layer"""

    def __init__(self, num_channels):
        super(Residual_Block, self).__init__()
        # 1st convolution layer for feature extraction
        self.conv_residual1 = nn.Conv2d(
            in_channels=num_channels,
            out_channels=num_channels,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False,
        )

        self.bn_residual1 = nn.BatchNorm2d(num_channels)
        # 2nd convolution layer for feature extraction
        self.conv_residual2 = nn.Conv2d(
            in_channels=num_channels,
            out_channels=num_channels,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False,
        )

        self.bn_residual2 = nn.BatchNorm2d(num_channels)

        self.relu = nn.ReLU()

    def forward(self, x):
        """Residual block forward pass whose flow is as follows:
        Passing through the first cnn block having 3*3 kernel size, stride of 1 and padding of 1
        Batchnorm and relu
        Passing through the second cnn block having 3*3 kernel size, stride of 1 and padding of 1
        Batchnorm
        Adding the skip connection
        Relu application
        """
        identity = x
        out = self.conv_residual1(x)
        out = self.bn_residual1(out)
        out = self.relu(out)
        out = self.conv_residual2(out)
        out = self.bn_residual2(out)
        out += identity
        out = self.relu(out)
        return out

In [58]:
# testing
residual_block_test = Residual_Block(num_channels=3).to(device)
summary(residual_block_test, input_size=(1, 3, 32, 32))

Layer (type:depth-idx)                   Output Shape              Param #
Residual_Block                           [1, 3, 32, 32]            --
├─Conv2d: 1-1                            [1, 3, 32, 32]            81
├─BatchNorm2d: 1-2                       [1, 3, 32, 32]            6
├─ReLU: 1-3                              [1, 3, 32, 32]            --
├─Conv2d: 1-4                            [1, 3, 32, 32]            81
├─BatchNorm2d: 1-5                       [1, 3, 32, 32]            6
├─ReLU: 1-6                              [1, 3, 32, 32]            --
Total params: 174
Trainable params: 174
Non-trainable params: 0
Total mult-adds (M): 0.17
Input size (MB): 0.01
Forward/backward pass size (MB): 0.10
Params size (MB): 0.00
Estimated Total Size (MB): 0.11

In [38]:
output = residual_block_test(random_tensor_batch)
output.size()

torch.Size([16, 3, 32, 32])

Transition Block:
* https://miro.medium.com/v2/resize:fit:1100/format:webp/1*-Itl6-5b-8ODVHbm61n4vg.png
* this section is going to focus on the residual blocks that use dashed curve to denote its skip connection
* The convolution layer fundamentally remian the same

In [41]:
class Residual_Block_Transition(nn.Module):
    def __init__(self, in_channels, out_channels):
        """This block contains the structure of a residual block with downsampling of input and skip connection for the Input layer"""
        super(Residual_Block_Transition, self).__init__()

        # This is the kind of maxpooling layer for the Input interms of sie reduction as strid is 2,2
        self.conv_identity_adapt = nn.Conv2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=(1, 1),
            stride=(2, 2),
            padding=(0, 0),
            bias=False,
        )

        self.bn_identity_adapt = nn.BatchNorm2d(out_channels)

        # Application of a 3*3 kernel size, stride of 2 and padding of 1 which again reduces the size of the input along with feature extraction
        self.conv_transition1 = nn.Conv2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=(3, 3),
            stride=(2, 2),
            padding=(1, 1),
            bias=False,
        )

        self.bn_transition1 = nn.BatchNorm2d(out_channels)

        # this is a normal kerel size, stride of 1 and padding of 1 which is just meant for feature extraction
        self.conv_transition2 = nn.Conv2d(
            in_channels=out_channels,
            out_channels=out_channels,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False,
        )

        self.bn_transition2 = nn.BatchNorm2d(out_channels)

        self.relu = nn.ReLU()

    def forward(self, x):
        """Residual block forward pass whose flow is as follows:
        Create a skip connect with reduced dims for future addition
        First cnn block having 3*3 kernel size, stride of 2 and padding of 1 which makes it input reducer plus feature extractor
        Batchnorm and relu
        Second cnn block having 3*3 kernel size, stride of 1 and padding of 1 which makes it feature extractor
        Batchnorm
        Adding the skip connection
        Relu application
        """
        identity = self.bn_identity_adapt(self.conv_identity_adapt(x))
        out = self.relu(self.bn_transition1(self.conv_transition1(x)))
        out = self.relu(self.bn_transition2(self.conv_transition2(out)))
        out += identity
        out = self.relu(out)
        return out

In [40]:
# testing
residual_block_transition = Residual_Block_Transition(
    in_channels=3, out_channels=16
).to(device)
summary(residual_block_transition, input_size=(1, 3, 32, 32))

Layer (type:depth-idx)                   Output Shape              Param #
Residual_Block_Transition                [1, 16, 16, 16]           --
├─Conv2d: 1-1                            [1, 16, 16, 16]           48
├─BatchNorm2d: 1-2                       [1, 16, 16, 16]           32
├─Conv2d: 1-3                            [1, 16, 16, 16]           432
├─BatchNorm2d: 1-4                       [1, 16, 16, 16]           32
├─ReLU: 1-5                              [1, 16, 16, 16]           --
├─Conv2d: 1-6                            [1, 16, 16, 16]           2,304
├─BatchNorm2d: 1-7                       [1, 16, 16, 16]           32
├─ReLU: 1-8                              [1, 16, 16, 16]           --
├─ReLU: 1-9                              [1, 16, 16, 16]           --
Total params: 2,880
Trainable params: 2,880
Non-trainable params: 0
Total mult-adds (M): 0.71
Input size (MB): 0.01
Forward/backward pass size (MB): 0.20
Params size (MB): 0.01
Estimated Total Size (MB): 0.22

Furthermore, we also need to ensure that the skip connection to have the exact same size as the output. This is simply because element-wise summation is not possible to be done if the dimension of two tensors is different. In order to do so, we initialize a layer called self.conv_identity_adapt in which it is actually similar to self.conv_transition1 yet with the kernel size of 1×1

What we have done is to
* Residual_Block is specialized to process a tensor in which the input and output dimension is exactly the same.
* Residual_Block_Trans, on the other hand, is going to make the output tensor to be spatially twice as small as the input (using stride=2, as shown at line #(1) and #(2)), while the number of channels will be twice larger.

Final Resnet Image

https://miro.medium.com/v2/resize:fit:4604/format:webp/1*03YmxWgvuQgZluZHZjZuVg.png

First Layer Non_resnet:
* The inital part has a structure of Application of Convolution of (7,7) with stride =2  and padding of 3 which basically halfs the input size
* Then we application of batchnorn and relu and maxpool with (3,3) stride of (2,2) and padding of (1,1)
* The size of those kernels themselves are 7×7. In this first conv layer, we set the stride and the padding to (2,2) and (3,3), respectively, in order to make the spatial output dimension to be 112×112. Technically speaking, we actually don’t need to specify the image size to the network as it is going to work adaptively. This means that if you feed this part of network with 100×100 image, then the output size is going to be 50×50. We can also see in the Codeblock 9 above that max-pooling layer is implemented with the stride of (2,2), which causes the image size to be reduced by twice again. Thus, up until this point our image is going to be four times smaller than its original size.


In [54]:
class ResNet(nn.Module):
    def __init__(self, num_repeats, in_channels, num_classes, first_out_channels):
        """
        Args:
            num_repeats (int): Number of repetitions of the ResNet block
            in_channels (int): Number of input channels
            num_classes (int): Number of classes
            first_out_channels (int): Number of output channels for the first convolution layer
        """
        super(ResNet, self).__init__()

        # first layer non_resnet
        self.conv1 = nn.Conv2d(
            in_channels=in_channels,
            out_channels=first_out_channels,
            kernel_size=(7, 7),
            stride=(2, 2),
            padding=(3, 3),
            bias=False,
        )

        self.bn = nn.BatchNorm2d(first_out_channels)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))

        # first resnet layer with residual blocks conv2x
        self.residual_block_conv2_x = nn.ModuleList()
        for _ in range(num_repeats[0]):
            self.residual_block_conv2_x.append(
                Residual_Block(num_channels=first_out_channels)
            )

        # Using Residual_Block_Trans to connect conv2_x and conv3_x; in_channels set to 64 and out_channels to 128 (line #(1) Codeblock 11)
        # then creating and stacking all blocks for conv3_x (#(2)).

        # conv2x which increases from 64-> 128
        self.residual_block_trans3x = Residual_Block_Transition(
            in_channels=64, out_channels=128
        )  # this is the dotted transition block

        self.residual_blocks_conv3_x = nn.ModuleList()
        for _ in range(
            num_repeats[1] - 1
        ):  # -1 as we have n-1 residual blocks and one transition block
            self.residual_blocks_conv3_x.append(Residual_Block(num_channels=128))

        # 3 We use the same technique to connect conv3_x and conv4_x

        # conv3x which increases from 128-> 256
        self.residual_block_trans4 = Residual_Block_Transition(
            in_channels=128, out_channels=256
        )

        self.residual_blocks_conv4_x = nn.ModuleList()
        for _ in range(num_repeats[2] - 1):
            self.residual_blocks_conv4_x.append(Residual_Block(num_channels=256))

        # conv4x which increases from 256-> 512
        self.residual_block_trans5 = Residual_Block_Transition(
            in_channels=256, out_channels=512
        )

        self.residual_blocks_conv5_x = nn.ModuleList()
        for _ in range(num_repeats[3] - 1):
            self.residual_blocks_conv5_x.append(Residual_Block(num_channels=512))

        # Final classification layer
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = nn.Linear(512, num_classes)

    def forward(self, x):
        """forward function for classification layer"""
        # presentation layer
        x = self.conv1(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.maxpool(x)

        # first residual conv block
        for layer in self.residual_block_conv2_x:
            x = layer(x)

        # first transition block to increase the channel size and reduce the image size
        x = self.residual_block_trans3x(x)

        # second residual block
        for layer in self.residual_blocks_conv3_x:
            x = layer(x)

        # second transition block to increase the channel size and reduce the image size
        x = self.residual_block_trans4(x)

        # third residual block
        for layer in self.residual_blocks_conv4_x:
            x = layer(x)

        # third transition block to increase the channel size and reduce the image size
        x = self.residual_block_trans5(x)

        # fourth residual block
        for layer in self.residual_blocks_conv5_x:
            x = layer(x)

        # final classification layer
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [55]:
resnet_base = ResNet(
    num_repeats=[3, 4, 6, 3], in_channels=3, num_classes=10, first_out_channels=64
).to(device)

summary(resnet_base, input_size=(1, 3, 32, 32))

Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [1, 10]                   --
├─Conv2d: 1-1                            [1, 64, 16, 16]           9,408
├─BatchNorm2d: 1-2                       [1, 64, 16, 16]           128
├─ReLU: 1-3                              [1, 64, 16, 16]           --
├─MaxPool2d: 1-4                         [1, 64, 8, 8]             --
├─ModuleList: 1-5                        --                        --
│    └─Residual_Block: 2-1               [1, 64, 8, 8]             --
│    │    └─Conv2d: 3-1                  [1, 64, 8, 8]             36,864
│    │    └─BatchNorm2d: 3-2             [1, 64, 8, 8]             128
│    │    └─ReLU: 3-3                    [1, 64, 8, 8]             --
│    │    └─Conv2d: 3-4                  [1, 64, 8, 8]             36,864
│    │    └─BatchNorm2d: 3-5             [1, 64, 8, 8]             128
│    │    └─ReLU: 3-6                    [1, 64, 8, 8]             --
│

In [56]:
output = resnet_base(random_tensor_batch)
output.size()

torch.Size([16, 10])

 This notebook ends here as we are not training this model given the size of the model. We will try training a smaller model resnet-18 on cifar 10 dataset which we discuss in next notebook