In [2]:
%autosave 300
%autoreload 2
%reload_ext autoreload
%config Completer.use_jedi = False

Autosaving every 300 seconds


In [3]:
import os

os.chdir(
    "/mnt/batch/tasks/shared/LS_root/mounts/clusters/insights-model-run/code/Users/soutrik.chowdhury/advanced_vision_modules_pytorch"
)
print(os.getcwd())

/mnt/batch/tasks/shared/LS_root/mounts/clusters/insights-model-run/code/Users/soutrik.chowdhury/advanced_vision_modules_pytorch


This notebooks deals in a different way of solving the RESENT problem in one block rather than 2 that we have discussed in the previous notebook:
* We will attempt solving resnet 18 from the point of view of CIFAR-10 but same applies for Resnet-34
* https://debuggercafe.com/wp-content/uploads/2022/08/resnet-models-from-table.png
* https://debuggercafe.com/implementing-resnet18-in-pytorch-from-scratch/

In [4]:
import torch
from torch import nn
import torch.nn.functional as F
from torchinfo import summary
from torch import Tensor
from typing import Type

Basic blocks 

https://debuggercafe.com/wp-content/uploads/2022/08/resnet18-basic-blocks-1.png

https://debuggercafe.com/wp-content/uploads/2022/08/resnet-residual-block-for-resnet18-from-scratch-using-pytorch.png

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
random_tensor_batch = torch.rand((16, 3, 32, 32), device=device, dtype=torch.float32)
print(random_tensor_batch.shape)
print(random_tensor_batch.dtype)
print(random_tensor_batch.device)
print(random_tensor_batch.dim())

torch.Size([16, 3, 32, 32])
torch.float32
cuda:0
4


In [25]:
class BasicResentBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride, expansion, downsample=None):
        super(BasicResentBlock, self).__init__()
        self.expansion = expansion
        self.downsample = downsample

        self.convblock1 = nn.Sequential(
            nn.Conv2d(
                in_channels,
                out_channels,
                kernel_size=3,
                stride=stride,
                padding=1,
                bias=False,
            ),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
        )

        self.convblock2 = nn.Sequential(
            nn.Conv2d(
                out_channels,
                out_channels * self.expansion,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=False,
            ),
            nn.BatchNorm2d(out_channels * self.expansion),
        )

        self.relu = nn.ReLU()

    def forward(self, x):
        """Forward pass which follows the flow as :
        First convolution block -> Second convolution block -> Downsample -> Add identity -> ReLU
        """
        identity = x

        out = self.convblock1(x)
        out = self.convblock2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        # print(out.shape)
        # print(identity.shape)
        out += identity
        out = self.relu(out)
        return out

In [26]:
resnet_block = BasicResentBlock(in_channels=3, out_channels=3, stride=1, expansion=1).to(device)
summary(resnet_block, input_size=(1, 3, 32, 32))

Layer (type:depth-idx)                   Output Shape              Param #
BasicResentBlock                         [1, 3, 32, 32]            --
├─Sequential: 1-1                        [1, 3, 32, 32]            --
│    └─Conv2d: 2-1                       [1, 3, 32, 32]            81
│    └─BatchNorm2d: 2-2                  [1, 3, 32, 32]            6
│    └─ReLU: 2-3                         [1, 3, 32, 32]            --
├─Sequential: 1-2                        [1, 3, 32, 32]            --
│    └─Conv2d: 2-4                       [1, 3, 32, 32]            81
│    └─BatchNorm2d: 2-5                  [1, 3, 32, 32]            6
├─ReLU: 1-3                              [1, 3, 32, 32]            --
Total params: 174
Trainable params: 174
Non-trainable params: 0
Total mult-adds (M): 0.17
Input size (MB): 0.01
Forward/backward pass size (MB): 0.10
Params size (MB): 0.00
Estimated Total Size (MB): 0.11

In [27]:
block_op = resnet_block(random_tensor_batch)
print(block_op.shape)

torch.Size([16, 3, 32, 32])


In [31]:
class ResNet(nn.Module):
    def __init__(self, in_channels, resnet_ver, num_classes):
        super(ResNet, self).__init__()
        if resnet_ver == "resnet18":
            layers = [2, 2, 2, 2]
            self.expansion = 1
        elif resnet_ver == "resnet34":
            layers = [3, 4, 6, 3]
            self.expansion = 1

        # First lets declare the presenet block conv->bn->relu->maxpool with paper wise config
        self.presentation = nn.Sequential(
            nn.Conv2d(
                in_channels=in_channels,
                out_channels=64,
                kernel_size=(7, 7),
                stride=(2, 2),
                padding=(3, 3),
                bias=False,
            ),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)),
        )

        # Now lets declare the resnet blocks
        self.resnet_l1 = self._make_resent_layer(64, 64, 1, layers[0])
        self.resnet_l2 = self._make_resent_layer(64, 128, 2, layers[1])
        self.resnet_l3 = self._make_resent_layer(128, 256, 2, layers[2])
        self.resnet_l4 = self._make_resent_layer(256, 512, 2, layers[3])

        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(512 * self.expansion, num_classes),
        )

    def _make_resent_layer(self, in_channels, out_channels, stride, blocks):
        if stride == 1:
            downsample = None
        else:
            downsample = nn.Sequential(
                nn.Conv2d(
                    in_channels=in_channels,
                    out_channels=out_channels * self.expansion,
                    kernel_size=1,
                    stride=stride,
                    bias=False,
                ),
                nn.BatchNorm2d(out_channels * self.expansion),
            )

        layers = []
        # adding the layer with dotted connection ie the downsampling block
        layers.append(
            BasicResentBlock(
                in_channels=in_channels,
                out_channels=out_channels,
                stride=stride,
                expansion=self.expansion,
                downsample=downsample,
            )
        )

        # adding the remaining blocks
        for _ in range(1, blocks):
            layers.append(
                BasicResentBlock(
                    out_channels, out_channels, 1, expansion=self.expansion
                )
            )

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.presentation(x)
        x = self.resnet_l1(x)
        x = self.resnet_l2(x)
        x = self.resnet_l3(x)
        x = self.resnet_l4(x)
        print("Dimensions of the last convolutional feature map: ", x.shape)
        x = self.classifier(x)
        return x

In [32]:
model = ResNet(in_channels=3, resnet_ver="resnet18", num_classes=10).to(device)
summary(model, input_size=(1, 3, 32, 32))

Dimensions of the last convolutional feature map:  torch.Size([1, 512, 1, 1])


Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [1, 10]                   --
├─Sequential: 1-1                        [1, 64, 8, 8]             --
│    └─Conv2d: 2-1                       [1, 64, 16, 16]           9,408
│    └─BatchNorm2d: 2-2                  [1, 64, 16, 16]           128
│    └─ReLU: 2-3                         [1, 64, 16, 16]           --
│    └─MaxPool2d: 2-4                    [1, 64, 8, 8]             --
├─Sequential: 1-2                        [1, 64, 8, 8]             --
│    └─BasicResentBlock: 2-5             [1, 64, 8, 8]             --
│    │    └─Sequential: 3-1              [1, 64, 8, 8]             36,992
│    │    └─Sequential: 3-2              [1, 64, 8, 8]             36,992
│    │    └─ReLU: 3-3                    [1, 64, 8, 8]             --
│    └─BasicResentBlock: 2-6             [1, 64, 8, 8]             --
│    │    └─Sequential: 3-4              [1, 64, 8, 8]             36,992

In [35]:
# Total parameters and trainable parameters.
total_params = sum(p.numel() for p in model.parameters())
print(f"{total_params:,} total parameters.")
total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"{total_trainable_params:,} training parameters.")
output = model(random_tensor_batch)
print(output.shape)

11,181,642 total parameters.
11,181,642 training parameters.
Dimensions of the last convolutional feature map:  torch.Size([16, 512, 1, 1])
torch.Size([16, 10])


According the original Article:: https://debuggercafe.com/implementing-resnet18-in-pytorch-from-scratch/

In [33]:
class BasicBlock(nn.Module):
    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        stride: int = 1,
        expansion: int = 1,
        downsample: nn.Module = None,
    ) -> None:
        super(BasicBlock, self).__init__()
        # Multiplicative factor for the subsequent conv2d layer's output channels.
        # It is 1 for ResNet18 and ResNet34.
        self.expansion = expansion
        self.downsample = downsample
        self.conv1 = nn.Conv2d(
            in_channels,
            out_channels,
            kernel_size=3,
            stride=stride,
            padding=1,
            bias=False,
        )
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(
            out_channels,
            out_channels * self.expansion,
            kernel_size=3,
            padding=1,
            bias=False,
        )
        self.bn2 = nn.BatchNorm2d(out_channels * self.expansion)

    def forward(self, x: Tensor) -> Tensor:
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)
        return out

In [34]:
class ResNetMain(nn.Module):
    def __init__(
        self, 
        img_channels: int,
        num_layers: int,
        block: Type[BasicBlock],
        num_classes: int  = 1000
    ) -> None:
        super(ResNetMain, self).__init__()
        if num_layers == 18:
            # The following `layers` list defines the number of `BasicBlock` 
            # to use to build the network and how many basic blocks to stack
            # together.
            layers = [2, 2, 2, 2]
            self.expansion = 1
        
        self.in_channels = 64
        # All ResNets (18 to 152) contain a Conv2d => BN => ReLU for the first
        # three layers. Here, kernel size is 7.
        self.conv1 = nn.Conv2d(
            in_channels=img_channels,
            out_channels=self.in_channels,
            kernel_size=7, 
            stride=2,
            padding=3,
            bias=False
        )
        self.bn1 = nn.BatchNorm2d(self.in_channels)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512*self.expansion, num_classes)

    def _make_layer(
        self, 
        block: Type[BasicBlock],
        out_channels: int,
        blocks: int,
        stride: int = 1
    ) -> nn.Sequential:
        downsample = None
        if stride != 1:
            """
            This should pass from `layer2` to `layer4` or 
            when building ResNets50 and above. Section 3.3 of the paper
            Deep Residual Learning for Image Recognition
            (https://arxiv.org/pdf/1512.03385v1.pdf).
            """
            downsample = nn.Sequential(
                nn.Conv2d(
                    self.in_channels, 
                    out_channels*self.expansion,
                    kernel_size=1,
                    stride=stride,
                    bias=False 
                ),
                nn.BatchNorm2d(out_channels * self.expansion),
            )
        layers = []
        layers.append(
            block(
                self.in_channels, out_channels, stride, self.expansion, downsample
            )
        )
        self.in_channels = out_channels * self.expansion

        for i in range(1, blocks):
            layers.append(block(
                self.in_channels,
                out_channels,
                expansion=self.expansion
            ))
        return nn.Sequential(*layers)

    def forward(self, x: Tensor) -> Tensor:
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        # The spatial dimension of the final layer's feature 
        # map should be (7, 7) for all ResNets.
        print('Dimensions of the last convolutional feature map: ', x.shape)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

In [39]:
model = ResNetMain(img_channels=3, num_layers=18, block=BasicBlock, num_classes=10).to(device)
summary(model)

# Total parameters and trainable parameters.
total_params = sum(p.numel() for p in model.parameters())
print(f"{total_params:,} total parameters.")
total_trainable_params = sum(
    p.numel() for p in model.parameters() if p.requires_grad)
print(f"{total_trainable_params:,} training parameters.")
output = model(random_tensor_batch)
print(output.shape)

11,181,642 total parameters.
11,181,642 training parameters.
Dimensions of the last convolutional feature map:  torch.Size([16, 512, 1, 1])
torch.Size([16, 10])


This verifies that the ResNet architecture is correct. The next step is to train the model on the CIFAR-10 dataset.