- x is input, y=F(x)
- identity block simply means input should be equal to output.
-  y = x + F(x)   the layers in a traditional network are learning the true output H(x)
-   F(x) = y - x   the layers in a residual network are learning the residual F(x)
-    Hence, the name: Residual Block.

- Solves the problem of Vanishing gradients.
- During backpropagation, the gradient can flow through two paths:
1. Through the residual layers (normal conv layers).
2. Directly through the skip connection (identity mapping).
Mathematically, if:
𝑦 = 𝐹(𝑥)+𝑥
then derivative wrt input is:
∂𝑦/∂𝑥 =∂𝐹(𝑥)/∂𝑥+1

 The “+1” ensures that even if ∂𝐹(𝑥)/∂𝑥 becomes very small (vanishing), the gradient never goes to zero because the skip connection always passes at least some gradient back.

# Identity Block

In [5]:
import torch
import torch.nn as nn

class IdentityBlock(nn.Module):
    def __init__(self, in_channels, filters, f):
        super(IdentityBlock, self).__init__()
        
        F1, F2, F3 = filters

        # First component of main path
        self.conv1 = nn.Conv2d(in_channels, F1, kernel_size=1, stride=1, padding=0)
        self.bn1   = nn.BatchNorm2d(F1)

        # Second component of main path
        self.conv2 = nn.Conv2d(F1, F2, kernel_size=f, stride=1, padding=f//2)  # padding='same'
        self.bn2   = nn.BatchNorm2d(F2)

        # Third component of main path
        self.conv3 = nn.Conv2d(F2, F3, kernel_size=1, stride=1, padding=0)
        self.bn3   = nn.BatchNorm2d(F3)

        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        # Save input for skip connection
        shortcut = x  

        # First layer
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        # Second layer
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        # Third layer
        out = self.conv3(out)
        out = self.bn3(out)

        # Add skip connection
        out += shortcut
        out = self.relu(out)

        return out


# Convolutional Block

In [6]:
#used in ResNet when input and output dimensions differ

import torch
import torch.nn as nn

class ConvolutionalBlock(nn.Module):
    def __init__(self, in_channels, filters, f, stride=2):
        super(ConvolutionalBlock, self).__init__()

        F1, F2, F3 = filters

        # Main path
        self.conv1 = nn.Conv2d(in_channels, F1, kernel_size=1, stride=stride, padding=0)
        self.bn1   = nn.BatchNorm2d(F1)

        self.conv2 = nn.Conv2d(F1, F2, kernel_size=f, stride=1, padding=f//2)  # padding='same'
        self.bn2   = nn.BatchNorm2d(F2)

        self.conv3 = nn.Conv2d(F2, F3, kernel_size=1, stride=1, padding=0)
        self.bn3   = nn.BatchNorm2d(F3)

        # Shortcut path
        self.shortcut_conv = nn.Conv2d(in_channels, F3, kernel_size=1, stride=stride, padding=0)
        self.shortcut_bn   = nn.BatchNorm2d(F3)

        # Activation
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        shortcut = self.shortcut_conv(x)
        shortcut = self.shortcut_bn(shortcut)

        # Main path
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        # Add shortcut
        out += shortcut
        out = self.relu(out)

        return out


# ResNet 50

In [9]:
import torch.nn.functional as F

class ResNet50(nn.Module):
    def __init__(self, num_classes=3):
        super(ResNet50, self).__init__()

        # Stage 1
        self.pad   = nn.ZeroPad2d(3)  
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=0)
        self.bn1   = nn.BatchNorm2d(64)
        self.relu  = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # Stage 2
        self.layer2a = ConvolutionalBlock(64, [64, 64, 256], f=3, stride=1)
        self.layer2b = IdentityBlock(256, [64, 64, 256], f=3)
        self.layer2c = IdentityBlock(256, [64, 64, 256], f=3)

        # Stage 3
        self.layer3a = ConvolutionalBlock(256, [128, 128, 512], f=3, stride=2)
        self.layer3b = IdentityBlock(512, [128, 128, 512], f=3)
        self.layer3c = IdentityBlock(512, [128, 128, 512], f=3)
        self.layer3d = IdentityBlock(512, [128, 128, 512], f=3)

        # Stage 4
        self.layer4a = ConvolutionalBlock(512, [256, 256, 1024], f=3, stride=2)
        self.layer4b = IdentityBlock(1024, [256, 256, 1024], f=3)
        self.layer4c = IdentityBlock(1024, [256, 256, 1024], f=3)
        self.layer4d = IdentityBlock(1024, [256, 256, 1024], f=3)
        self.layer4e = IdentityBlock(1024, [256, 256, 1024], f=3)
        self.layer4f = IdentityBlock(1024, [256, 256, 1024], f=3)

        # Stage 5
        self.layer5a = ConvolutionalBlock(1024, [512, 512, 2048], f=3, stride=2)
        self.layer5b = IdentityBlock(2048, [512, 512, 2048], f=3)
        self.layer5c = IdentityBlock(2048, [512, 512, 2048], f=3)

        # Average Pool + FC
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  
        self.fc = nn.Linear(2048, num_classes)

    def forward(self, x):
        # Stage 1
        x = self.pad(x)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        # Stage 2
        x = self.layer2a(x)
        x = self.layer2b(x)
        x = self.layer2c(x)

        # Stage 3
        x = self.layer3a(x)
        x = self.layer3b(x)
        x = self.layer3c(x)
        x = self.layer3d(x)

        # Stage 4
        x = self.layer4a(x)
        x = self.layer4b(x)
        x = self.layer4c(x)
        x = self.layer4d(x)
        x = self.layer4e(x)
        x = self.layer4f(x)

        # Stage 5
        x = self.layer5a(x)
        x = self.layer5b(x)
        x = self.layer5c(x)

        # Pool + FC
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return F.softmax(x, dim=1)


In [11]:
model = ResNet50(num_classes=1000)  

# (batch_size=1, 3 channels, 64x64 image)
x = torch.randn(1, 3, 64, 64)

# forward pass
out = model(x)
print("Output shape:", out.shape)


Output shape: torch.Size([1, 1000])
