# ResNet50 by Pytorch

## 1. Packages

In [81]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchsummary import summary


print("Torch version: ", torch.__version__)
print("MPS available: ", torch.backends.mps.is_available())
print("MPS built: ", torch.backends.mps.is_built())

Torch version:  2.6.0
MPS available:  True
MPS built:  True


## 2. ResNet-50 building

### 2.1. Identity Block

In [31]:
class IdentityBlock(nn.Module):
    def __init__(self, in_channels, filters, kernel_size):
        '''
        in_channels == filters[2]
        '''
        super(IdentityBlock, self).__init__()
        F1, F2, F3 = filters
        
        #first layer
        self.conv1 = nn.Conv2d(in_channels, F1, kernel_size=1, stride=1, padding=0)
        self.bn1 = nn.BatchNorm2d(F1)
        
        #second layer
        self.conv2 = nn.Conv2d(F1, F2, kernel_size=kernel_size, stride=1, padding='same')
        self.bn2 = nn.BatchNorm2d(F2)
        
        #third layer
        self.conv3 = nn.Conv2d(F2, F3, kernel_size=1, stride=1, padding=0)
        self.bn3 = nn.BatchNorm2d(F3)
    def forward(self, X):
        X_shortcut = X
        
        X = self.conv1(X)
        X = self.bn1(X)
        X = nn.functional.relu(X)
        
        X = self.conv2(X)
        X = self.bn2(X)
        X = nn.functional.relu(X)
        
        X = self.conv3(X)
        X = self.bn3(X)
        
        X = nn.functional.relu(X + X_shortcut)
        
        return X

In [32]:
X = torch.randn(1, 256, 32, 32)
filters = [64, 64, 256]
kernel_size = 3
block = IdentityBlock(X.shape[1], filters = filters, kernel_size=3)
output = block.forward(X)
print(output.shape)

torch.Size([1, 256, 32, 32])


### 2.2. Convolutional Block

In [29]:
class ConvolutionalBlock(nn.Module):
    def __init__(self, in_channels, filters, kernel_size, stride):
        super(ConvolutionalBlock, self).__init__()
        F1, F2, F3 = filters
        X_shortcut = X
        
        #first layer
        self.conv1 = nn.Conv2d(in_channels, F1, kernel_size=1, stride=stride, padding=0)
        self.bn1 = nn.BatchNorm2d(F1)
        
        #second layer
        self.conv2 = nn.Conv2d(F1, F2, kernel_size=kernel_size, stride=1, padding='same')
        self.bn2 = nn.BatchNorm2d(F2)
        
        #third layer
        self.conv3 = nn.Conv2d(F2, F3, kernel_size=1, stride=1, padding=0)
        self.bn3 = nn.BatchNorm2d(F3)
        
        #shortcut path
        self.shortcut_conv = nn.Conv2d(in_channels, F3, kernel_size=1, stride=stride, padding=0)
        self.shortcut_bn = nn.BatchNorm2d(F3)
    
    def forward(self, X):
        X_shortcut = X
        
        X = self.conv1(X)
        X = self.bn1(X)
        X = nn.functional.relu(X)
        
        X = self.conv2(X)
        X = self.bn2(X)
        X = nn.functional.relu(X)
        
        X = self.conv3(X)
        X = self.bn3(X)
        
        X_shortcut = self.shortcut_conv(X_shortcut)
        X_shortcut = self.shortcut_bn(X_shortcut)
        X = nn.functional.relu(X + X_shortcut)
        
        return X

In [30]:
X = torch.randn(1, 64, 32, 32)
filters = [64, 64, 256]
stride = 2
kernel_size = 3
block = ConvolutionalBlock(in_channels=X.shape[1], filters=filters, kernel_size=kernel_size, stride=stride)
output = block.forward(X)
print(output.shape)

torch.Size([1, 256, 16, 16])


## 2.3. ResNet-50

In [55]:
class ResNet50(nn.Module):
    def __init__(self, in_channels=3,num_classes=10):
        super(ResNet50, self).__init__()
        
        # zero padding
        self.zero_pad = nn.ZeroPad2d(3)
        
        # stage 1
        self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=7, stride=2)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.maxpooling = nn.MaxPool2d(kernel_size=3, stride=2)
        
        #stage 2
        self.stage2 = nn.Sequential(
            ConvolutionalBlock(64, [64, 64, 256], kernel_size=3, stride=1),
            
            IdentityBlock(256, [64, 64, 256], kernel_size=3),
            IdentityBlock(256, [64, 64, 256], kernel_size=3)
        )
        
        #stage 3
        self.stage3 = nn.Sequential(
            ConvolutionalBlock(256, [128, 128, 512], kernel_size=3, stride=2),
            
            IdentityBlock(512, [128, 128, 512], kernel_size=3),
            IdentityBlock(512, [128, 128, 512], kernel_size=3),
            IdentityBlock(512, [128, 128, 512], kernel_size=3),
        )
        
        #stage 4
        self.stage4 = nn.Sequential(
            ConvolutionalBlock(512, [256, 256, 1024], kernel_size=3, stride=2),
            
            IdentityBlock(1024, [256, 256, 1024], kernel_size=3),
            IdentityBlock(1024, [256, 256, 1024], kernel_size=3),
            IdentityBlock(1024, [256, 256, 1024], kernel_size=3),
            IdentityBlock(1024, [256, 256, 1024], kernel_size=3),
            IdentityBlock(1024, [256, 256, 1024], kernel_size=3),
        )
        
        #stage 5
        self.stage5 = nn.Sequential(
            ConvolutionalBlock(1024, [512, 512, 2048], kernel_size=3, stride=2),
            
            IdentityBlock(2048, [512, 512, 2048], kernel_size=3),
            IdentityBlock(2048, [512, 512, 2048], kernel_size=3),
        )
        
        self.avgpooling = nn.AvgPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(2048, num_classes)
    
    def forward(self, X):
        X = self.zero_pad(X)
        X = self.conv1(X)
        X = self.bn1(X)
        X = self.relu(X)
        X = self.maxpooling(X)
        
        X = self.stage2(X)
        X = self.stage3(X)
        X = self.stage4(X)
        X = self.stage5(X)
        
        X = self.avgpooling(X)
        X = self.flatten(X)
        X = self.fc(X)
        
        return X

In [82]:
# Test model
model = ResNet50(num_classes=6)
summary(model, (3, 64, 64))

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.00015)
torch.manual_seed(1)
torch.cuda.manual_seed_all(2)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         ZeroPad2d-1            [-1, 3, 70, 70]               0
            Conv2d-2           [-1, 64, 32, 32]           9,472
       BatchNorm2d-3           [-1, 64, 32, 32]             128
              ReLU-4           [-1, 64, 32, 32]               0
         MaxPool2d-5           [-1, 64, 15, 15]               0
            Conv2d-6           [-1, 64, 15, 15]           4,160
       BatchNorm2d-7           [-1, 64, 15, 15]             128
            Conv2d-8           [-1, 64, 15, 15]          36,928
       BatchNorm2d-9           [-1, 64, 15, 15]             128
           Conv2d-10          [-1, 256, 15, 15]          16,640
      BatchNorm2d-11          [-1, 256, 15, 15]             512
           Conv2d-12          [-1, 256, 15, 15]          16,640
      BatchNorm2d-13          [-1, 256, 15, 15]             512
ConvolutionalBlock-14          [-1, 256