# Implementation of the ResNet backbone for the FPN Network.

In [1]:
from importlib.util import find_spec
if find_spec("model") is None:
    import sys
    sys.path.append('..')

In [2]:
from typing import Optional
import torchvision.models as models
import torch
import torch.nn as nn
import torch.nn.functional as F
from base import BaseModel

## Bag of Tricks Variant

paper: [Bag of Tricks for Image Classification with Convolutional Neural Networks](https://arxiv.org/abs/1812.01187)

In [3]:
class Resnet(BaseModel):
    def __init__(self, num_classes: Optional[int] = 1000, num_features: Optional[int] = 2048) -> None:
        super().__init__()
        self.num_features = num_features
        self.num_classes = num_classes
        
        # Stage 1:
        self.conv1_1 = nn.Conv2d(3, 32, kernel_size=3, stride=2)
        self.conv1_2 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
        self.conv1_3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn1_1 = nn.BatchNorm2d(32)
        self.bn1_2 = nn.BatchNorm2d(32)
        self.bn1_3 = nn.BatchNorm2d(64)
        self.pool1 = nn.MaxPool2d(stride=2, kernel_size=2)
        
        self.global_avg_pooling = nn.AvgPool2d(kernel_size=7)
        self.fc = nn.Linear(self.num_features, self.num_classes)

In [4]:
def conv1x1(in_channels: int, out_channels):
    return nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)

def conv3x3(in_channels: int, out_channels: int, stride: Optional[int]=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)

def shortcut(in_channels: int, out_channels: int):
     return  nn.Sequential (
         nn.AvgPool2d(stride=2, kernel_size=2),
         conv1x1(in_channels, out_channels)
     )

In [5]:
class BottleNeckBlock(nn.Module):
    def __init__(self, in_channels: int, out_channels: int, bn_channels: int, stride: Optional[int]=1):
        """
        Args:
            bn_channels (int): number of output channels for the 3x3.
        """
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        
        self.block = nn.Sequential(
            conv1x1(in_channels, bn_channels),
            nn.BatchNorm2d(bn_channels),
            nn.ReLU(inplace=True),
            conv3x3(bn_channels, bn_channels, stride),
            nn.BatchNorm2d(bn_channels),
            nn.ReLU(inplace=True),
            conv1x1(bn_channels, out_channels),
            nn.BatchNorm2d(out_channels),
        )
        
        if self.downsample():
            self.shortcut = shortcut(in_channels, out_channels)
        else:
            self.shortcut = None
            
        
        
    def downsample(self):
        return self.in_channels != self.out_channels
        
        
    def forward(self, x):
        out = self.block(x)
        
        if self.shortcut is None:
            self.shortcut = x
            
        out += self.shortcut
        out = F.relu(out)
        return out

In [6]:
class FpnResnet50(Resnet):
    def __init__(self, num_classes: Optional[int]=1000):
        super().__init__(num_classes)
        
        # Stage 2:
        self.res_block2_1 = BottleNeckBlock(64, 256, 64)
        self.res_block2_2 = BottleNeckBlock(256, 256, 64)
        self.res_block2_3 = BottleNeckBlock(256, 256, 64)
        
        # Stage 3:
        self.res_block3_1 = BottleNeckBlock(256, 512, 128, stride=2)
        self.res_block3_2 = BottleNeckBlock(512, 512, 128)
        self.res_block3_3 = BottleNeckBlock(512, 512, 128)
        self.res_block3_4 = BottleNeckBlock(512, 512, 128)
        
        # Stage 4:
        self.res_block4_1 = BottleNeckBlock(512, 1024, 256, stride=2)
        self.res_block4_2 = BottleNeckBlock(1024, 1024, 256)
        self.res_block4_3 = BottleNeckBlock(1024, 1024, 256)
        self.res_block4_4 = BottleNeckBlock(1024, 1024, 256)
        self.res_block4_5 = BottleNeckBlock(1024, 1024, 256)
        self.res_block4_6 = BottleNeckBlock(1024, 1024, 256)
        
        # Stage 5:
        self.res_block5_1 = BottleNeckBlock(1024, 2048, 512, stride=2)
        self.res_block5_2 = BottleNeckBlock(2048, 2048, 512)
        self.res_block5_3 = BottleNeckBlock(2048, 2048, 512)
        
    def forward(self, x):
        # Stage 1 forward.
        out = F.relu(self.bn1_1(self.conv1_1(x))) 
        out = F.relu(self.bn1_2(self.conv1_2(out))) 
        out = F.relu(self.bn1_3(self.conv1_3(out))) 
        out = self.pool1(out)
        
        # Stage 2 forwards.
        out = self.res_block2_1(out)
        out = self.res_block2_2(out)
        out = self.res_block2_3(out)
        C2 = out
        
        # Stage 3 forwards
        out = self.res_block3_1(out)
        out = self.res_block3_2(out)
        out = self.res_block3_3(out)
        out = self.res_block3_4(out)
        C3 = out
        
        # Stage 4 forward.
        out = self.res_block4_1(out)
        out = self.res_block4_2(out)
        out = self.res_block4_3(out)
        out = self.res_block4_4(out)
        out = self.res_block4_5(out)
        out = self.res_block4_6(out)
        C4 = out
        
        # Stage 5 forward.
        out = self.res_block5_1(out)
        out = self.res_block5_2(out)
        out = self.res_block5_3(out)
        C5 = out
        
        return C2, C3, C4, C5  # output format for FPN

In [7]:
model = FpnResnet50()
model

FpnResnet50(
  (conv1_1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2))
  (conv1_2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv1_3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1_1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn1_2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn1_3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (global_avg_pooling): AvgPool2d(kernel_size=7, stride=7, padding=0)
  (fc): Linear(in_features=2048, out_features=1000, bias=True)
  (res_block2_1): BottleNeckBlock(
    (block): Sequential(
      (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(64, 64, kernel_

In [2]:
from model.backbone.resnet import ResNet50
import torch

In [3]:
model = ResNet50()

In [4]:
data = torch.randn((64, 3, 224, 224))

In [5]:
C2, C3, C4, C5 = model(data)

In [6]:
assert C5.shape == (64, 512, 7, 7)

In [7]:
assert C4.shape == (64, 256, 14, 14)

In [8]:
assert C3.shape == (64, 128, 28, 28)

In [9]:
assert C2.shape == (64, 64, 56, 56)

In [10]:
data = torch.randn((64, 3, 512, 512))

In [11]:
C2, C3, C4, C5 = model(data)

In [12]:
assert C5.shape == (64, 512, 16, 16)

In [13]:
assert C4.shape == (64, 256, 32, 32)

In [14]:
assert C3.shape == (64, 128, 64, 64)

In [15]:
assert C2.shape == (64, 64, 128, 128)

In [16]:
data = torch.randn((64, 3, 224, 224))

In [17]:
model = ResNet50(num_classes=1000)
out = model(data)

  return F.log_softmax(self.fc(out))


In [18]:
assert out.shape == (64, 1000)