# **FEATURE PYRAMID NETWORK**

Importing the rquired libraries

In [2]:
from torchvision import models
import torch.nn as nn
import torch.nn.functional as F
import torch


Class FPN  gives multiple feature maps for 

In [3]:
class FPN(nn.Module):
    def __init__(self,C3_size = 128, C4_size = 256, C5_size = 512,feature_size=256):
        super().__init__()

        # upsample C5 to get P5 from the FPN paper
        self.P5_1 = nn.Conv2d(C5_size,feature_size,1)
        self.P5_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
        self.P5_2 = nn.Conv2d(feature_size,feature_size,3,1,1)

        # add P5 elementwise to C4        
        self.P4_1 = nn.Conv2d(C4_size,feature_size,1)
        self.P4_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
        self.P4_2 = nn.Conv2d(feature_size,feature_size,3,1,1)
        
        # add P4 elementwise to C3
        self.P3_1 = nn.Conv2d(C3_size,feature_size,1)
        self.P3_2 = nn.Conv2d(feature_size,feature_size,3,1,1)

        # "P6 is obtained via a 3x3 stride-2 conv on C5"       
        self.P6 = nn.Conv2d(512, feature_size, kernel_size=3, stride=2, padding=1)
        
        # "P7 is computed by applying ReLU followed by a 3x3 stride-2 conv on P6"
        self.P7_1 = nn.ReLU()
        self.P7_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=2, padding=1)
 #       self.apply(initialize_layer)
        
        
    def forward(self,C3,C4,C5):
  #      print(C3,"\n",C4,"\n",C5)
        P5_x = self.P5_1(C5)
        P5_upsampled_x = self.P5_upsampled(P5_x)
        P5_x = self.P5_2(P5_x)
        
        P4_x = self.P4_1(C4)
        P4_x = P5_upsampled_x + P4_x
        P4_upsampled_x = self.P4_upsampled(P4_x)
        P4_x = self.P4_2(P4_x)
        
        P3_x = self.P3_1(C3)
        P3_x = P3_x + P4_upsampled_x
        P3_x = self.P3_2(P3_x)
        
        P6_x = self.P6(C5)
        
        P7_x = self.P7_1(P6_x)
        P7_x = self.P7_2(P7_x)

        
        return [P3_x,P4_x,P5_x,P6_x,P7_x]

Class Resnet is defined for creating model with 4 convolution blocks and 4 residual layers(Basic Block). The convolution block down-samples the input and residual layer improves the model performance.

In [9]:
class ResNet(nn.Module):
    def __init__(self):
        super().__init__()
        resnet = models.resnet34(pretrained=True)
        resnet = list(resnet.children())
        self.conv_block = nn.Sequential(*resnet[:4])
        self.res_layers = nn.ModuleList(resnet[4:8])
    def forward(self,x):
       # print(self.conv_block(x))
        x = self.conv_block(x)
        layer_features = []
        for layer in self.res_layers:
            x = layer(x)
            layer_features.append(x)
        return layer_features[1:]


In [11]:
resnet = ResNet()

In [17]:
resnet

ResNet(
  (conv_block): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (res_layers): ModuleList(
    (0): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affi

In [6]:
fpn = FPN()


Input shape: 2 batches, 3 channels, 256 height, 256 width 

In [7]:
x = torch.randn([2,3,256,256])


In [10]:
c3,c4,c5 = resnet(x)


tensor([[[[1.0743, 0.6214, 0.8941,  ..., 0.7211, 0.4373, 0.6657],
          [1.0743, 0.7889, 0.7313,  ..., 0.7211, 0.9878, 0.9878],
          [1.0010, 0.7889, 0.8432,  ..., 0.9725, 0.9878, 0.9878],
          ...,
          [0.5704, 0.9695, 0.7770,  ..., 0.8019, 0.9754, 0.9754],
          [0.6930, 0.9695, 1.1037,  ..., 0.9050, 0.9050, 0.6373],
          [0.6563, 0.9083, 1.1037,  ..., 0.9050, 0.9643, 0.7323]],

         [[0.4413, 0.2925, 0.4300,  ..., 0.4512, 0.3490, 0.2707],
          [0.6643, 0.6643, 0.3898,  ..., 0.6287, 0.6246, 0.5311],
          [0.6643, 0.6643, 0.4486,  ..., 0.5682, 0.6246, 0.8432],
          ...,
          [0.5025, 0.5784, 0.5784,  ..., 0.5519, 0.4680, 0.2235],
          [0.3164, 0.5784, 0.5784,  ..., 0.7225, 0.7225, 0.1824],
          [0.9370, 0.9370, 0.6040,  ..., 0.4846, 0.3545, 0.5783]],

         [[0.4841, 0.7092, 0.6063,  ..., 0.6074, 0.6077, 0.4752],
          [0.7080, 0.9971, 0.9971,  ..., 0.6300, 0.8929, 0.4752],
          [0.7152, 0.8112, 0.3825,  ..., 0

In [12]:
features = fpn(c3,c4,c5)

The feature map size gets doubled as we upsample the features by a factor of 2 from the top of the pyramid.
If P5, P4, P3, P2 are the feature maps, P5 < P4 < P3 < based on their size 

In [16]:
for f in features:
    print(f.shape)

torch.Size([2, 256, 32, 32])
torch.Size([2, 256, 16, 16])
torch.Size([2, 256, 8, 8])
torch.Size([2, 256, 4, 4])
torch.Size([2, 256, 2, 2])
