In [1]:
import torch
import torch.nn as nn

In [None]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, use_bn=True):
        super(ConvBlock, self).__init__()
        
        # Convolutional Layer
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)
        
        # Batch Normalization (optional)
        self.bn = nn.BatchNorm2d(out_channels) if use_bn else nn.Identity()
        
        # Activation function (ReLU)
        self.relu = nn.ReLU(inplace=True)
    
    def forward(self, x):
        # Apply Conv, BatchNorm, ReLU (no pooling)
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        return x

In [None]:
class Backbone(nn.Module):
    def __init__(self):
        super(Backbone, self).__init__()
        
        # Block 1: Initial conv block and maxpool
        self.block1 = nn.Sequential(
            ConvBlock(3, 64, kernel_size=7, stride=2, padding=3, use_bn=True),
        )
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Block 2: Convolution + Maxpool
        self.block2 = nn.Sequential(
            ConvBlock(64, 192, kernel_size=3, stride=1, padding=1, use_bn=True),
        )
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Block 3: Mixed convolutions + maxpool
        self.block3 = nn.Sequential(
            ConvBlock(192, 128, kernel_size=1, stride=1, padding=0, use_bn=True),
            ConvBlock(128, 256, kernel_size=3, stride=1, padding=1, use_bn=True),
            ConvBlock(256, 256, kernel_size=1, stride=1, padding=0, use_bn=True),
            ConvBlock(256, 512, kernel_size=3, stride=1, padding=1, use_bn=True),
        )
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Block 4: 4 repetitions of (1x1, 3x3) + final 1x1 and 3x3 convs
        self.block4 = nn.Sequential(
            *[ConvBlock(512, 256, kernel_size=1, stride=1, padding=0, use_bn=True),
              ConvBlock(256, 512, kernel_size=3, stride=1, padding=1, use_bn=True)] * 4,
            ConvBlock(512, 512, kernel_size=1, stride=1, padding=0, use_bn=True),
            ConvBlock(512, 1024, kernel_size=3, stride=1, padding=1, use_bn=True),
        )
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Block 5: Additional convolutions
        self.block5 = nn.Sequential(
            ConvBlock(1024, 512, kernel_size=1, stride=1, padding=0, use_bn=True),
            ConvBlock(512, 1024, kernel_size=3, stride=1, padding=1, use_bn=True),
            ConvBlock(1024, 1024, kernel_size=3, stride=1, padding=1, use_bn=True),
            ConvBlock(1024, 1024, kernel_size=3, stride=2, padding=1, use_bn=True),
        )
        
        # Block 6: Final convolution layers
        self.block6 = nn.Sequential(
            ConvBlock(1024, 1024, kernel_size=3, stride=1, padding=1, use_bn=True),
            ConvBlock(1024, 1024, kernel_size=3, stride=1, padding=1, use_bn=True),
        )
    
    def forward(self, x):
        # Pass through convolutional blocks and pooling layers
        x = self.block1(x)
        x = self.pool1(x)
        
        x = self.block2(x)
        x = self.pool2(x)
        
        x = self.block3(x)
        x = self.pool3(x)
        
        x = self.block4(x)
        x = self.pool4(x)
        
        x = self.block5(x)
        
        x = self.block6(x)
        
        return x


In [None]:
class PredictionHead(nn.Module):
    def __init__(self):
        super(PredictionHead, self).__init__()

        nn.fc1 = nn.Linear(1024 * 7 * 7, 4096)
        nn.fc2 = nn.Linear(4096, 1470)

        nn.relu = nn.ReLU(inplace=True)