In [None]:
# import
import math, os, random, cv2, numpy, torch
import torch.nn as nn
from ultralytics import YOLO

# 1. BACKBONE
## 1.1. Conv

In [None]:
class Conv(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, groups=1, activation=True):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, groups=groups, bias=False)
        self.bn = nn.BatchNorm2d(out_channels, eps=0.001, momentum=0.03)
        self.activation = nn.SiLU() if activation else nn.Identity()

    def forward(self, x):
        return self.activation(self.bn(self.conv(x)))

## 1.2. C2f

In [None]:
# (a) Bottleneck: stack of 2 Conv with shortcut connnection
class Bottleneck(nn.Module):
    def __init__(self, in_channels, out_channels, shortcut=True):
        super().__init__()
        self.conv1 = Conv(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.conv2 = Conv(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.shortcut = shortcut

    def forward(self, x):
        x_in = x
        x = self.conv1(x)
        x = self.conv2(x)
        if self.shortcut:
            x = x + x_in
        return x
    
# (b) C2f: Conv + 2 Bottleneck + Conv
class C2f(nn.Module):
    def __init__(self, in_channels, out_channels, num_bottlenecks, shortcut=True):
        super().__init__()
        
        self.mid_channels = out_channels // 2
        self.num_bottlenecks = num_bottlenecks
        
        self.conv1 = Conv(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
        
        self.bottlenecks = nn.ModuleList([Bottleneck(self.mid_channels, self.mid_channels, shortcut=shortcut) for _ in range(num_bottlenecks)])
        
        self.conv2=Conv((num_bottlenecks+2)*out_channels//2,out_channels,kernel_size=1,stride=1,padding=0)
        
    def forward(self, x):
        x = self.conv1(x)
        
        # split x along channel dimension
        x1, x2 = x[:,:x.shape[1]//2,:,:], x[:,x.shape[1]//2:,:,:] 
        
        # list of outputs
        outputs=[x1,x2] # x1 is fed through the bottlenecks
        
        for i in range(self.num_bottlenecks):
            x1 = self.bottlenecks[i](x1) # [bs,0.5c_out,w,h]
            outputs.insert(0, x1)
        
        outputs = torch.cat(outputs, dim=1)  # [bs,0.5c_out(num_bottlenecks+2),w,h]
        outputs = self.conv2(outputs)  # [bs,c_out,w,h]
        
        return outputs

## 1.3. SPFF

In [None]:
class SPFF(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=5):
        super().__init__()
        
        hidden_channels = in_channels // 2
        
        self.conv1 = Conv(in_channels, hidden_channels, kernel_size=1, stride=1, padding=0)
        
        # concatenate outputs of maxpool and feed to conv2
        self.conv2=Conv(4*hidden_channels,out_channels,kernel_size=1,stride=1,padding=0)