In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

In [2]:
# [expand_ratio, channels, repeats, stride, kernel_size]
base = [
    [1, 16, 1, 1, 3],
    [6, 24, 2, 2, 3],
    [6, 40, 2, 2, 5],
    [6, 80, 3, 2, 3],
    [6, 112, 3, 1, 5],
    [6, 192, 4, 2, 5],
    [6, 320, 1, 1, 3]
]

In [None]:
phi_values = {
    # tuple of: (phi_value, resolution, drop_rate)
    "b0": (0, 224, 0.2),  # alpha, beta, gamma, depth = alpha ** phi
    "b1": (0.5, 240, 0.2),
    "b2": (1, 260, 0.3),
    "b3": (2, 300, 0.3),
    "b4": (3, 380, 0.4),
    "b5": (4, 456, 0.4),
    "b6": (5, 528, 0.5),
    "b7": (6, 600, 0.5),
}

In [3]:
class CNN(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, groups=1):
        super(CNN, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, groups=groups,bias=False)
        self.bn = nn.BatchNorm2d(out_channels)
        self.silu = nn.SiLU()
    
    def forward(self, x):
        return self.silu(self.bn(self.conv(x)))

class Squeeze(nn.Module):
    def __init__(self, in_channels, reduced_dim):
        super(Squeeze, self).__init__()
        self.squeeze = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(in_channels, reduced_dim, 1),
            nn.SiLU(),
            nn.Conv2d(reduced_dim, in_channels, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        return x * self.squeeze(x)

In [4]:
class ResBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, expand_ratio, reduction=4, survival_prob=0.8):
        super(ResBlock, self).__init__()
        self.survival_prob = survival_prob
        self.use_residual = in_channels == out_channels and stride == 1
        hidden_dim = in_channels * expand_ratio
        self.expand = in_channels != hidden_dim
        reduced_dim = int(in_channels / reduction)

        if self.expand:
            self.expand_conv = CNN(in_channels, hidden_dim, kernel_size=3, stride=1, padding=1)
        self.conv = nn.Sequential(
            CNN(hidden_dim, hidden_dim, kernel_size, stride, padding, groups=hidden_dim),
            Squeeze(hidden_dim, reduced_dim),
            nn.Conv2d(hidden_dim, out_channels, 1, bias=False),
            nn.BatchNorm2d(out_channels)
        )
    def stochastic_depth(self, x):
        if not self.training:
            return x
        binary_tensor = torch.rand(x.shape[0], 1, 1, 1, device=x.device) < self.survival_prob
        return torch.div(x, self.survival_prob) * binary_tensor

    def forward(self, inputs):
        x = self.expand_conv(inputs) if self.expand else inputs
        x = self.conv(x)
        if self.use_residual:
            x = self.stochastic_depth(x)
            x += inputs
        return x

In [None]:
class Net(nn.Module):
    def __init__(self, ver, num_classes):
        super(Net, self).__init__()
        width_factor, depth_factor, dropout_rate = self.calculate_factors(ver)
        last_channel = int(1280 * width_factor)
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.features = self.create_features(width_factor, depth_factor, last_channel)
        self.classifier = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(last_channel, num_classes)
        )
    
    def calculate_factors(self, ver, alpha=1.2, beta=1.1):
        phi, res, drop_rate = phi_values[ver]
        depth_factor = alpha ** phi
        width_factor = beta ** phi
        return width_factor, depth_factor, drop_rate

    def create_features(self, width_factor, depth_factor, last_channel):
        channels = int(32 * width_factor)
        features = [CNN(3, channels, 3, stride=2, padding=1)]
        in_channels = channels

        for expand_ratio, channels, repeats, stride, kernel_size in base:
            out_channels = 4 * math.ceil(int(channels * width_factor) / 4)
            layers_repeats = int(repeats * depth_factor)
            for layer in range(layers_repeats):
                features.append(
                    ResBlock(in_channels, out_channels, kernel_size, stride=stride if layer == 0 else 1, padding=kernel_size//2, expand_ratio=expand_ratio)
                )
                in_channels = out_channels