In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
class SELayer(nn.Module):
    def __init__(self, channels, reduction=4):
        super(SELayer, self).__init__()
        self.fc1 = nn.Linear(channels, channels // reduction, bias=False)
        self.fc2 = nn.Linear(channels // reduction, channels, bias=False)

    def forward(self, x):
        batch_size, channels, height, width = x.size()
        y = F.adaptive_avg_pool2d(x, (1, 1)).view(batch_size, channels)
        y = F.relu(self.fc1(y))
        y = torch.sigmoid(self.fc2(y)).view(batch_size, channels, 1, 1)
        return x * y

In [None]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, expand_ratio, use_se=False):
        super(ConvBlock, self).__init__()
        self.expand_ratio = expand_ratio
        self.use_se = use_se

        self.expand = nn.Conv2d(in_channels, in_channels * expand_ratio, kernel_size=1)
        self.bn1 = nn.BatchNorm2d(in_channels * expand_ratio)
        self.relu = nn.SiLU()  # Swish activation

        self.depthwise = nn.Conv2d(in_channels * expand_ratio, in_channels * expand_ratio, 
                                    kernel_size=kernel_size, stride=stride, padding=kernel_size // 2, groups=in_channels * expand_ratio)
        self.bn2 = nn.BatchNorm2d(in_channels * expand_ratio)

        self.project = nn.Conv2d(in_channels * expand_ratio, out_channels, kernel_size=1)
        self.bn3 = nn.BatchNorm2d(out_channels)

        if self.use_se:
            self.se = SELayer(in_channels * expand_ratio)

    def forward(self, x):
        identity = x

        x = self.expand(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.depthwise(x)
        x = self.bn2(x)

        if self.use_se:
            x = self.se(x)

        x = self.project(x)
        x = self.bn3(x)

        return x + identity 