In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
class InvertedResidual(nn.Module):
    def __init__(self, in_channels, out_channels, stride, expand_ratio):
        super(InvertedResidual, self).__init__()
        
        # The number of channels after expansion
        hidden_dim = int(round(in_channels * expand_ratio))
        
        # If expand_ratio is 1, we skip the pointwise convolution
        layers = []
        
        # Pointwise convolution (expand)
        if expand_ratio != 1:
            layers.append(nn.Conv2d(in_channels, hidden_dim, kernel_size=1, stride=1, padding=0, bias=False))
            layers.append(nn.BatchNorm2d(hidden_dim))
            layers.append(nn.ReLU6(inplace=True))  # ReLU6 is used
        
        # Depthwise convolution
        layers.append(nn.Conv2d(hidden_dim, hidden_dim, kernel_size=3, stride=stride, padding=1, groups=hidden_dim, bias=False))
        layers.append(nn.BatchNorm2d(hidden_dim))
        layers.append(nn.ReLU6(inplace=True))
        
        # Pointwise convolution (linear bottleneck)
        layers.append(nn.Conv2d(hidden_dim, out_channels, kernel_size=1, stride=1, padding=0, bias=False))
        layers.append(nn.BatchNorm2d(out_channels))
        
        # Skip connection
        self.block = nn.Sequential(*layers)
        
        # Skip connection if stride == 1 and input/output dimensions match
        self.use_res_connect = (stride == 1 and in_channels == out_channels)
    
    def forward(self, x):
        if self.use_res_connect:
            return x + self.block(x)  # Residual connection
        else:
            return self.block(x)

In [None]:
class MobileNetV2(nn.Module):
    def __init__(self, num_classes=1000):
        super(MobileNetV2, self).__init__()
        
        # First Convolution layer
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        
        # MobileNetV2 blocks
        self.features = nn.Sequential(
            InvertedResidual(32, 16, stride=1, expand_ratio=1),
            InvertedResidual(16, 24, stride=2, expand_ratio=6),
            InvertedResidual(24, 24, stride=1, expand_ratio=6),
            InvertedResidual(24, 32, stride=2, expand_ratio=6),
            InvertedResidual(32, 32, stride=1, expand_ratio=6),
            InvertedResidual(32, 64, stride=2, expand_ratio=6),
            InvertedResidual(64, 64, stride=1, expand_ratio=6),
            InvertedResidual(64, 64, stride=1, expand_ratio=6),
            InvertedResidual(64, 96, stride=2, expand_ratio=6),
            InvertedResidual(96, 96, stride=1, expand_ratio=6),
            InvertedResidual(96, 160, stride=2, expand_ratio=6),
            InvertedResidual(160, 160, stride=1, expand_ratio=6),
            InvertedResidual(160, 320, stride=1, expand_ratio=6)
        )
        
        # Final Convolution layer (1x1 convolution)
        self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(1280)
        
        # Global Average Pooling
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        
        # Fully connected layer
        self.fc = nn.Linear(1280, num_classes)
    
    def forward(self, x):
        x = self.bn1(F.relu6(self.conv1(x)))  # Initial conv layer with ReLU6
        x = self.features(x)  # Apply MobileNetV2 blocks
        x = self.bn2(F.relu6(self.conv2(x)))  # Final conv layer with ReLU6
        x = self.avgpool(x)  # Global Average Pooling
        x = torch.flatten(x, 1)  # Flatten the output for the fully connected layer
        x = self.fc(x)  # Fully connected output
        return x
