## MobileNet v1의 기본 블록
- 하나의 Depthwise convolution과 Pointwise convolution이 쌍을 이룸
- Conv2d(3x3) -> Conv2d(1x1) -> BatchNorm -> ReLU ..
- classifier head : fc layer
- hyperparamer로 width multiplier, resolution multiplier 구현

## Import

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim

import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [22]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device # 현재 노트북이라 ..

'cpu'

## MobileNet_v1

In [13]:
class Depthwise_separable_conv(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(Depthwise_separable_conv, self).__init__()
        
        self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=stride, padding=1, groups=in_channels, bias=False)
        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        out = self.depthwise(x)
        out = self.pointwise(x)
        out = self.bn(out)
        out = self.relu(out)
        
        return out

In [14]:
# width multiplier 적용 함수함수
def divisible(v, divisor=8, min_value=None):
    if min_value is None:
        min_value = divisor
        
    return max(min_value, int(v + divisor / 2) // divisor * divisor)

In [18]:
class MobileNet_v1(nn.Module):
    def __init__(self, num_classes=1000, width_mult=1.0, resolution_mult=1.0, input_size=224444):
        super(MobileNet_v1, self).__init__()
        
        self.input_size = int(input_size * resolution_mult)
        
        def adjust_channels(channels):
            return divisible(channels * width_mult, divisor=8)

        self.init_conv = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
        )
        
        # Feature Extraction
        self.feature = nn.Sequential(
            Depthwise_separable_conv(32, 64),
            Depthwise_separable_conv(64, 128, stride=2),
            Depthwise_separable_conv(128, 128),
            Depthwise_separable_conv(128, 256, stride=2),
            Depthwise_separable_conv(256, 256),
            Depthwise_separable_conv(256, 512, stride=2),
            Depthwise_separable_conv(512, 512),
            Depthwise_separable_conv(512, 512),
            Depthwise_separable_conv(512, 512),
            Depthwise_separable_conv(512, 512),
            Depthwise_separable_conv(512, 512),
            Depthwise_separable_conv(512, 1024, stride=2),
            Depthwise_separable_conv(1024, 1024),
        )
        
        # Classification Head
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc_layer = nn.Linear(1024, num_classes)
    
    def forward(self, x):
        out = self.init_conv(x)
        out = self.feature(out)
        out = self.avgpool(out)
        out = torch.flatten(out, 1)
        out = self.fc_layer(out)
        
        return out

In [20]:
model = MobileNet_v1(num_classes=1000, width_mult=0.75, resolution_mult=0.5)
input_image = torch.randn(1, 3, int(224 * 0.5), int(224 * 0.5))  # (1, 3, 112, 112)
output = model(input_image)
print(output.shape)  # (1, 1000)

torch.Size([1, 1000])
