# MobileNet V1 and V2

    '''
    参数设置参考Justin 598WI2022课件


    ----------------------------
    MobileNet V2 只实现了一个版本
    
    5轮训练: 15m 1.5s;
    100%|██████████| 782/782 [03:00<00:00,  4.34it/s]
    Epoch [2/5], Loss: 0.8395
    Epoch [3/5], Loss: 0.6184
    Epoch [4/5], Loss: 0.4732
    Epoch [5/5], Loss: 0.3651
    Test Accuracy: 79.71% （效果还可以，训练集上收敛挺快）


    官方实现 mobilenet_v2 t值默认
    5轮训练: 27m 23.9s;
    100%|██████████| 782/782 [05:27<00:00,  2.39it/s]
    Epoch [2/5], Loss: 1.0033
    Epoch [3/5], Loss: 0.7206
    Epoch [4/5], Loss: 0.5603
    Epoch [5/5], Loss: 0.4549
    Test Accuracy: 78.62%
    '''

## MovileNet V1
<img src="./image/mobilenet/mobilenet1.png" alt="Model Image" width="800">

## MovileNet V2
### InvertedResidual
<img src="./image/mobilenet/mobilenet2.png" alt="Model Image" width="800">

### ReLU6
<img src="./image/mobilenet/mobilenet3.png" alt="Model Image" width="800">
<img src="./image/mobilenet/mobilenet4.png" alt="Model Image" width="800">


In [1]:
# 环境配置
%cd ../../
import sys
sys.path.append('./python')

d:\sgd-代码库\torch2.0-paly\sgd_deep_learning\sgd_cv


In [2]:
from tqdm import tqdm
import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader

import torchvision
from torchvision import transforms

from sgd_cv.model import MobileNetV2

import torchvision.models as models

## 测试模型

In [3]:
# 测试模型结构
model = MobileNetV2(num_classes=10)
print(model)

# 随机生成一个批次的输入 (cifar10 图像大小: 3x227x227)
input_tensor = torch.randn(1, 3, 224, 224) # 是否设为224
output = model(input_tensor)

print(f"输入张量大小: {input_tensor.shape}")
print(f"输出张量大小: {output.shape}")  # 应为 [1, 10]


MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu6): ReLU6(inplace=True)
    )
    (1): InvertedResidualShort(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu6): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (downsample): Sequential(
        (0): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1))
        (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=Tru

## 训练模型

In [4]:
# 数据集加载
transform = transforms.Compose([transforms.Resize(256),
                                transforms.CenterCrop(224), # 224
                                transforms.ToTensor(),
                                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                     std=[0.229, 0.224, 0.225]),
                                ])
# CIFAR10
train_dataset = torchvision.datasets.CIFAR10('./data/CIFAR10/', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.CIFAR10('./data/CIFAR10/', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# 定义模型、损失函数和优化器
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

# model = models.mobilenet_v2().to(device) # 对比一下官方模型效果
model = MobileNetV2(num_classes=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=3e-4)
# 启用异常检测
torch.autograd.set_detect_anomaly(False)

Files already downloaded and verified
Files already downloaded and verified
cuda


<torch.autograd.anomaly_mode.set_detect_anomaly at 0x18aba365a30>

In [5]:
# 训练
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in tqdm(train_loader):
        images, labels = images.to(device), labels.to(device)

        # 前向传播
        outputs = model(images)
        loss = criterion(outputs, labels)

        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

100%|██████████| 782/782 [02:56<00:00,  4.43it/s]


Epoch [1/5], Loss: 1.2774


100%|██████████| 782/782 [03:04<00:00,  4.25it/s]


Epoch [2/5], Loss: 0.8395


100%|██████████| 782/782 [03:00<00:00,  4.34it/s]


Epoch [3/5], Loss: 0.6184


100%|██████████| 782/782 [03:04<00:00,  4.25it/s]


Epoch [4/5], Loss: 0.4732


100%|██████████| 782/782 [02:56<00:00,  4.44it/s]

Epoch [5/5], Loss: 0.3651





In [6]:
# 测试
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")

Test Accuracy: 79.71%
