In [1]:
import os
from pathlib import Path

import cv2
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader
from tqdm import tqdm   

from src.utils import seed_everything
from src.model import MyNet
from src.dataset import CustomDataset
from src.transform import define_transforms


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
TRAIN_IMAGE_DIR: str = Path("data/train")
VALID_IMAGE_DIR: str = Path("data/valid")
LABEL_DICT: dict = {"helmet": 0, "head": 1}

IMAGE_SIZE: int = 224
NUM_CLASSES: int = len(LABEL_DICT)
MODEL_NAME: str = "mobilenetv3_large_100.ra_in1k"
PRETRAINED: bool = True

BATCH_SIZE: int = 32
NUM_WORKERS: int = 4
NUM_EPOCHS: int = 10

LR: float = 1e-3
MOMENTUM: float = 0.99
WEIGHT_DECAY: float = 1e-4

SEED: int = 42



In [3]:
# seed値の固定
seed_everything(SEED)


In [4]:
# Load the model
model = MyNet(model_name=MODEL_NAME, num_classes=NUM_CLASSES, pretrained=PRETRAINED)
print(model)


MyNet(
  (model): MobileNetV3(
    (conv_stem): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNormAct2d(
      16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): Hardswish()
    )
    (blocks): Sequential(
      (0): Sequential(
        (0): DepthwiseSeparableConv(
          (conv_dw): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (bn1): BatchNormAct2d(
            16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): ReLU(inplace=True)
          )
          (se): Identity()
          (conv_pw): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn2): BatchNormAct2d(
            16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): Identity()
          )
          (drop_path): Identity()
 

In [5]:
num_train_head_image = len(list(TRAIN_IMAGE_DIR.glob('head/*.jpg')))
num_train_helmet_image = len(list(TRAIN_IMAGE_DIR.glob('helmet/*.jpg')))
num_valid_head_image = len(list(VALID_IMAGE_DIR.glob('head/*.jpg')))
num_valid_helmet_image = len(list(VALID_IMAGE_DIR.glob('helmet/*.jpg')))

print(f"num_train_head_image: {num_train_head_image}")
print(f"num_train_helmet_image: {num_train_helmet_image}")
print(f"num_valid_head_image: {num_valid_head_image}")
print(f"num_valid_helmet_image: {num_valid_helmet_image}")


num_train_head_image: 87842
num_train_helmet_image: 7342
num_valid_head_image: 23491
num_valid_helmet_image: 1700


In [6]:
# 
train_head_path_list = list(TRAIN_IMAGE_DIR.glob('head/*.jpg'))
train_helmet_path_list = list(TRAIN_IMAGE_DIR.glob('helmet/*.jpg'))
valid_head_path_list = list(VALID_IMAGE_DIR.glob('head/*.jpg'))
valid_helmet_path_list = list(VALID_IMAGE_DIR.glob('helmet/*.jpg'))

train_head_path_list = train_head_path_list[:min(num_train_head_image, num_train_helmet_image)]
train_helmet_path_list = train_helmet_path_list[:min(num_train_head_image, num_train_helmet_image)]
valid_head_path_list = valid_head_path_list[:min(num_valid_head_image, num_valid_helmet_image)]
valid_helmet_path_list = valid_helmet_path_list[:min(num_valid_head_image, num_valid_helmet_image)]
# 動作テスト用に100枚ずつに制限する
# train_head_path_list = train_head_path_list[:100]
# train_helmet_path_list = train_helmet_path_list[:100]
# valid_head_path_list = valid_head_path_list[:100]
# valid_helmet_path_list = valid_helmet_path_list[:100]

print(f"train_head_path_list: {len(train_head_path_list)}")
print(f"train_helmet_path_list: {len(train_helmet_path_list)}")
print(f"valid_head_path_list: {len(valid_head_path_list)}")
print(f"valid_helmet_path_list: {len(valid_helmet_path_list)}")


train_head_path_list: 7342
train_helmet_path_list: 7342
valid_head_path_list: 1700
valid_helmet_path_list: 1700


In [7]:
# 画像データと正解ラベルの取得
# train_image_path_list = list(TRAIN_IMAGE_DIR.glob("*/*.jpg"))
# valid_image_path_list = list(VALID_IMAGE_DIR.glob("*/*.jpg"))
train_image_path_list = train_head_path_list + train_helmet_path_list
valid_image_path_list = valid_head_path_list + valid_helmet_path_list
train_label_list = [LABEL_DICT[path.parent.name] for path in train_image_path_list]
valid_label_list = [LABEL_DICT[path.parent.name] for path in valid_image_path_list]

# Datasetの作成
train_dataset = CustomDataset(
    image_path_list=train_image_path_list,
    label_list=train_label_list,
    transform=define_transforms(IMAGE_SIZE)['train']
)
valid_dataset = CustomDataset(
    image_path_list=valid_image_path_list,
    label_list=valid_label_list,
    transform=define_transforms(IMAGE_SIZE)['valid']
)

# DataLoaderの作成
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=True,
    drop_last=True
)
valid_loader = DataLoader(
    valid_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True,
    drop_last=False
)



In [8]:
# デバイスの設定
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# macで実行する場合は以下コメントアウト解除
device = torch.device("mps"if torch.backends.mps.is_available() else "cpu")
print(f"device: {device}")
model = model.to(device)


device: mps


In [9]:
# 損失関数の設定
criterion = nn.CrossEntropyLoss()
criterion = criterion.to(device)

# 最適化手法の設定
# optimizer = torch.optim.SGD(
#     model.parameters(),
#     lr=LR,
#     momentum=MOMENTUM,
#     weight_decay=WEIGHT_DECAY
# )
optimizer = torch.optim.Adam(
    model.parameters(),
    lr=LR,
)

# スケジューラーの設定
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer,
    T_max=NUM_EPOCHS,
    eta_min=0
)



In [10]:
# 学習

best_loss = float("inf")

for epoch in range(NUM_EPOCHS):
    # 学習
    model.train()
    for images, labels in tqdm(train_loader):
        # データをデバイスに転送
        images = images.to(device)
        labels = labels.to(device)
        
        # 勾配の初期化
        optimizer.zero_grad()
        
        # 順伝播
        output = model(images)
        
        # 損失の計算
        train_loss = criterion(output, labels)
        
        # 逆伝播
        train_loss.backward()
        
        # パラメータの更新
        optimizer.step()

    # 検証
    model.eval()
    gt_label_list = []
    pred_label_list = []
    valid_loss_list = []
    
    with torch.no_grad():
        for images, labels in valid_loader:
            # データをデバイスに転送
            images = images.to(device)
            labels = labels.to(device)
            
            # 順伝播
            output = model(images)
            
            # 損失の計算
            valid_loss = criterion(output, labels)
            valid_loss_list.append(valid_loss.item())
            
            # 予測ラベルの取得
            pred_labels = output.argmax(dim=1)
            gt_label_list.extend(labels.cpu().numpy())
            pred_label_list.extend(pred_labels.cpu().numpy())
            
    valid_loss_mean = np.mean(valid_loss_list)
    print(f"epoch: {epoch+1}, valid_loss: {valid_loss_mean}")
    
    # lossが改善した場合にモデルを保存
    if valid_loss_mean < best_loss:
        best_loss = valid_loss_mean
        torch.save(model.state_dict(), f"{MODEL_NAME}.pth")
        print("!!!update best loss & save best model!!!")
    
    gt_label_array = np.array(gt_label_list)
    pred_label_array = np.array(pred_label_list)
    accuracy = (gt_label_array == pred_label_array).mean()
    print(f"epoch: {epoch+1}, accuracy: {accuracy}")
    


  0%|          | 0/458 [00:00<?, ?it/s]

100%|██████████| 458/458 [03:59<00:00,  1.92it/s]


epoch: 1, valid_loss: 0.12995676170614268
!!!update best loss & save best model!!!
epoch: 1, accuracy: 0.9602941176470589


  0%|          | 0/458 [00:00<?, ?it/s]

In [None]:
print(sum(gt_label_array))
print(sum(pred_label_array))


1700
1771


In [None]:
import onnx
from onnxsim import simplify

model.cpu()

# モデルを評価モードに設定
model.eval()

# ダミー入力を作成
dummy_input = torch.randn(1, 3, IMAGE_SIZE, IMAGE_SIZE)

# ONNX形式に変換
torch.onnx.export(
    model,
    dummy_input,
    "onnx/mobilenetv3_under_sampling.onnx",
    input_names=["input"],
    output_names=["output"],
    opset_version=11,
    dynamic_axes={"input": {0: "batch_size"}},
)

# 型の推定
model = onnx.load("onnx/mobilenetv3_under_sampling.onnx")
model = onnx.shape_inference.infer_shapes(model)

# モデル構造の最適化
model_simp, check = simplify(model)

onnx.save(model_simp, "onnx/mobilenetv3_under_sampling.onnx")

print("Model converted to ONNX format.")


Model converted to ONNX format.
