In [1]:
import torch
import torch.nn as nn
from torchsummary import summary
import torch.nn.functional as F
from torch import optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

import copy
import time
import numpy as np
import os

from jjjMobileNet import MobileNetv1
from conf import config


In [2]:
img_size = config['data']['img_size']
num_classes = config['data']['num_classes']
save_folder = config['data']['save_weights_dir']

batch_size = config['param']['batch_size']
width_param = config['param']['width_ratio']
learning_rate = config['param']['lr']
num_epochs = config['param']['num_epochs']

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
x = torch.randn((3, 3, img_size, img_size)).to(device)
print(device)

model = MobileNetv1(width_param=width_param, num_classes=num_classes).to(device)
output = model(x)

print('output size:', output.size())

cuda
output size: torch.Size([3, 10])


In [4]:
summary(model, (3, img_size, img_size), device=device.type)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 28, 111, 111]             784
       BatchNorm2d-2         [-1, 28, 111, 111]              56
              ReLU-3         [-1, 28, 111, 111]               0
            Conv2d-4         [-1, 28, 111, 111]             252
       BatchNorm2d-5         [-1, 28, 111, 111]              56
             ReLU6-6         [-1, 28, 111, 111]               0
            Conv2d-7         [-1, 57, 111, 111]           1,596
       BatchNorm2d-8         [-1, 57, 111, 111]             114
             ReLU6-9         [-1, 57, 111, 111]               0
DepthwiseSeparable-10         [-1, 57, 111, 111]               0
           Conv2d-11           [-1, 57, 56, 56]             513
      BatchNorm2d-12           [-1, 57, 56, 56]             114
            ReLU6-13           [-1, 57, 56, 56]               0
           Conv2d-14          [-1, 115

In [5]:
path2data = config['data']['dataset']

if not os.path.exists(path2data):
    os.mkdir(path2data)

if not os.path.exists(save_folder):
    os.mkdir(save_folder)

# load dataset
train_ds = datasets.STL10(path2data, split='train', download=True, transform=transforms.ToTensor())
val_ds = datasets.STL10(path2data, split='test', download=True, transform=transforms.ToTensor())

print(len(train_ds))
print(len(val_ds))

Files already downloaded and verified
Files already downloaded and verified
5000
8000


In [6]:
transformation = transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Resize(img_size)
])

# apply transformation to dataset
train_ds.transform = transformation
val_ds.transform = transformation

In [7]:
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=True)

In [8]:
loss_func = nn.CrossEntropyLoss(reduction='mean')
opt = optim.Adam(model.parameters(), lr=learning_rate)
lr_scheduler = ReduceLROnPlateau(opt)

In [9]:
loss_history = {'train': [], 'val': []}
metric_history = {'train': [], 'val': []}

def calc_batch_metric(output, target):
    pred = output.argmax(1, keepdim=True)
    metric = pred.eq(target.view_as(pred)).sum().item()
    return metric

def calc_batch_loss(loss_func, output, target, opt=None):
    batch_loss = loss_func(output, target)
    batch_metric = calc_batch_metric(output, target)

    if opt is not None:
        opt.zero_grad()
        batch_loss.backward()
        opt.step()
    
    return batch_loss.item(), batch_metric

def loss_epoch(model, loss_func, data_loader, opt=None):
    epoch_loss, epoch_metric = 0, 0
    data_len = len(data_loader.dataset)
    
    for xd, yd in data_loader:
        xd = xd.to(device)
        yd = yd.to(device)
        
        output = model(xd)
        
        batch_loss, batch_metric = calc_batch_loss(loss_func=loss_func, output=output, target=yd, opt=opt)
        
        epoch_loss += batch_loss
        
        if batch_metric is not None:
            epoch_metric += batch_metric
        
    return_loss = epoch_loss / data_len
    return_metric = epoch_metric / data_len
    
    return np.round(return_loss, 5), np.round(return_metric, 5)

In [10]:
best_loss = float('inf')

for epoch in range(1, num_epochs+1):
    start_time = time.time()
    cur_lr = opt.param_groups[0]['lr']
    print("Current Learning Rate: {}".format(cur_lr))
    
    model.train()
    train_loss, train_metric = loss_epoch(model, loss_func, train_dl, opt)
    loss_history['train'].append(train_loss)
    metric_history['train'].append(train_metric)

    model.eval()
    with torch.no_grad():
        val_loss, val_metric = loss_epoch(model, loss_func, val_dl)
    loss_history['val'].append(val_loss)
    metric_history['val'].append(val_metric)

    lr_scheduler.step(val_loss)

    print("Epoch: {} Time: {}s Train Loss: {}, Validation Loss: {}, Train Accuracy: {}%, Validation Accuracy: {}%".format(epoch, np.round(time.time() - start_time),
                              train_loss, val_loss, train_metric*100, val_metric*100))                              

    if val_loss < best_loss:
        best_loss = val_loss
        print("save best weights")
        save_dir = os.path.join(save_folder, 'weights_epoch{}.pt'.format(epoch))
        torch.save(model.state_dict(), save_dir)

Current Learning Rate: 0.01
Epoch: 0 Time: 79.0s Train Loss: 0.06089, Validation Loss: 0.0561, Train Accuracy: 24.92%, Validation Accuracy: 27.025%
save best weights
Current Learning Rate: 0.01
Epoch: 1 Time: 75.0s Train Loss: 0.05362, Validation Loss: 0.0539, Train Accuracy: 30.819999999999997%, Validation Accuracy: 29.425%
save best weights
Current Learning Rate: 0.01
Epoch: 2 Time: 75.0s Train Loss: 0.05244, Validation Loss: 0.05015, Train Accuracy: 32.300000000000004%, Validation Accuracy: 36.038%
save best weights
Current Learning Rate: 0.01
Epoch: 3 Time: 74.0s Train Loss: 0.05045, Validation Loss: 0.04865, Train Accuracy: 34.86%, Validation Accuracy: 37.875%
save best weights
Current Learning Rate: 0.01
Epoch: 4 Time: 75.0s Train Loss: 0.04835, Validation Loss: 0.06084, Train Accuracy: 38.3%, Validation Accuracy: 35.562%
Current Learning Rate: 0.01
Epoch: 5 Time: 76.0s Train Loss: 0.0463, Validation Loss: 0.05016, Train Accuracy: 41.64%, Validation Accuracy: 40.175%
Current Lear

## 구현 & 학습결과
- 확실히 이전에 구현했던 모델보다는 모델 사이즈의 경량화가 잘 되어있음.
- 과적합이 너무 심함. 아래 항목은 과적합 이유 추측
    1. 이미지 전처리 과정의 부재(주요 원인 중 하나)
    2. 생각보다 심한 정보의 소실(하지만 이정도로 심하면 타 모델에서 depthwise 기능을 썼을리 없음. 보류)
    3. Dropout 등 과적합 방지 기능의 부재(주요 원인 중 하나)
    4. 데이터셋 고유 문제 or train / val dataset 간의 불균형
        - 추가 실험이 필요하겠지만 원본 or train/val 데이터셋의 문제는 아닐듯함.