In [2]:
# 필요한 모듈 불러오기

# model
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
from torch import optim

# dataset and transformation
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import os

# display images
from torchvision import utils
import matplotlib.pyplot as plt
%matplotlib inline

# utils
import numpy as np
from torchsummary import summary
import time
import copy

In [3]:
# 본 모델의 학습과 성능평가를 위해 STL10 Dataset을 사용할 예정이다

# STL Dataset을 다운로드 받을 Directory를 지정한다
path2data = './data'

# if not exists the path, make the directory
if not os.path.exists(path2data):
    os.mkdir(path2data)

# load dataset
train_ds = datasets.STL10(path2data, split='train', download=True, transform=transforms.ToTensor())
val_ds = datasets.STL10(path2data, split='test', download=True, transform=transforms.ToTensor())

print(len(train_ds))
print(len(val_ds))

Downloading http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz to ./data/stl10_binary.tar.gz


  0%|          | 0/2640397119 [00:00<?, ?it/s]

Extracting ./data/stl10_binary.tar.gz to ./data
Files already downloaded and verified
5000
8000


In [4]:
# DataLoader에 사용될 Pre-Processor를 정의한다
transformation = transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Resize(299)
])

train_ds.transform = transformation
val_ds.transform = transformation

# Train dataset, Validation dataset의 Dataloader 생성
train_dl = DataLoader(train_ds, batch_size=32, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=32, shuffle=True)

In [5]:
# Xception 모델 구축에 필요한 여러 모듈을 구현해보자
# Xception model은 Entry flow - Middle flow - Exit flow 크게 세 부분으로 구성된다
# 여러 곳에 사용되는 블록인 Separable Convolution Layer을 먼저 구현해보자

# 3x3 conv와 1x1 conv로 이루어져 있으며
# 해당 Separable Conv layer를 통과하면 형상은 변화 없고 채널만 in_channel에서 out_channel로 변화한다
class SeparableConv(nn.Module):
  def __init__(self, in_channels, out_channels):
    super().__init__()

    self.separable = nn.Sequential(
        nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=False),
        nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False)
    )
  
  def forward(self, x):
    x = self.separable(x)
    return x

In [6]:
# Entry flow를 정의하자
class EntryFlow(nn.Module):
  def __init__(self):
    super().__init__()

    self.conv1 = nn.Sequential(
        nn.Conv2d(3,32,kernel_size=3, padding=1, bias=False),
        nn.BatchNorm2d(32),
        nn.ReLU(),
        nn.Conv2d(32,64,kernel_size=3, stride=1, padding=0, bias=False),
        nn.BatchNorm2d(64),
        nn.ReLU()        
    )

    self.conv2_residual = nn.Sequential(
        SeparableConv(64,128),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        SeparableConv(128,128),
        nn.BatchNorm2d(128),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1)                    
    )

    self.conv2_shortcut = nn.Sequential(
        nn.Conv2d(64,128, kernel_size=1, stride=2, padding=0),
        nn.BatchNorm2d(128)
    )

    self.conv3_residual = nn.Sequential(
        nn.ReLU(),
        SeparableConv(128,256),
        nn.BatchNorm2d(256),
        nn.ReLU(),
        SeparableConv(256,256),
        nn.BatchNorm2d(256),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    )

    self.conv3_shortcut = nn.Sequential(
        nn.Conv2d(128,256,kernel_size=1, stride=2, padding=0),
        nn.BatchNorm2d(256)
    )

    self.conv4_residual = nn.Sequential(
        nn.ReLU(),
        SeparableConv(256,728),
        nn.BatchNorm2d(728),
        nn.ReLU(),
        SeparableConv(728,728),
        nn.BatchNorm2d(728),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1)      
    )

    self.conv4_shortcut = nn.Sequential(
        nn.Conv2d(256,728,kernel_size=1, stride=2, padding=0),
        nn.BatchNorm2d(728)
    )

  def forward(self, x):
    x = self.conv1(x)
    x = self.conv2_residual(x) + self.conv2_shortcut(x)
    x = self.conv3_residual(x) + self.conv3_shortcut(x)
    x = self.conv4_residual(x) + self.conv4_shortcut(x)
    return x

In [7]:
# MiddleFlow를 정의한다
class MiddleFlow(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv_residual = nn.Sequential(
            nn.ReLU(),
            SeparableConv(728, 728),
            nn.BatchNorm2d(728),
            nn.ReLU(),
            SeparableConv(728, 728),
            nn.BatchNorm2d(728),
            nn.ReLU(),
            SeparableConv(728, 728),
            nn.BatchNorm2d(728)
        )

        self.conv_shortcut = nn.Sequential()

    def forward(self, x):
        return self.conv_shortcut(x) + self.conv_residual(x)

In [8]:
# ExitFlow를 정의한다
class ExitFlow(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()

        self.conv1_residual = nn.Sequential(
            nn.ReLU(),
            SeparableConv(728, 1024),
            nn.BatchNorm2d(1024),
            nn.ReLU(),
            SeparableConv(1024, 1024),
            nn.BatchNorm2d(1024),
            nn.MaxPool2d(3, stride=2, padding=1)
        )

        self.conv1_shortcut = nn.Sequential(
            nn.Conv2d(728, 1024, 1, stride=2, padding=0),
            nn.BatchNorm2d(1024)
        )

        self.conv2 = nn.Sequential(
            SeparableConv(1024, 1536),
            nn.BatchNorm2d(1536),
            nn.ReLU(),
            SeparableConv(1536, 2048),
            nn.BatchNorm2d(2048),
            nn.ReLU()
        )

        self.avg_pool = nn.AdaptiveAvgPool2d((1,1))
    
    def forward(self, x):
        x = self.conv1_residual(x) + self.conv1_shortcut(x)
        x = self.conv2(x)
        x = self.avg_pool(x)
        return x

In [9]:
# Xception에 사용되는 모든 module을 정의하였다
# 본격적으로 Xception 모듈을 정의해보자
# Middle flow같은 경우는 _make_middle_flow라는 함수를 만들어 총 8개의 middle을 생성하였다

class Xception(nn.Module):
  def __init__(self, num_classes=10, init_weights=True):
    super().__init__()
    self.init_weights = init_weights

    self.entry = EntryFlow()
    self.middle = self._make_middle_flow()
    self.exit = ExitFlow()

    self.linear = nn.Linear(2048, num_classes)

    if self.init_weights:
      self._initialize_weights()

  def forward(self, x):
    x = self.entry(x)
    x = self.middle(x)
    x = self.exit(x)
    x = x.view(x.size(0), -1)
    x = self.linear(x)
    return x

  def _make_middle_flow(self):
    middle = nn.Sequential()
    for i in range(8):
      middle.add_module("middle_block_{}".format(i), MiddleFlow())
    return middle

  def _initialize_weights(self):
      for m in self.modules():
          if isinstance(m, nn.Conv2d):
              nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
              if m.bias is not None:
                  nn.init.constant_(m.bias, 0)
          elif isinstance(m, nn.BatchNorm2d):
              nn.init.constant_(m.weight, 1)
              nn.init.constant_(m.bias, 0)
          elif isinstance(m, nn.Linear):
              nn.init.normal_(m.weight, 0, 0.01)
              nn.init.constant_(m.bias, 0)

In [10]:
# 모델을 생성하고 Test input을 넣어 output을 확인해보자
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
x = torch.randn(3,3,299,299).to(device)
model = Xception().to(device)
output=model(x)
print("output size : ", output.size())

output size :  torch.Size([3, 10])


In [11]:
# 특정 입력값에 대한 각 계층에서의 출력 확인
summary(model, (3, 299, 299), device=device.type)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 299, 299]             864
       BatchNorm2d-2         [-1, 32, 299, 299]              64
              ReLU-3         [-1, 32, 299, 299]               0
            Conv2d-4         [-1, 64, 297, 297]          18,432
       BatchNorm2d-5         [-1, 64, 297, 297]             128
              ReLU-6         [-1, 64, 297, 297]               0
            Conv2d-7         [-1, 64, 297, 297]          36,864
            Conv2d-8        [-1, 128, 297, 297]           8,192
     SeparableConv-9        [-1, 128, 297, 297]               0
      BatchNorm2d-10        [-1, 128, 297, 297]             256
             ReLU-11        [-1, 128, 297, 297]               0
           Conv2d-12        [-1, 128, 297, 297]         147,456
           Conv2d-13        [-1, 128, 297, 297]          16,384
    SeparableConv-14        [-1, 128, 2

In [12]:
# 본격적으로 학습을 위한 함수를 제작하자
# 하나의 batch_set의 데이터들의 손실함수가 모두 합산되어 반환되도록 정의
loss_func = nn.CrossEntropyLoss(reduction='sum')
opt = optim.Adam(model.parameters(), lr=0.01)

from torch.optim.lr_scheduler import ReduceLROnPlateau
# 30회마다 Learning rate를 10분의 1로 줄여주는 Scheduler 제작
lr_scheduler = ReduceLROnPlateau(opt, mode='min', factor=0.1, patience=8)

# optimizer의 현재 Learning rate를 반환하는 함수 제작
def get_lr(opt):
  return opt.param_groups[0]['lr']

# model의 예측과 정답 label을 비교하여 맞춘 개수를 반환한다
def metric_batch(output, target):
  pred = output.argmax(dim=1, keepdim=True)
  corrects = pred.eq(target.view_as(pred)).sum().item()
  return corrects

# batct학습 시 손실함수 이용하여 backpropagation을 하고 나서
# 해당 batch의 총 loss값과 맞은 정답의 개수를 반환하는 함수

def loss_batch(loss_func, outputs, target, opt=None):
  # 만약 Auxiliary Classifier가 적용된 모델이 반환한 값이라면
  # Main Classifier의 반환값, Aux. classifier 1,2의 반환값이 return된다
  if len(outputs) == 3:
    output, aux1, aux2 = outputs
  
    output_loss = loss_func(output, target)
    aux1_loss = loss_func(aux1, target)
    aux2_loss = loss_func(aux2, target)

    # Aux. Classifier의 loss는 0.3을 곱하여 전체 loss에 더한다
    loss = output_loss + 0.3*(aux1_loss + aux2_loss)

    # 해당 batch_dataset에서 model이 맞춘 정답의 개수
    metric_b = metric_batch(output, target)

  else:
    loss = loss_func(outputs, target)
    metric_b = metric_batch(outputs, target)

  if opt is not None:
    opt.zero_grad()
    loss.backward()
    opt.step()

  return loss.item(), metric_b

# 해당 dataloader를 이용해 model을 1 epoch 훈련시키고
# 1epoch동안의 평균 손실함수값과 정확도를 반환하는 함수
def loss_epoch(model, loss_func, dataset_dl, sanity_check=False, opt=None):
  # epoch 동안의 평균 손실함수값
  # epoch 동안의 평균 Precision 저장을 위한 변수 생성
  running_loss = 0.0
  running_metric = 0.0
  len_data = len(dataset_dl.dataset)

  for xb, yb in dataset_dl:
    xb, yb = xb.to(device), yb.to(device)
    output = model(xb)

    loss_b, metric_b = loss_batch(loss_func, output, yb, opt)

    running_loss += loss_b

    if metric_b is not None:
      running_metric += metric_b

    if sanity_check is True:
      break

  loss = running_loss  / len_data
  metric = running_metric / len_data

  return loss, metric

# configuration parameter를 params라는 인자로 전달하면
# 해당 config에 맞게 Train을 해주는 함수를 정의하였다
def train_val(model, params):
  num_epochs=params["num_epochs"]
  loss_func=params["loss_func"]
  opt=params["optimizer"]
  train_dl=params["train_dl"]
  val_dl=params["val_dl"]
  sanity_check=params["sanity_check"]
  lr_scheduler=params["lr_scheduler"]
  path2weights=params["path2weights"]

  # epoch별 평균 loss와 정확도를 저장
  loss_history = {'train':[], 'val':[]}
  metric_history = {'train':[], 'val':[]}

  # 가장 작은 손실함수값을 반환하는 모델의 가중치를 저장한다
  best_model_wts = copy.deepcopy(model.state_dict())
  best_loss = float('inf')

  start_time = time.time()
  for epoch in range(num_epochs):
    current_lr = get_lr(opt)
    print('Epoch {}/{}, current lr={}'.format(epoch, num_epochs - 1, current_lr)) 

    # 학습 모드
    model.train()
    # train_dataset 1 Epoch 훈련
    train_loss, train_metric = loss_epoch(model, loss_func, train_dl, sanity_check, opt)
    
    loss_history['train'].append(train_loss)
    metric_history['train'].append(train_metric)

    # 모델의 성능 평가모드
    model.eval()
    with torch.no_grad():
      val_loss, val_metric = loss_epoch(model, loss_func, val_dl, sanity_check)

    if val_loss < best_loss:
      best_loss = val_loss
      best_model_wts = copy.deepcopy(model.state_dict())

      torch.save(model.state_dict(), path2weights)
      print("Copied best model weights!")

    loss_history['val'].append(val_loss)
    metric_history['val'].append(val_metric)

    lr_scheduler.step(val_loss)
    if current_lr != get_lr(opt):
      print('Loading best model weights!')
      model.load_state_dict(best_model_wts)

    print('train loss: %.6f, val loss: %.6f, accuracy: %.2f, time: %.4f min' %(train_loss, val_loss, 100*val_metric, (time.time()-start_time)/60))
    print('-'*10)

  # 학습을 모두 마치기 전 가장 손실함수가 적게 반환된 가중치로 모델을 초기화한다
  model.load_state_dict(best_model_wts)

  return model, loss_history, metric_history

In [13]:
# 모델에 삽입할 config parameter를 정의한다
params_train = {
    'num_epochs':10,
    'optimizer':opt,
    'loss_func':loss_func,
    'train_dl':train_dl,
    'val_dl':val_dl,
    'sanity_check':False,
    'lr_scheduler':lr_scheduler,
    'path2weights':'./models/weights.pt',
}

# check the directory to save weights.pt
def createFolder(directory):
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSerror:
        print('Error')
createFolder('./models')

In [28]:
# 10 epoch 학습 시작
# Colab Out of Memory Error인하여 실험 진행 불가능
model, loss_hist, metric_hist = train_val(model, params_train)

Epoch 0/9, current lr=0.01


RuntimeError: ignored