In [1]:
# 필요한 모듈을 import하기
# 모델과 관련된 모듈 import하기
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
from torch import optim
from torch.optim.lr_scheduler import StepLR

# DataSet 및 DataLoader관련 모듈 import
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import os

# 이미지 출력을 위한 모듈 import
from torchvision import utils
import matplotlib.pyplot as plt
%matplotlib inline

# 기타 모듈 import하기
import numpy as np
import time
import copy

In [2]:
# 모델의 학습과 평가에 STL Dataset을 이용할 것이다
# STL Dataset를 저장할 Directory를 명시하도록 하자
path2data = "./data"

if not os.path.exists(path2data):
  os.mkdir(path2data)

# STL10 Dataset중 train_dataset loading
# STL10 Dataset중 validation_dataset loading
train_ds = datasets.STL10(path2data, split='train', download=True, transform=transforms.ToTensor())
val_ds = datasets.STL10(path2data, split='test', download=True, transform=transforms.ToTensor())

Downloading http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz to ./data/stl10_binary.tar.gz


  0%|          | 0/2640397119 [00:00<?, ?it/s]

Extracting ./data/stl10_binary.tar.gz to ./data
Files already downloaded and verified


In [3]:
# Dataset을 정규화하기 위하여 각 채널별 평균과 표준편차를 구한다
# 우리가 직접 제작한 모델은 STL10 훈련 데이터에 익숙해진 모델이다
# 모델이 익숙해하는 훈련 데이터와 같은 RGB로 모든 dataset를 정규화하여야 한다

train_meanRGB = [np.mean(x.numpy(),axis=(1,2)) for x, _ in train_ds]
train_stdRGB = [np.std(x.numpy(), axis=(1,2)) for x, _ in train_ds]

train_meanR = np.mean([m[0] for m in train_meanRGB])
train_meanG = np.mean([m[1] for m in train_meanRGB])
train_meanB = np.mean([m[2] for m in train_meanRGB])
train_stdR = np.std([s[0] for s in train_stdRGB])
train_stdG = np.std([s[1] for s in train_stdRGB])
train_stdB = np.std([s[2] for s in train_stdRGB])

val_meanRGB = [np.mean(x.numpy(), axis=(1,2)) for x, _ in val_ds]
val_stdRGB = [np.std(x.numpy(), axis=(1,2)) for x,_ in val_ds]

val_meanR = np.mean([m[0] for m in val_meanRGB])
val_meanG = np.mean([m[1] for m in val_meanRGB])
val_meanB = np.mean([m[2] for m in val_meanRGB])
std_meanR = np.std([s[0] for s in val_stdRGB])
std_meanG = np.std([s[1] for s in val_stdRGB])
std_meanB = np.std([s[2] for s in val_stdRGB])

print(train_meanR, train_meanG, train_meanB)
print(val_meanR, val_meanG, val_meanB)

0.4467106 0.43980986 0.40664646
0.44723064 0.4396425 0.40495726


In [4]:
# Dataset을 불러올 때 사용할 Preprocessor와 DataLoader를 정의하자
train_transformation = transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Resize(224),
                        transforms.Normalize([train_meanR, train_meanG, train_meanB],[train_stdR, train_stdG, train_stdB]),
                        transforms.RandomHorizontalFlip()
])

val_transformation = transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Resize(224),
                        transforms.Normalize([train_meanR, train_meanG, train_meanB],[train_stdR, train_stdG, train_stdB])                                      
])
train_ds.transform = train_transformation
val_ds.transform = val_transformation

train_dl = DataLoader(train_ds, batch_size=32, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=32, shuffle=True)

In [5]:
# GoogLeNet 모델을 정의해보자
# 가장 먼저 Channel Reduction Inception Module을 정의할 것인데,
# 해당 Inception Module을 구성하는 각 branch에 삽입되는 합성곱층에 Batch normalization과 ReLU를 일괄적으로 적용하기 위하여
# 다음과 같은 클래스를 정의하였다.

# Convolution Layer의 여러 Parameter는 kwargs라는 이름의 파라미터로 받게 된다.
class conv_block(nn.Module):
  def __init__(self, in_channels, out_channels,**kwargs):
    super(conv_block, self).__init__()

    self.conv_layer = nn.Sequential(
        nn.Conv2d(in_channels, out_channels, **kwargs),
        nn.BatchNorm2d(out_channels),
        nn.ReLU()
    )

  def forward(self, x):
    return self.conv_layer(x)


In [6]:
# Reduced Dimension inception module을 정의한다
# 각 parameter 설명
# out_1x1 : 첫 번째 branch의 최종 출력 채널
# red_3x3, out_3x3 : 두 번째 branch의 중간, 최종 출력 채널
# red_5x5, out_5x5 : 세 번째 branch의 중간, 최종 출력 채널
# out_1x1pool : 네 번째 branch의 최종 출력 채널

class Inception_block(nn.Module):
  def __init__(self, in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool):
    super(Inception_block, self).__init__()

    # GoogLeNet의 inception Module은 총 4개의 branch의 출력을 채널 방향으로 Concatanate한다
    # 첫 번째 branch 정의하기
    self.branch1 = conv_block(in_channels, out_1x1, kernel_size=1)

    # 두 번째 branch 정의하기
    # 각 branch의 padding은 출력 형상을 입력과 일치시키기 위해 설정한 것이다
    self.branch2 = nn.Sequential(
        conv_block(in_channels, red_3x3, kernel_size=1),
        conv_block(red_3x3, out_3x3, kernel_size=3, padding=1)
    )

    self.branch3 = nn.Sequential(
        conv_block(in_channels, red_5x5, kernel_size=1),
        conv_block(red_5x5, out_5x5, kernel_size=5, padding=2)
    )

    self.branch4 = nn.Sequential(
        nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
        conv_block(in_channels, out_1x1pool, kernel_size=1)
    )

  def forward(self, x):
    # x의 차원은 (Batch_size, Channel, Width, Height)로 구성되어 있다
    # 각 branch의 출력을 channel방향으로 concatenate하여 다음 Layer로 전달한다
    x = torch.cat([self.branch1(x), self.branch2(x), self.branch3(x), self.branch4(x)],1)
    return x

In [7]:
# GoogLeNet만의 특수한 구조 Auxiliary classifier
# auxiliary classifier에 의한 학습 시 loss에는 0.3을 곱하여 영향을 최소화한다

class InceptionAux(nn.Module):
  def __init__(self, in_channels, num_classes):
    super(InceptionAux, self).__init__()

    self.conv = nn.Sequential(
        nn.AvgPool2d(kernel_size=5, stride=3),
        conv_block(in_channels, 128, kernel_size=1)
    )

    self.fc = nn.Sequential(
        nn.Linear(2048,1024),
        nn.ReLU(),
        nn.Dropout(),
        nn.Linear(1024, num_classes)
    )

  def forward(self, x):
    x = self.conv(x)
    x = x.view(x.shape[0],-1)
    x = self.fc(x)
    return x

In [8]:
# GoogLeNet의 network in network에 사용되는 여러 모듈을 정의하였다
# 이제 GoogLeNet을 정의해보자
# aux_logits : 해당 GoogLeNet모델에 Auxiliary Classifier를 적용할지 여부를 선택한다
# True이면 해당 모델 내에 Auxiliary Classifier를 설치한다

class GoogLeNet(nn.Module):
  def __init__(self, aux_logits=True, num_classes=10, init_weights=True):
    super(GoogLeNet, self).__init__()

    # aux_logits값이 True 또는 False이어야 한다
    assert aux_logits == True or aux_logits == False
    self.aux_logits = aux_logits

    # GoogLeNet의 전체 Layer 정의하기
    # Auxiliary Classifier는 inception 4b, 4e와 연결 되어있다
    self.conv1 = conv_block(3,64,kernel_size=7, stride=2, padding=3)
    self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    self.conv2 = conv_block(64,192, kernel_size=3, stride=1, padding=1)
    self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    self.inception3a = Inception_block(192, 64, 96, 128, 16, 32, 32)
    self.inception3b = Inception_block(256, 128, 128, 192, 32, 96, 64)
    self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    self.inception4a = Inception_block(480, 192, 96, 208, 16, 48, 64)

    self.inception4b = Inception_block(512, 160, 112, 224, 24, 64, 64)
    self.inception4c = Inception_block(512, 128, 128, 256, 24, 64, 64)
    self.inception4d = Inception_block(512, 112, 144, 288, 32, 64, 64)

    self.inception4e = Inception_block(528, 256, 160, 320, 32, 128, 128 )
    self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    self.inception5a = Inception_block(832, 256, 160, 320, 32, 128, 128)
    self.inception5b = Inception_block(832, 384, 192, 384, 48, 128, 128)

    self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
    self.dropout = nn.Dropout(p=0.4)
    self.fc1 = nn.Linear(1024, num_classes)

    # GoogLeNet에 Auxiliary Classifier를 설정할지 여부에 따라 모델을 다르게 만든다
    if self.aux_logits:
      self.aux1 = InceptionAux(512, num_classes) # 입력채널 수가 Inception_4b와 같다
      self.aux2 = InceptionAux(528, num_classes) # 입력채널 수가 Inception_4e와 같다
    else :
      self.aux1 = self.aux2 = None

    # weight initialization
    if init_weights:
      self._initialize_weights()

  def forward(self, x):
    x = self.conv1(x)
    x = self.maxpool1(x)
    x = self.conv2(x)
    x = self.maxpool2(x)
    x = self.inception3a(x)
    x = self.inception3b(x)
    x = self.maxpool3(x)
    x = self.inception4a(x)

    # 만약 Auxiliary Classifier가 적용되어 있고, 학습 모드라면
    # 첫 번째 Auxiliary Classifier의 결과를 저장한다

    if self.aux_logits and self.training:
      aux1 = self.aux1(x)

    x = self.inception4b(x)
    x = self.inception4c(x)
    x = self.inception4d(x)

    # 만약 Auxiliary Classifier가 적용되어 있고, 학습 모드라면
    # 두 번째 Auxiliary Classifier의 결과를 저장한다
    if self.aux_logits and self.training:
        aux2 = self.aux2(x)

    x = self.inception4e(x)
    x = self.maxpool4(x)
    x = self.inception5a(x)
    x = self.inception5b(x)
    x = self.avgpool(x)

    x = x.view(x.shape[0], -1)

    x = self.dropout(x)
    x = self.fc1(x) # 실제 Classifier
    
    if self.aux_logits and self.training:
      return (x, aux1, aux2)
    else:
      return x

  # GoogLeNet모델의 가중치를 초기화 해주는 인스턴스 함수를 정의한다
  def _initialize_weights(self):
    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        # 모델의 가중치를 초기화하는 함수 (다시 공부 필요)
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        if m.bias is not None:
          nn.init.constant_(m.bias,0)
      elif isinstance(m, nn.BatchNorm2d):
        nn.init.constant_(m.weight,1)
        nn.init.constant_(m.bias,0)
      elif isinstance(m, nn.Linear):
        nn.init.normal_(m.weight, 0, 0.01)
        nn.init.constant_(m.bias,0)


In [9]:
# Device를 정의 및 모델을 생성한다
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GoogLeNet(aux_logits=True, num_classes=10, init_weights=True).to(device)

In [10]:
# model이 올바르게 정의되었는지 확인하기 위하여 임의의 input을 통과시켜 보자
x = torch.randn(3, 3, 224, 224).to(device)
output = model(x)
print(output)

(tensor([[-0.2224, -0.1502,  0.0301, -0.4078, -0.2194,  0.1947, -0.0490,  0.0588,
          0.1062, -0.1923],
        [-0.0570,  0.0353, -0.1919,  0.0025,  0.0260,  0.2307, -0.1039,  0.2329,
          0.0445, -0.0623],
        [-0.3132, -0.1867,  0.1086, -0.0915, -0.0176, -0.1184, -0.0013,  0.2148,
          0.0528,  0.1562]], device='cuda:0', grad_fn=<AddmmBackward0>), tensor([[-0.0499,  0.1645, -0.0970,  0.1916,  0.0415,  0.2317, -0.1784,  0.0272,
          0.2067, -0.0881],
        [ 0.0105,  0.0901,  0.2227,  0.0448,  0.0789,  0.0290,  0.0025, -0.0504,
          0.0767, -0.0581],
        [ 0.1101,  0.1109, -0.0905, -0.0021,  0.2313, -0.0588,  0.0480,  0.0091,
          0.0940, -0.0291]], device='cuda:0', grad_fn=<AddmmBackward0>), tensor([[-0.0520,  0.2617,  0.0231, -0.1939,  0.0200, -0.0372,  0.0735, -0.0772,
         -0.2404,  0.0233],
        [-0.1309,  0.0207, -0.0321,  0.0853, -0.0133, -0.0062,  0.0178,  0.0188,
         -0.0639,  0.0666],
        [-0.1521,  0.1329,  0.0118,  

In [11]:
# model을 print하여 전체 구조 파악
print(model)

GoogLeNet(
  (conv1): conv_block(
    (conv_layer): Sequential(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (conv2): conv_block(
    (conv_layer): Sequential(
      (0): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (inception3a): Inception_block(
    (branch1): conv_block(
      (conv_layer): Sequential(
        (0): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
      )
    )
    (branch2): Sequential(
 

In [12]:
# torchsummary이용하여 임의의 입력값이 input되었을 때 어떠한 형태로 처리되는지 확인한다
summary(model, input_size=(3,224,224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,472
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
        conv_block-4         [-1, 64, 112, 112]               0
         MaxPool2d-5           [-1, 64, 56, 56]               0
            Conv2d-6          [-1, 192, 56, 56]         110,784
       BatchNorm2d-7          [-1, 192, 56, 56]             384
              ReLU-8          [-1, 192, 56, 56]               0
        conv_block-9          [-1, 192, 56, 56]               0
        MaxPool2d-10          [-1, 192, 28, 28]               0
           Conv2d-11           [-1, 64, 28, 28]          12,352
      BatchNorm2d-12           [-1, 64, 28, 28]             128
             ReLU-13           [-1, 64, 28, 28]               0
       conv_block-14           [-1, 64,

In [13]:
# 본격적으로 학습을 위한 함수를 제작하자
# 하나의 batch_set의 데이터들의 손실함수가 모두 합산되어 반환되도록 정의
loss_func = nn.CrossEntropyLoss(reduction='sum')
opt = optim.Adam(model.parameters(), lr=0.001)

from torch.optim.lr_scheduler import StepLR
# 30회마다 Learning rate를 10분의 1로 줄여주는 Scheduler 제작
lr_scheduler = StepLR(opt, step_size=30, gamma=0.1)

# optimizer의 현재 Learning rate를 반환하는 함수 제작
def get_lr(opt):
  return opt.param_groups[0]['lr']

# model의 예측과 정답 label을 비교하여 맞춘 개수를 반환한다
def metric_batch(output, target):
  pred = output.argmax(dim=1, keepdim=True)
  corrects = pred.eq(target.view_as(pred)).sum().item()
  return corrects

# batct학습 시 손실함수 이용하여 backpropagation을 하고 나서
# 해당 batch의 총 loss값과 맞은 정답의 개수를 반환하는 함수

def loss_batch(loss_func, outputs, target, opt=None):
  # 만약 Auxiliary Classifier가 적용된 모델이 반환한 값이라면
  # Main Classifier의 반환값, Aux. classifier 1,2의 반환값이 return된다
  if len(outputs) == 3:
    output, aux1, aux2 = outputs
  
    output_loss = loss_func(output, target)
    aux1_loss = loss_func(aux1, target)
    aux2_loss = loss_func(aux2, target)

    # Aux. Classifier의 loss는 0.3을 곱하여 전체 loss에 더한다
    loss = output_loss + 0.3*(aux1_loss + aux2_loss)

    # 해당 batch_dataset에서 model이 맞춘 정답의 개수
    metric_b = metric_batch(output, target)

  else:
    loss = loss_func(outputs, target)
    metric_b = metric_batch(outputs, target)

  if opt is not None:
    opt.zero_grad()
    loss.backward()
    opt.step()

  return loss.item(), metric_b

# 해당 dataloader를 이용해 model을 1 epoch 훈련시키고
# 1epoch동안의 평균 손실함수값과 정확도를 반환하는 함수
def loss_epoch(model, loss_func, dataset_dl, sanity_check=False, opt=None):
  # epoch 동안의 평균 손실함수값
  # epoch 동안의 평균 Precision 저장을 위한 변수 생성
  running_loss = 0.0
  running_metric = 0.0
  len_data = len(dataset_dl.dataset)

  for xb, yb in dataset_dl:
    xb, yb = xb.to(device), yb.to(device)
    output = model(xb)

    loss_b, metric_b = loss_batch(loss_func, output, yb, opt)

    running_loss += loss_b

    if metric_b is not None:
      running_metric += metric_b

    if sanity_check is True:
      break

  loss = running_loss  / len_data
  metric = running_metric / len_data

  return loss, metric

# configuration parameter를 params라는 인자로 전달하면
# 해당 config에 맞게 Train을 해주는 함수를 정의하였다
def train_val(model, params):
  num_epochs=params["num_epochs"]
  loss_func=params["loss_func"]
  opt=params["optimizer"]
  train_dl=params["train_dl"]
  val_dl=params["val_dl"]
  sanity_check=params["sanity_check"]
  lr_scheduler=params["lr_scheduler"]
  path2weights=params["path2weights"]

  # epoch별 평균 loss와 정확도를 저장
  loss_history = {'train':[], 'val':[]}
  metric_history = {'train':[], 'val':[]}

  # 가장 작은 손실함수값을 반환하는 모델의 가중치를 저장한다
  best_model_wts = copy.deepcopy(model.state_dict())
  best_loss = float('inf')

  start_time = time.time()
  for epoch in range(num_epochs):
    current_lr = get_lr(opt)
    print('Epoch {}/{}, current lr={}'.format(epoch, num_epochs - 1, current_lr)) 

    # 학습 모드
    model.train()
    # train_dataset 1 Epoch 훈련
    train_loss, train_metric = loss_epoch(model, loss_func, train_dl, sanity_check, opt)
    
    loss_history['train'].append(train_loss)
    metric_history['train'].append(train_metric)

    # 모델의 성능 평가모드
    model.eval()
    with torch.no_grad():
      val_loss, val_metric = loss_epoch(model, loss_func, val_dl, sanity_check)

    if val_loss < best_loss:
      best_loss = val_loss
      best_model_wts = copy.deepcopy(model.state_dict())

      torch.save(model.state_dict(), path2weights)
      print("Copied best model weights!")

    loss_history['val'].append(val_loss)
    metric_history['val'].append(val_metric)

    lr_scheduler.step()

    print('train loss: %.6f, val loss: %.6f, accuracy: %.2f, time: %.4f min' %(train_loss, val_loss, 100*val_metric, (time.time()-start_time)/60))
    print('-'*10)

  # 학습을 모두 마치기 전 가장 손실함수가 적게 반환된 가중치로 모델을 초기화한다
  model.load_state_dict(best_model_wts)

  return model, loss_history, metric_history



In [14]:
# 학습함수의 configuration으로 적용할 parameter를 만든다
params_train = {
    'num_epochs':50,
    'optimizer':opt,
    'loss_func':loss_func,
    'train_dl':train_dl,
    'val_dl':val_dl,
    'sanity_check':False,
    'lr_scheduler':lr_scheduler,
    'path2weights':'./models/weights.pt',
}

# best_weights를 저장할 디렉토리를 생성한다
def createFolder(dir_path):
  try:
    if not os.path.exists(dir_path):
      os.makedirs(dir_path)
  except OSerror:
    print("Error")
createFolder("./models")

In [15]:
model, loss_hist, metric_hist = train_val(model, params_train)

Epoch 0/49, current lr=0.001
Copied best model weights!
train loss: 2.774134, val loss: 1.772952, accuracy: 30.49, time: 0.8177 min
----------
Epoch 1/49, current lr=0.001
Copied best model weights!
train loss: 2.325948, val loss: 1.643793, accuracy: 41.08, time: 1.6073 min
----------
Epoch 2/49, current lr=0.001
Copied best model weights!
train loss: 2.079945, val loss: 1.369010, accuracy: 49.04, time: 2.3968 min
----------
Epoch 3/49, current lr=0.001
Copied best model weights!
train loss: 1.870917, val loss: 1.363517, accuracy: 53.05, time: 3.1875 min
----------
Epoch 4/49, current lr=0.001
Copied best model weights!
train loss: 1.693205, val loss: 1.312639, accuracy: 54.36, time: 3.9822 min
----------
Epoch 5/49, current lr=0.001
Copied best model weights!
train loss: 1.525838, val loss: 1.248575, accuracy: 56.10, time: 4.7768 min
----------
Epoch 6/49, current lr=0.001
Copied best model weights!
train loss: 1.433418, val loss: 1.010976, accuracy: 64.41, time: 5.5713 min
----------