In [1]:
import sys
import numpy as np
import random
import visdom
import subprocess

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torchvision
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torchvision.models as models
import torch.nn.functional as F
from torch.utils.data.sampler import SubsetRandomSampler

In [2]:
# visdom setting
vis = visdom.Visdom()
vis.close(env="main")

# make plot
loss_plt = vis.line(Y=torch.Tensor(1).zero_(),
                    opts=dict(title = 'VGG_Loss_Tracker',
                              legend=['T_loss', 'V_loss'],
                             showlegend=True
                             )
                   )

def loss_tracker(loss_plot, loss_value, num, name):
    vis.line(X = num,
            Y = loss_value,
            win = loss_plot,
            name = name,
            update = 'append'
            )

Setting up a new session...


In [3]:
# random seed
torch.manual_seed(555)
torch.cuda.manual_seed_all(555)
np.random.seed(555)

In [4]:
# CUDA 설정

GPU_NUM = 1
device = torch.device(f'cuda:{GPU_NUM}' if torch.cuda.is_available() else 'cpu')

#device = torch.device('cuda')
print("설정된 학습용 기기 :",device)

설정된 학습용 기기 : cuda:1


In [5]:
# Hyperparameter
lr = 0.1
epochs = 30
batch_size = 128

In [6]:
# Data 전처리
transform = transforms.Compose([transforms.ToTensor()]
                              )
# Data load
trainset = dsets.CIFAR10('../CIFAR10/',
                         train=True,
                         transform = transform,
                         download=False)

# transforms.Normalize
train_data_mean = trainset.data.mean( axis=(0,1,2) )
train_data_std = trainset.data.std( axis=(0,1,2) )

# 각 pixel은 0~255값을 가지므로 이를 나누어 정규화한다.
train_data_mean /= 255
train_data_std /= 255

transform_test = transforms.Compose([transforms.Resize(40),
                                     transforms.RandomCrop(32),
                                    transforms.ToTensor(),
                               transforms.Normalize(train_data_mean, train_data_std)
                               ])

transform = transforms.Compose([transforms.ToTensor(),
                               transforms.Normalize(train_data_mean, train_data_std)
                               ])

# Normalize 된 dataset으로 reload
trainset = dsets.CIFAR10('../CIFAR10/',
                         train=True,
                         transform = transform_test,
                         download=False)

valset = dsets.CIFAR10('../CIFAR10/',
                         train=True,
                         transform = transform,
                         download=False)

testset = dsets.CIFAR10('../CIFAR10/',
                         train=False,
                         transform = transform,
                         download=False)

# validation set 분류
validation_ratio = 0.15
num_train = len(trainset)
indices = list(range(num_train))
# 설정한 비율만큼 분할 시의 data 갯수
split = int(np.floor(validation_ratio * num_train))
# shuffle
np.random.shuffle(indices)
# data 분할
train_idx, val_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(val_idx)

train_loader = torch.utils.data.DataLoader(dataset = trainset,
                                          batch_size = batch_size,
                                          sampler = train_sampler,
                                          drop_last = True)

val_loader = torch.utils.data.DataLoader(dataset = valset,
                                          batch_size = batch_size,
                                          sampler = val_sampler,
                                          drop_last = True)

test_loader = torch.utils.data.DataLoader(dataset = testset,
                                          batch_size = 4,
                                          shuffle = False,
                                          drop_last = True)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog',
           'frog', 'horse', 'ship', 'truck')

In [7]:
# model output channel 수를 맞춰준다
model = models.resnet18()
model.fc = nn.Linear(512, 10)
model.to(device)
#print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [8]:
# model's output channel check
'''
a = torch.Tensor(1,3,32,32).to(device)
out = model(a)
print(out)
'''

'\na = torch.Tensor(1,3,32,32).to(device)\nout = model(a)\nprint(out)\n'

In [9]:
# model 훈련 함수
def train(model, optimizer, criterion, DataLoader, total_batch):
    model.train()    
    running_loss = 0.0
    
    for batch_idx, (data, label) in enumerate(DataLoader):
        data, label = data.to(device), label.to(device)
                
        """
        if batch_idx == 0:
            print(data, label)
        """
        
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, label)
        loss.backward()
        optimizer.step()
        
        running_loss += loss / total_batch
    return running_loss 

"""
def train(model, dataloader, optimizer, criterion, cp_mask):
    model.train()
    running_loss = 0.0
    #EPS = 1e-6
    for batch_idx, (data, label) in enumerate(dataloader):
        data, label = data.to(device), label.to(device)
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, label)
        loss.backward()

        if cp_mask:
            # 0-weight 학습 방지 code
            i = 0
            for name, p in model.named_parameters():
                if 'weight' in name:
                    p.grad.data *= cp_mask[i]
                    i += 1

        optimizer.step()

        running_loss = loss / len(dataloader)
    return running_loss"""



"""# validation loss 함수
def loss_eval(model, criterion, DataLoader, total_batch):
    with torch.no_grad():
        model.eval()
        running_loss = 0.0
        for i, data in enumerate(DataLoader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels) 
            running_loss += loss / total_batch
        return running_loss    
        
# accuracy 계산 함수
def accu_eval(DataLoader):
    with torch.no_grad():
        model.eval()
        correct = 0
        total = 0
        for i, data in enumerate(DataLoader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        return correct, total"""

'# validation loss 함수\ndef loss_eval(model, criterion, DataLoader, total_batch):\n    with torch.no_grad():\n        model.eval()\n        running_loss = 0.0\n        for i, data in enumerate(DataLoader, 0):\n            inputs, labels = data\n            inputs, labels = inputs.to(device), labels.to(device)\n            \n            outputs = model(inputs)\n            loss = criterion(outputs, labels) \n            running_loss += loss / total_batch\n        return running_loss    \n        \n# accuracy 계산 함수\ndef accu_eval(DataLoader):\n    with torch.no_grad():\n        model.eval()\n        correct = 0\n        total = 0\n        for i, data in enumerate(DataLoader, 0):\n            inputs, labels = data\n            inputs, labels = inputs.to(device), labels.to(device)\n            outputs = model(inputs)\n            \n            _, predicted = torch.max(outputs.data, 1)\n            total += labels.size(0)\n            correct += (predicted == labels).sum().item()\n        re

In [10]:
def test(model, dataloader, criterion):
    model.eval()
    correct = 0
    test_loss = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (data, label) in enumerate(dataloader):
            data, label = data.to(device), label.to(device)
            outputs = model(data)
            #test_loss += F.nll_loss(outputs, label, reduction='sum').item() # sum up batch loss
            loss = criterion(outputs, label)
            predicted = outputs.data.max(1, keepdim=True)[1]
            correct += predicted.eq(label.data.view_as(predicted)).sum().item()
            
            test_loss += loss / len(dataloader)
        #test_loss /= len(test_loader.dataset)
            
        accuracy = correct / len(dataloader)

    return accuracy, test_loss

def test(model, dataloader, criterion):
    model.eval()
    correct = 0
    test_loss = 0
    with torch.no_grad():
        for data, label in dataloader:
            data, label = data.to(device), label.to(device)
            outputs = model(data)
            #test_loss += F.nll_loss(outputs, label, reduction='sum').item() # sum up batch loss
            loss = criterion(outputs, label)
            predicted = outputs.data.max(1, keepdim=True)[1]
            correct += predicted.eq(label.data.view_as(predicted)).sum().item()
            
        test_loss = loss / len(dataloader)
        accuracy =  correct / len(dataloader.dataset)
        # 로더 -> 배치 개수 로더.dataset -> 전체 길이, 
    return accuracy, test_loss

In [11]:
criterion = nn.CrossEntropyLoss().to(device)
# 특정 가중치값이 커질수록 오버피팅이 발생할 가능성이 높아지므로
# 이를 해소하기 위해 특정값을 손실함수에 더해주는 것 (가중치 업데이트 쏠림 방지)
optimizer = optim.SGD(model.parameters(), lr = lr, momentum=0.9, weight_decay=3e-4)
lr_sche = optim.lr_scheduler.StepLR(optimizer, step_size=13, gamma=0.5)

In [12]:
len(test_loader)

2500

In [13]:
# Training
t_batch = len(train_loader)
v_batch = len(val_loader)
print('Learning Start!')

for epoch in range(epochs):
    lr_sche.step()
    # model training
    t_running_loss = train(model, optimizer, criterion, train_loader, t_batch)

    # validation loss
    #v_running_loss = loss_eval(model, criterion, val_loader, v_batch)

    # validation accuracy
    #correct, total = accu_eval(val_loader)
    
    correct, v_running_loss = test(model, val_loader, criterion)
    
    # Plot & print
    loss_tracker(loss_plt, torch.Tensor([t_running_loss]), torch.Tensor([epoch]), 'T_loss')
    loss_tracker(loss_plt, torch.Tensor([v_running_loss]), torch.Tensor([epoch]), 'V_loss')

    print('[epoch : %d] (T_loss: %.5f) ' % (epoch + 1, t_running_loss),
          '(V_loss: %5f) ' % (v_running_loss),
          #'(Val Accuract : %d %%)' % (100 * correct / total)
          '(Val Accuract : %d %%)' % (correct)
         )
    
print('Finished Training')

Learning Start!




[epoch : 1] (T_loss: 2.12849)  (V_loss: 1.624769)  (Val Accuract : 49 %)
[epoch : 2] (T_loss: 1.56727)  (V_loss: 1.447676)  (Val Accuract : 61 %)
[epoch : 3] (T_loss: 1.36953)  (V_loss: 1.371854)  (Val Accuract : 64 %)
[epoch : 4] (T_loss: 1.23542)  (V_loss: 1.231578)  (Val Accuract : 72 %)
[epoch : 5] (T_loss: 1.11280)  (V_loss: 1.133960)  (Val Accuract : 76 %)
[epoch : 6] (T_loss: 1.02569)  (V_loss: 1.154103)  (Val Accuract : 76 %)
[epoch : 7] (T_loss: 0.96215)  (V_loss: 1.056794)  (Val Accuract : 81 %)
[epoch : 8] (T_loss: 0.90450)  (V_loss: 1.068637)  (Val Accuract : 81 %)
[epoch : 9] (T_loss: 0.86479)  (V_loss: 1.042346)  (Val Accuract : 83 %)
[epoch : 10] (T_loss: 0.82380)  (V_loss: 0.954766)  (Val Accuract : 85 %)
[epoch : 11] (T_loss: 0.79894)  (V_loss: 1.142243)  (Val Accuract : 79 %)
[epoch : 12] (T_loss: 0.77304)  (V_loss: 1.011878)  (Val Accuract : 85 %)
[epoch : 13] (T_loss: 0.63049)  (V_loss: 0.863099)  (Val Accuract : 90 %)
[epoch : 14] (T_loss: 0.59470)  (V_loss: 0.8538

In [14]:
# model test
correct, total = accu_eval(test_loader)
print('Accuracy (testset) : %.3f %%' % (100*correct / total))

NameError: name 'accu_eval' is not defined

[epoch : 1] (T_loss: 2.11550)  (V_loss: 1.610088)  (Val Accuract : 40 %)
[epoch : 2] (T_loss: 1.48724)  (V_loss: 1.418673)  (Val Accuract : 47 %)
[epoch : 3] (T_loss: 1.32180)  (V_loss: 1.354464)  (Val Accuract : 51 %)
[epoch : 4] (T_loss: 1.18753)  (V_loss: 1.269082)  (Val Accuract : 55 %)