In [2]:
import sys
import numpy as np
import random
import visdom
import subprocess

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torchvision
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data.sampler import SubsetRandomSampler

In [3]:
# visdom setting
vis = visdom.Visdom()
vis.close(env="main")

# make plot
loss_plt = vis.line(Y=torch.Tensor(1).zero_(),
                    opts=dict(title = 'VGG_Loss_Tracker',
                              legend=['T_loss', 'V_loss'],
                             showlegend=True
                             )
                   )

def loss_tracker(loss_plot, loss_value, num, name):
    vis.line(X = num,
            Y = loss_value,
            win = loss_plot,
            name = name,
            update = 'append'
            )

Setting up a new session...


In [4]:
# random seed
torch.manual_seed(555)
torch.cuda.manual_seed_all(555)
np.random.seed(555)

In [5]:
# CUDA 설정

GPU_NUM = 1
device = torch.device(f'cuda:{GPU_NUM}' if torch.cuda.is_available() else 'cpu')

#device = torch.device('cuda')
print("설정된 학습용 기기 :",device)

설정된 학습용 기기 : cuda:1


In [6]:
# Hyperparameter
lr = 0.1
epochs = 3
batch_size = 128

In [7]:
# Data 전처리
transform = transforms.Compose([transforms.ToTensor()]
                              )
# Data load
trainset = dsets.CIFAR10('../CIFAR10/',
                         train=True,
                         transform = transform,
                         download=False)

# transforms.Normalize
train_data_mean = trainset.data.mean( axis=(0,1,2) )
train_data_std = trainset.data.std( axis=(0,1,2) )

# 각 pixel은 0~255값을 가지므로 이를 나누어 정규화한다.
train_data_mean /= 255
train_data_std /= 255

transform_test = transforms.Compose([transforms.Resize(40),
                                     transforms.RandomCrop(32),
                                    transforms.ToTensor(),
                               transforms.Normalize(train_data_mean, train_data_std)
                               ])

transform = transforms.Compose([transforms.ToTensor(),
                               transforms.Normalize(train_data_mean, train_data_std)
                               ])

# Normalize 된 dataset으로 reload
trainset = dsets.CIFAR10('../CIFAR10/',
                         train=True,
                         transform = transform_test,
                         download=False)

valset = dsets.CIFAR10('../CIFAR10/',
                         train=True,
                         transform = transform,
                         download=False)

testset = dsets.CIFAR10('../CIFAR10/',
                         train=False,
                         transform = transform,
                         download=False)

# validation set 분류
validation_ratio = 0.15
num_train = len(trainset)
indices = list(range(num_train))
# 설정한 비율만큼 분할 시의 data 갯수
split = int(np.floor(validation_ratio * num_train))
# shuffle
np.random.shuffle(indices)
# data 분할
train_idx, val_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(val_idx)

train_loader = torch.utils.data.DataLoader(dataset = trainset,
                                          batch_size = batch_size,
                                          sampler = train_sampler,
                                          drop_last = True)

val_loader = torch.utils.data.DataLoader(dataset = valset,
                                          batch_size = batch_size,
                                          sampler = val_sampler,
                                          drop_last = True)

test_loader = torch.utils.data.DataLoader(dataset = testset,
                                          batch_size = 4,
                                          shuffle = False,
                                          drop_last = True)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog',
           'frog', 'horse', 'ship', 'truck')

In [9]:
# model output channel 수를 맞춰준다
model = models.vgg16()
model.fc = nn.Linear(4096, 10)
model.to(device)
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [None]:
args.prune_iterations
args.prune_type
args.prune_percent

In [8]:
# model's output channel check
'''
a = torch.Tensor(1,3,32,32).to(device)
out = model(a)
print(out)
'''

'\na = torch.Tensor(1,3,32,32).to(device)\nout = model(a)\nprint(out)\n'

In [9]:
# model 훈련 함수
def train(model, optimizer, criterion, DataLoader, total_batch):
    model.train()    
    running_loss = 0.0
    
    for i, data in enumerate(DataLoader, 0):

        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss / total_batch
    return running_loss 
    
# validation loss 함수
def loss_eval(model, criterion, DataLoader, total_batch):
    with torch.no_grad():
        model.eval()
        running_loss = 0.0
        for i, data in enumerate(DataLoader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels) 
            running_loss += loss / total_batch
        return running_loss    
        
# accuracy 계산 함수
def accu_eval(DataLoader):
    with torch.no_grad():
        model.eval()
        correct = 0
        total = 0
        for i, data in enumerate(DataLoader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        return correct, total

In [10]:
criterion = nn.CrossEntropyLoss().to(device)
# 특정 가중치값이 커질수록 오버피팅이 발생할 가능성이 높아지므로
# 이를 해소하기 위해 특정값을 손실함수에 더해주는 것 (가중치 업데이트 쏠림 방지)
optimizer = optim.SGD(model.parameters(), lr = lr, momentum=0.9, weight_decay=3e-4)
lr_sche = optim.lr_scheduler.StepLR(optimizer, step_size=13, gamma=0.5)

In [11]:
# Training
t_batch = len(train_loader)
v_batch = len(val_loader)
print('Learning Start!')

for epoch in range(epochs):
    lr_sche.step()
    # model training
    t_running_loss = train(model, optimizer, criterion, train_loader, t_batch)

    # validation loss
    v_running_loss = loss_eval(model, criterion, val_loader, v_batch)

    # validation accuracy
    correct, total = accu_eval(val_loader)

    # Plot & print
    loss_tracker(loss_plt, torch.Tensor([t_running_loss]), torch.Tensor([epoch]), 'T_loss')
    loss_tracker(loss_plt, torch.Tensor([v_running_loss]), torch.Tensor([epoch]), 'V_loss')

    print('[epoch : %d] (T_loss: %.5f) ' % (epoch + 1, t_running_loss),
          '(V_loss: %5f) ' % (v_running_loss),
          '(Val Accuract : %d %%)' % (100 * correct / total)
         )
    
print('Finished Training')

Learning Start!




[epoch : 1] (T_loss: 2.06267)  (V_loss: 1.584349)  (Val Accuract : 41 %)
[epoch : 2] (T_loss: 1.51753)  (V_loss: 1.387423)  (Val Accuract : 49 %)
[epoch : 3] (T_loss: 1.33654)  (V_loss: 1.316104)  (Val Accuract : 52 %)
[epoch : 4] (T_loss: 1.17735)  (V_loss: 1.215312)  (Val Accuract : 57 %)
[epoch : 5] (T_loss: 1.07808)  (V_loss: 1.204826)  (Val Accuract : 57 %)
[epoch : 6] (T_loss: 0.99805)  (V_loss: 1.020895)  (Val Accuract : 63 %)
[epoch : 7] (T_loss: 0.92629)  (V_loss: 1.098331)  (Val Accuract : 62 %)
[epoch : 8] (T_loss: 0.88024)  (V_loss: 0.951883)  (Val Accuract : 66 %)
[epoch : 9] (T_loss: 0.83776)  (V_loss: 0.944779)  (Val Accuract : 67 %)
[epoch : 10] (T_loss: 0.80634)  (V_loss: 0.866553)  (Val Accuract : 68 %)
[epoch : 11] (T_loss: 0.76882)  (V_loss: 0.996573)  (Val Accuract : 66 %)
[epoch : 12] (T_loss: 0.74905)  (V_loss: 1.041671)  (Val Accuract : 64 %)
[epoch : 13] (T_loss: 0.60526)  (V_loss: 0.835952)  (Val Accuract : 71 %)
[epoch : 14] (T_loss: 0.57611)  (V_loss: 0.7671

In [12]:
# model test
correct, total = accu_eval(test_loader)
print('Accuracy (testset) : %.3f %%' % (100*correct / total))

Accuracy (testset) : 73.440 %
