- reference : https://www.youtube.com/watch?v=qx9uglq80Qs&list=PLSAJwo7mw8jn8iaXwT4MqLbZnS-LJwnBd&index=15

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import argparse
import numpy as np
import time

## Data Preparation
- dataset부분에서 partition이라는 dict을 쓰는 이유는 batch size도 hyper param으로 쓰기위함임
- 그래서 partition['train']하면 그냥 train에 해당하는 데이터셋임 ㅇㅇ

In [2]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainset, valset = torch.utils.data.random_split(trainset, [40000, 10000])
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
partition = {'train': trainset, 'val':valset, 'test':testset}

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


## Model Architecture

In [3]:
class MLP(nn.Module):
    def __init__(self, in_dim, out_dim, hid_dim, n_layer, act, dropout, use_bn, use_xavier):
        super(MLP, self).__init__()
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.hid_dim = hid_dim
        self.n_layer = n_layer
        self.act = act
        self.dropout = dropout # float
        self.use_bn = use_bn # boolean
        self.use_xavier = use_xavier
        
        # ====== Create Linear Layers ====== #
        self.fc1 = nn.Linear(self.in_dim, self.hid_dim)
        
        self.linears = nn.ModuleList()
        self.bns = nn.ModuleList()
        for i in range(self.n_layer-1):
            self.linears.append(nn.Linear(self.hid_dim, self.hid_dim))
            if self.use_bn: # 배치정규화 쓰는경우에는 n_layer 수 (마지막 출력층 제외) 만큼 추가해준다
                self.bns.append(nn.BatchNorm1d(self.hid_dim))
                
        self.fc2 = nn.Linear(self.hid_dim, self.out_dim)
        
        # ====== Create Activation Function ====== #
        if self.act == 'relu':
            self.act = nn.ReLU()
        elif self.act == 'tanh':
            self.act == nn.Tanh()
        elif self.act == 'sigmoid':
            self.act = nn.Sigmoid()
        else:
            raise ValueError('no valid activation function selected!')
        
        # ====== Create Regularization Layer ======= #
        self.dropout = nn.Dropout(self.dropout)
        if self.use_xavier:
            self.xavier_init()
          
    def forward(self, x):
        x = self.act(self.fc1(x))
        for i in range(len(self.linears)):
            # 순서 : 층 => 활성화 => gn => dropout
            x = self.act(self.linears[i](x))
            x = self.bns[i](x) # layer마다 평균과 편차가 다르므로 각각 다르게 사용해줘야함
            x = self.dropout(x) # dropout : 처음과 마지막 layer에선 사용금지! 중간만 사용가능
        x = self.fc2(x)
        return x
    
    def xavier_init(self):
        for linear in self.linears:
            nn.init.xavier_normal_(linear.weight)
            linear.bias.data.fill_(0.01)
            
model = MLP(3072, 10, 100, 4, 'relu', 0.1, True, True) # Testing Model Construction

## Train, Validate, Test and Experiment
- experiment 부분을 train, val, test로 나눔으로써 더 구조적이게 코드작성!

In [4]:
# 인자 : 모델, 데이터셋, 옵티마이저, 목적함수 => 출력 : 학습된 모델..
def train(model, partition, optimizer, loss_fn, args):
    trainloader = torch.utils.data.DataLoader(partition['train'], 
                                              batch_size=args.train_batch_size, 
                                              shuffle=True,
                                              num_workers=2)
    model.train()

    correct = 0
    total = 0
    train_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        optimizer.zero_grad() # [21.01.05 오류 수정] 매 Epoch 마다 .zero_grad()가 실행되는 것을 매 iteration 마다 실행되도록 수정했습니다. 

        # get the inputs
        inputs, labels = data
        inputs = inputs.view(-1, 3072)
        inputs = inputs.cuda()
        labels = labels.cuda()
        outputs = model(inputs)

        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss = train_loss / len(trainloader)
    train_acc = 100 * correct / total
    return model, train_loss, train_acc

> train 함수에서는 return model 중요! 자칫 잘못하면 학습 실컷시키고 (return model 미포함하면) 쌩 model 사용하게 될 수 있음

In [5]:
# 인자 : 모델, 데이터셋, 옵티마이저(X : 학습을 안하니까), 목적함수
def validate(model, partition, loss_fn, args):
    valloader = torch.utils.data.DataLoader(partition['val'], 
                                            batch_size=args.test_batch_size, 
                                            shuffle=False,
                                            num_workers=2)
    model.eval()

    correct = 0
    total = 0
    val_loss = 0 
    with torch.no_grad():
        for data in valloader:
            images, labels = data
            images = images.view(-1, 3072)
            images = images.cuda()
            labels = labels.cuda()
            outputs = model(images)

            loss = loss_fn(outputs, labels)
            
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        val_loss = val_loss / len(valloader)
        val_acc = 100 * correct / total
    return val_loss, val_acc

In [6]:
# 인자 : 모델, 데이터셋, 옵티마이저(X), 목적함수(X) => 걍 숫자만 산출하는거니까
def test(model, partition, args):
    testloader = torch.utils.data.DataLoader(partition['test'], 
                                             batch_size=args.test_batch_size, 
                                             shuffle=False,
                                             num_workers=2)
    model.eval()
    
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images = images.view(-1, 3072)
            images = images.cuda()
            labels = labels.cuda()

            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        test_acc = 100 * correct / total
    return test_acc

In [7]:
def experiment(partition, args):
  
    model = MLP(args.in_dim, args.out_dim, args.hid_dim, args.n_layer, args.act, args.dropout, args.use_bn, args.use_xavier)
    model.cuda()

    loss_fn = nn.CrossEntropyLoss()
    if args.optim == 'SGD':
        optimizer = optim.RMSprop(model.parameters(), lr=args.lr, weight_decay=args.l2)
    elif args.optim == 'RMSprop':
        optimizer = optim.RMSprop(model.parameters(), lr=args.lr, weight_decay=args.l2)
    elif args.optim == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.l2)
    else:
        raise ValueError('In-valid optimizer choice')
    
    for epoch in range(args.epoch):  # loop over the dataset multiple times
        ts = time.time()
        model, train_loss, train_acc = train(model, partition, optimizer, loss_fn, args)
        val_loss, val_acc = validate(model, partition, loss_fn, args)
        te = time.time()
        print('Epoch {}, Acc(train/val): {:2.2f}/{:2.2f}, Loss(train/val) {:2.2f}/{:2.2f}. Took {:2.2f} sec'.format(epoch, train_acc, val_acc, train_loss, val_loss, te-ts))
        
    test_acc = test(model, partition, args)    
    return train_loss, val_loss, train_acc, val_acc, test_acc

## Experiment
- colab gpu 속도 이점 얻기위해서 배치사이즈 좀 크게해야 할 필요가 있음
- 128~512사이정도로

In [8]:
# ====== Random Seed Initialization ====== #
seed = 123
np.random.seed(seed)
torch.manual_seed(seed)

parser = argparse.ArgumentParser()
args = parser.parse_args("")

# ====== Model Capacity ====== #
args.in_dim = 3072
args.out_dim = 10
args.hid_dim = 100
args.act = 'relu'

# ====== Regularization ======= #
args.dropout = 0.2
args.use_bn = True
args.l2 = 0.00001
args.use_xavier = True

# ====== Optimizer & Training ====== #
args.optim = 'RMSprop' #'RMSprop' #SGD, RMSprop, ADAM...
args.lr = 0.0015
args.epoch = 10

args.train_batch_size = 256
args.test_batch_size = 1024

# ====== Experiment Variable ====== #
name_var1 = 'n_layer'
name_var2 = 'hid_dim'
list_var1 = [3, 3, 4]
list_var2 = [500, 300, 700]


for var1 in list_var1:
    for var2 in list_var2:
        setattr(args, name_var1, var1)
        setattr(args, name_var2, var2)
        print(args)
        result = experiment(partition, args)  

Namespace(act='relu', dropout=0.2, epoch=10, hid_dim=500, in_dim=3072, l2=1e-05, lr=0.0015, n_layer=3, optim='RMSprop', out_dim=10, test_batch_size=1024, train_batch_size=256, use_bn=True, use_xavier=True)
Epoch 0, Acc(train/val): 35.49/38.25, Loss(train/val) 1.85/1.76. Took 12.53 sec
Epoch 1, Acc(train/val): 45.04/39.72, Loss(train/val) 1.54/1.71. Took 9.62 sec
Epoch 2, Acc(train/val): 49.41/45.21, Loss(train/val) 1.41/1.56. Took 9.51 sec
Epoch 3, Acc(train/val): 52.08/40.97, Loss(train/val) 1.34/1.72. Took 9.47 sec
Epoch 4, Acc(train/val): 55.19/44.75, Loss(train/val) 1.26/1.60. Took 9.43 sec
Epoch 5, Acc(train/val): 57.42/45.29, Loss(train/val) 1.19/2.44. Took 9.42 sec
Epoch 6, Acc(train/val): 59.71/48.89, Loss(train/val) 1.13/1.49. Took 9.43 sec
Epoch 7, Acc(train/val): 62.01/48.82, Loss(train/val) 1.07/1.51. Took 9.52 sec
Epoch 8, Acc(train/val): 63.83/51.79, Loss(train/val) 1.01/1.43. Took 9.61 sec
Epoch 9, Acc(train/val): 65.83/50.73, Loss(train/val) 0.96/1.51. Took 9.62 sec
Nam