# DL(CNN) 과제 자료

출처: https://github.com/chengyangfu/pytorch-vgg-cifar10

VGG16 cifar10 모델을 정확도 40까지 훈련시켜보세요!

In [1]:
import torch
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import time

In [2]:
# DON'T TOUCH
from torchvision import models

model = models.vgg16(weights=None)
model = model.cuda()
print(model)
# DON'T TOUCH

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [3]:
# DON'T TOUCH
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

train_loader = torch.utils.data.DataLoader(
        datasets.CIFAR10(root='./data', train=True, transform=transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(32, 4),
            transforms.ToTensor(),
            normalize,
        ]), download=True),
        batch_size=64, shuffle=True,
        num_workers=4, pin_memory=True)

val_loader = torch.utils.data.DataLoader(
        datasets.CIFAR10(root='./data', train=False, transform=transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])),
        batch_size=128, shuffle=False,
        num_workers=4, pin_memory=True)
# DON'T TOUCH

Files already downloaded and verified


  cpuset_checked))


In [4]:
# DON'T TOUCH
def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res
# DON'T TOUCH

In [5]:
# DON'T TOUCH
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
# DON'T TOUCH

In [6]:
# DON'T TOUCH
criterion = nn.CrossEntropyLoss().cuda()
# DON'T TOUCH

In [7]:
#TODO: Learning rate scheduler! VERY IMPORTANT
def adjust_learning_rate(optimizer, epoch, lr):
    """Sets the learning rate decay"""
    
    lr = lr - (0.001 * epoch)
    
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [8]:
#TODO: MOST IMPORTANT
lr = 0.05

#TODO: 만약 더 공부하고 싶으면 각 요소들의 의미가 무엇인지 알아보면 좋습니다.
# 더 공부하고 싶지 않으면 아무것도 설정하지 않고 기본값으로도 과제를 끝낼 수 있습니다!
optimizer = torch.optim.SGD(model.parameters(), lr)
"""
optimizer = torch.optim.SGD(model.parameters(), lr,
                            weight_decay = #TODO,
                            momentum = #TODO,
                            nesterov= #TODO
                            )
"""

'\noptimizer = torch.optim.SGD(model.parameters(), lr,\n                            weight_decay = #TODO,\n                            momentum = #TODO,\n                            nesterov= #TODO\n                            )\n'

In [15]:
# Custom Optimizer 1
lr = 0.0001

optimizer = torch.optim.SGD(model.parameters(), lr,
                            momentum = 0.85,
                            nesterov= True
                            )

In [18]:
# Custom Optimizer 2
lr = 0.0001
optimizer = torch.optim.Adam(model.parameters(), lr)

In [19]:
# DON'T TOUCH
def train(train_loader, model, criterion, optimizer, epoch):
    """
        Run one train epoch
    """
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to train mode
    model.train()

    end =time.time()
    for i, (input, target) in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)

        input = input.cuda()
        target = target.cuda()


        # compute output
        output = model(input)
        loss = criterion(output, target)

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        output = output.float()
        loss = loss.float()
        # measure accuracy and record loss
        prec1 = accuracy(output.data, target)[0]
        losses.update(loss.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % 10 == 0: 
          print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                      epoch, i, len(train_loader), batch_time=batch_time,
                      data_time=data_time, loss=losses, top1=top1))
# DON'T TOUCH

In [20]:
# DON'T TOUCH
for epoch in range(10):
  # adjust_learning_rate(optimizer, epoch, lr)
  # train for one epoch
  train(train_loader, model, criterion, optimizer, epoch)
# DON'T TOUCH

  cpuset_checked))


Epoch: [0][0/782]	Time 0.398 (0.398)	Data 0.271 (0.271)	Loss 0.5985 (0.5985)	Prec@1 78.125 (78.125)
Epoch: [0][10/782]	Time 0.097 (0.127)	Data 0.000 (0.025)	Loss 0.6153 (0.6707)	Prec@1 79.688 (78.267)
Epoch: [0][20/782]	Time 0.096 (0.113)	Data 0.000 (0.013)	Loss 0.5154 (0.6224)	Prec@1 82.812 (79.688)
Epoch: [0][30/782]	Time 0.096 (0.108)	Data 0.000 (0.009)	Loss 0.7942 (0.6177)	Prec@1 76.562 (79.688)
Epoch: [0][40/782]	Time 0.099 (0.105)	Data 0.000 (0.007)	Loss 0.9516 (0.6359)	Prec@1 62.500 (79.345)
Epoch: [0][50/782]	Time 0.097 (0.104)	Data 0.000 (0.006)	Loss 0.7789 (0.6441)	Prec@1 78.125 (78.891)
Epoch: [0][60/782]	Time 0.096 (0.102)	Data 0.000 (0.005)	Loss 0.7499 (0.6547)	Prec@1 70.312 (78.279)
Epoch: [0][70/782]	Time 0.099 (0.102)	Data 0.000 (0.004)	Loss 0.6043 (0.6412)	Prec@1 81.250 (78.719)
Epoch: [0][80/782]	Time 0.097 (0.101)	Data 0.000 (0.004)	Loss 0.5891 (0.6388)	Prec@1 75.000 (78.742)
Epoch: [0][90/782]	Time 0.097 (0.101)	Data 0.000 (0.003)	Loss 0.4737 (0.6437)	Prec@1 87.500 

In [22]:
# DON'T TOUCH
def validate(val_loader, model, criterion):
    """
    Run evaluation
    """
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        input = input.cuda()
        target = target.cuda()

        # compute output
        with torch.no_grad():
            output = model(input)
            loss = criterion(output, target)

        output = output.float()
        loss = loss.float()

        # measure accuracy and record loss
        prec1 = accuracy(output.data, target)[0]
        losses.update(loss.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % 10 == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                      i, len(val_loader), batch_time=batch_time, loss=losses,
                      top1=top1))

    print(' * Prec@1 {top1.avg:.3f}'
          .format(top1=top1))

    return top1.avg
# DON'T TOUCH

## 모델 학습 결과 확인!
10 epoch만 train을 해서 완성된 모델의 accuracy가 40이 넘은 장면을 캡쳐해서 올려주세요!

In [23]:
# DON'T TOUCH
validate(val_loader,model,criterion)
# DON'T TOUCH

  cpuset_checked))


Test: [0/79]	Time 0.273 (0.273)	Loss 0.4211 (0.4211)	Prec@1 87.500 (87.500)
Test: [10/79]	Time 0.030 (0.065)	Loss 0.3906 (0.4293)	Prec@1 86.719 (87.074)
Test: [20/79]	Time 0.032 (0.050)	Loss 0.3613 (0.4878)	Prec@1 88.281 (85.119)
Test: [30/79]	Time 0.026 (0.044)	Loss 0.4018 (0.4984)	Prec@1 87.500 (85.030)
Test: [40/79]	Time 0.025 (0.041)	Loss 0.5484 (0.4928)	Prec@1 78.906 (85.118)
Test: [50/79]	Time 0.032 (0.039)	Loss 0.6177 (0.4928)	Prec@1 83.594 (85.080)
Test: [60/79]	Time 0.029 (0.038)	Loss 0.6097 (0.4973)	Prec@1 83.594 (85.067)
Test: [70/79]	Time 0.033 (0.037)	Loss 0.5328 (0.5011)	Prec@1 87.500 (85.002)
 * Prec@1 85.020


85.02