# 패키지


In [1]:
import time
import copy

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler

import torchvision
import torchvision.models as models
from torchvision import datasets, transforms

is_cuda = torch.cuda.is_available()

print('Torch Version : {}'.format(torch.__version__))
print('CUDA Available : {}'.format(is_cuda))
if is_cuda :
  cuda_count = torch.cuda.device_count()
  print('CUDA Count : {}'.format(cuda_count))
  for i in range(cuda_count) :
    print('CUDA Device {} : {}'.format(i, torch.cuda.get_device_name(i)))

Torch Version : 1.9.0+cu102
CUDA Available : True
CUDA Count : 1
CUDA Device 0 : Tesla T4


In [2]:
#Batch size
batch_size=128
#Epoch 
num_epochs=3
#learning rate
learning_rate=0.001

# 데이터 로드

In [3]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

train_set=torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=data_transforms['train'])
test_set =torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=data_transforms['val'])

dataloaders=dict()
dataloaders['train']= torch.utils.data.DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
dataloaders['val']= torch.utils.data.DataLoader(dataset=test_set, batch_size=batch_size, shuffle=False)

dataset_sizes = {x: len(dataloaders[x].dataset) for x in ['train', 'val']}

print("train 개수",dataset_sizes['train'])
print("test 개수",dataset_sizes['val'])

class_names = train_set.classes
print("class_names:",class_names)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
train 개수 50000
test 개수 10000
class_names: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


# 모델

In [4]:
device = 'cuda' if is_cuda else 'cpu'

## AlexNet

In [8]:
AlexNet = models.alexnet(num_classes = 10).to(device)

criterion = nn.CrossEntropyLoss()
optimizer_alex = optim.SGD(AlexNet.parameters(), lr=0.001, momentum=0.9)

for epoch in range(num_epochs):  # loop over the dataset multiple times

    running_loss = 0.0
    start_time = time.time()
    for i, data in enumerate(dataloaders['train'], 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)

        # zero the parameter gradients
        optimizer_alex.zero_grad()

        # forward + backward + optimize
        output = AlexNet(inputs)
        loss = criterion(output, labels)
        loss.backward()
        optimizer_alex.step()

        #Time
        end_time = time.time()
        time_taken = end_time - start_time

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            print('Time:',time_taken)
            running_loss = 0.0

print('Finished Training of AlexNet')

[1,  2000] loss: 2.234
Time: 49.322383880615234
[1,  4000] loss: 2.093
Time: 98.53961944580078
[1,  6000] loss: 1.991
Time: 147.67357873916626
[1,  8000] loss: 1.911
Time: 196.80434226989746
[1, 10000] loss: 1.829
Time: 245.88856029510498
[1, 12000] loss: 1.812
Time: 295.0717992782593
[2,  2000] loss: 1.725
Time: 49.03990864753723
[2,  4000] loss: 1.724
Time: 98.07988286018372
[2,  6000] loss: 1.705
Time: 147.0668694972992
[2,  8000] loss: 1.646
Time: 196.06444764137268
[2, 10000] loss: 1.601
Time: 244.96015548706055
[2, 12000] loss: 1.610
Time: 293.98670625686646
[3,  2000] loss: 1.564
Time: 48.99040222167969
[3,  4000] loss: 1.527
Time: 98.14871001243591
[3,  6000] loss: 1.518
Time: 147.51282000541687
[3,  8000] loss: 1.480
Time: 196.4264476299286
[3, 10000] loss: 1.470
Time: 245.4590084552765
[3, 12000] loss: 1.450
Time: 294.4606418609619
Finished Training of AlexNet


## VGGNet

In [5]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    train_loss_list=[]
    val_acc_list=[]

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0
            iteration_count=0
            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                iteration_count+=len(inputs)
                print('Iteration {}/{}'.format(iteration_count,dataset_sizes[phase]))
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))
            
            if phase=="train":
              train_loss_list.append(epoch_loss)
            elif phase=="val":
              val_acc_list.append(epoch_acc)


            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model,train_loss_list,val_acc_list

In [6]:
vgg16 = models.vgg16(pretrained=True)
num_ftrs = vgg16.classifier[6].in_features
vgg16.classifier[6] = nn.Linear(num_ftrs, len(class_names))
vgg16 = vgg16.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
#SGD
optimizer_vgg = optim.SGD(vgg16.parameters(), lr=learning_rate, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_vgg, step_size=7, gamma=0.1)

vgg16,_,_ = train_model(vgg16, criterion, optimizer_vgg, exp_lr_scheduler, num_epochs=num_epochs)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


HBox(children=(FloatProgress(value=0.0, max=553433881.0), HTML(value='')))


Epoch 0/2
----------
Iteration 128/50000


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Iteration 256/50000
Iteration 384/50000
Iteration 512/50000
Iteration 640/50000
Iteration 768/50000
Iteration 896/50000
Iteration 1024/50000
Iteration 1152/50000
Iteration 1280/50000
Iteration 1408/50000
Iteration 1536/50000
Iteration 1664/50000
Iteration 1792/50000
Iteration 1920/50000
Iteration 2048/50000
Iteration 2176/50000
Iteration 2304/50000
Iteration 2432/50000
Iteration 2560/50000
Iteration 2688/50000
Iteration 2816/50000
Iteration 2944/50000
Iteration 3072/50000
Iteration 3200/50000
Iteration 3328/50000
Iteration 3456/50000
Iteration 3584/50000
Iteration 3712/50000
Iteration 3840/50000
Iteration 3968/50000
Iteration 4096/50000
Iteration 4224/50000
Iteration 4352/50000
Iteration 4480/50000
Iteration 4608/50000
Iteration 4736/50000
Iteration 4864/50000
Iteration 4992/50000
Iteration 5120/50000
Iteration 5248/50000
Iteration 5376/50000
Iteration 5504/50000
Iteration 5632/50000
Iteration 5760/50000
Iteration 5888/50000
Iteration 6016/50000
Iteration 6144/50000
Iteration 6272/5000

## ResNet

In [9]:
resnet = models.resnet18(num_classes = 10).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer_res = optim.SGD(resnet.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_res, step_size=7, gamma=0.1)

resnet, _, _ = train_model(resnet, criterion, optimizer_res, exp_lr_scheduler, num_epochs=num_epochs)

Epoch 0/2
----------
Iteration 128/50000
Iteration 256/50000
Iteration 384/50000
Iteration 512/50000
Iteration 640/50000
Iteration 768/50000
Iteration 896/50000
Iteration 1024/50000
Iteration 1152/50000
Iteration 1280/50000
Iteration 1408/50000
Iteration 1536/50000
Iteration 1664/50000
Iteration 1792/50000
Iteration 1920/50000
Iteration 2048/50000
Iteration 2176/50000
Iteration 2304/50000
Iteration 2432/50000
Iteration 2560/50000
Iteration 2688/50000
Iteration 2816/50000
Iteration 2944/50000
Iteration 3072/50000
Iteration 3200/50000
Iteration 3328/50000
Iteration 3456/50000
Iteration 3584/50000
Iteration 3712/50000
Iteration 3840/50000
Iteration 3968/50000
Iteration 4096/50000
Iteration 4224/50000
Iteration 4352/50000
Iteration 4480/50000
Iteration 4608/50000
Iteration 4736/50000
Iteration 4864/50000
Iteration 4992/50000
Iteration 5120/50000
Iteration 5248/50000
Iteration 5376/50000
Iteration 5504/50000
Iteration 5632/50000
Iteration 5760/50000
Iteration 5888/50000
Iteration 6016/50000

## DenseNet

In [6]:
densenet = models.densenet161(num_classes = 10)
densenet = densenet.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
#SGD
optimizer_dense = optim.SGD(densenet.parameters(), lr=learning_rate, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_dense, step_size=7, gamma=0.1)

densenet,_,_ = train_model(densenet, criterion, optimizer_dense, exp_lr_scheduler,num_epochs=num_epochs)

Epoch 0/2
----------
Iteration 128/50000


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


RuntimeError: ignored

# 평가

In [9]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in dataloaders['val']:
        images = images.to(device)
        labels = labels.to(device)
        outputs = AlexNet(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))
    print()

# GPU 메모리 확보를 위해 정확도 측정후 모델 삭제
del(AlexNet)

Accuracy of the model on the test images: 60.0 %



In [7]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in dataloaders['val']:
        images = images.to(device)
        labels = labels.to(device)
        outputs = vgg16(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))
    print()

Accuracy of the model on the test images: 89.63 %



In [10]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in dataloaders['val']:
        images = images.to(device)
        labels = labels.to(device)
        outputs = resnet(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))

Accuracy of the model on the test images: 45.71 %


- DenseNet GPU 메모리 초과 문제로 확인 어려움
