## 데이터 확인 및 전처리

In [33]:
import os
import numpy as np
import cv2
import copy
import random
import time
import matplotlib.pyplot as plt
from ipywidgets import interact

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data

import torchvision
import torchvision.models as models
import torchvision.datasets as Datasets
from torchvision import transforms
from torchsummary import summary

In [2]:
train_path = './data/train'
test_path = './data/test'

In [3]:
train_dataset = torchvision.datasets.ImageFolder(train_path)

In [4]:
train_dataset.classes

['Cat', 'Dog']

In [5]:
train_dataset.class_to_idx

{'Cat': 0, 'Dog': 1}

In [6]:
# image path and classes
train_dataset.imgs

[('./data/train\\Cat\\0.jpg', 0),
 ('./data/train\\Cat\\1.jpg', 0),
 ('./data/train\\Cat\\10.jpg', 0),
 ('./data/train\\Cat\\11.jpg', 0),
 ('./data/train\\Cat\\12.jpg', 0),
 ('./data/train\\Cat\\13.jpg', 0),
 ('./data/train\\Cat\\14.jpg', 0),
 ('./data/train\\Cat\\15.jpg', 0),
 ('./data/train\\Cat\\16.jpg', 0),
 ('./data/train\\Cat\\17.jpg', 0),
 ('./data/train\\Cat\\18.jpg', 0),
 ('./data/train\\Cat\\19.jpg', 0),
 ('./data/train\\Cat\\2.jpg', 0),
 ('./data/train\\Cat\\20.jpg', 0),
 ('./data/train\\Cat\\21.jpg', 0),
 ('./data/train\\Cat\\22.jpg', 0),
 ('./data/train\\Cat\\23.jpg', 0),
 ('./data/train\\Cat\\24.jpg', 0),
 ('./data/train\\Cat\\25.jpg', 0),
 ('./data/train\\Cat\\26.jpg', 0),
 ('./data/train\\Cat\\27.jpg', 0),
 ('./data/train\\Cat\\28.jpg', 0),
 ('./data/train\\Cat\\29.jpg', 0),
 ('./data/train\\Cat\\3.jpg', 0),
 ('./data/train\\Cat\\30.jpg', 0),
 ('./data/train\\Cat\\31.jpg', 0),
 ('./data/train\\Cat\\32.jpg', 0),
 ('./data/train\\Cat\\33.jpg', 0),
 ('./data/train\\Cat\\34

In [7]:
@interact(index=(0, len(train_dataset.imgs)-1))
def image_show(index=0):
    image, label = train_dataset.imgs[index]

    image = cv2.imread(image)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    plt.title('cat' if label == 0 else 'dog')
    plt.imshow(image)
    plt.show()


interactive(children=(IntSlider(value=0, description='index', max=528), Output()), _dom_classes=('widget-inter…

In [8]:
train_transforms = transforms.Compose([
                                        transforms.Resize((256, 256)),
                                        transforms.RandomRotation(5),
                                        transforms.RandomHorizontalFlip(0.5),
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
])

test_transforms = transforms.Compose([
                                        transforms.Resize((256, 256)),
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
])

In [9]:
train_dataset = torchvision.datasets.ImageFolder(train_path, transform=train_transforms)
test_dataset = torchvision.datasets.ImageFolder(test_path, transform=test_transforms)

In [10]:
val_ratio = 0.9
train_len = int(len(train_dataset) * val_ratio)
val_len = len(train_dataset) - train_len

train_data, val_data = data.random_split(train_dataset, [train_len, val_len])

In [11]:
val_data = copy.deepcopy(val_data)
val_data.dataset.transform = test_transforms

In [12]:
print(len(train_data), len(val_data), len(test_dataset))

476 53 12


In [13]:
BATCH_SIZE =  128
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DEVICE

device(type='cpu')

In [47]:
dataloaders = {}

dataloaders['train'] = data.DataLoader(train_data, shuffle=True, batch_size=BATCH_SIZE)
dataloaders['val'] = data.DataLoader(val_data, shuffle=False, batch_size=BATCH_SIZE)
dataloaders['test'] = data.DataLoader(test_dataset, shuffle=False, batch_size=BATCH_SIZE)

## Build Model

- self

In [48]:
from model import VGG, get_vgg_layers

-  features : 모델 유형 정의

In [49]:
vgg11_config = [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M']

vgg13_config = [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M']

vgg16_config = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']

vgg19_config = [64, 64, 'M', 128, 128, 'M', 256,  256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M']

In [52]:
# class 2 : cat or dog
OUTPUT_DIM = 2

# vgg 11
vgg11_layers  = get_vgg_layers(vgg11_config, batch_norm=True)
model = VGG(vgg11_layers, output_dim=OUTPUT_DIM)
summary(model=model, input_size=(3, 256, 256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 256, 256]           1,792
       BatchNorm2d-2         [-1, 64, 256, 256]             128
              ReLU-3         [-1, 64, 256, 256]               0
         MaxPool2d-4         [-1, 64, 128, 128]               0
            Conv2d-5        [-1, 128, 128, 128]          73,856
       BatchNorm2d-6        [-1, 128, 128, 128]             256
              ReLU-7        [-1, 128, 128, 128]               0
         MaxPool2d-8          [-1, 128, 64, 64]               0
            Conv2d-9          [-1, 256, 64, 64]         295,168
      BatchNorm2d-10          [-1, 256, 64, 64]             512
             ReLU-11          [-1, 256, 64, 64]               0
           Conv2d-12          [-1, 256, 64, 64]         590,080
      BatchNorm2d-13          [-1, 256, 64, 64]             512
             ReLU-14          [-1, 256,

In [51]:
# class 2 : cat or dog
OUTPUT_DIM = 2

# vgg 13
vgg13_layers  = get_vgg_layers(vgg13_config, batch_norm=True)
model = VGG(vgg13_layers, output_dim=OUTPUT_DIM)
summary(model=model, input_size=(3, 256, 256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 256, 256]           1,792
       BatchNorm2d-2         [-1, 64, 256, 256]             128
              ReLU-3         [-1, 64, 256, 256]               0
            Conv2d-4         [-1, 64, 256, 256]          36,928
       BatchNorm2d-5         [-1, 64, 256, 256]             128
              ReLU-6         [-1, 64, 256, 256]               0
         MaxPool2d-7         [-1, 64, 128, 128]               0
            Conv2d-8        [-1, 128, 128, 128]          73,856
       BatchNorm2d-9        [-1, 128, 128, 128]             256
             ReLU-10        [-1, 128, 128, 128]               0
           Conv2d-11        [-1, 128, 128, 128]         147,584
      BatchNorm2d-12        [-1, 128, 128, 128]             256
             ReLU-13        [-1, 128, 128, 128]               0
        MaxPool2d-14          [-1, 128,

In [27]:
# class 2 : cat or dog
OUTPUT_DIM = 2

# vgg 16
vgg16_layers  = get_vgg_layers(vgg16_config, batch_norm=True)
model = VGG(vgg16_layers, output_dim=OUTPUT_DIM)
summary(model=model, input_size=(3, 256, 256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 256, 256]           1,792
       BatchNorm2d-2         [-1, 64, 256, 256]             128
              ReLU-3         [-1, 64, 256, 256]               0
            Conv2d-4         [-1, 64, 256, 256]          36,928
       BatchNorm2d-5         [-1, 64, 256, 256]             128
              ReLU-6         [-1, 64, 256, 256]               0
         MaxPool2d-7         [-1, 64, 128, 128]               0
            Conv2d-8        [-1, 128, 128, 128]          73,856
       BatchNorm2d-9        [-1, 128, 128, 128]             256
             ReLU-10        [-1, 128, 128, 128]               0
           Conv2d-11        [-1, 128, 128, 128]         147,584
      BatchNorm2d-12        [-1, 128, 128, 128]             256
             ReLU-13        [-1, 128, 128, 128]               0
        MaxPool2d-14          [-1, 128,

In [28]:
# class 2 : cat or dog
OUTPUT_DIM = 2

# vgg 19
vgg19_layers  = get_vgg_layers(vgg19_config, batch_norm=True)
model = VGG(vgg19_layers, output_dim=OUTPUT_DIM)
summary(model=model, input_size=(3, 256, 256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 256, 256]           1,792
       BatchNorm2d-2         [-1, 64, 256, 256]             128
              ReLU-3         [-1, 64, 256, 256]               0
            Conv2d-4         [-1, 64, 256, 256]          36,928
       BatchNorm2d-5         [-1, 64, 256, 256]             128
              ReLU-6         [-1, 64, 256, 256]               0
         MaxPool2d-7         [-1, 64, 128, 128]               0
            Conv2d-8        [-1, 128, 128, 128]          73,856
       BatchNorm2d-9        [-1, 128, 128, 128]             256
             ReLU-10        [-1, 128, 128, 128]               0
           Conv2d-11        [-1, 128, 128, 128]         147,584
      BatchNorm2d-12        [-1, 128, 128, 128]             256
             ReLU-13        [-1, 128, 128, 128]               0
        MaxPool2d-14          [-1, 128,

- use pretrainde model

In [67]:
# pretrained model을 쓰려면  out_features를 1000 -> 2로 바꿔야 한다.
pretrained11_model = models.vgg11_bn(pretrained=True)
pretrained11_model.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) # 1 이 부분부터 교체
pretrained11_model.classifier = nn.Sequential(                        # 2 다음부터 프로젝트에 맞게 교체
                            nn.Flatten(),
                            nn.Linear(512, 256),
                            nn.ReLU(),
                            nn.Linear(256, 128),
                            nn.Linear(128, 64),
                            nn.ReLU(),
                            nn.Linear(64, 2),
                            nn.Softmax(dim=1)
)

summary(pretrained11_model, input_size=(3, 256, 256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 256, 256]           1,792
       BatchNorm2d-2         [-1, 64, 256, 256]             128
              ReLU-3         [-1, 64, 256, 256]               0
         MaxPool2d-4         [-1, 64, 128, 128]               0
            Conv2d-5        [-1, 128, 128, 128]          73,856
       BatchNorm2d-6        [-1, 128, 128, 128]             256
              ReLU-7        [-1, 128, 128, 128]               0
         MaxPool2d-8          [-1, 128, 64, 64]               0
            Conv2d-9          [-1, 256, 64, 64]         295,168
      BatchNorm2d-10          [-1, 256, 64, 64]             512
             ReLU-11          [-1, 256, 64, 64]               0
           Conv2d-12          [-1, 256, 64, 64]         590,080
      BatchNorm2d-13          [-1, 256, 64, 64]             512
             ReLU-14          [-1, 256,

In [None]:
# pretrained model을 쓰려면  out_features를 1000 -> 2로 바꿔야 한다.
pretrained13_model = models.vgg13_bn(pretrained=True)
pretrained13_model.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) # 1 이 부분부터 교체
pretrained13_model.classifier = nn.Sequential(                        # 2 다음부터 프로젝트에 맞게 교체
                            nn.Flatten(),
                            nn.Linear(512, 256),
                            nn.ReLU(),
                            nn.Linear(256, 128),
                            nn.Linear(128, 64),
                            nn.ReLU(),
                            nn.Linear(64, 2),
                            nn.Softmax(dim=1)
)

summary(pretrained13_model, input_size=(3, 256, 256))

In [None]:
# pretrained model을 쓰려면  out_features를 1000 -> 2로 바꿔야 한다.
pretrained16_model = models.vgg16_bn(pretrained=True)
pretrained16_model.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) # 1 이 부분부터 교체
pretrained16_model.classifier = nn.Sequential(                        # 2 다음부터 프로젝트에 맞게 교체
                            nn.Flatten(),
                            nn.Linear(512, 256),
                            nn.ReLU(),
                            nn.Linear(256, 128),
                            nn.Linear(128, 64),
                            nn.ReLU(),
                            nn.Linear(64, 2),
                            nn.Softmax(dim=1)
)

summary(pretrained16_model, input_size=(3, 256, 256))

In [68]:
# pretrained model을 쓰려면  out_features를 1000 -> 2로 바꿔야 한다.
pretrained19_model = models.vgg19_bn(pretrained=True)
pretrained19_model.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) # 1 이 부분부터 교체
pretrained19_model.classifier = nn.Sequential(                        # 2 다음부터 프로젝트에 맞게 교체
                            nn.Flatten(),
                            nn.Linear(512, 256),
                            nn.ReLU(),
                            nn.Linear(256, 128),
                            nn.Linear(128, 64),
                            nn.ReLU(),
                            nn.Linear(64, 2),
                            nn.Softmax(dim=1)
)

summary(pretrained19_model, input_size=(3, 256, 256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 256, 256]           1,792
       BatchNorm2d-2         [-1, 64, 256, 256]             128
              ReLU-3         [-1, 64, 256, 256]               0
            Conv2d-4         [-1, 64, 256, 256]          36,928
       BatchNorm2d-5         [-1, 64, 256, 256]             128
              ReLU-6         [-1, 64, 256, 256]               0
         MaxPool2d-7         [-1, 64, 128, 128]               0
            Conv2d-8        [-1, 128, 128, 128]          73,856
       BatchNorm2d-9        [-1, 128, 128, 128]             256
             ReLU-10        [-1, 128, 128, 128]               0
           Conv2d-11        [-1, 128, 128, 128]         147,584
      BatchNorm2d-12        [-1, 128, 128, 128]             256
             ReLU-13        [-1, 128, 128, 128]               0
        MaxPool2d-14          [-1, 128,

## training

In [None]:
from runs import train, evaluate, predict
from utils import epoch_time, normalize_image, show_correct_images

In [54]:
optimizer = optim.Adam(model.parameters(), lr=0.00001)
criterion = nn.CrossEntropyLoss()
criterion = criterion.to(DEVICE)
model = model.to(DEVICE)

In [55]:
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace=True)
    (14): MaxPool2d(ke

In [44]:
os.makedirs('./trained_model')

In [None]:
NUM_EPOCHS = 5

best_loss = 1e9
for epoch in range(NUM_EPOCHS):
    start_time = time.monotonic()

    train_loss, train_acc = train(model, dataloaders, optimizer, criterion, DEVICE)
    val_loss, val_acc = evaluate(model, dataloaders, criterion, DEVICE)

    if val_loss < best_loss:
        best_loss = val_loss
        torch.save(model.state_dict(), './trained_model/vgg_model.pth')
    
    end_time = time.monotonic()
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\tVal Loss: {val_loss:.3f} | Val Acc: {val_acc*100:.2f}%')

# Test

In [None]:
images, labels, probs = predict(model, dataloaders)
pred_labels = torch.argmax(probs, 1)
corrects = torch.eq(labels, pred_labels)
correct_exam = []

for image, label, prob, correct in zip(images, labels, probs, corrects):
    if correct:
        correct_exam.append((image, label, prob))

correct_exam.sort(reverse=True, key=lambda x: torch.max(x[2], dim=0).values)

In [None]:
classes = test_dataset.classes
N_IMAGES = 5
show_correct_images(correct_exam, classes, N_IMAGES)