In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
from torch.utils.data import DataLoader, Dataset
from torchsummary import summary


import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from PIL import Image

import numpy as np
import copy
import os
import random
import time
import cv2
from ipywidgets import interact
from collections import namedtuple
import matplotlib.pyplot as plt


# 데이터 확인 및 전처리

In [2]:
cat_dir = './data/Cat/'
dog_dir = './data/Dog/'

cat_image_path = sorted([os.path.join(cat_dir, f) for f in os.listdir(cat_dir)])
dog_image_path = sorted([os.path.join(dog_dir, f) for f in os.listdir(dog_dir)])

image_file_path = [*cat_image_path, *dog_image_path]
correct_image_path = [i for i in image_file_path if cv2.imread(i) is not None]

random.seed(29)
random.shuffle(correct_image_path)

train_image_files = correct_image_path[:400]
val_image_files = correct_image_path[400:-10]
test_image_files = correct_image_path[-10:]
print(len(train_image_files), len(val_image_files), len(test_image_files))
print(train_image_files[:5])

400 92 10
['./data/Cat/cat.4.jpg', './data/Dog/dog.20.jpg', './data/Cat/cat.158.jpg', './data/Cat/cat.75.jpg', './data/Cat/cat.152.jpg']


In [3]:
@interact(index=(0, len(train_image_files)-1))
def image_show(index=0):
    image = train_image_files[index]
    image = cv2.imread(image)
    print('image shape: ', image.shape)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    plt.title(train_image_files[index].split('/')[-1].split('.')[0])
    plt.imshow(image)
    plt.tight_layout()
    plt.show()

interactive(children=(IntSlider(value=0, description='index', max=399), Output()), _dom_classes=('widget-inter…

In [4]:
from utils import build_trasnforms, MyDataset

In [5]:
image_size = 224
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
batch_size = 32

In [6]:
train_dataset = MyDataset(train_image_files, transforms=build_trasnforms(image_size=image_size, mean=mean, std=std), phase='train')
val_dataset = MyDataset(val_image_files, transforms=build_trasnforms(image_size=image_size, mean=mean, std=std), phase='val')

In [7]:
print(train_dataset.__getitem__(0)[0].size(), train_dataset.__getitem__(0)[1])

torch.Size([3, 224, 224]) 0


In [8]:
dataloaders = {}

dataloaders['train'] = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dataloaders['val'] = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

image, label = next(iter(dataloaders['train']))
print(image.shape, label)

torch.Size([32, 3, 224, 224]) tensor([0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1,
        0, 0, 0, 0, 1, 0, 1, 0])


# modeling

### build model

In [9]:
from model import BasicBlock, Bottleneck, ResNet

- resnet config

In [10]:
ResNet_config = namedtuple('ResNetConfig', ['block', 'n_blocks', 'channels'])

- resnet 18

In [11]:
resnet18_config = ResNet_config(block=BasicBlock, n_blocks=[2, 2, 2, 2], channels=[64, 128, 256, 512])

- resnet 34

In [12]:
resnet34_config = ResNet_config(block=BasicBlock, n_blocks=[3, 4, 6, 3], channels=[64, 128, 256, 512])

- resnet 50

In [13]:
resnet50_config = ResNet_config(block=Bottleneck, n_blocks=[3, 4, 6, 3], channels=[64, 128, 256, 512])

- resnet 101

In [14]:
resnet101_config = ResNet_config(block=Bottleneck, n_blocks=[3, 4, 23, 3], channels=[64, 128, 256, 512])

- resnet 152

In [15]:
resnet152_config = ResNet_config(block=Bottleneck, n_blocks=[3, 8, 36, 3], channels=[64, 128, 256, 512])

In [16]:
model = ResNet(resnet18_config, output_dim=2, zero_init_residual=True)
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kerne

In [17]:
summary(model=model, input_size=(3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]          36,864
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
       BasicBlock-11           [-1, 64, 56, 56]               0
           Conv2d-12           [-1, 64, 56, 56]          36,864
      BatchNorm2d-13           [-1, 64, 56, 56]             128
             ReLU-14           [-1, 64,

In [18]:
model = ResNet(resnet152_config, output_dim=2, zero_init_residual=True)
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [19]:
summary(model=model, input_size=(3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,384
      BatchNorm2d-14          [-1, 256,

### pretrained model

In [None]:
# select model

# pretrain_model = models.resnet18(weights=True)
# pretrain_model = models.resnet34(weights=True)
# pretrain_model = models.resnet50(weights=True)
# pretrain_model = models.resnet101(weights=True)
# pretrain_model = models.resnet152(weights=True)

In [29]:
pretrain_model = models.resnet50(pretrained=True)

 # 2 다음부터 프로젝트에 맞게 교체
OUTPUT_DIM = 2 
pretrain_model.fc = nn.Sequential(                       
                            nn.Linear(2048, 1000),
                            nn.ReLU(inplace=True),
                            nn.Linear(1000, OUTPUT_DIM)
)



In [30]:
summary(model=pretrain_model, input_size=(3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,384
      BatchNorm2d-14          [-1, 256,