In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.functional as F
import torchsummary
import torch.nn as nn

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from torch.utils.data import random_split
from torch import optim
from torch.optim.lr_scheduler import StepLR

### GPU 사용

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

torch.manual_seed(777)

if device == "cuda":
  torch.cuda.manual_seed(777)

print(device)

cuda


#### CIFAR10 Train, Test 이미지 얻기

In [3]:
# == Cifar10 데이터 얻기 ==
def get_data(flag = True):
    train = dsets.CIFAR10(root = '/data',
                          train = True,
                          download = True)
    test = dsets.CIFAR10(root = '/data',
                        train = False,
                        download = True)
    return train, test

#### 이미지 전처리 & Augmentation

* 1. cifar10은 image의 width와 height가 똑같기 때문에 바로 224x224로 randomly crop

* 2. Horizontal Flip

* 3. Color Augmentation

* 4. subtract the mean activity over the training set from each pixel (normalization)




In [4]:
train, test = get_data(flag = True)

train.transform = transforms.ToTensor()
test.transform = transforms.ToTensor()

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:10<00:00, 15955921.19it/s]


Extracting /data/cifar-10-python.tar.gz to /data
Files already downloaded and verified


In [5]:
train_meanRGB = [np.mean(x.numpy(), axis = (1, 2)) for x, _ in train]
train_stdRGB = [np.std(x.numpy(), axis = (1, 2)) for x, _ in train]

meanR = np.mean([m[0] for m in train_meanRGB])
meanG = np.mean([m[1] for m in train_meanRGB])
meanB = np.mean([m[2] for m in train_meanRGB])

stdR = np.mean([s[0] for s in train_stdRGB])
stdG = np.mean([s[1] for s in train_stdRGB])
stdB = np.mean([s[2] for s in train_stdRGB])

train_mean = [meanR, meanG, meanB]
train_std = [stdR, stdG, stdB]

test_meanRGB = [np.mean(x.numpy(), axis = (1, 2)) for x, _ in test]
test_stdRGB = [np.std(x.numpy(), axis = (1, 2)) for x, _ in test]

meanR = np.mean([m[0] for m in test_meanRGB])
meanG = np.mean([m[1] for m in test_meanRGB])
meanB = np.mean([m[2] for m in test_meanRGB])

stdR = np.mean([s[0] for s in test_stdRGB])
stdG = np.mean([s[1] for s in test_stdRGB])
stdB = np.mean([s[2] for s in test_stdRGB])


test_mean = [meanR, meanG, meanB]
test_std = [stdR, stdG, stdB]

print(' == == == == Train == == == ==')
print('각 Channel당 pixel Mean 값 : ', train_mean)
print('각 Channel당 pixel Std 값 : ', train_std)
print(' == == == == == == == == == ==')

print(' == == == == Test == == == ==')
print('각 Channel당 pixel Mean 값 : ', test_mean)
print('각 Channel당 pixel Std 값 : ', test_std)
print(' == == == == == == == == == ==')


 == == == == Train == == == ==
각 Channel당 pixel Mean 값 :  [0.49139965, 0.48215845, 0.4465309]
각 Channel당 pixel Std 값 :  [0.20220213, 0.19931543, 0.20086348]
 == == == == == == == == == ==
 == == == == Test == == == ==
각 Channel당 pixel Mean 값 :  [0.49421427, 0.48513138, 0.45040908]
각 Channel당 pixel Std 값 :  [0.20189482, 0.19902097, 0.20103233]
 == == == == == == == == == ==


In [6]:
train_tf = transforms.Compose([
    transforms.Resize(224),
    transforms.RandomHorizontalFlip(p = 0.5),
    transforms.ColorJitter(brightness = 0.2, contrast = 0.2, saturation = 0.2, hue = 0.2),
    transforms.ToTensor(),
    transforms.Normalize(train_mean, train_std)
])

test_tf = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(test_mean, test_std)
])

# train, test 데이터셋에 대한 변환 적용
train.transform = train_tf
test.transform = test_tf

#### 모델 설계


In [None]:
# == F.pad 이해하기 위함 ==

sample_tensor = torch.randn(5, 3, 32, 32)

# == channel을 앞 뒤로 0으로 padding ==
out = F.pad(sample_tensor[:, :, ::2, ::2], (0, 0, 0, 0, 3, 3), "constant", 0)

out[:,:,0]

In [30]:
class shortcut_option_A(nn.Module):
  def __init__(self, lambd):
    super(shortcut_option_A, self).__init__()
    self.lambd = lambd

  def forward(self, x):
    return self.lambd(x)

class BasicBlock(nn.Module):

  '''
  BasicBlock은 resent18, 34
  '''
  expansion = 1

  def __init__(self, in_channels, out_channels, stride = 1):
    super().__init__()

    # == Residual Block ==
    self.residual_function = nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size = 3, stride = stride, padding = 1, bias = False),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace = True),

        nn.Conv2d(out_channels, out_channels * BasicBlock.expansion, kernel_size = 3, padding = 1, bias = False),
        nn.BatchNorm2d(out_channels * BasicBlock.expansion)
    )

    # == Shortcut(Identity) ==
    self.shortcut = nn.Sequential()

    # == 만약 dimension이 맞지 않을 시에는 option(A)를 선택한다고 했음 ==
    # == stride 2를 적용하고, channel의 size를 맞춤(앞, 뒤로 맞추기 위해 //4를 해준거임)
    if stride != 1 or out_channels * BasicBlock.expansion != out_channels:
      self.shortcut == shortcut_option_A(lambda x :
                                         F.pad(x[:, :, ::2, ::2]), (0, 0, 0, 0, out_channels // 4, out_channels //4, "constant", 0))

    def forward(self, x):
      return nn.ReLU(inplace = True)(self.residual_function(x) + self.shortcut(x))



class BottleNeck(nn.Module):
    expansion = 4

    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()

        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),

            nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),

            nn.Conv2d(out_channels, out_channels * BottleNeck.expansion, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_channels * BottleNeck.expansion)
        )

        self.shortcut = nn.Sequential()

        if stride != 1 or in_channels != out_channels * BottleNeck.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * BottleNeck.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * BottleNeck.expansion)
            )

    def forward(self, x):
        return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x))

In [39]:
class ResNet(nn.Module):

  def __init__(self, block, num_block, num_classes = 10):
      super(ResNet, self).__init__()

      self.in_channels = 64

      self.conv1 = nn.Sequential(
          nn.Conv2d(3, 64, kernel_size = 7, stride  = 2, padding = 3, bias = False),
          nn.BatchNorm2d(64),
          nn.ReLU(inplace = True),
          nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
      )
      self.conv2 = self._make_layer(block, 64, num_block[0], 1)
      self.conv3 = self._make_layer(block, 128, num_block[1], 2)
      self.conv4 = self._make_layer(block, 256, num_block[2], 2)
      self.conv5 = self._make_layer(block, 512, num_block[3], 2)

      self.avg_pool = nn.AdaptiveAvgPool2d((1,1))
      self.fc = nn.Linear(512 * block.expansion, num_classes)


  def _make_layer(self, block, out_channels, num_blocks, stride):
      strides = [stride] + [1] * (num_blocks - 1)
      layers = []

      for stride in strides:
        layers.append(block(self.in_channels, out_channels, stride))
        self.in_channels = out_channels * block.expansion

      return nn.Sequential(*layers)

  def forward(self, x):
      output = self.conv1(x)
      output = self.conv2(output)
      x = self.conv3(output)
      x = self.conv4(x)
      x = self.conv5(x)
      x = self.avg_pool(x)
      x = x.view(x.size(0), -1)
      x = self.fc(x)
      return x

  def initialize_weights(self):
      for m in self.modules():
          if isinstance(m, nn.Conv2d):
              nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
              if m.bias is not None:
                  nn.init.constant_(m.bias, 0)
          elif isinstance(m, nn.BatchNorm2d):
              nn.init.constant_(m.weight, 1)
              nn.init.constant_(m.bias, 0)
          elif isinstance(m, nn.Linear):
              nn.init.normal_(m.weight, 0, 0.01)
              nn.init.constant_(m.bias, 0)

def resnet18():
    return ResNet(BasicBlock, [2,2,2,2])

def resnet34():
    return ResNet(BasicBlock, [3, 4, 6, 3])

def resnet50():
    return ResNet(BottleNeck, [3,4,6,3])

def resnet101():
    return ResNet(BottleNeck, [3, 4, 23, 3])

def resnet152():
    return ResNet(BottleNeck, [3, 8, 36, 3])

In [40]:
model = resnet50().to(device)
torchsummary.summary(model, input_size = (3, 224, 224), device = 'cuda')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,384
      BatchNorm2d-14          [-1, 256,

### 훈련 & 검증

* batch size : 256

* learning rate : 0.1

* momentum : 0.9

* weight decay : 0.0001



In [41]:
batch_size = 64 # 256은 너무 큼
learning_rate = 0.1
momentum = 0.9
weight_decay = 0.0001
epochs = 10

In [42]:
# == split train, valud ==

train_ratio = 0.8
val_ratio = 0.2

train_size = int(len(train) * train_ratio)
val_size = len(train) - train_size

split_train, split_train = random_split(train, [train_size, val_size])

In [43]:
train_loader = torch.utils.data.DataLoader(dataset = split_train,
                                           batch_size=  batch_size,
                                           shuffle = True,
                                           drop_last = True)

val_loader = torch.utils.data.DataLoader(dataset = split_train,
                                        batch_size = batch_size,
                                        shuffle = True,
                                        drop_last = True)

In [44]:
optimizer = optim.SGD(model.parameters(), lr = learning_rate, momentum = momentum, weight_decay = weight_decay)
criterion = nn.CrossEntropyLoss().to(device)

In [45]:
# Step 4: 반복적인 학습
for epoch in range(epochs):
    model.train()  # 모델을 학습 모드로 설정


    '''
    1. for문 1번 반복할때마다 batch_idx는 batch_size만큼 증가
     ex) 1 iter: 0 ~ 127
         2 iter: 128 ~ 255

         전체 훈련 데이터를 다 쓰기전까지 반복.
    '''
    for batch_idx, (data, target) in enumerate(train_loader):
        # 데이터와 레이블을 GPU로 이동 (if available)
        # data = data.view(-1, 3, 224, 224) # multi-crop 대체
        data, target = data.to(device), target.to(device)

        # Forward 연산
        output = model(data)

        # 손실 계산
        loss = criterion(output, target)

        # Backward 연산 및 매개변수 업데이트
        optimizer.zero_grad() # 미분값 중복 방지
        loss.backward() # 오차역전파
        optimizer.step() # parameter 업데이트

        # 일정 간격으로 손실 출력


        if batch_idx % 100 == 0:
            #print(len(train_loader)) 항상 312로 동일한 값
            print(batch_idx)
            print('Epoch {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch + 1, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item())) # 100.은 실수형을 말한다.(실수형으로 출력하기 위함)
                # len(train_loader)는 배치 개수를 나타낸다. 여기서는 312개(312번 가중치 update)

    # 검증 데이터셋을 사용한 모델 평가
    model.eval()  # 모델을 평가 모드로 설정
    val_loss = 0
    correct = 0

    # 미분값 계산 x
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)

            # Forward 계산
            output = model(data)

            val_loss += criterion(output, target).item() # 손실함수 계산
            pred = output.argmax(dim=1, keepdim=True) # 예측값(행마다)에서 가장 큰 값의 인덱스 가져옴, (n, 1)형식
            correct += pred.eq(target.view_as(pred)).sum().item() # target을 pred와 같은 shape으로 만들어준다.
            '''
            .eq()는 두 tensor의 shape이 동일해야 비교 연산이 가능하다.
            '''

    val_loss /= len(val_loader.dataset) # 검증 손실함수
    val_accuracy = 100. * correct / len(val_loader.dataset) # 검증 정확도

    # 검증 결과 출력
    print('\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        val_loss, correct, len(val_loader.dataset), val_accuracy))

0
100

Validation set: Average loss: 0.0377, Accuracy: 1193/10000 (11.93%)

0
100

Validation set: Average loss: 0.0336, Accuracy: 1823/10000 (18.23%)

0
100

Validation set: Average loss: 0.0318, Accuracy: 2321/10000 (23.21%)

0
100

Validation set: Average loss: 0.0307, Accuracy: 2540/10000 (25.40%)

0
100

Validation set: Average loss: 0.0290, Accuracy: 3012/10000 (30.12%)

0
100

Validation set: Average loss: 0.0284, Accuracy: 3445/10000 (34.45%)

0
100

Validation set: Average loss: 0.0270, Accuracy: 3500/10000 (35.00%)

0
100

Validation set: Average loss: 0.0262, Accuracy: 3734/10000 (37.34%)

0
100

Validation set: Average loss: 0.0258, Accuracy: 3915/10000 (39.15%)

0
100

Validation set: Average loss: 0.0249, Accuracy: 4222/10000 (42.22%)



#### 모델 테스트

In [47]:
test_loader = torch.utils.data.DataLoader(test, batch_size = batch_size, shuffle = False)

In [48]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
  for images, labels in test_loader:
    images = images.to(device)
    labels = labels.to(device)

    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1) # 가장 높은값의 인덱스, 값 반환 --> 여기서는 가장 높은 값만 반환했다.

    total += labels.size(0) # 전체 데이터 예측 개수(len(cifar_test_loader) 와 같다)
    correct += (predicted == labels.to(torch.long)).sum().item() # predicted, labels의 텐서 타입이 일치하지 않아서 long으로 바꾸어줌.
    # 맞으면 correct에 true, 틀리면 false 반환한다. 맞은 것만 개수 센다.

accuracy = 100 * correct / total
print(f'Accuracy : {accuracy}')

Accuracy : 42.63
