## ResNet
- CNN에서 가장 많이 사용되는 모델
- **스킵 커넥션** 기법을 이용해 VGG 모델의 기울기 소실 문제를 어느 정도 해결함.
- 스킵 커넥션은 자기 자신을 미분하면 1이 되기 때문에 신경망의 출력 부분에 입력을 더하는 방식으로 기울기를 최소 1로 확보

+) 스킵 커넥션 : 은닉층을 거치지 않은 입력값을 은닉층의 출력값과 더하는 구조

< 장점 >
- 층을 깊게 쌓을 수 있다.
- VGG에 비해 학습이 안정적이다.
- 기울기 소실 문제를 어느 정도 해결한다.

< 단점 >
- 가중치가 늘어나기 때문에 계산량이 많아진다.
- VGG에 비해 과적합이 일어나기 쉽다.

### Batch Normalization
- 배치 데이터의 분포의 불균형을 해결한다.
- 배치의 값의 분포가 서로 다르면 출력의 분포가 달라지기 때문에 학습에 악영향 -> 값의 분포를 일정하게 맞춰준다.


In [None]:
import torch
import torch.nn as nn

In [None]:
### skip-connection
class BasicBlock(nn.Module) :

  def __init__(self, in_channels, out_channels, kernel_size = 3) :
    super(BasicBlock, self).__init__()
    self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size = kernel_size, padding = 1)
    self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size = kernel_size, padding = 1)
    self.downsample = nn.Conv2d(in_channels, out_channels, kernel_size = 1)
    self.bn1 = nn.BatchNorm2d(num_features = out_channels)
    self.bn2 = nn.BatchNorm2d(num_features = out_channels)
    self.relu = nn.ReLU()

  def forward(self, x) :

    skip_x = x

    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.conv2(x)
    x = self.bn2(x)

    skip_x = self.downsample(skip_x) # 입력 이미지의 채널은 3인데 특징 맵은 훨씬 많은 채널을 가지고 있으므로 채널 수를 맞춰준다.

    x += skip_x
    x = self.relu(x)

    return x

In [None]:
class ResNet(nn.Module) :

  def __init__(self, num_classes = 10) :
    super(ResNet, self).__init__()
    self.b1 = BasicBlock(in_channels = 3, out_channels = 64)
    self.b2 = BasicBlock(in_channels = 64, out_channels = 128)
    self.b3 = BasicBlock(in_channels = 128, out_channels = 256)
    self.pool = nn.AvgPool2d(kernel_size = 2, stride = 2)
    self.fc1 = nn.Linear(in_features = 4096, out_features = 2048)
    self.fc2 = nn.Linear(in_features = 2048, out_features = 512)
    self.fc3 = nn.Linear(in_features = 512, out_features = num_classes)
    self.relu = nn.ReLU()

  def forward(self, x) :

    x = self.b1(x)
    x = self.pool(x)
    x = self.b2(x)
    x = self.pool(x)
    x = self.b3(x)
    x = self.pool(x)
    x = torch.flatten(x, start_dim = 1)
    x = self.fc1(x)
    x = self.relu(x)
    x = self.fc2(x)
    x = self.relu(x)
    x = self.fc3(x)

    return x

In [None]:
import tqdm

In [None]:
from torchvision.datasets.cifar import CIFAR10
from torchvision.transforms import Compose, ToTensor, RandomHorizontalFlip, RandomCrop, Normalize
from torch.utils.data import DataLoader
from torch.optim.adam import Adam

In [None]:
transforms = Compose([
    RandomCrop((32, 32), padding = 4),
    RandomHorizontalFlip(p = 0.5),
    ToTensor(),
    Normalize(mean = (0.4914, 0.4822, 0.4465), std = (0.247, 0.243, 0.261))
])

In [None]:
train = CIFAR10(root = './', train = True, download = True, transform = transforms)
test = CIFAR10(root = './', train = False, download = True, transform = transforms)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./cifar-10-python.tar.gz to ./
Files already downloaded and verified


In [None]:
tr_loader = DataLoader(train, batch_size = 32, shuffle = True)
te_loader = DataLoader(test, batch_size = 32, shuffle = False)

In [None]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [None]:
device

'cuda:0'

In [None]:
model = ResNet(num_classes = 10)

In [None]:
model.to(device)

ResNet(
  (b1): BasicBlock(
    (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (downsample): Conv2d(3, 64, kernel_size=(1, 1), stride=(1, 1))
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (b2): BasicBlock(
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (downsample): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1))
    (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (b3): BasicBlock(
    (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), p

In [None]:
lr = 1e-4

In [None]:
optim = Adam(model.parameters(), lr = lr)

In [None]:
for epoch in range(30) :

  model.train()
  tr_acc, tr_cnt, tr_loss = 0., 0., 0.
  for image, label in tr_loader :

    optim.zero_grad()

    image = image.to(device)
    label = label.to(device)

    pred = model(image)
    cls = pred.data.max(1)[1]

    loss = nn.CrossEntropyLoss()(pred, label)
    loss.backward()
    
    optim.step()

    corr = cls.eq(label.data).sum().item()
    tr_acc += corr
    tr_cnt += len(label)
    tr_loss += loss.item()

  tr_acc /= tr_cnt
  tr_loss /= len(tr_loader)
  
  model.eval()
  te_acc, te_cnt, te_loss = 0., 0., 0.

  with torch.no_grad() :

    for image, label in te_loader :

      image = image.to(device)
      label = label.to(device)

      pred = model(image)
      cls = pred.data.max(1)[1]
      
      loss = nn.CrossEntropyLoss()(pred, label)

      corr = cls.eq(label.data).sum().item()

      te_acc += corr
      te_cnt += len(label)
      te_loss += loss.item()

    te_acc /= te_cnt
    te_loss /= len(te_loader)
  
  print(f"{epoch + 1} Epoch Train Accuracy : {tr_acc} / Test Accuracy : {te_acc}")

1 Epoch Train Accuracy : 0.50976 / Test Accuracy : 0.6179
2 Epoch Train Accuracy : 0.68172 / Test Accuracy : 0.7198
3 Epoch Train Accuracy : 0.74914 / Test Accuracy : 0.7632
4 Epoch Train Accuracy : 0.7853 / Test Accuracy : 0.7917
5 Epoch Train Accuracy : 0.81246 / Test Accuracy : 0.8033
6 Epoch Train Accuracy : 0.83218 / Test Accuracy : 0.8131
7 Epoch Train Accuracy : 0.8474 / Test Accuracy : 0.834
8 Epoch Train Accuracy : 0.86362 / Test Accuracy : 0.8291
9 Epoch Train Accuracy : 0.87448 / Test Accuracy : 0.8425
10 Epoch Train Accuracy : 0.8849 / Test Accuracy : 0.8474
11 Epoch Train Accuracy : 0.89442 / Test Accuracy : 0.8586
12 Epoch Train Accuracy : 0.90214 / Test Accuracy : 0.8463
13 Epoch Train Accuracy : 0.91092 / Test Accuracy : 0.8543
14 Epoch Train Accuracy : 0.91664 / Test Accuracy : 0.8659
15 Epoch Train Accuracy : 0.92296 / Test Accuracy : 0.874
16 Epoch Train Accuracy : 0.92854 / Test Accuracy : 0.8678
17 Epoch Train Accuracy : 0.9346 / Test Accuracy : 0.8663
18 Epoch Tra