---
title: Day5. VGG와 ResNet 학습하기 실습(Hands-On)
---

In [3]:
import torch
import torch.nn as nn
import tqdm

from torchvision.datasets.cifar import CIFAR10
from torchvision.transforms import Compose, ToTensor
from torchvision.transforms import RandomHorizontalFlip, RandomCrop
from torchvision.transforms import Normalize
from torch.utils.data.dataloader import DataLoader
from torch.optim.adam import Adam

In [2]:
transforms = Compose([
   RandomCrop((32, 32), padding=4),
   RandomHorizontalFlip(p=0.5),
   ToTensor(),
   Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.247, 0.243, 0.261))
])

In [3]:
train = CIFAR10(root="data", train=True, download=True, transform=transforms)
test = CIFAR10(root="data", train=False, download=True, transform=transforms)
train_loader = DataLoader(train, batch_size=32, shuffle=True)
test_loader = DataLoader(test, batch_size=32, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


## ResNet

In [4]:
class BasicBlock(nn.Module):
   def __init__(self, in_channels, out_channels, kernel_size=3):
       super(BasicBlock, self).__init__()
       self.c1 = nn.Conv2d(in_channels, out_channels,kernel_size=kernel_size, padding=1)
       self.c2 = nn.Conv2d(out_channels, out_channels,kernel_size=kernel_size, padding=1)
       self.downsample = nn.Conv2d(in_channels, out_channels,kernel_size=1)
       self.bn1 = nn.BatchNorm2d(num_features=out_channels)
       self.bn2 = nn.BatchNorm2d(num_features=out_channels)
       self.relu = nn.ReLU()
   def forward(self, x):
       x_ = x
       x = self.c1(x)
       x = self.bn1(x)
       x = self.relu(x)
       x = self.c2(x)
       x = self.bn2(x)
       x_ = self.downsample(x_)
       x += x_
       x = self.relu(x)
       return x

In [5]:
class ResNet(nn.Module):
   def __init__(self, num_classes=10):
       super(ResNet, self).__init__()
       self.b1 = BasicBlock(in_channels=3, out_channels=64)
       self.b2 = BasicBlock(in_channels=64, out_channels=128)
       self.b3 = BasicBlock(in_channels=128, out_channels=256)
       self.pool = nn.AvgPool2d(kernel_size=2, stride=2)
       self.fc1 = nn.Linear(in_features=4096, out_features=2048)
       self.fc2 = nn.Linear(in_features=2048, out_features=512)
       self.fc3 = nn.Linear(in_features=512, out_features=num_classes)

       self.relu = nn.ReLU()
   def forward(self, x):
       x = self.b1(x)
       x = self.pool(x)
       x = self.b2(x)
       x = self.pool(x)
       x = self.b3(x)
       x = self.pool(x)
       x = torch.flatten(x, start_dim=1)
       x = self.fc1(x)
       x = self.relu(x)
       x = self.fc2(x)
       x = self.relu(x)
       x = self.fc3(x)
       return x

In [6]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = ResNet(num_classes=10)
model.to(device)

ResNet(
  (b1): BasicBlock(
    (c1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (c2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (downsample): Conv2d(3, 64, kernel_size=(1, 1), stride=(1, 1))
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (b2): BasicBlock(
    (c1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (c2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (downsample): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1))
    (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (b3): BasicBlock(
    (c1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))


In [7]:
lr = 1e-4
optim = Adam(model.parameters(), lr=lr)

for epoch in range(1):
   iterator = tqdm.tqdm(train_loader)
   for data, label in iterator:
       optim.zero_grad()
       preds = model(data.to(device))
       loss = nn.CrossEntropyLoss()(preds, label.to(device))
       loss.backward()
       optim.step()
       iterator.set_description(f"epoch:{epoch+1} loss:{loss.item()}")

torch.save(model.state_dict(), "ResNet.pth")

epoch:1 loss:1.2963857650756836: 100%|██████████| 1563/1563 [02:35<00:00, 10.08it/s]


In [9]:
model.load_state_dict(torch.load("ResNet.pth", map_location=device))
num_corr = 0
with torch.no_grad():
   for data, label in test_loader:
       output = model(data.to(device))
       preds = output.data.max(1)[1]
       corr = preds.eq(label.to(device).data).sum().item()
       num_corr += corr
   print(f"Accuracy:{(num_corr/len(test)) * 100.0}")

Accuracy:62.23


## VGG 전이학습

In [7]:
import torch
import torch.nn as nn
from torchvision.models.vgg import vgg16, VGG16_Weights

device = "cuda" if torch.cuda.is_available() else "cpu"

model = vgg16(weights=VGG16_Weights.DEFAULT) # vgg 16 모델을 불러옴

fc = nn.Sequential(                     # 분류층을 정의
       nn.Linear(512 * 7 * 7, 4096),
       nn.ReLU(),
       nn.Dropout(), # 드롭아웃을 추가
       nn.Linear(4096, 4096),
       nn.ReLU(),
       nn.Dropout(),
       nn.Linear(4096, 10),
   )

model.classifier = fc # 분류층을 모델에 붙임
model.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [13]:
lr = 1e-4
optim = Adam(model.parameters(), lr=lr)

for epoch in range(1):
   iterator = tqdm.tqdm(train_loader) # 학습 로그 출력
   for data, label in iterator:
       optim.zero_grad()
       preds = model(data.to(device)) # 모델의 예측값 출력
       loss = nn.CrossEntropyLoss()(preds, label.to(device))
       loss.backward()
       optim.step()
       iterator.set_description(f"epoch:{epoch+1} loss:{loss.item()}")
torch.save(model.state_dict(), "CIFAR_pretrained.pth") # 모델 저장

epoch:1 loss:0.28979870676994324: 100%|██████████| 1563/1563 [14:09<00:00,  1.84it/s]


In [14]:
model.load_state_dict(torch.load("CIFAR_pretrained.pth", map_location=device))
num_corr = 0
with torch.no_grad():
   for data, label in test_loader:
       output = model(data.to(device))
       preds = output.data.max(1)[1]
       corr = preds.eq(label.to(device).data).sum().item()
       num_corr += corr
   print(f"Accuracy:{(num_corr/len(test))*100.0}")

Accuracy:81.58999999999999


### 분류기의 파라메터 확인

In [4]:
transforms = Compose([
   RandomCrop((32, 32), padding=4),
   RandomHorizontalFlip(p=0.5),
   ToTensor(),
   Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.247, 0.243, 0.261))
])

In [5]:
train = CIFAR10(root="data", train=True, download=True, transform=transforms)
test = CIFAR10(root="data", train=False, download=True, transform=transforms)
train_loader = DataLoader(train, batch_size=32, shuffle=True)
test_loader = DataLoader(test, batch_size=32, shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data\cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:36<00:00, 4632622.12it/s]


Extracting data\cifar-10-python.tar.gz to data
Files already downloaded and verified


In [6]:
print(train.data.shape)

(50000, 32, 32, 3)


In [8]:
from torchinfo import summary
summary(model, input_size=(1, 3, 32, 32))

Layer (type:depth-idx)                   Output Shape              Param #
VGG                                      [1, 10]                   --
├─Sequential: 1-1                        [1, 512, 1, 1]            --
│    └─Conv2d: 2-1                       [1, 64, 32, 32]           1,792
│    └─ReLU: 2-2                         [1, 64, 32, 32]           --
│    └─Conv2d: 2-3                       [1, 64, 32, 32]           36,928
│    └─ReLU: 2-4                         [1, 64, 32, 32]           --
│    └─MaxPool2d: 2-5                    [1, 64, 16, 16]           --
│    └─Conv2d: 2-6                       [1, 128, 16, 16]          73,856
│    └─ReLU: 2-7                         [1, 128, 16, 16]          --
│    └─Conv2d: 2-8                       [1, 128, 16, 16]          147,584
│    └─ReLU: 2-9                         [1, 128, 16, 16]          --
│    └─MaxPool2d: 2-10                   [1, 128, 8, 8]            --
│    └─Conv2d: 2-11                      [1, 256, 8, 8]            29