# Chapter 05 이미지 처리 능력이 탁월한 CNN

## 5.2 CNN 모델 구현
* Conv -> Pooling -> Conv -> Dropout -> Pooling -> Dense -> Dropout -> Dense 구조로 구현

필요한 모듈 임포트

In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms, datasets

CUDA 사용가능 여부

In [21]:
USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")

하이퍼파라미터인 이폭과 배치크기 설정

In [22]:
EPOCHS = 40
BATCH_SIZE = 64

데이터 불러오기 및 전처리

In [30]:
train_loader = torch.utils.data.DataLoader(
    datasets.FashionMNIST('./.data',
                          train=True,
                          download=True,
                          transform=transforms.Compose([
                                transforms.ToTensor(),
                                transforms.Normalize((0.1307,), (0.3081,))
                          ])),
    batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(
    datasets.FashionMNIST('./.data',
                          train=False,
                          download=True,
                          transform=transforms.Compose([
                                transforms.ToTensor(),
                                transforms.Normalize((0.1307,), (0.3081,))
                          ])),
    batch_size=BATCH_SIZE, shuffle=True)

* 만들 CNN 모델의 커널 크기는 5x5, 컨볼루션 계층 2개
* nn.Conv2d 모듈은 자신을 바로 부를 수 있는 인스턴스, 그냥 함수로 생각해도 무방
* 즉, self.conv1, self.conv2와 같은 CNN 모델의 내부 변수들은 함수로 취급될 수 있음

In [37]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5) # 채널 1, 커널 갯수 10, 커널 사이즈 5x5
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)

        # 드롭아웃 함수 사용하지 않고 모듈로 드롭아웃 인스턴스 만듬
        self.drop = nn.Dropout2d()

        # 일반 신경망 정의
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    # 입력부터 출력까지의 데이터가 지나갈 길 만들기
    def forward(self, x):
        # Conv층
        x = F.relu(F.max_pool2d(self.conv1(x), 2))  # 두번째 입력은 커널 크기
        x = F.relu(F.max_pool2d(self.conv2(x), 2))

        # 밀집층에 넣기위해 펼쳐줌
        x = x.view(-1, 320)

        x = F.relu(self.fc1(x))
        x = self.drop(x)
        x = self.fc2(x)
        return x

CNN 모델의 인스턴스와 최적화 함수 만들기

In [38]:
model = CNN().to(DEVICE)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

모델 훈련 코드

In [39]:
def train(model, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(DEVICE), target.to(DEVICE)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()

        if batch_idx % 200 == 0:
            print(f"Train Epoch: {epoch} [{batch_idx*len(data)}/{len(train_loader.dataset)}\
            ({100.*batch_idx/len(train_loader):.0f}%)]\tLoss:{loss.item():.6f}")

평가 코드

In [40]:
def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(DEVICE), target.to(DEVICE)
            output = model(data)

            # 배치 오차 합산
            test_loss += F.cross_entropy(output, target,
                                         reduction='sum').item()

            # 가장 높은 값을 가진 인덱스가 바로 예측값
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, test_accuracy

코드 실행

In [41]:
for epoch in range(1, EPOCHS + 1):
    train(model, train_loader, optimizer, epoch)
    test_loss, test_accuracy = evaluate(model, test_loader)

    print(f'[{epoch}] Test Loss: {test_loss:4f}, Accuracy: {test_accuracy:.2f}%')

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


[1] Test Loss: 0.626241, Accuracy: 76.19%
[2] Test Loss: 0.532268, Accuracy: 79.27%
[3] Test Loss: 0.493996, Accuracy: 82.32%
[4] Test Loss: 0.456888, Accuracy: 83.90%
[5] Test Loss: 0.421468, Accuracy: 85.07%
[6] Test Loss: 0.402588, Accuracy: 85.35%
[7] Test Loss: 0.391149, Accuracy: 85.97%
[8] Test Loss: 0.374953, Accuracy: 86.36%
[9] Test Loss: 0.368853, Accuracy: 86.71%
[10] Test Loss: 0.353006, Accuracy: 87.47%
[11] Test Loss: 0.350171, Accuracy: 87.32%
[12] Test Loss: 0.345960, Accuracy: 87.92%
[13] Test Loss: 0.329854, Accuracy: 87.82%
[14] Test Loss: 0.329779, Accuracy: 88.19%
[15] Test Loss: 0.331546, Accuracy: 88.23%
[16] Test Loss: 0.333916, Accuracy: 88.03%
[17] Test Loss: 0.320396, Accuracy: 88.24%
[18] Test Loss: 0.311278, Accuracy: 88.84%
[19] Test Loss: 0.315168, Accuracy: 88.56%
[20] Test Loss: 0.302553, Accuracy: 89.13%
[21] Test Loss: 0.298838, Accuracy: 89.51%
[22] Test Loss: 0.307221, Accuracy: 88.97%
[23] Test Loss: 0.305540, Accuracy: 88.82%
[24] Test Loss: 0.30

### 전체 코드 쳐보기

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms, datasets

USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")

EPOCHS = 40
BATCH_SIZE = 64

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('./.data',
                   train=True,
                   download=True,
                   transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size = BATCH_SIZE, shuffle=True
)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('./.data',
                   train=True,
                   download=True,
                   transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3801,))
                   ])),
    batch_size = BATCH_SIZE, shuffle=True
)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return x

model = Net().to(DEVICE)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

def train(model, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(DEVICE), target.to(DEVICE)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()

def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(DEVICE), target.to(DEVICE)
            output = model(data)

            # 배치 오차 합산
            test_loss += F.cross_entropy(output, target,
                                         reduction='sum').item()
            
            # 가장 높은 값을 가진 인덱스가 예측값임
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, test_accuracy

for epoch in range(1, EPOCHS + 1):
    train(model, train_loader, optimizer, epoch)
    test_loss, test_accuracy = evaluate(model, test_loader)

    print(f'[{epoch}] Test Loss: {test_loss:.4f}, Accuracy: {test_accuracy:.2f}%')
