In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init

import torchvision.datasets as datasets
import torchvision.transforms as transforms

from torch.utils.data import DataLoader

import numpy as np 
import matplotlib.pyplot as plt

In [1]:
batch_size = 100
learning_rate = 0.0002
num_epoch = 10

In [3]:
mnist_train = datasets.MNIST(root="../Data/",train=True,transform = transforms.ToTensor(),target_transform=None, download=True)
mnist_test = datasets.MNIST(root="../Data/",train=False,transform = transforms.ToTensor(),target_transform=None, download=True)

In [4]:
train_loader = DataLoader(mnist_train,batch_size=batch_size,shuffle=True,num_workers=2,drop_last=True)
test_loader = DataLoader(mnist_test, batch_size=batch_size,shuffle=False,num_workers=2,drop_last=True)

In [5]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        self.layer = nn.Sequential(
            nn.Conv2d(in_channels=1,out_channels=16,kernel_size=5),
            nn.ReLU(),

            nn.Conv2d(in_channels=16,out_channels=32,kernel_size=5),
            nn.ReLU(),

            nn.MaxPool2d(kernel_size=2,stride=2),

            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5),
            nn.ReLU(),

            nn.MaxPool2d(kernel_size=2,stride=2)
        )   
        self.fc_layer = nn.Sequential(
            nn.Linear(64*3*3,100),
            nn.ReLU(),
            nn.Linear(100,10)
        )

    def forward(self,x):
        out = self.layer(x)
        out = out.view(batch_size,-1)
        out = self.fc_layer(out)
        return out

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [7]:
model = CNN().to(device)

In [8]:
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)

In [9]:
loss_arr = []
for i in range(num_epoch):
    for j,[image,label] in enumerate(train_loader):
        x = image.to(device)
        y = label.to(device)

        optimizer.zero_grad() # pytorch 특성 : back propagation할때마다 gradient 누적.
                              # 따라서 각 배치에서 그래디언트를 계산하기 전에 초기화
        
        output = model.forward(x) # 모델에 x를 전달하고 출력(y_label) 반환.

        loss = loss_func(output,y) # loss 계산
        loss.backward() # loss에 대한 gradient 계산
        optimizer.step() # 모델 가중치 업데이트

        if j % 1000 == 0:
            print(loss)
            loss_arr.append(loss.cpu().detach().numpy())

tensor(2.3089, grad_fn=<NllLossBackward0>)
tensor(0.1180, grad_fn=<NllLossBackward0>)
tensor(0.1222, grad_fn=<NllLossBackward0>)
tensor(0.0715, grad_fn=<NllLossBackward0>)
tensor(0.0139, grad_fn=<NllLossBackward0>)
tensor(0.0322, grad_fn=<NllLossBackward0>)
tensor(0.0324, grad_fn=<NllLossBackward0>)
tensor(0.0086, grad_fn=<NllLossBackward0>)
tensor(0.0324, grad_fn=<NllLossBackward0>)
tensor(0.0388, grad_fn=<NllLossBackward0>)


In [13]:
correct = 0
total = 0

# 해당 모델의 모든 레이어가 eval mode에 들어가게 한다. 
# -? 학습할 때만 사용하는 개념인 Dropout이나 Batchnorm 등을 비활성화 시킨다는 것을 의미.
model.eval()

# torch's autograd engine 비활성화. gradient 트래킹을 더이상 하지 않음. 
with torch.no_grad():
    for image, label in test_loader:
        x = image.to(device)
        y = label.to(device)

        output = model.forward(x)

        _,output_index = torch.max(output,1)

        total += label.size(0)

        correct += (output_index == y).sum().float()

    print("Accuracy of Test Data: {}%".format(100*correct/total))

Accuracy of Test Data: 99.16999816894531%
