In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init
import matplotlib.pyplot as plt
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn
import pandas as pd
import numpy as np

class CNN(torch.nn.Module):

    def __init__(self):
        super(CNN, self).__init__()
        # 첫번째층
        # ImgIn shape=(?, 28, 28, 1)
    
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))

        # 두번째층
 
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))

        # 전결합층 7x7x64 inputs -> 10 outputs
        self.fc = torch.nn.Linear(7 * 7 * 64, 10, bias=True)

        # 전결합층 한정으로 가중치 초기화
        torch.nn.init.xavier_uniform_(self.fc.weight)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)   # 전결합층을 위해서 Flatten
        out = self.fc(out)
        return out

class CustomDataset(torch.utils.data.Dataset): 
  def __init__(self, phase='train'):
   # 데이터 읽기
   if phase=='train':
        mnist = dsets.MNIST(root='MNIST_data/', # 다운로드 경로 지정
                          train=True, # True를 지정하면 훈련 데이터로 다운로드
                          transform=transforms.ToTensor(), # 텐서로 변환
                          download=True)
   if phase=='val':
        mnist = dsets.MNIST(root='MNIST_data/', # 다운로드 경로 지정
                          train=False,
                          transform=transforms.ToTensor(), # 텐서로 변환
                          download=True)

   self.x_data=mnist.data
   self.y_data=mnist.targets

  def __len__(self):
    return len(self.x_data)

  def __getitem__(self, idx): 
    x = self.x_data[idx:idx+1]/255
    y = self.y_data[idx]
    return x, y

device = 'cuda' if torch.cuda.is_available() else 'cpu'

batch_=100
dataloader ={ 'train' :  DataLoader(CustomDataset(phase='train'), batch_size=batch_, shuffle=True),
             'val' :  DataLoader(CustomDataset(phase='val'), batch_size=batch_, shuffle=False)
 }

model = CNN().to(device) # 모델 구축
loss_fn =  torch.nn.CrossEntropyLoss()  # 손실함수 
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # 옵티마이저 매개변수 갱신

nb_epochs = 10

for epoch in range(0, nb_epochs):
  avg_cost = 0
  sample_size = 0
  avg_cost_ = 0
  sample_size_ = 0

  # Train
  for batch_idx, samples in enumerate(dataloader['train']):
    x_train, y_train = samples
    x_train = x_train.to(device)
    y_train = y_train.to(device)

    pred = model(x_train)
    loss = loss_fn(pred, y_train)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    avg_cost+=loss
    sample_size+=1
  
  #Validation
  for batch_idx, samples in enumerate(dataloader['val']):
    x_train, y_train = samples
    x_train = x_train.to(device)
    y_train = y_train.to(device)

    pred = model(x_train)
    loss = loss_fn(pred, y_train)

    avg_cost_+=loss
    sample_size_+=1

  print("epoch :" , epoch, "loss_train", avg_cost/sample_size, "loss_val", avg_cost_/sample_size_)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw

epoch : 0 loss_train tensor(0.2401, device='cuda:0', grad_fn=<DivBackward0>) loss_val tensor(0.0624, device='cuda:0', grad_fn=<DivBackward0>)
epoch : 1 loss_train tensor(0.0626, device='cuda:0', grad_fn=<DivBackward0>) loss_val tensor(0.0521, device='cuda:0', grad_fn=<DivBackward0>)
epoch : 2 loss_train tensor(0.0460, device='cuda:0', grad_fn=<DivBackward0>) loss_val tensor(0.0439, device='cuda:0', grad_fn=<DivBackward0>)
epoch : 3 loss_train tensor(0.0362, device='cuda:0', grad_fn=<DivBackward0>) loss_val tensor(0.0335, device='cuda:0', grad_fn=<DivBackward0>)
epoch : 4 loss_train tensor(0.0298, device='cuda:0', grad_fn=<DivBackward0>) loss_val tensor(0.0354, device='cuda:0', grad_fn=<DivBackward0>)
epoch : 5 loss_train tensor(0.0256, device='cuda:0', grad_fn=<DivBackward0>) loss_val tensor(0.0337, device='cuda:0', grad_fn=<DivBackward0>)
epoch : 6 loss_train tensor(0.0214, device='cuda:0', grad_fn=<Div