In [None]:
# MNIST
# DataLoader, Transformation
# Multilayer Neural Net, activation function
# Loss and Optimizer
# Trainig loop (batch training)
# Model evaluation
# GPU support

In [None]:
# 0. basic import

In [3]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.utils.data import Dataset, DataLoader # class

import matplotlib.pyplot as plt
import numpy as np

In [4]:
# device config
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("device: ", device)

device:  cuda


In [5]:
# hyperparameters
input_size = 784 # 28x28
hidden_size = 100
num_classes = 10
num_epochs = 2
batch_size = 100
learning_rate = 0.001

In [6]:
# 1. prepare dataset

In [7]:
train_dataset = dsets.MNIST(root='./MNIST_data', # 다운로드 경로 지정
                          train=True, # True를 지정하면 훈련 데이터로 다운로드
                          transform=transforms.ToTensor(), # 텐서로 변환
                          download=True)

test_dataset = dsets.MNIST(root='./MNIST_data', # 다운로드 경로 지정
                         train=False, # False를 지정하면 테스트 데이터로 다운로드
                         transform=transforms.ToTensor())#, # 텐서로 변환
                         #download=True)

In [8]:
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=batch_size,
                          shuffle=True)
#                           ,drop_last=True)

test_loader = DataLoader(dataset=test_dataset,
                          batch_size=batch_size,
                          shuffle=False)
#                          ,drop_last=True)

In [9]:
# examples = iter(train_loader)
# samples, labels = examples.next()
# print(samples.shape, labels.shape)

# for i in range(6):
#     plt.subplot(2, 3, i+1) # 2 row 3 columns
#     plt.imshow(samples[i][0]) # each image, first channel

# because batch_size = 100
# color channel = 1

examples = iter(test_loader)
example_data, example_targets = examples.next()

# for i in range(6):
#     plt.subplot(2,3,i+1)
#     plt.imshow(example_data[i][0], cmap='gray')
# plt.show()

In [10]:
# 2. Define Model

In [11]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        out = self.l1(x)
#         out = self.relu(x)
#         out = self.l2(x)
        out = self.relu(out)
        out = self.l2(out)

        return out

In [12]:
model = NeuralNet(input_size, hidden_size, num_classes).to(device)

In [13]:
# 3. Define Loss, Optimizer

In [14]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [15]:
# 4. Training loop

In [16]:
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
    for i, (images ,labels) in enumerate(train_loader):
        # reshape images first
        # 100, 1, 28, 28
        # input -> 100, 784
        images = images.reshape(-1, 784).to(device)
#         images = images.reshape(-1, 28 * 28).to(device)

        labels = labels.to(device)
        
        # forward
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # backwards
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print(f'epoch {epoch+1} / {num_epochs}, step {i+1}/{n_total_steps},\
                 loss = {loss.item():.4f}')

epoch 1 / 2, step 100/600,                 loss = 0.3799
epoch 1 / 2, step 200/600,                 loss = 0.3669
epoch 1 / 2, step 300/600,                 loss = 0.3611
epoch 1 / 2, step 400/600,                 loss = 0.2685
epoch 1 / 2, step 500/600,                 loss = 0.2774
epoch 1 / 2, step 600/600,                 loss = 0.1854
epoch 2 / 2, step 100/600,                 loss = 0.2561
epoch 2 / 2, step 200/600,                 loss = 0.2541
epoch 2 / 2, step 300/600,                 loss = 0.1080
epoch 2 / 2, step 400/600,                 loss = 0.2452
epoch 2 / 2, step 500/600,                 loss = 0.2045
epoch 2 / 2, step 600/600,                 loss = 0.1236


In [22]:
# 5. Test
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        
        # calcul output
        outputs = model(images)
        
        # value, index (we don't need first)
        _, prediction = torch.max(outputs.data, 1)
        n_samples += labels.size(0)#.shape[0]
        n_correct += (prediction == labels).sum().item()
        
    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the 10000 test images: {acc} %')

Accuracy of the network on the 10000 test images: 94.88 %


## Debug

1. images - images.reshape(-1, 28`*`28).to(device)   
   images = images.reshape(-1, 28`*`28).to(device)


2. forward
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(x)
        out = self.l2(x)

        out = self.relu(out)
        out = self.l2(out)