In [1]:
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
np.random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [3]:
# hyperparameters
lr = 0.001
training_epochs = 15
batch_size = 100

In [4]:
dataset_train = torchvision.datasets.MNIST(root='', train=True, transform=transforms.ToTensor())
dataset_test = torchvision.datasets.MNIST(root='', train=False, transform=transforms.ToTensor())

In [5]:
dataloader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True, drop_last=True)
dataloader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=True, drop_last=True)

In [6]:
model = nn.Sequential(nn.Linear(28*28, 256), nn.ReLU(),
                      nn.Linear(256, 256), nn.ReLU(),
                      nn.Linear(256, 10)).to(device)

In [7]:
for x in model.children():
    if isinstance(x, nn.Linear):
        nn.init.normal_(x.weight)

In [8]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

print('Start training...')

for epoch in range(1, training_epochs+1):

    running_loss = 0
    running_correct = 0

    for X, y in dataloader_train:
        X, y = X.view(-1,28*28).to(device), y.to(device)
        y_pred = model(X)
        loss = criterion(y_pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        with torch.no_grad():
            running_loss += loss.item()
            running_correct += sum(y_pred.argmax(dim=1) == y)

    running_test_loss = 0
    running_test_correct = 0
    
    with torch.no_grad():
        for X, y in dataloader_test:
            X, y = X.view(-1,28*28).to(device), y.to(device)
            y_pred = model(X)
            loss = criterion(y_pred, y)

            running_test_loss += loss.item()
            running_test_correct += sum(y_pred.argmax(dim=1) == y)
    
    print(f'epoch {epoch}\
    training loss:{running_loss/len(dataloader_train):.3f}\
    training acc:{running_correct/len(dataloader_train):.3f}\
    test loss:{running_test_loss/len(dataloader_test):.3f}\
    test acc:{running_test_correct/len(dataloader_test):.3f}')

print('Finished Training')

Start training...
epoch 1    training loss:176.040    training acc:72.640    test loss:48.500    test acc:86.610
epoch 2    training loss:38.305    training acc:88.550    test loss:31.462    test acc:90.020
epoch 3    training loss:24.103    training acc:91.527    test loss:24.760    test acc:91.100
epoch 4    training loss:16.790    training acc:93.077    test loss:20.484    test acc:92.250
epoch 5    training loss:11.952    training acc:94.418    test loss:18.159    test acc:92.890
epoch 6    training loss:8.808    training acc:95.243    test loss:16.572    test acc:93.370
epoch 7    training loss:6.505    training acc:95.965    test loss:14.989    test acc:93.740
epoch 8    training loss:4.728    training acc:96.728    test loss:13.755    test acc:93.980
epoch 9    training loss:3.446    training acc:97.323    test loss:12.740    test acc:94.630
epoch 10    training loss:2.591    training acc:97.683    test loss:13.305    test acc:94.260
epoch 11    training loss:1.854    training a