In [1]:
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
np.random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [3]:
dataset_train = torchvision.datasets.MNIST(root='', train=True, transform=transforms.ToTensor())
dataset_test = torchvision.datasets.MNIST(root='', train=False, transform=transforms.ToTensor())

In [4]:
# hyperparameters

batch_size = 100
lr = 0.001
training_epochs = 15
drop_prob = 0.3

In [5]:
relu = nn.ReLU()
dropout = nn.Dropout(drop_prob)

model = nn.Sequential(nn.Linear(28*28, 256), relu, dropout,
                      nn.Linear(256, 256), relu, dropout,
                      nn.Linear(256, 10)).to(device)

In [6]:
def xavier(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight.data)

model.apply(xavier)

Sequential(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.3, inplace=False)
  (3): Linear(in_features=256, out_features=256, bias=True)
  (4): ReLU()
  (5): Dropout(p=0.3, inplace=False)
  (6): Linear(in_features=256, out_features=10, bias=True)
)

In [7]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

dataloader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True, drop_last=True)
dataloader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=True, drop_last=True)

print('Start training...')

for epoch in range(1, training_epochs+1):
    
    model.train() # activate dropout

    running_loss = 0
    running_correct = 0

    for X, y in dataloader_train:
        X, y = X.view(-1,28*28).to(device), y.to(device)
        y_pred = model(X)
        loss = criterion(y_pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        with torch.no_grad():
            running_loss += loss.item()
            running_correct += sum(y_pred.argmax(dim=1) == y)

    running_test_loss = 0
    running_test_correct = 0
    
    with torch.no_grad():
        model.eval() # deactivate dropout
        for X, y in dataloader_test:
            X, y = X.view(-1,28*28).to(device), y.to(device)
            y_pred = model(X)
            loss = criterion(y_pred, y)

            running_test_loss += loss.item()
            running_test_correct += sum(y_pred.argmax(dim=1) == y)
    
    print(f'epoch {epoch}\
    training loss:{running_loss/len(dataloader_train):.3f}\
    training acc:{running_correct/len(dataloader_train):.3f}\
    test loss:{running_test_loss/len(dataloader_test):.3f}\
    test acc:{running_test_correct/len(dataloader_test):.3f}')

print('Finished Training')

Start training...
epoch 1    training loss:0.328    training acc:90.073    test loss:0.118    test acc:96.300
epoch 2    training loss:0.141    training acc:95.780    test loss:0.091    test acc:97.020
epoch 3    training loss:0.107    training acc:96.675    test loss:0.080    test acc:97.520
epoch 4    training loss:0.089    training acc:97.165    test loss:0.068    test acc:97.900
epoch 5    training loss:0.077    training acc:97.595    test loss:0.066    test acc:98.000
epoch 6    training loss:0.068    training acc:97.833    test loss:0.069    test acc:97.830
epoch 7    training loss:0.061    training acc:98.057    test loss:0.068    test acc:97.760
epoch 8    training loss:0.055    training acc:98.268    test loss:0.065    test acc:98.030
epoch 9    training loss:0.051    training acc:98.347    test loss:0.065    test acc:97.970
epoch 10    training loss:0.048    training acc:98.483    test loss:0.060    test acc:98.260
epoch 11    training loss:0.045    training acc:98.513    tes

* overfitting이 어느정도 완화되었음