In [1]:
'''
1. Import Libraries for Data Engineering
'''

import os
import torch
import torchvision
from torchvision import datasets
import torchvision.transforms as transforms

In [2]:
root = os.path.join('~', '.torch', 'mnist')
transform = transforms.Compose([transforms.ToTensor(), lambda x: x.view(-1)])

In [3]:
train_dataset = datasets.MNIST(root=root, download=True, train=True, transform=transform)
test_dataset = datasets.MNIST(root=root, download=True, train=False, transform=transform)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /Users/hongjinseog/.torch/mnist/MNIST/raw/train-images-idx3-ubyte.gz


100.0%


Extracting /Users/hongjinseog/.torch/mnist/MNIST/raw/train-images-idx3-ubyte.gz to /Users/hongjinseog/.torch/mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /Users/hongjinseog/.torch/mnist/MNIST/raw/train-labels-idx1-ubyte.gz


100.0%


Extracting /Users/hongjinseog/.torch/mnist/MNIST/raw/train-labels-idx1-ubyte.gz to /Users/hongjinseog/.torch/mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz


6.0%

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /Users/hongjinseog/.torch/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz


100.0%


Extracting /Users/hongjinseog/.torch/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to /Users/hongjinseog/.torch/mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz


100.0%

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /Users/hongjinseog/.torch/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting /Users/hongjinseog/.torch/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to /Users/hongjinseog/.torch/mnist/MNIST/raw






In [4]:
'''
3. Import Libraries for Model Engineering
'''

from sklearn.metrics import accuracy_score
import torch.nn as nn
import torch.optim as optimizers

import numpy as np

np.random.seed(123)
torch.manual_seed(123)

<torch._C.Generator at 0x105b19f90>

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
device

device(type='cpu')

In [7]:
'''
4. Set Hyperparameters
'''
input_size = 784 # 28x28
hidden_size = 200
output_dim = 10  # output layer dimensionality
EPOCHS = 30
batch_size = 100
learning_rate = 5e-4

In [8]:
train_ds = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_ds = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [9]:
class Feed_Forward_Net(nn.Module):
    def __init__(self, input_size, hidden_size, output_dim):
        super().__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.a1 = nn.Sigmoid()
        self.l2 = nn.Linear(hidden_size, hidden_size)
        self.a2 = nn.Sigmoid()
        self.l3 = nn.Linear(hidden_size, hidden_size)
        self.a3 = nn.Sigmoid()
        self.l4 = nn.Linear(hidden_size, output_dim)
        
        self.layers = [self.l1, self.a1, self.l2, self.a2, self.l3, self.a3, self.l4]
        
    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

In [10]:
model = Feed_Forward_Net(input_size=input_size, hidden_size=hidden_size, output_dim=output_dim).to(device)

In [11]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [12]:
criterion = nn.CrossEntropyLoss()

In [13]:
def compute_loss(t, y):
    return criterion(y, t)

In [14]:
def train_step(x, t):
    model.train()
    preds = model(x)
    loss = compute_loss(t, preds)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    return loss, preds

In [15]:
def test_step(x, t):
    model.eval()
    preds = model(x)
    loss = compute_loss(t, preds)
    return loss, preds

In [18]:
for epoch in range(EPOCHS):
    train_loss = 0.
    train_acc = 0.
    
    for (x, t) in train_ds:
        x, t = x.to(device), t.to(device)
        loss, preds = train_step(x, t)
        train_loss += loss.item()
        train_acc += accuracy_score(t.tolist(), preds.argmax(dim=-1).tolist())
        
    train_loss /= len(train_ds)
    train_acc /= len(train_ds)
    
    print("Epoch : {}, Loss : {:.3f}, Acc : {:.3f}".format(epoch+1, train_loss, train_acc))

Epoch : 1, Loss : 0.087, Acc : 0.975
Epoch : 2, Loss : 0.075, Acc : 0.977
Epoch : 3, Loss : 0.064, Acc : 0.980
Epoch : 4, Loss : 0.059, Acc : 0.982
Epoch : 5, Loss : 0.050, Acc : 0.984
Epoch : 6, Loss : 0.049, Acc : 0.985
Epoch : 7, Loss : 0.048, Acc : 0.985
Epoch : 8, Loss : 0.043, Acc : 0.986
Epoch : 9, Loss : 0.037, Acc : 0.988
Epoch : 10, Loss : 0.035, Acc : 0.989
Epoch : 11, Loss : 0.039, Acc : 0.988
Epoch : 12, Loss : 0.038, Acc : 0.988
Epoch : 13, Loss : 0.037, Acc : 0.988
Epoch : 14, Loss : 0.034, Acc : 0.989
Epoch : 15, Loss : 0.036, Acc : 0.988
Epoch : 16, Loss : 0.035, Acc : 0.989
Epoch : 17, Loss : 0.033, Acc : 0.990
Epoch : 18, Loss : 0.033, Acc : 0.989
Epoch : 19, Loss : 0.033, Acc : 0.990
Epoch : 20, Loss : 0.032, Acc : 0.990
Epoch : 21, Loss : 0.031, Acc : 0.990
Epoch : 22, Loss : 0.031, Acc : 0.990
Epoch : 23, Loss : 0.028, Acc : 0.991
Epoch : 24, Loss : 0.026, Acc : 0.992
Epoch : 25, Loss : 0.025, Acc : 0.992
Epoch : 26, Loss : 0.025, Acc : 0.992
Epoch : 27, Loss : 0.

In [19]:
test_loss = 0.
test_acc = 0.

for (x, t) in test_ds:
    x, t = x.to(device), t.to(device)
    loss, preds = test_step(x, t)
    test_loss += loss.item()
    test_acc += accuracy_score(t.tolist(), preds.argmax(dim=-1).tolist())
    
test_loss /= len(test_ds)
test_acc /= len(test_ds)

print('test_loss : {:.3f}, test_acc : {:.3f}'.format(test_loss, test_acc))


test_loss : 0.114, test_acc : 0.974
