In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import numpy as np


In [2]:
# 1. Hyper Parameter
train_size = 784
num_classes = 10
hidden1_size = 500
learning_rate = 0.001

batch_size = 100
ephoc_size = 5


In [3]:
# 2. Data load
# MNIST Dataset 
train_dataset = dsets.MNIST(root='./data', 
                            train=True, 
                            transform=transforms.ToTensor(),  
                            download=True)

test_dataset = dsets.MNIST(root='./data', 
                           train=False, 
                           transform=transforms.ToTensor())

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:03, 2859824.91it/s]                             


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz


0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


32768it [00:00, 40898.40it/s]                           
0it [00:00, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:01, 993718.01it/s]                            
0it [00:00, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


8192it [00:00, 13103.78it/s]            

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Processing...
Done!





In [4]:
# 3. Build the Model
class FeedForwardNN(nn.Module):
    def __init__(self, train_size, hidden1_size, num_classes):
        super(FeedForwardNN, self).__init__()
        self.linear1 = nn.Linear(train_size, hidden1_size) #784x20
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden1_size, num_classes) #20x10
        
    def forward(self, x):
        z1 = self.linear1(x)
        a1 = self.relu(z1)
        z2 = self.linear2(a1)
        
        return z2
        

In [5]:
# 4. Generate a model
model = FeedForwardNN(train_size, hidden1_size, num_classes)

In [6]:
# 5. Init loss function and Optimizer
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [7]:
# 6. Train
for ephoc in range(ephoc_size):
    print("ephoc: ",ephoc)
    for idx, (images, labels) in enumerate(train_loader):
        #convert dataset as the Pytorch style
        images = Variable(images.view(-1,28*28))
        labels = Variable(labels)
        
        #Forward, Backward, gradient
        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if idx%100 == 0:
            print("ephoc[",ephoc,"] \t", "loss:", loss.item())
            

ephoc:  0
ephoc[ 0 ] 	 loss: 2.310422658920288
ephoc[ 0 ] 	 loss: 0.40802592039108276
ephoc[ 0 ] 	 loss: 0.19996318221092224
ephoc[ 0 ] 	 loss: 0.15013495087623596
ephoc[ 0 ] 	 loss: 0.1843675971031189
ephoc[ 0 ] 	 loss: 0.17182865738868713
ephoc:  1
ephoc[ 1 ] 	 loss: 0.1467081606388092
ephoc[ 1 ] 	 loss: 0.17056721448898315
ephoc[ 1 ] 	 loss: 0.1366962045431137
ephoc[ 1 ] 	 loss: 0.11327894032001495
ephoc[ 1 ] 	 loss: 0.14209286868572235
ephoc[ 1 ] 	 loss: 0.10156629234552383
ephoc:  2
ephoc[ 2 ] 	 loss: 0.0877559632062912
ephoc[ 2 ] 	 loss: 0.08177069574594498
ephoc[ 2 ] 	 loss: 0.03787003457546234
ephoc[ 2 ] 	 loss: 0.07844266295433044
ephoc[ 2 ] 	 loss: 0.11255628615617752
ephoc[ 2 ] 	 loss: 0.09957398474216461
ephoc:  3
ephoc[ 3 ] 	 loss: 0.03837532177567482
ephoc[ 3 ] 	 loss: 0.05395949259400368
ephoc[ 3 ] 	 loss: 0.009709489531815052
ephoc[ 3 ] 	 loss: 0.021260881796479225
ephoc[ 3 ] 	 loss: 0.016461573541164398
ephoc[ 3 ] 	 loss: 0.0791126936674118
ephoc:  4
ephoc[ 4 ] 	 loss:

In [8]:
#Test the model
total = 0
correct = 0
for images, labels in test_loader:
    images = Variable(images.view(-1,28*28))

    outputs = model(images)   
    _, predicted = torch.max(outputs.data, 1)
    total += len(predicted)
    correct += (predicted == labels).sum()

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

Accuracy of the network on the 10000 test images: 98 %
