In [1]:
import numpy as np

from torchvision import datasets
import torchvision.transforms as transforms

from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
numClass = 10
batchSize = 64 # 32, 64, 128, 256, 512
validSize = 0.2
dropOut = 0.2
optimizerType = 'RMSprop' # Adam, RMSprop, SGD
learningRate = 0.001

In [3]:
trainData = datasets.MNIST(root='data', train=True, download=True, transform=transforms.ToTensor())
testData = datasets.MNIST(root='data', train=False, download=True, transform=transforms.ToTensor())

numTrain, sets = len(trainData), list(range(len(trainData)))
np.random.shuffle(sets)

splitIndex = int(np.floor(validSize * numTrain))
trainIndex, validIndex = sets[splitIndex:], sets[:splitIndex]

trainLoader = DataLoader(trainData, batch_size=batchSize, 
                         sampler=SubsetRandomSampler(trainIndex), num_workers=0)
validLoader = DataLoader(trainData, batch_size=batchSize, 
                         sampler=SubsetRandomSampler(validIndex), num_workers=0)
testLoader = DataLoader(testData, batch_size=batchSize, num_workers=0)

In [4]:
class pytorchMLP(nn.Module):
    def __init__(self):
        super(pytorchMLP, self).__init__()
        
        self.fc1 = nn.Linear(784, 512) # with one hidden layer
        self.fc2 = nn.Linear(512, numClass)
        self.droput = nn.Dropout(dropOut)
        
    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.fc1(x))
        x = self.droput(x)
        x = self.fc2(x)
        
        return x

In [5]:
model = pytorchMLP()
criterion = nn.CrossEntropyLoss()

if optimizerType == 'Adam':
    optimizer = torch.optim.Adam(model.parameters(), lr=learningRate)
    
if optimizerType == 'RMSprop':
    optimizer = torch.optim.RMSprop(model.parameters(), lr=learningRate)

if optimizerType == 'SGD':
    optimizer = torch.optim.SGD(model.parameters(), lr=learningRate)

In [6]:
patience, badCounter = 5, 0
epochs, validLossMin = 50, np.Inf
for epoch in range(epochs):
    trainLoss, validLoss = 0, 0

    model.train()
    for data, label in trainLoader:
        optimizer.zero_grad()
        output = model(data)
        
        loss = criterion(output, label)
        
        loss.backward() # to backpropagate the error
        optimizer.step()

        trainLoss += loss.item() * data.size(0)
        
    model.eval()
    for data, label in validLoader:
        output = model(data)

        loss = criterion(output, label)

        validLoss = loss.item() * data.size(0)
    
    trainLoss = trainLoss / len(trainLoader.sampler)
    validLoss = validLoss / len(validLoader.sampler)
    
    print('Epoch {} \tTrain Loss: {:.4f} \tValid Loss: {:.4f}'.format(
        epoch + 1, trainLoss, validLoss))
        
    if validLoss < validLossMin:
        print('---------------------------------------------------------------------------------')
        print('Valid Loss decreased from {:.4f} to {:.4f}.  Best Model updated and saved.'.format(
            validLossMin, validLoss))
        print('---------------------------------------------------------------------------------')
        torch.save(model.state_dict(), 'bestmodel.pt')
        validLossMin = validLoss
        
    else:
        badCounter += 1
        
    if badCounter > patience:
        print('Early Stopping')
        break

Epoch 1 	Train Loss: 0.2522 	Valid Loss: 0.0007
---------------------------------------------------------------------------------
Valid Loss decreased from inf to 0.0007.  Best Model updated and saved.
---------------------------------------------------------------------------------
Epoch 2 	Train Loss: 0.1153 	Valid Loss: 0.0001
---------------------------------------------------------------------------------
Valid Loss decreased from 0.0007 to 0.0001.  Best Model updated and saved.
---------------------------------------------------------------------------------
Epoch 3 	Train Loss: 0.0782 	Valid Loss: 0.0002
Epoch 4 	Train Loss: 0.0596 	Valid Loss: 0.0003
Epoch 5 	Train Loss: 0.0480 	Valid Loss: 0.0000
---------------------------------------------------------------------------------
Valid Loss decreased from 0.0001 to 0.0000.  Best Model updated and saved.
---------------------------------------------------------------------------------
Epoch 6 	Train Loss: 0.0384 	Valid Loss: 0.000

In [7]:
model.load_state_dict(torch.load('bestmodel.pt'))

testLoss = 0.0
classCorrect = [0. for i in range(numClass)]
classTotal = [0. for i in range(numClass)]
model.eval()
for data, target in testLoader:
    output = model(data)

    loss = criterion(output, target)

    testLoss += loss.item() * data.size(0)

    __, predictions = torch.max(output, 1)
    correct = np.squeeze(predictions.eq(target.data.view_as(predictions)))
    for i in range(len(target)):
        label = target.data[i]
        classCorrect[label] += correct[i].item()
        classTotal[label] += 1

testLoss = testLoss / len(testLoader.sampler)
print('Test Loss: {:.4f}\n'.format(testLoss))

for i in range(numClass):
    print('Test Accuracy of {:2d}:   {:.2f}%  ({:.0f} / {:.0f})'.format(
        i, 100 * classCorrect[i] / classTotal[i],
        np.sum(classCorrect[i]), np.sum(classTotal[i])))

print('\nBest Model\nBatch size: {}, Valid Size: {}'.format(batchSize, validSize))
print('Drop out: {}, Optimizer: {}, Learning Rate: {}'.format(
    dropOut, optimizerType, learningRate))
print('\nTest Accuracy of Overall:   {:.2f}%  ({:.0f} / {:.0f})'.format(
    100 * np.sum(classCorrect) / np.sum(classTotal),
    np.sum(classCorrect), np.sum(classTotal)))

Test Loss: 0.0679

Test Accuracy of  0:   99.08%  (971 / 980)
Test Accuracy of  1:   98.94%  (1123 / 1135)
Test Accuracy of  2:   98.55%  (1017 / 1032)
Test Accuracy of  3:   97.43%  (984 / 1010)
Test Accuracy of  4:   98.27%  (965 / 982)
Test Accuracy of  5:   97.87%  (873 / 892)
Test Accuracy of  6:   96.66%  (926 / 958)
Test Accuracy of  7:   97.08%  (998 / 1028)
Test Accuracy of  8:   97.74%  (952 / 974)
Test Accuracy of  9:   97.52%  (984 / 1009)

Best Model
Batch size: 64, Valid Size: 0.2
Drop out: 0.2, Optimizer: RMSprop, Learning Rate: 0.001

Test Accuracy of Overall:   97.93%  (9793 / 10000)
