In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets


import os
import random
import numpy as np

In [2]:
seed = 1234
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True

In [3]:
data_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,),(0.3081,))
])

In [4]:
train_data = datasets.MNIST('data',train=True,download=True,transform=data_transforms)
test_data = datasets.MNIST('data',train=False,download=True,transform=data_transforms)

In [5]:
n_train_examples = int(len(train_data)*0.9)
n_valid_examples = len(train_data) - n_train_examples

train_data,valid_data = torch.utils.data.random_split(train_data,[n_train_examples,n_valid_examples])

In [6]:
print("Number of training examples: {}".format(len(train_data)))
print("Number of validation examples: {}".format(len(valid_data)))
print("Number of training examples: {}".format(len(test_data)))

Number of training examples: 54000
Number of validation examples: 6000
Number of training examples: 10000


In [7]:
BATCH_SIZE = 64
train_iterator = torch.utils.data.DataLoader(train_data,shuffle=True,batch_size=BATCH_SIZE)
valid_iterator = torch.utils.data.DataLoader(valid_data,shuffle=True,batch_size=BATCH_SIZE)
test_iterator = torch.utils.data.DataLoader(test_data,shuffle=True,batch_size=BATCH_SIZE)


In [8]:
class MLP(nn.Module):
    def __init__(self,hidden_neurons):
        super().__init__()
        self.hidden_neurons = hidden_neurons
        self.input_fc = nn.Linear(28*28,hidden_neurons[0])
        self.fcs = nn.ModuleList([nn.Linear(hidden_neurons[i],hidden_neurons[i+1]) for i in range(len(hidden_neurons)-1)])
        self.output_fc = nn.Linear(hidden_neurons[-1],10)
    
    def forward(self,x):
        #flatten 
        x = x.view(x.shape[0],-1)
        x = F.relu(self.input_fc(x))
        for i in range(len(self.hidden_neurons)-1):
            x = F.relu(self.fcs[i](x))
        x = self.output_fc(x)
        return x
            

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [10]:
model = MLP([1000,500,250]).to(device)

In [11]:
model

MLP(
  (input_fc): Linear(in_features=784, out_features=1000, bias=True)
  (fcs): ModuleList(
    (0): Linear(in_features=1000, out_features=500, bias=True)
    (1): Linear(in_features=500, out_features=250, bias=True)
  )
  (output_fc): Linear(in_features=250, out_features=10, bias=True)
)

In [12]:
optimizer = optim.Adam(model.parameters())

In [13]:
criterion = nn.CrossEntropyLoss()

In [14]:
def calculate_accuracy(yhat,y):
    preds = yhat.max(1,keepdim=True)[1]
    correct = preds.eq(y.view_as(preds)).sum()
    acc = correct.float()/preds.shape[0]
    return acc

In [15]:
def train(model,device,iterator,optimizer,criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for x,y in iterator:
        x = x.to(device)
        y = y.to(device)
        
        optimizer.zero_grad()
        yhat = model(x)
        
        acc = calculate_accuracy(yhat,y)
        
        loss = criterion(yhat,y)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc  += acc.item()
        
    return epoch_loss/len(iterator),epoch_acc/len(iterator)
    
    

In [16]:
def evaluate(model,device,iterator,criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    with torch.no_grad():
        for (x,y) in iterator:
            
            x = x.to(device)
            y = y.to(device)
            
            yhat = model(x)
            
            loss = criterion(yhat,y)
            
            acc = calculate_accuracy(yhat,y)
            
            epoch_loss += loss.item()
            epoch_acc += acc.item()
    return epoch_loss/len(iterator) ,epoch_acc/len(iterator)

In [17]:
EPOCHS = 10
SAVE_DIR = 'models'
MODEL_SAVE_PATH = os.path.join(SAVE_DIR, 'mlp-mnist.pt')

best_valid_loss = float('inf')

if not os.path.isdir(f'{SAVE_DIR}'):
    os.makedirs(f'{SAVE_DIR}')

for epoch in range(EPOCHS):
    train_loss, train_acc = train(model, device, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, device, valid_iterator, criterion)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), MODEL_SAVE_PATH)
    
    print(f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:05.2f}% | Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:05.2f}% |')

| Epoch: 01 | Train Loss: 0.221 | Train Acc: 93.20% | Val. Loss: 0.152 | Val. Acc: 95.27% |
| Epoch: 02 | Train Loss: 0.095 | Train Acc: 97.07% | Val. Loss: 0.119 | Val. Acc: 96.81% |
| Epoch: 03 | Train Loss: 0.070 | Train Acc: 97.88% | Val. Loss: 0.093 | Val. Acc: 97.46% |
| Epoch: 04 | Train Loss: 0.055 | Train Acc: 98.31% | Val. Loss: 0.102 | Val. Acc: 97.24% |
| Epoch: 05 | Train Loss: 0.045 | Train Acc: 98.58% | Val. Loss: 0.095 | Val. Acc: 97.37% |
| Epoch: 06 | Train Loss: 0.038 | Train Acc: 98.80% | Val. Loss: 0.115 | Val. Acc: 97.31% |
| Epoch: 07 | Train Loss: 0.033 | Train Acc: 98.94% | Val. Loss: 0.119 | Val. Acc: 97.32% |
| Epoch: 08 | Train Loss: 0.028 | Train Acc: 99.14% | Val. Loss: 0.092 | Val. Acc: 97.71% |
| Epoch: 09 | Train Loss: 0.025 | Train Acc: 99.22% | Val. Loss: 0.110 | Val. Acc: 97.29% |
| Epoch: 10 | Train Loss: 0.025 | Train Acc: 99.29% | Val. Loss: 0.121 | Val. Acc: 97.55% |


In [18]:

model.load_state_dict(torch.load(MODEL_SAVE_PATH))

test_loss, test_acc = evaluate(model, device, valid_iterator, criterion)

print(f'| Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:05.2f}% |')

| Test Loss: 0.103 | Test Acc: 98.15% |
