In [1]:
import torch
import numpy as np
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.nn as nn
import time

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

learning_rate = 0.1
num_epochs = 10
batch_size = 128

In [3]:
train_dataset = datasets.MNIST(root='data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='data', train=False, transform=transforms.ToTensor())

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [4]:
for image , label in train_loader:
    print('Image batch dimensions:' , image.shape)
    print('Image label dimensions:' , label.shape)
    break

Image batch dimensions: torch.Size([128, 1, 28, 28])
Image label dimensions: torch.Size([128])


In [5]:
class Multilayer_Perceptron(torch.nn.Module):
    
    def __init__(self, num_features, num_classes):
        super(Multilayer_Perceptron, self).__init__()
        
        self.linear_1 = nn.Linear(num_features, num_hidden_1)
        self.linear_1_bn = nn.BatchNorm1d(num_hidden_1)
        self.linear_2 = nn.Linear(num_hidden_1, num_hidden_2)
        self.linear_2_bn = nn.BatchNorm1d(num_hidden_2)
        self.linear_out = nn.Linear(num_hidden_2, num_classes)
    
    def forward(self, x):

        out = self.linear_1(x)
        out = self.linear_1_bn(out)
        out = F.relu(out)
        out = self.linear_2(out)
        out = self.linear_2_bn(out)
        out = F.relu(out)
        out = self.linear_out(out)
        proba = F.log_softmax(out, dim=1)
        return proba

In [6]:
num_hidden_1 = 128
num_hidden_2 = 256

model = Multilayer_Perceptron(784, 10)
model = model.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

torch.manual_seed(1)

<torch._C.Generator at 0x7fd470f97b70>

In [7]:
def metric_acc(net, data_loader):
    net.eval()
    correct_pred, num_examples = 0, 0
    with torch.no_grad():
        for features, targets in data_loader:
            features = features.view(-1, 28*28).to(device)
            targets = targets.to(device)
            proba = net(features)
            _, predicted = torch.max(proba, 1)
            num_examples +=targets.size(0)
            correct_pred += (predicted == targets).sum()
        return correct_pred.float()/num_examples*100

In [8]:
start_time = time.time()
for epoch in range(num_epochs):
    model.train()
    for batch_ind, (features, targets) in enumerate(train_loader):
        features = features.view(-1, 28*28).to(device)

        targets = targets.to(device)
        
        proba = model(features)
        loss = F.cross_entropy(proba, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if not batch_ind%50:
            print('Epoch %03d/%03d | Batch %03d/%03d | Loss: %.4f' % ((epoch+1), num_epochs , batch_ind, 
                  len(train_loader), loss))
    with torch.no_grad():
        print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
              epoch+1, num_epochs, 
              metric_acc(model, train_loader)))
    print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))   

Epoch 001/010 | Batch 000/469 | Loss: 2.3975
Epoch 001/010 | Batch 050/469 | Loss: 0.2771
Epoch 001/010 | Batch 100/469 | Loss: 0.3463
Epoch 001/010 | Batch 150/469 | Loss: 0.1911
Epoch 001/010 | Batch 200/469 | Loss: 0.2405
Epoch 001/010 | Batch 250/469 | Loss: 0.1149
Epoch 001/010 | Batch 300/469 | Loss: 0.1684
Epoch 001/010 | Batch 350/469 | Loss: 0.0981
Epoch 001/010 | Batch 400/469 | Loss: 0.1273
Epoch 001/010 | Batch 450/469 | Loss: 0.0774
Epoch: 001/010 training accuracy: 97.02%
Time elapsed: 0.13 min
Epoch 002/010 | Batch 000/469 | Loss: 0.0826
Epoch 002/010 | Batch 050/469 | Loss: 0.0355
Epoch 002/010 | Batch 100/469 | Loss: 0.0540
Epoch 002/010 | Batch 150/469 | Loss: 0.0854
Epoch 002/010 | Batch 200/469 | Loss: 0.1458
Epoch 002/010 | Batch 250/469 | Loss: 0.1168
Epoch 002/010 | Batch 300/469 | Loss: 0.1241
Epoch 002/010 | Batch 350/469 | Loss: 0.0869
Epoch 002/010 | Batch 400/469 | Loss: 0.1485
Epoch 002/010 | Batch 450/469 | Loss: 0.0965
Epoch: 002/010 training accuracy: 98

In [9]:
print('Test accuracy: %.2f%%' % (metric_acc(model, test_loader)))

Test accuracy: 98.12%
