In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

In [None]:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [None]:
# load MNIST images and transform them to tensor
transform = transforms.ToTensor()
train_data = datasets.MNIST(root='../data/cnn_files', train=True, download=True, transform=transform)
test_data = datasets.MNIST(root='../data/cnn_files', train=False, download=True, transform=transform)

In [None]:
train_data

In [None]:
test_data

In [None]:
type(train_data)

In [None]:
# this is a tuple related to the first image of dataset: first item of turple is the image and second item is the label
train_data[0]

In [None]:
image, label = train_data[0]

In [None]:
image.shape
# the 1 in the shape indicates that the image is greyscale; for RGB the 1 would change to 3

In [None]:
label

In [None]:
plt.imshow(image.reshape((28,28)), cmap='gray')
# the reshape is because the imshow method accept 2D matrix
# cmap='gray' is added because imshow has a default 'viridis' color-mapping which leads to having not acctual colors

In [None]:
torch.manual_seed(101)
train_loader = DataLoader(train_data, batch_size=100, shuffle=True)
test_loader = DataLoader(test_data, batch_size=500, shuffle=False)

In [None]:
# let's have a visual sense of the dataset
from torchvision.utils import make_grid

In [None]:
for images, labels in train_loader:
    break     # in this way we just retrieve the first batch

In [None]:
images.shape

In [None]:
labels.shape

In [None]:
# print first 12 labels
labels_to_print = '          '.join([str(tensor_label.item()) for tensor_label in labels[:12]])
print(labels_to_print)

# print first 12 images
im = make_grid(images[:12], nrow=12)
plt.figure(figsize=(18,4))
# because of default structure of imshow(), we have to transpose the images from channel-width-height to width-height-channel
plt.imshow(np.transpose(im.numpy(), (1, 2, 0)))

In [None]:
class MultilayerPerceptron(nn.Module):
    def __init__(self, in_size=28*28, out_size=10, layers=[120, 84]):
        super().__init__()
        self.fc1 = nn.Linear(in_size, layers[0])
        self.fc2 = nn.Linear(layers[0], layers[1])
        self.fc3 = nn.Linear(layers[1], out_size)
        
    def forward(self, X):
        X = F.relu(self.fc1(X))
        X = F.relu(self.fc2(X))
        X = F.log_softmax(self.fc3(X), dim=1)
        return X

In [None]:
torch.manual_seed(101)
model = MultilayerPerceptron()
model

In [None]:
for param in model.parameters():
    print(param.numel())

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
import time
start_time = time.time()

epochs = 10
train_loss_list = []
test_loss_list = []
train_correct_list = []
test_correct_list = []

for i in range(epochs):
    train_corrects = 0
    test_corrects = 0
    
    for batch_num, (X_train, y_train) in enumerate(train_loader):
        batch_num += 1              # because it starts from 0
        
        y_pred = model.forward(X_train.view(100, -1))      # because should reshape [100, 1, 28, 28] to [100, 784]
        loss = criterion(y_pred, y_train)
        
        predicted = torch.max(y_pred.data,dim=1)[1]
        batch_corrects = (predicted == y_train).sum()
        train_corrects += batch_corrects
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch_num % 200 == 0:
            accuracy_till_here = (train_corrects.item()/(batch_num * 100)) * 100
            print(f'Epoch:{i}, batch:{batch_num}, loss:{loss.item()}, accuracy:{accuracy_till_here}')
        
    train_loss_list.append(loss)
    train_correct_list.append(train_corrects)
    
    
    with torch.no_grad():
        for batch_num, (X_test, y_test) in enumerate(test_loader):
            batch_num += 1
        
            y_eval = model.forward(X_test.view(500, -1))      # because should reshape [500, 1, 28, 28] to [500, 784]

            predicted = torch.max(y_eval.data,dim=1)[1]
            batch_corrects = (predicted == y_test).sum()
            test_corrects += batch_corrects
        
        loss = criterion(y_eval, y_test)
        test_loss_list.append(loss)
        test_correct_list.append(test_corrects)
        
    
total_time = time.time() - start_time
print(f'Duration: {total_time/60} mins')      
        

In [None]:
plt.plot(train_loss_list, label='Training loss')
plt.plot(test_loss_list, label='Validation loss')
plt.legend()

In [None]:
train_acc_per_epoch = [(t/60000)*100 for t in train_correct_list]
test_acc_per_epoch = [(t/10000)*100 for t in test_correct_list]

In [None]:
plt.plot(train_acc_per_epoch, label='Training acc')
plt.plot(test_acc_per_epoch, label='Validation acc')
plt.legend()

In [None]:
# accuracy on the whole test data
test_load_all = DataLoader(test_data, batch_size=10000, shuffle=False)

with torch.no_grad():
    corrects = 0
    for X_test, y_test in test_load_all:
        y_eval = model.forward(X_test.view(len(X_test), -1))
        predicted = torch.max(y_eval.data,dim=1)[1]
        corrects = (predicted == y_test).sum()
100*corrects.item()/len(X_test)

In [None]:
confusion_matrix(predicted, y_test)