In [None]:
import numpy as np
import matplotlib.pylab as plt
import math
%matplotlib inline

np.random.seed(42)

# MNIST dataset import

In [None]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', data_home='./')
X = mnist['data']
print(X.shape)

In [None]:
n = 5
fig, axs = plt.subplots(n, n, figsize = (n,n))

for wi in range(n):
    for wj in range(n):

        axs[wi, wj].axis('off')
        axs[wi, wj].imshow(X[wi*n+wj].reshape(28,28), cmap='gray')        

In [None]:
# data normalization
X = X.astype('float')
Xm = np.mean(X, axis=0)
Xs = np.std(X, axis=0)
X=(X - Xm) / (Xs + 0.01)

In [None]:
# Labels conversion to one-hot encoding and train-test splitting
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

Y = mnist['target']

X_n = X[:]
Y_n = Y[:]

print('original', Y_n)
t = OneHotEncoder(sparse=False, categories='auto')
Y_work = t.fit_transform(Y_n.reshape(-1, 1))
print('one hot', Y_work)
X_train, X_test, Y_train, Y_test = train_test_split(X_n, Y_work, test_size=0.2, stratify=Y_n)

print('X train shape :', X_train.shape, ', Labels train shape :', Y_train.shape,
     '\nX test shape :', X_test.shape, ', Labels test shape :', Y_train.shape)

# Simple FF - NN in pytorch

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cpu')

In [None]:
# data convertion to torch.tensor
X_train = torch.from_numpy(X_train).to(dtype=torch.float32).to(device)
X_test = torch.from_numpy(X_test).to(dtype=torch.float32).to(device)
Y_train = torch.from_numpy(Y_train).to(dtype=torch.float32).to(device)
Y_test = torch.from_numpy(Y_test).to(dtype=torch.float32).to(device)

In [None]:
class Net(nn.Module):
    def __init__(self, device):
        super(Net, self).__init__()
        
        self.fc = nn.Sequential(
            nn.Linear(784, 800), 
            nn.ReLU(),
            nn.Linear(800, 10)     
        ).to(device)

    def forward(self, x):
        return self.fc(x)

In [None]:
# loss function
def compute_loss(y_true, y_hat):
    return -torch.sum(y_true*torch.log_softmax(y_hat, dim=-1))

#accuracy calculation
def calculate_accuracy(pred_one_hot, label_one_hot):
    
    prediction = torch.max(pred_one_hot,dim=1)[1]
    labels = torch.max(label_one_hot,dim=1)[1]
    
    acc = ((prediction - labels) == 0).sum()/label_one_hot.sum()
    
    return acc.cpu().numpy()

#minibatches
def iterate_minibatches(X, y, batchsize):
    indices = np.random.permutation(np.arange(len(X)))
    for start in range(0, len(indices), batchsize):
        ix = indices[start: start + batchsize]
        yield X[ix], y[ix]

In [None]:
net = Net(device)
optimizer = torch.optim.Adam(net.parameters(), lr=0.5e-3)

batch_size = 512

L_train, L_test, acc_train, acc_test = [], [], [], []
L_test = []

In [None]:
print(net)

In [None]:
for epoch in range(15):
    # train
    net.train(True)
    L = 0.
    acc = 0.
    for X_batch, y_batch in iterate_minibatches(X_train, Y_train, batch_size):
        y_h = net.forward(X_batch)
        loss = compute_loss(y_batch, y_h)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        L += loss.detach().cpu().numpy()
    L_train.append(L/Y_train.shape[0])\
    
    # test
    net.train(False)    
    y_h = net.forward(X_test)
    with torch.no_grad():
        L = compute_loss(Y_test, y_h).cpu().numpy()
    L_test.append(L/Y_test.shape[0])
    acc_test.append(calculate_accuracy(y_h, Y_test))
    if epoch % 1 == 0:
            print("{} epoch loss. Train : {}, Test : {}".format(
                                                                epoch, 
                                                                np.round(L_train[-1],2),
                                                                np.round(L_test[-1],2)
                                                                                ))

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2,figsize = (10,5))

ax1.plot(L_train, label='train')
ax1.plot(L_test, label='test')
ax1.grid()
ax1.set_title('Loss')
ax1.legend()

ax2.plot(acc_train, label='train')
ax2.plot(acc_test, label='test')
ax2.grid()
ax2.set_title('Accuracy')
ax2.legend()

plt.show()