## <font color='green'> <div align="center">In the name of God </div></font>

### <font color='red'> Author: Sayed Kamaledin Ghiasi-Shrirazi</font> <a href="http://profsite.um.ac.ir/~k.ghiasi">(http://profsite.um.ac.ir/~k.ghiasi)</a> 

# A multi-layer feedforward neural network in PyTorch

### importing general modules

In [1]:
import numpy as np
import scipy.io as sio
import matplotlib as mpl
import matplotlib.pyplot as plt

### importing PyTorch modules

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from   torch.utils.tensorboard import SummaryWriter

## Defining the MLFF network by inheriting from nn.Module

Note that the following command is crucial:

``
self.layers = nn.ModuleList(layersList)
``

In [22]:
class MLFF(nn.Module):
    def __init__(self, num_hidden_layers, num_input_neurons, 
                 num_hidden_neurons, num_output_neurons):
        super().__init__()
        self.layersCount =  num_hidden_layers+1
        layersList = [None] * (self.layersCount)
        layersList[0] = nn.Linear(num_input_neurons, num_hidden_neurons)
        for i in range (1, num_hidden_layers):
            layersList[i] = nn.Linear(num_hidden_neurons, num_hidden_neurons)
        layersList[num_hidden_layers] = nn.Linear(num_hidden_neurons, num_output_neurons)
        self.layers  = nn.ModuleList(layersList)
    
    def forward(self, x):
        for i in range(self.layersCount):
            x = self.layers[i](x)
            if (i < self.layersCount - 1):
                pass
                #x = F.relu(x)
        return x

In [29]:
net = MLFF(5, 784, 100, 10)
print (net)

MLFF(
  (layers): ModuleList(
    (0): Linear(in_features=784, out_features=100, bias=True)
    (1): Linear(in_features=100, out_features=100, bias=True)
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): Linear(in_features=100, out_features=100, bias=True)
    (4): Linear(in_features=100, out_features=100, bias=True)
    (5): Linear(in_features=100, out_features=10, bias=True)
  )
)


### Choosing device

In [30]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print (device)

cuda:0


## Loading MNIST

In [31]:
MnistTrainX = sio.loadmat ('../../datasets/mnist/MnistTrainX')['MnistTrainX'] / 255;
MnistTrainY = sio.loadmat ('../../datasets/mnist/MnistTrainY')['MnistTrainY'];
MnistTestX  = sio.loadmat ('../../datasets/mnist/MnistTestX')['MnistTestX'] / 255;
MnistTestY  = sio.loadmat ('../../datasets/mnist/MnistTestY')['MnistTestY'];

N = 60000
MnistTrainX = MnistTrainX[:N,:]
MnistTrainY = MnistTrainY[:N,:]
XTrain = MnistTrainX
yTrain = MnistTrainY.squeeze()
XTest = MnistTestX
yTest = MnistTestY.squeeze()
N, dim = XTrain.shape

## Optimization

In [32]:
num_epochs = 2
batch_size = 100
report_after_X_iterations = 100
learning_rate = 0.1
num_batches = N // batch_size

In [49]:
net.to(device)

tensorboard_comment = 'MLFF'
tb = SummaryWriter(log_dir = 'runs', comment = tensorboard_comment)

optimizer = optim.SGD(net.parameters(), lr= learning_rate)
criterion = nn.CrossEntropyLoss()

for epoch in range (num_epochs):
    for itr in range (num_batches):
        X = torch.tensor (MnistTrainX[itr*batch_size:(itr+1)*batch_size,:], dtype=torch.float)
        T = MnistTrainY[itr*batch_size:(itr+1)*batch_size]
        T = torch.tensor (T.squeeze(), dtype = torch.long)
        X = X.to(device)
        T = T.to(device)
        if epoch == 0 and itr == 0:
            tb.add_graph (net, X)        
        output = net(X)
        loss = criterion(output, T)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (itr % report_after_X_iterations == 0):
            print('\n---- iteration #{0} of {1} at epoch #{2} of {3} ---- :'.format(
                itr, num_batches, epoch, num_epochs))
            score = 0.0
            for i in range(num_batches):
                X = MnistTrainX[i * batch_size:(i + 1) * batch_size, :]
                T = MnistTrainY[i * batch_size:(i + 1) * batch_size]
                T = T.squeeze()
                X = torch.tensor(X, dtype=torch.float).to(device)
                #T = torch.tensor(T, dtype=torch.long).to(device)
                output = net(X)
                prediction = torch.argmax(output, dim=1).cpu().numpy()
                score += np.sum(prediction == T)
            score /= N
            score *= 100
            print('Loss = {0}, Accuracy on training data = {1}%'.format(loss.item(), score))
            total_itr = epoch * num_batches + itr
            print (total_itr)
            tb.add_scalar ('Training score', scalar_value= score, global_step = total_itr)
            tb.add_scalar ('Training loss', scalar_value=loss, global_step = total_itr)
            data = net.layers[0].weight[0,:].detach().cpu().numpy()
            data_pos = np.maximum(data,0)
            data_neg = np.maximum(-data,0)
            max_val = np.maximum (np.max(data_pos), np.max(data_neg))
            data_pos /= max_val
            data_neg /= max_val
            img = np.zeros ([3,28,28])
            img[0,:,:] = np.reshape (data_pos, [28,28])
            img[1,:,:] = np.reshape (data_neg, [28,28])
            tb.add_image  ('Layer1Visualization', img, global_step = total_itr)
            for i,layer in enumerate (net.layers):
                tb.add_histogram (f'layer{i}.weight', net.layers[i].weight, global_step = total_itr)

tb.close()


---- iteration #0 of 600 at epoch #0 of 2 ---- :
Loss = 0.11023735255002975, Accuracy on training data = 89.53999999999999%
0

---- iteration #100 of 600 at epoch #0 of 2 ---- :
Loss = 0.32644739747047424, Accuracy on training data = 91.48333333333333%
100

---- iteration #200 of 600 at epoch #0 of 2 ---- :
Loss = 0.3714450001716614, Accuracy on training data = 90.565%
200

---- iteration #300 of 600 at epoch #0 of 2 ---- :
Loss = 0.25609225034713745, Accuracy on training data = 90.76666666666667%
300

---- iteration #400 of 600 at epoch #0 of 2 ---- :
Loss = 0.32117268443107605, Accuracy on training data = 91.62%
400

---- iteration #500 of 600 at epoch #0 of 2 ---- :
Loss = 0.35292351245880127, Accuracy on training data = 91.21166666666667%
500

---- iteration #0 of 600 at epoch #1 of 2 ---- :
Loss = 0.17817768454551697, Accuracy on training data = 90.34%
600

---- iteration #100 of 600 at epoch #1 of 2 ---- :
Loss = 0.3193376362323761, Accuracy on training data = 91.63333333333334%

In [46]:
net.layers[0].weight[0,:].detach().cpu().numpy() 

array([ 3.29613201e-02,  2.52459645e-02, -2.86724027e-02,  3.42800468e-03,
       -1.54972672e-02,  1.26734935e-02,  3.49265374e-02, -1.58633254e-02,
       -1.52948759e-02,  2.25275755e-03,  2.91853063e-02,  9.53777507e-03,
        3.21108066e-02,  2.92489794e-03,  1.48941884e-02,  3.07525545e-02,
       -2.92217527e-02, -1.95769966e-02, -2.53907964e-02, -6.14722818e-03,
       -1.38979983e-02,  9.36683267e-04,  3.07190977e-02, -8.10556114e-04,
        1.22003891e-02, -3.24045382e-02, -3.31997201e-02,  8.14040005e-03,
       -7.08421692e-03, -2.07496174e-02,  7.12541118e-03, -2.75545213e-02,
       -5.00779366e-03, -1.74469538e-02,  1.64579898e-02, -3.53653952e-02,
       -1.83401126e-02,  3.25091667e-02,  5.36939176e-03, -7.96594005e-03,
       -3.80959660e-02, -1.78201532e-04,  3.20014134e-02,  1.35507493e-03,
       -9.31689143e-03, -1.21201621e-02,  3.32228988e-02, -2.45643649e-02,
       -2.07378231e-02,  3.34474333e-02, -3.30965519e-02, -3.38512771e-02,
        3.10086198e-02,  