In [2]:
import torch 
import numpy as np
import matplotlib.pyplot as plt

In [3]:
class MyMLP(torch.nn.Module):
    def __init__(self,bias = True):
        super().__init__()
        self.layers = torch.nn.Sequential(
            torch.nn.Linear(5,11,bias=bias),
            torch.nn.ReLU(),
            torch.nn.Linear(11,16,bias=bias),
            torch.nn.ReLU(),
            torch.nn.Linear(16,13,bias=bias),
            torch.nn.ReLU(),
            torch.nn.Linear(13,8,bias=bias),
            torch.nn.ReLU(),
            torch.nn.Linear(8,4,bias=bias),
            torch.nn.Softmax(dim=1)
        )
    def forward(self,x):
        return self.layers.forward(x)


In order to calculate the exact number of parameters for each layer suppose to have the number of neurons in each layer:
$d_1, d_2,\cdots , d_n$

This means that each neuron in the $k^{th}$ layer is connected to $d_{k+1}$ neurons. So the total number of parameters (bias excluded) 
needed in the $k^{th}$ layer is $d_k * d_{k+1}$

So overall the total number of parameters is 
$$|W| = \sum_{i = 1} ^ {n-1} d_i * d_{i+1}$$

If we consider also the bias as an additional weight the formula becomes 
$$|W| = \sum_{i = 1} ^ {n-1} (d_i + 1) * d_{i+1}$$

In [4]:
mlp = MyMLP(bias = False)

import numpy as np
#looping through parameters
#actually the method parameters returns biases and weights as different tensors
#for matrices `torch.linalg.norm` returns the Frobenious norm, for vector the euclidean norm is returned
for p in mlp.parameters():
    print(p.shape, '\t',f"Frobenius norm: {torch.linalg.norm(p) : .3f} \t L1 norm {torch.linalg.vector_norm(p, 1) : .3f}")

from torchinfo import summary
summary(mlp)

torch.Size([11, 5]) 	 Frobenius norm:  1.768 	 L1 norm  3.002
torch.Size([16, 11]) 	 Frobenius norm:  2.257 	 L1 norm  2.991
torch.Size([13, 16]) 	 Frobenius norm:  2.080 	 L1 norm  2.204
torch.Size([8, 13]) 	 Frobenius norm:  1.647 	 L1 norm  1.515
torch.Size([4, 8]) 	 Frobenius norm:  1.165 	 L1 norm  1.247


Layer (type:depth-idx)                   Param #
MyMLP                                    --
├─Sequential: 1-1                        --
│    └─Linear: 2-1                       55
│    └─ReLU: 2-2                         --
│    └─Linear: 2-3                       176
│    └─ReLU: 2-4                         --
│    └─Linear: 2-5                       208
│    └─ReLU: 2-6                         --
│    └─Linear: 2-7                       104
│    └─ReLU: 2-8                         --
│    └─Linear: 2-9                       32
│    └─Softmax: 2-10                     --
Total params: 575
Trainable params: 575
Non-trainable params: 0

In [5]:
#generate 10 random datapoints
x = torch.rand(10,5)

In [6]:
#forward pass through the network
y = mlp.forward(x)
print(y)

tensor([[0.2496, 0.2500, 0.2520, 0.2484],
        [0.2489, 0.2502, 0.2540, 0.2470],
        [0.2492, 0.2500, 0.2534, 0.2473],
        [0.2491, 0.2502, 0.2530, 0.2478],
        [0.2494, 0.2500, 0.2528, 0.2478],
        [0.2491, 0.2501, 0.2535, 0.2473],
        [0.2496, 0.2500, 0.2518, 0.2486],
        [0.2489, 0.2501, 0.2543, 0.2466],
        [0.2494, 0.2501, 0.2522, 0.2483],
        [0.2492, 0.2500, 0.2535, 0.2472]], grad_fn=<SoftmaxBackward0>)


In [7]:
#argmax returns the index for each row of the largest value
#we are supposing that the example is classified according to 

y_class = torch.argmax(y, axis = 1)
print(f"Neural network classification: {y_class}")
y_true = torch.randint_like(y_class,0,4)
print(f"Random integer tensor to test the accuracy score against: {y_true}")

Neural network classification: tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
Random integer tensor to test the accuracy score against: tensor([1, 2, 3, 1, 1, 1, 1, 2, 0, 0])


In [10]:
def accuracy(y_pred,y_true):
    y_eq = (y_pred == y_true)
    return float(torch.sum(y_eq)/y_eq.shape[0])

In [11]:
accuracy(y_class,y_true)

0.20000000298023224