In [2]:
import torch 
import numpy as np
import matplotlib.pyplot as plt

In [22]:
class MyMLP(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = torch.nn.Sequential(
            torch.nn.Linear(5,11),
            torch.nn.ReLU(),
            torch.nn.Linear(11,16),
            torch.nn.ReLU(),
            torch.nn.Linear(16,13),
            torch.nn.ReLU(),
            torch.nn.Linear(13,8),
            torch.nn.ReLU(),
            torch.nn.Linear(8,4),
            torch.nn.Softmax(dim=1)
        )
    def forward(self,x):
        return self.layers.forward(x)


In order to calculate the exact number of parameters for each layer suppose to have the number of neurons in each layer:
$d_1, d_2,\cdots , d_n$

This means that each neuron in the $k^{th}$ layer is connected to $d_{k+1}$ neurons. So the total number of parameters (bias excluded) 
needed in the $k^{th}$ layer is $d_k * d_{k+1}$

So overall the total number of parameters is 
$$|W| = \sum_{i = 1} ^ {n-1} d_i * d_{i+1}$$

If we consider also the bias as an additional weight the formula becomes 
$$|W| = \sum_{i = 1} ^ {n-1} (d_i + 1) * d_{i+1}$$

In [23]:
mlp = MyMLP()
for p in mlp.parameters():
    print(torch.linalg.norm(p))

tensor(1.9922, grad_fn=<CopyBackwards>)
tensor(0.9338, grad_fn=<CopyBackwards>)
tensor(2.3889, grad_fn=<CopyBackwards>)
tensor(0.8140, grad_fn=<CopyBackwards>)
tensor(2.1699, grad_fn=<CopyBackwards>)
tensor(0.4521, grad_fn=<CopyBackwards>)
tensor(1.6237, grad_fn=<CopyBackwards>)
tensor(0.4475, grad_fn=<CopyBackwards>)
tensor(1.3018, grad_fn=<CopyBackwards>)
tensor(0.4139, grad_fn=<CopyBackwards>)


In [27]:
#generate 10 random datapoints
x = torch.rand(10,5)

In [32]:
y = mlp.forward(x)
print(y)

tensor([[0.2712, 0.2812, 0.2726, 0.1749],
        [0.2733, 0.2779, 0.2732, 0.1756],
        [0.2737, 0.2774, 0.2735, 0.1754],
        [0.2731, 0.2784, 0.2734, 0.1751],
        [0.2734, 0.2785, 0.2730, 0.1750],
        [0.2718, 0.2800, 0.2728, 0.1754],
        [0.2740, 0.2776, 0.2733, 0.1751],
        [0.2709, 0.2813, 0.2725, 0.1753],
        [0.2731, 0.2777, 0.2734, 0.1757],
        [0.2744, 0.2770, 0.2735, 0.1751]], grad_fn=<SoftmaxBackward0>)


In [37]:
y_class = torch.argmax(y, axis = 1)
print(y_class)
y_true = torch.randint_like(y_class,0,4)

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])


In [38]:
def accuracy(y_pred,y_true):
    y_eq = (y_pred == y_true)
    y_eq.int()
    return torch.sum(y_eq)/y_eq.shape[0]

In [40]:
accuracy(y_class,y_true)

tensor(0.4000)