# Workshop. Neural networks' tools (Pytorch)

<p style='text-align: right;font-style: italic; color: red;'>Designed by: Mr. Abdelkrime Aries</p>


In [1]:
import torch
from torch import Tensor, nn, optim

torch.__version__

'2.3.1+cu121'

In [2]:
# !pip install pytorch-lightning
# import pytorch_lightning as pl

In [3]:
import pandas     as pd
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report

pd.__version__

'2.2.2'

In [4]:
from typing import Literal, List

## I. Data preparation

In [5]:
train = pd.read_csv('data/sat.trn', delimiter=' ', header=None)

X_train = train.iloc[:, :-1].values
Y_train = train.iloc[:,  -1].values

lbin = LabelBinarizer()

X_train = X_train / 255.
Y_train = lbin.fit_transform(Y_train)

X_train = Tensor(X_train)
Y_train = Tensor(Y_train)

X_train.shape, Y_train.shape

(torch.Size([4435, 36]), torch.Size([4435, 6]))

In [6]:
test = pd.read_csv('data/sat.tst', delimiter=' ', header=None)

X_test = test.iloc[:, :-1].values
Y_test = test.iloc[:,  -1].values

X_test = X_test / 255.
# Y_test = lbin.transform(Y_test)

X_test = Tensor(X_test)
# Y_test = Tensor(Y_test)

X_test.shape, Y_test.shape

(torch.Size([2000, 36]), (2000,))

## II. High level

### II.1. Sequential model

In [7]:
nn1 = nn.Sequential(
   nn.Linear(X_train.shape[1], 10),
   nn.ReLU(),
   nn.Linear(10, 10),
   nn.ReLU(),
   nn.Linear(10, Y_train.shape[1]),
   nn.Softmax(dim=1)
   )

nn1

Sequential(
  (0): Linear(in_features=36, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=10, bias=True)
  (3): ReLU()
  (4): Linear(in_features=10, out_features=6, bias=True)
  (5): Softmax(dim=1)
)

### II.2. Model training

In [8]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(nn1.parameters(), lr=0.01)

for t in range(10000):
    Y_pred = nn1(X_train)
    loss = loss_fn(Y_pred, Y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print('loss = ', loss.detach().numpy())

loss =  1.7896159
loss =  1.7865982
loss =  1.7832615
loss =  1.7794069
loss =  1.7750536
loss =  1.7703464
loss =  1.7654735
loss =  1.7607359
loss =  1.7562562
loss =  1.7522346
loss =  1.748649
loss =  1.7455068
loss =  1.7428652
loss =  1.7408514
loss =  1.7396108
loss =  1.7391522
loss =  1.7392162
loss =  1.7394158
loss =  1.7394489
loss =  1.7391423
loss =  1.7384669
loss =  1.7375276
loss =  1.7365077
loss =  1.7356277
loss =  1.7350872
loss =  1.734874
loss =  1.7346165
loss =  1.7339579
loss =  1.7329522
loss =  1.7318529
loss =  1.7307603
loss =  1.7296734
loss =  1.7286721
loss =  1.7276855
loss =  1.726452
loss =  1.7249248
loss =  1.7232678
loss =  1.7214612
loss =  1.7194595
loss =  1.7173939
loss =  1.7152103
loss =  1.7127011
loss =  1.7098697
loss =  1.7067337
loss =  1.7031845
loss =  1.6993324
loss =  1.6951602
loss =  1.6905235
loss =  1.6855289
loss =  1.6800091
loss =  1.6741067
loss =  1.6677102
loss =  1.6609961
loss =  1.6539458
loss =  1.6466838
loss =  1.639

### II.3. Model testing

In [9]:
print(classification_report(Y_test, lbin.inverse_transform(nn1(X_test)), zero_division=0))

              precision    recall  f1-score   support

           1       0.65      0.99      0.78       461
           2       0.00      0.00      0.00       224
           3       0.87      0.93      0.90       397
           4       0.57      0.56      0.57       211
           5       0.00      0.00      0.00       237
           7       0.60      0.84      0.70       470

    accuracy                           0.67      2000
   macro avg       0.45      0.56      0.49      2000
weighted avg       0.52      0.67      0.58      2000



## III. High level with a custom class

### III.1. Custom Layer

In [10]:
# MyLayer in here
class MyLayer(nn.Linear):
    def __init__(self, 
                 nb_in: int, nb_out: int, 
                 bias: bool = True, act: Literal['relu', 'sigmoid', 'linear'] = 'linear'):
        assert nb_in   > 0
        assert nb_out  > 0
        super().__init__(nb_in, nb_out, bias=bias)

        self.act = lambda x: x
        if act == 'relu':
            self.act = nn.ReLU()
        elif act == 'sigmoid':
            self.act = nn.Sigmoid()

    def forward(self, X):
        return self.act(super().forward(X))


MyLayer(3, 2)

MyLayer(in_features=3, out_features=2, bias=True)

In [11]:
# Must print an 'Exception' or 'AssertionError'

try:
    ml1 = MyLayer(0, 2)
except Exception as e:
    print(repr(e))

print('end')

AssertionError()
end


In [27]:
l2ts = [
    MyLayer(3, 2, bias=False, act='relu'),
    MyLayer(3, 2, bias=True, act='sigmoid'),
    MyLayer(3, 1)
    ]

XX = Tensor([[1, 2, 3], [4, 5, 6]])

for l in l2ts:
    print('===============================')
    print(l)
    print('-------------------------------')
    print('bias=', l.bias)
    weight = Tensor(l.weight)
    print('output=', l(XX))


MyLayer(
  in_features=3, out_features=2, bias=False
  (act): ReLU()
)
-------------------------------
bias= None
output= tensor([[1.0666, 2.2770],
        [3.3874, 4.6740]], grad_fn=<ReluBackward0>)
MyLayer(
  in_features=3, out_features=2, bias=True
  (act): Sigmoid()
)
-------------------------------
bias= Parameter containing:
tensor([ 0.2190, -0.3305], requires_grad=True)
output= tensor([[0.9160, 0.4117],
        [0.9911, 0.3067]], grad_fn=<SigmoidBackward0>)
MyLayer(in_features=3, out_features=1, bias=True)
-------------------------------
bias= Parameter containing:
tensor([-0.2814], requires_grad=True)
output= tensor([[0.1506],
        [0.6035]], grad_fn=<AddmmBackward0>)


### III.2. Custom Net

In [13]:
class MyMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.ModuleList()
        self.locked = False
    
    def add_layer(self, layer: MyLayer):
        if self.locked:
            raise Exception('You cannot add more layers')
        out_nbr = None
        if len(self.layers):
            out_nbr = self.layers[-1].weight.shape[0]
        in_nbr = layer.weight.shape[1]
        if out_nbr is not None and out_nbr != in_nbr:
            raise Exception(f'The last layer outputs ({out_nbr}) must be the same as this layer input {in_nbr}')
        self.layers.append(layer)
        return self
        
    def compile(self, nb_in=1, nb_out=1, bias=True, multiclass=False, lr=1.):
        if len(self.layers):
            nb_in = self.layers[-1].weight.shape[0]
        self.layers.append(MyLayer(nb_in, nb_out, bias=bias, act='sigmoid'))

        self.loss = nn.BCELoss()
        if multiclass and nb_out > 1:
            self.layers[-1].act = nn.Softmax(dim=1)
            self.loss = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.parameters(), lr=lr)
        self.locked = True

    def forward(self, X):
        Z = X 
        for layer in self.layers:
            Z = layer(Z)
        return Z
    
    def backward(self, X, Y):
        Y_pred = self.forward(X)
        loss = self.loss(Y_pred, Y)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        return loss.detach().numpy()

    def fit(self, X, Y, epochs=20):
        for epoch in range(epochs):
            loss = self.backward(X, Y)
            print('epoch', epoch, ', loss =', loss)

    def __call__(self, X):
        return self.forward(X)


### III.3. Model training

In [14]:
nn2 = MyMLP()
nn2.add_layer(MyLayer(X_train.shape[1], 10, act='relu'))\
   .add_layer(MyLayer(10, 10, act='relu'))\
   .compile(nb_out=Y_train.shape[1], lr=0.01, multiclass=True)

nn2

MyMLP(
  (layers): ModuleList(
    (0): MyLayer(
      in_features=36, out_features=10, bias=True
      (act): ReLU()
    )
    (1): MyLayer(
      in_features=10, out_features=10, bias=True
      (act): ReLU()
    )
    (2): MyLayer(
      in_features=10, out_features=6, bias=True
      (act): Softmax(dim=1)
    )
  )
  (loss): CrossEntropyLoss()
)

In [15]:
nn2.fit(X_train, Y_train, epochs=10000)

epoch 0 , loss = 1.7879705
epoch 1 , loss = 1.7857032
epoch 2 , loss = 1.783593
epoch 3 , loss = 1.7813249
epoch 4 , loss = 1.7788433
epoch 5 , loss = 1.7761974
epoch 6 , loss = 1.7733734
epoch 7 , loss = 1.7704092
epoch 8 , loss = 1.7673396
epoch 9 , loss = 1.7642145
epoch 10 , loss = 1.7610705
epoch 11 , loss = 1.7579694
epoch 12 , loss = 1.7548261
epoch 13 , loss = 1.7515924
epoch 14 , loss = 1.7483813
epoch 15 , loss = 1.745105
epoch 16 , loss = 1.7419765
epoch 17 , loss = 1.7392184
epoch 18 , loss = 1.7369286
epoch 19 , loss = 1.7351764
epoch 20 , loss = 1.7339135
epoch 21 , loss = 1.7328278
epoch 22 , loss = 1.7316134
epoch 23 , loss = 1.730138
epoch 24 , loss = 1.7284712
epoch 25 , loss = 1.7267965
epoch 26 , loss = 1.7252388
epoch 27 , loss = 1.7236698
epoch 28 , loss = 1.721944
epoch 29 , loss = 1.7200108
epoch 30 , loss = 1.7179209
epoch 31 , loss = 1.7155979
epoch 32 , loss = 1.7128627
epoch 33 , loss = 1.7096949
epoch 34 , loss = 1.706355
epoch 35 , loss = 1.7030119
epoch 3

### III.4. Model testing

In [16]:
print(classification_report(Y_test, lbin.inverse_transform(nn2(X_test)), zero_division=0))

              precision    recall  f1-score   support

           1       0.60      1.00      0.75       461
           2       0.00      0.00      0.00       224
           3       0.84      0.96      0.90       397
           4       0.00      0.00      0.00       211
           5       0.00      0.00      0.00       237
           7       0.58      0.96      0.72       470

    accuracy                           0.65      2000
   macro avg       0.34      0.49      0.39      2000
weighted avg       0.44      0.65      0.52      2000



## IV. Low level

### IV.1. Activation functions

In [17]:
class SimpleSigmoid(nn.Module):
    def forward(self, X: Tensor) -> Tensor:
        return 1/(1+torch.exp(-X))

class SimpleReLU(nn.Module):
    def forward(self, X: Tensor) -> Tensor:
        return torch.where(X > 0., X, 0.)
    

class SimpleSoftmax(nn.Module):
    def forward(self, X: Tensor) -> Tensor:
        H = torch.exp(X)
        return H/H.sum(axis=1).view(-1, 1)

tensor([[0.7311, 0.2689, 0.5000],
        [0.3775, 0.5498, 0.9933]])
tensor([[1.0000, 0.0000, 0.0000],
        [0.0000, 0.2000, 5.0000]])
tensor([[0.6652, 0.0900, 0.2447],
        [0.0040, 0.0081, 0.9878]])


In [None]:
XX = Tensor([[1., -1., 0.], [-0.5, 0.2, 5]])
sigmoid = SimpleSigmoid()
print(sigmoid(XX))
relu = SimpleReLU()
print(relu(XX))
softmax = SimpleSoftmax()
print(softmax(XX))

### IV.2. Loss functions

In [18]:
class SimpleBCE(nn.Module):
    def forward(self, H: Tensor, Y: Tensor) -> Tensor:
        return torch.mean(- Y * torch.log(H) - (1-Y) * torch.log(1-H))
    
class SimpleCE(nn.Module):
    def forward(self, H: Tensor, Y: Tensor) -> Tensor:
        return torch.mean(- Y * torch.log(H))

### IV.3. Optimization functions

In [19]:
class SimpleGD(optim.Optimizer):
    def __init__(self, params, lr=0.001):
        super().__init__(params, defaults={'lr': lr})

    def step(self):
        for group in self.param_groups: 
            for p in group['params']:  
                p.data -= group['lr'] * p.grad


### IV.4. Custom Layer

In [20]:
# SimpleLayer in here
class SimpleLayer(nn.Module):
    def __init__(self, 
                 nb_in: int, nb_out: int, 
                 bias: bool = True, act: Literal['relu', 'sigmoid', 'linear'] = 'linear'):
        assert nb_in   > 0
        assert nb_out  > 0
        super().__init__()

        self.W = nn.parameter.Parameter(torch.zeros([nb_in, nb_out]))
        self.b = torch.zeros([1   , nb_out])
        if bias:
            self.b = nn.parameter.Parameter(self.b)

        self.act = lambda x: x
        if act == 'relu':
            self.act = SimpleReLU()
        elif act == 'sigmoid':
            self.act = SimpleSigmoid()

    def randomize(self):
        self.W.data = torch.normal(torch.zeros(self.W.shape), 1.0)
        if self.b.requires_grad:
            self.b.data = torch.normal(torch.zeros(self.b.shape), 1.0)
            

    def forward(self, X):
        return self.act(torch.matmul(X, self.W) + self.b)


SimpleLayer(3, 2)

SimpleLayer()

In [21]:
sl = SimpleLayer(3, 2, bias=False)

sl.randomize()
sl.b, sl.W, list(sl.parameters())

(tensor([[0., 0.]]),
 Parameter containing:
 tensor([[ 0.3438,  0.0652],
         [-1.6695,  0.4227],
         [-0.2163, -0.1763]], requires_grad=True),
 [Parameter containing:
  tensor([[ 0.3438,  0.0652],
          [-1.6695,  0.4227],
          [-0.2163, -0.1763]], requires_grad=True)])

### IV.5. Custom Net

In [22]:
class SimpleMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.ModuleList()
        self.locked = False
    
    def add_layer(self, layer: SimpleLayer):
        if self.locked:
            raise Exception('You cannot add more layers')
        out_nbr = None
        if len(self.layers):
            out_nbr = self.layers[-1].W.shape[1]
        in_nbr = layer.W.shape[0]
        if out_nbr is not None and out_nbr != in_nbr:
            raise Exception(f'The last layer outputs ({out_nbr}) must be the same as this layer input {in_nbr}')
        self.layers.append(layer)
        return self
        
    def compile(self, nb_in=1, nb_out=1, bias=True, multiclass=False, lr=1.):
        if len(self.layers):
            nb_in = self.layers[-1].W.shape[1]
        self.layers.append(SimpleLayer(nb_in, nb_out, bias=bias, act='sigmoid'))

        self.loss = SimpleBCE()
        if multiclass and nb_out > 1:
            self.layers[-1].act = SimpleSoftmax()
            self.loss = SimpleCE()
        self.optimizer = SimpleGD(self.parameters(), lr=lr)
        self.locked = True

    def forward(self, X):
        Z = X 
        for layer in self.layers:
            Z = layer(Z)
        return Z
    
    def backward(self, X, Y):
        Y_pred = self.forward(X)
        loss = self.loss(Y_pred, Y)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        return loss.detach().numpy()

    def fit(self, X, Y, epochs=20):
        for epoch in range(epochs):
            loss = self.backward(X, Y)
            print('epoch', epoch, ', loss =', loss)

    def randomize(self):
        for layer in self.layers:
            layer.randomize()

    def __call__(self, X):
        return self.forward(X)

In [23]:
# Result:
# tensor([[0.8401],
#         [0.8428]], grad_fn=<MulBackward0>)
# 1.0020916
# Parameter containing:
# tensor([[0.5149],
#         [0.5659]], requires_grad=True)

nn3t = SimpleMLP()
nn3t.add_layer(SimpleLayer(2, 2, act='sigmoid'))\
    .add_layer(SimpleLayer(2, 2, act='sigmoid'))\
    .compile()

# print(nn3)

with torch.no_grad():
    nn3t.layers[0].W += torch.Tensor([[0.5, 0.3], [0.2, 0.4]])
    nn3t.layers[0].b += torch.Tensor([[-0.3, 0.5]])
    nn3t.layers[1].W += torch.Tensor([[0.3, -0.1], [0.5, -0.3]])
    nn3t.layers[1].b += torch.Tensor([[-0.3, -0.2]])
    nn3t.layers[2].W  += torch.Tensor([[0.7], [0.7]])
    nn3t.layers[2].b  += torch.Tensor([[1.]])

XX = Tensor([[2, -1], [3, 5]])
YY = Tensor([[0], [1]])

print(nn3t.forward(XX))

loss = nn3t.backward(XX, YY)

print(loss)

nn3t.layers[2].W

tensor([[0.8401],
        [0.8428]], grad_fn=<MulBackward0>)
1.0020916


Parameter containing:
tensor([[0.5149],
        [0.5659]], requires_grad=True)

### IV.6. Model training

In [24]:
nn3 = SimpleMLP()
nn3.add_layer(SimpleLayer(X_train.shape[1], 10, act='relu'))\
   .add_layer(SimpleLayer(10, 10, act='relu'))\
   .compile(nb_out=Y_train.shape[1], lr=0.01, multiclass=True)

nn3.randomize()

list(nn3.parameters())

[Parameter containing:
 tensor([[-7.9130e-01, -1.1062e+00,  1.2261e+00, -4.0962e-01,  9.6866e-01,
           3.4700e-01, -1.0460e+00, -8.1427e-01,  2.6797e-01,  1.0544e-01],
         [-8.4003e-01,  1.2223e+00, -5.0533e-01, -2.1352e-01, -1.0827e+00,
           7.4370e-01, -1.0925e+00, -1.0600e+00,  2.5281e+00, -6.6703e-01],
         [-1.4188e+00, -1.1239e+00, -5.5355e-01,  1.4297e+00, -4.2401e-01,
           1.7553e+00,  3.9301e-01, -1.0623e+00, -2.5051e-01, -9.5105e-01],
         [-5.4394e-01, -8.0084e-01,  7.7697e-01,  1.0352e+00,  8.3369e-01,
           1.3322e+00, -5.5778e-01, -7.4700e-01,  6.1482e-01,  7.8317e-01],
         [ 1.4810e+00, -7.3949e-01, -2.7877e-01,  1.1923e+00,  7.8069e-01,
          -8.5321e-01,  4.5081e-01, -6.9470e-01, -1.8059e+00, -7.3234e-01],
         [-1.0353e+00,  2.5881e-01, -7.3341e-01, -1.3627e-01, -1.7801e+00,
          -1.3768e+00, -1.8250e+00,  1.7271e-01,  5.7075e-01, -1.0558e+00],
         [ 5.7186e-01, -1.8567e+00, -1.9635e-01, -7.4316e-01, -2.8962e-

In [25]:
nn3.fit(X_train, Y_train, epochs=10000)

epoch 0 , loss = 1.2665638
epoch 1 , loss = 1.190225
epoch 2 , loss = 1.1174388
epoch 3 , loss = 1.0526997
epoch 4 , loss = 0.9989691
epoch 5 , loss = 0.95644945
epoch 6 , loss = 0.9240329
epoch 7 , loss = 0.8993058
epoch 8 , loss = 0.88022375
epoch 9 , loss = 0.86498606
epoch 10 , loss = 0.85239273
epoch 11 , loss = 0.8416047
epoch 12 , loss = 0.83211774
epoch 13 , loss = 0.8235697
epoch 14 , loss = 0.81570715
epoch 15 , loss = 0.8083552
epoch 16 , loss = 0.8013998
epoch 17 , loss = 0.7947614
epoch 18 , loss = 0.78837943
epoch 19 , loss = 0.78221047
epoch 20 , loss = 0.77622294
epoch 21 , loss = 0.7703964
epoch 22 , loss = 0.76471734
epoch 23 , loss = 0.7591726
epoch 24 , loss = 0.75375307
epoch 25 , loss = 0.74845284
epoch 26 , loss = 0.74326783
epoch 27 , loss = 0.73819184
epoch 28 , loss = 0.7332196
epoch 29 , loss = 0.7283488
epoch 30 , loss = 0.72358054
epoch 31 , loss = 0.7189147
epoch 32 , loss = 0.7143472
epoch 33 , loss = 0.70987684
epoch 34 , loss = 0.70550555
epoch 35 , los

### IV.7. Model testing

In [26]:
print(classification_report(Y_test, lbin.inverse_transform(nn3(X_test)), zero_division=0))

              precision    recall  f1-score   support

           1       0.71      0.96      0.82       461
           2       0.99      0.84      0.91       224
           3       0.66      0.96      0.78       397
           4       0.00      0.00      0.00       211
           5       0.00      0.00      0.00       237
           7       0.67      0.86      0.75       470

    accuracy                           0.71      2000
   macro avg       0.51      0.60      0.54      2000
weighted avg       0.56      0.71      0.62      2000

