# Workshop. Neural networks' tools (Pytorch)

<p style='text-align: right;font-style: italic; color: red;'>Designed by: Mr. Abdelkrime Aries</p>


In [1]:
import torch
from torch import Tensor, nn, optim

torch.__version__

'2.3.1+cu121'

In [2]:
# !pip install pytorch-lightning
# import pytorch_lightning as pl

In [3]:
import pandas     as pd
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report

pd.__version__

'2.2.2'

In [4]:
from typing import Literal, List

## I. Data preparation

In [5]:
train = pd.read_csv('data/sat.trn', delimiter=' ', header=None)

X_train = train.iloc[:, :-1].values
Y_train = train.iloc[:,  -1].values

lbin = LabelBinarizer()

X_train = X_train / 255.
Y_train = lbin.fit_transform(Y_train)

X_train = Tensor(X_train)
Y_train = Tensor(Y_train)

X_train.shape, Y_train.shape

(torch.Size([4435, 36]), torch.Size([4435, 6]))

In [6]:
test = pd.read_csv('data/sat.tst', delimiter=' ', header=None)

X_test = test.iloc[:, :-1].values
Y_test = test.iloc[:,  -1].values

X_test = X_test / 255.
# Y_test = lbin.transform(Y_test)

X_test = Tensor(X_test)
# Y_test = Tensor(Y_test)

X_test.shape, Y_test.shape

(torch.Size([2000, 36]), (2000,))

## II. High level

### II.1. Sequential model

In [7]:
nn1 = nn.Sequential(
   nn.Linear(X_train.shape[1], 10),
   nn.ReLU(),
   nn.Linear(10, 10),
   nn.ReLU(),
   nn.Linear(10, Y_train.shape[1]),
   nn.Softmax(dim=1)
   )

nn1

Sequential(
  (0): Linear(in_features=36, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=10, bias=True)
  (3): ReLU()
  (4): Linear(in_features=10, out_features=6, bias=True)
  (5): Softmax(dim=1)
)

### II.2. Model training

In [8]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(nn1.parameters(), lr=0.01)

for t in range(1000):
    Y_pred = nn1(X_train)
    loss = loss_fn(Y_pred, Y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if not t%100: #print every 100 iterations
        print('t=', t, ', loss = ', loss.detach().numpy())

t= 0 , loss =  1.7860535
t= 100 , loss =  1.4005458
t= 200 , loss =  1.3745211
t= 300 , loss =  1.3695382
t= 400 , loss =  1.3673468
t= 500 , loss =  1.3660754
t= 600 , loss =  1.3653035
t= 700 , loss =  1.3109933
t= 800 , loss =  1.2944173
t= 900 , loss =  1.2906961


### II.3. Model testing

In [9]:
print(classification_report(Y_test, lbin.inverse_transform(nn1(X_test)), zero_division=0))

              precision    recall  f1-score   support

           1       0.80      0.99      0.88       461
           2       0.00      0.00      0.00       224
           3       0.81      0.96      0.88       397
           4       0.00      0.00      0.00       211
           5       0.52      0.75      0.61       237
           7       0.71      0.93      0.81       470

    accuracy                           0.73      2000
   macro avg       0.47      0.60      0.53      2000
weighted avg       0.57      0.73      0.64      2000



## III. High level with a custom class

### III.1. Custom Layer

In [10]:
# MyLayer in here
class MyLayer(nn.Linear):
    def __init__(self, 
                 nb_in: int, nb_out: int, 
                 bias: bool = True, act: Literal['relu', 'sigmoid', 'linear'] = 'linear'):
        assert nb_in   > 0
        assert nb_out  > 0
        super().__init__(nb_in, nb_out, bias=bias)

        self.act = lambda x: x
        if act == 'relu':
            self.act = nn.ReLU()
        elif act == 'sigmoid':
            self.act = nn.Sigmoid()

    def forward(self, X):
        return self.act(super().forward(X))


MyLayer(3, 2)

MyLayer(in_features=3, out_features=2, bias=True)

In [11]:
# Must print an 'Exception' or 'AssertionError'

try:
    ml1 = MyLayer(0, 2)
except Exception as e:
    print(repr(e))

print('end')

AssertionError()
end


In [12]:
l2ts = [
    MyLayer(3, 2, bias=False, act='relu'),
    MyLayer(3, 2, bias=True, act='sigmoid'),
    MyLayer(3, 1)
    ]

XX = Tensor([[1, 2, 3], [4, 5, 6]])

for l in l2ts:
    print('===============================')
    print(l)
    print('-------------------------------')
    print('bias=', l.bias)
    weight = Tensor(l.weight)
    print('output=', l(XX))


MyLayer(
  in_features=3, out_features=2, bias=False
  (act): ReLU()
)
-------------------------------
bias= None
output= tensor([[1.2344, 0.6513],
        [3.2725, 2.1922]], grad_fn=<ReluBackward0>)
MyLayer(
  in_features=3, out_features=2, bias=True
  (act): Sigmoid()
)
-------------------------------
bias= Parameter containing:
tensor([ 0.0439, -0.5674], requires_grad=True)
output= tensor([[0.7324, 0.0676],
        [0.8473, 0.0082]], grad_fn=<SigmoidBackward0>)
MyLayer(in_features=3, out_features=1, bias=True)
-------------------------------
bias= Parameter containing:
tensor([-0.3595], requires_grad=True)
output= tensor([[0.0351],
        [0.0075]], grad_fn=<AddmmBackward0>)


### III.2. Custom Net

In [13]:
class MyMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.ModuleList()
        self.locked = False
    
    def add_layer(self, layer: MyLayer):
        if self.locked:
            raise Exception('You cannot add more layers')
        out_nbr = None
        if len(self.layers):
            out_nbr = self.layers[-1].weight.shape[0]
        in_nbr = layer.weight.shape[1]
        if out_nbr is not None and out_nbr != in_nbr:
            raise Exception(f'The last layer outputs ({out_nbr}) must be the same as this layer input {in_nbr}')
        self.layers.append(layer)
        return self
        
    def compile(self, nb_in=1, nb_out=1, bias=True, multiclass=False, lr=1.):
        if len(self.layers):
            nb_in = self.layers[-1].weight.shape[0]
        self.layers.append(MyLayer(nb_in, nb_out, bias=bias, act='sigmoid'))

        self.loss = nn.BCELoss()
        if multiclass and nb_out > 1:
            self.layers[-1].act = nn.Softmax(dim=1)
            self.loss = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.parameters(), lr=lr)
        self.locked = True

    def forward(self, X):
        Z = X 
        for layer in self.layers:
            Z = layer(Z)
        return Z
    
    def backward(self, X, Y):
        Y_pred = self.forward(X)
        loss = self.loss(Y_pred, Y)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        return loss.detach().numpy()

    def fit(self, X, Y, epochs=20, pr:int = 100):
        for epoch in range(epochs):
            loss = self.backward(X, Y)
            if not epoch%pr: # print every pr
                print('epoch', epoch, ', loss =', loss)

    def __call__(self, X):
        return self.forward(X)


### III.3. Model training

In [14]:
nn2 = MyMLP()
nn2.add_layer(MyLayer(X_train.shape[1], 10, act='relu'))\
   .add_layer(MyLayer(10, 10, act='relu'))\
   .compile(nb_out=Y_train.shape[1], lr=0.01, multiclass=True)

nn2

MyMLP(
  (layers): ModuleList(
    (0): MyLayer(
      in_features=36, out_features=10, bias=True
      (act): ReLU()
    )
    (1): MyLayer(
      in_features=10, out_features=10, bias=True
      (act): ReLU()
    )
    (2): MyLayer(
      in_features=10, out_features=6, bias=True
      (act): Softmax(dim=1)
    )
  )
  (loss): CrossEntropyLoss()
)

In [15]:
nn2.fit(X_train, Y_train, epochs=1000)

epoch 0 , loss = 1.8015405
epoch 100 , loss = 1.3338568
epoch 200 , loss = 1.276798
epoch 300 , loss = 1.2276777
epoch 400 , loss = 1.209966
epoch 500 , loss = 1.2046121
epoch 600 , loss = 1.20211
epoch 700 , loss = 1.1997224
epoch 800 , loss = 1.1977218
epoch 900 , loss = 1.1965315


### III.4. Model testing

In [16]:
print(classification_report(Y_test, lbin.inverse_transform(nn2(X_test)), zero_division=0))

              precision    recall  f1-score   support

           1       0.91      0.99      0.95       461
           2       0.95      0.90      0.93       224
           3       0.81      0.96      0.88       397
           4       0.00      0.00      0.00       211
           5       0.83      0.72      0.77       237
           7       0.71      0.93      0.80       470

    accuracy                           0.82      2000
   macro avg       0.70      0.75      0.72      2000
weighted avg       0.74      0.82      0.78      2000



## IV. Low level

### IV.1. Activation functions

In [17]:
class SimpleSigmoid(nn.Module):
    def forward(self, X: Tensor) -> Tensor:
        return 1/(1+torch.exp(-X))

class SimpleReLU(nn.Module):
    def forward(self, X: Tensor) -> Tensor:
        return torch.where(X > 0., X, 0.)
    

class SimpleSoftmax(nn.Module):
    def forward(self, X: Tensor) -> Tensor:
        H = torch.exp(X)
        return H/H.sum(axis=1).view(-1, 1)

In [18]:
XX = Tensor([[1., -1., 0.], [-0.5, 0.2, 5]])
sigmoid = SimpleSigmoid()
print(sigmoid(XX))
relu = SimpleReLU()
print(relu(XX))
softmax = SimpleSoftmax()
print(softmax(XX))

tensor([[0.7311, 0.2689, 0.5000],
        [0.3775, 0.5498, 0.9933]])
tensor([[1.0000, 0.0000, 0.0000],
        [0.0000, 0.2000, 5.0000]])
tensor([[0.6652, 0.0900, 0.2447],
        [0.0040, 0.0081, 0.9878]])


### IV.2. Loss functions

In [19]:
class SimpleBCE(nn.Module):
    def forward(self, H: Tensor, Y: Tensor) -> Tensor:
        return torch.mean(- Y * torch.log(H) - (1-Y) * torch.log(1-H))
    
class SimpleCE(nn.Module):
    def forward(self, H: Tensor, Y: Tensor) -> Tensor:
        return torch.mean(- Y * torch.log(H))

### IV.3. Optimization functions

In [20]:
class SimpleGD(optim.Optimizer):
    def __init__(self, params, lr=0.001):
        super().__init__(params, defaults={'lr': lr})

    def step(self):
        for group in self.param_groups: 
            for p in group['params']:  
                p.data -= group['lr'] * p.grad


### IV.4. Custom Layer

In [21]:
# SimpleLayer in here
class SimpleLayer(nn.Module):
    def __init__(self, 
                 nb_in: int, nb_out: int, 
                 bias: bool = True, act: Literal['relu', 'sigmoid', 'linear'] = 'linear'):
        assert nb_in   > 0
        assert nb_out  > 0
        super().__init__()

        self.W = nn.parameter.Parameter(torch.zeros([nb_in, nb_out]))
        self.b = torch.zeros([1   , nb_out])
        if bias:
            self.b = nn.parameter.Parameter(self.b)

        self.act = lambda x: x
        if act == 'relu':
            self.act = SimpleReLU()
        elif act == 'sigmoid':
            self.act = SimpleSigmoid()

    def randomize(self):
        self.W.data = torch.normal(torch.zeros(self.W.shape), 1.0)
        if self.b.requires_grad:
            self.b.data = torch.normal(torch.zeros(self.b.shape), 1.0)
            

    def forward(self, X):
        return self.act(torch.matmul(X, self.W) + self.b)


SimpleLayer(3, 2)

SimpleLayer()

In [22]:
sl = SimpleLayer(3, 2, bias=False)

sl.randomize()
sl.b, sl.W, list(sl.parameters())

(tensor([[0., 0.]]),
 Parameter containing:
 tensor([[-1.3187, -0.8510],
         [-1.2403, -1.0491],
         [ 1.1244, -0.0342]], requires_grad=True),
 [Parameter containing:
  tensor([[-1.3187, -0.8510],
          [-1.2403, -1.0491],
          [ 1.1244, -0.0342]], requires_grad=True)])

### IV.5. Custom Net

In [23]:
class SimpleMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.ModuleList()
        self.locked = False
    
    def add_layer(self, layer: SimpleLayer):
        if self.locked:
            raise Exception('You cannot add more layers')
        out_nbr = None
        if len(self.layers):
            out_nbr = self.layers[-1].W.shape[1]
        in_nbr = layer.W.shape[0]
        if out_nbr is not None and out_nbr != in_nbr:
            raise Exception(f'The last layer outputs ({out_nbr}) must be the same as this layer input {in_nbr}')
        self.layers.append(layer)
        return self
        
    def compile(self, nb_in=1, nb_out=1, bias=True, multiclass=False, lr=1.):
        if len(self.layers):
            nb_in = self.layers[-1].W.shape[1]
        self.layers.append(SimpleLayer(nb_in, nb_out, bias=bias, act='sigmoid'))

        self.loss = SimpleBCE()
        if multiclass and nb_out > 1:
            self.layers[-1].act = SimpleSoftmax()
            self.loss = SimpleCE()
        self.optimizer = SimpleGD(self.parameters(), lr=lr)
        self.locked = True

    def forward(self, X):
        Z = X 
        for layer in self.layers:
            Z = layer(Z)
        return Z
    
    def backward(self, X, Y):
        Y_pred = self.forward(X)
        loss = self.loss(Y_pred, Y)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        return loss.detach().numpy()

    def fit(self, X, Y, epochs=20, pr:int = 100):
        for epoch in range(epochs):
            loss = self.backward(X, Y)
            if not epoch%pr: # print every pr
                print('epoch', epoch, ', loss =', loss)

    def randomize(self):
        for layer in self.layers:
            layer.randomize()

    def __call__(self, X):
        return self.forward(X)

In [24]:
# Result:
# tensor([[0.8401],
#         [0.8428]], grad_fn=<MulBackward0>)
# 1.0020916
# Parameter containing:
# tensor([[0.5149],
#         [0.5659]], requires_grad=True)

nn3t = SimpleMLP()
nn3t.add_layer(SimpleLayer(2, 2, act='sigmoid'))\
    .add_layer(SimpleLayer(2, 2, act='sigmoid'))\
    .compile()

# print(nn3)

with torch.no_grad():
    nn3t.layers[0].W += torch.Tensor([[0.5, 0.3], [0.2, 0.4]])
    nn3t.layers[0].b += torch.Tensor([[-0.3, 0.5]])
    nn3t.layers[1].W += torch.Tensor([[0.3, -0.1], [0.5, -0.3]])
    nn3t.layers[1].b += torch.Tensor([[-0.3, -0.2]])
    nn3t.layers[2].W  += torch.Tensor([[0.7], [0.7]])
    nn3t.layers[2].b  += torch.Tensor([[1.]])

XX = Tensor([[2, -1], [3, 5]])
YY = Tensor([[0], [1]])

print(nn3t.forward(XX))

loss = nn3t.backward(XX, YY)

print(loss)

nn3t.layers[2].W

tensor([[0.8401],
        [0.8428]], grad_fn=<MulBackward0>)
1.0020916


Parameter containing:
tensor([[0.5149],
        [0.5659]], requires_grad=True)

### IV.6. Model training

In [25]:
nn3 = SimpleMLP()
nn3.add_layer(SimpleLayer(X_train.shape[1], 10, act='relu'))\
   .add_layer(SimpleLayer(10, 10, act='relu'))\
   .compile(nb_out=Y_train.shape[1], lr=0.01, multiclass=True)

nn3.randomize()

list(nn3.parameters())

[Parameter containing:
 tensor([[-1.0324e+00,  6.2962e-02,  1.7376e+00, -1.0988e+00, -5.4936e-01,
           9.8362e-01, -6.1377e-01, -1.1112e-01,  9.5957e-01,  1.1741e+00],
         [-2.3289e-01,  9.5027e-01,  5.2685e-01, -5.6911e-01,  1.0022e+00,
          -3.1141e-01,  9.8217e-01,  1.6803e-02,  1.0775e+00,  3.2616e-01],
         [-6.1324e-01, -6.5029e-01, -5.3696e-01, -1.3392e-01,  7.8460e-01,
          -7.0812e-01,  2.8266e-01,  4.3198e-02, -4.9156e-01,  1.2444e+00],
         [-1.8798e+00, -3.5944e-01, -1.8772e+00, -3.4254e-02,  1.6008e+00,
           2.0627e+00,  1.2528e+00,  1.6610e+00,  1.2891e+00, -1.1635e+00],
         [ 2.8728e-01, -5.6467e-01,  1.2480e+00, -2.5756e-03, -1.5971e+00,
          -5.7729e-01, -3.5158e-01,  1.1920e+00, -9.4963e-01,  7.0126e-01],
         [-5.2769e-01,  5.0802e-01,  1.1877e+00,  2.2736e-01, -8.0788e-01,
           3.0933e-01, -1.8719e+00, -9.1211e-01,  2.2809e-01,  4.5248e-01],
         [-3.0328e-01, -9.5507e-01,  6.4631e-01, -7.5942e-01, -8.6839e-

In [26]:
nn3.fit(X_train, Y_train, epochs=1000)

epoch 0 , loss = 2.7024145
epoch 100 , loss = 0.528085
epoch 200 , loss = 0.39478248
epoch 300 , loss = 0.35853994
epoch 400 , loss = 0.3345934
epoch 500 , loss = 0.3168753
epoch 600 , loss = 0.30264002
epoch 700 , loss = 0.29065415
epoch 800 , loss = 0.28024605
epoch 900 , loss = 0.27096522


### IV.7. Model testing

In [27]:
print(classification_report(Y_test, lbin.inverse_transform(nn3(X_test)), zero_division=0))

              precision    recall  f1-score   support

           1       0.67      0.89      0.76       461
           2       0.99      0.55      0.71       224
           3       0.10      0.18      0.13       397
           4       0.00      0.00      0.00       211
           5       0.05      0.01      0.01       237
           7       0.20      0.20      0.20       470

    accuracy                           0.35      2000
   macro avg       0.33      0.30      0.30      2000
weighted avg       0.34      0.35      0.33      2000

