# Workshop. Neural networks' tools (Tensorflow)

<p style='text-align: right;font-style: italic; color: red;'>Designed by: Mr. Abdelkrime Aries</p>

In [1]:
# Sometimes, TensorFlow throws errors when there is no GPU. 
# To stop these messages, we can use this code:
import logging, os
logging.disable(logging.WARNING)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [2]:
import tensorflow as tf
from tensorflow import keras

tf.__version__

'2.17.0'

In [3]:
import pandas     as pd
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report

pd.__version__

'2.2.2'

In [4]:
from typing import Literal, List

## I. Data Preparation

In [5]:
train = pd.read_csv('data/sat.trn', delimiter=' ', header=None)

X_train = train.iloc[:, :-1].values
Y_train = train.iloc[:,  -1].values

lbin = LabelBinarizer()

X_train = X_train / 255.
Y_train = lbin.fit_transform(Y_train)

X_train = tf.constant(X_train, dtype=tf.float32)
Y_train = tf.constant(Y_train, dtype=tf.float32)

X_train.shape, Y_train.shape

(TensorShape([4435, 36]), TensorShape([4435, 6]))

In [6]:
test = pd.read_csv('data/sat.tst', delimiter=' ', header=None)

X_test = test.iloc[:, :-1].values
Y_test = test.iloc[:,  -1].values

X_test = X_test / 255.
# Y_test = lbin.transform(Y_test)

X_test = tf.constant(X_test, dtype=tf.float32)

X_test.shape, Y_test.shape

(TensorShape([2000, 36]), (2000,))

## II. Keras

### II.1. Sequential model

In [7]:
nn1 = keras.Sequential()
nn1.add(keras.Input(shape=(X_train.shape[1],)))
nn1.add(keras.layers.Dense(10, activation='relu'))
nn1.add(keras.layers.Dense(10, activation='relu'))
nn1.add(keras.layers.Dense(Y_train.shape[1], activation='softmax'))


nn1.summary()

### II.2. Model training

In [8]:
# this is just to print every 10 iterations
class PrintEveryPrEpochs(keras.callbacks.Callback):
    def __init__(self, pr:int=10):
        self.pr = pr

    def on_epoch_end(self, epoch, logs=None):
        if not epoch%self.pr:
            print('epoch =', epoch, ', loss=', logs['loss'])

In [9]:
nn1.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.01), 
    loss=keras.losses.CategoricalCrossentropy()
    )
nn1.fit(X_train, Y_train, epochs=100, callbacks=[PrintEveryPrEpochs()], verbose=0)

epoch = 0 , loss= 1.2577202320098877
epoch = 10 , loss= 0.4249204695224762
epoch = 20 , loss= 0.3979230523109436
epoch = 30 , loss= 0.37447887659072876
epoch = 40 , loss= 0.36111515760421753
epoch = 50 , loss= 0.36925023794174194
epoch = 60 , loss= 0.3617419898509979
epoch = 70 , loss= 0.35329291224479675
epoch = 80 , loss= 0.3386724293231964
epoch = 90 , loss= 0.3415277898311615


<keras.src.callbacks.history.History at 0x77ef418e53f0>

### II.3. Model testing

In [10]:
print(classification_report(Y_test, lbin.inverse_transform(nn1(X_test).numpy()), zero_division=0))

              precision    recall  f1-score   support

           1       0.95      0.99      0.97       461
           2       0.92      0.98      0.95       224
           3       0.91      0.90      0.90       397
           4       0.55      0.65      0.59       211
           5       0.87      0.80      0.83       237
           7       0.88      0.79      0.83       470

    accuracy                           0.86      2000
   macro avg       0.85      0.85      0.85      2000
weighted avg       0.87      0.86      0.87      2000



## III. High level with a custom class

### III.1. Custom Layer

In [11]:
# MyLayer in here
class MyLayer(keras.layers.Dense):
    def __init__(self, 
                 nb_in:int, nb_out: int, 
                 bias: bool = True, act: Literal['relu', 'sigmoid', 'linear'] = 'linear'):
        assert nb_in   > 0
        assert nb_out  > 0
        super().__init__(nb_out, use_bias=bias, activation=act)

        self.build((nb_in,))


MyLayer(3, 2)

<MyLayer name=my_layer, built=True>

In [12]:
# Must print an 'Exception' or 'AssertionError'

try:
    ml1 = MyLayer(0, 2)
except Exception as e:
    print(repr(e))

print('end')

AssertionError()
end


In [13]:
l2ts = [
    MyLayer(3, 2, bias=False, act='relu'),
    MyLayer(3, 2, bias=True, act='sigmoid'),
    MyLayer(3, 1)
    ]

XX = tf.constant([[1, 2, 3], [4, 5, 6]])

for l in l2ts:
    print('===============================')
    print(l)
    print('-------------------------------')
    print('bias=', l.bias)
    weight = l.kernel
    print('output=', l(XX))

<MyLayer name=my_layer_1, built=True>
-------------------------------
bias= None
output= tf.Tensor(
[[2.9173596 2.136314 ]
 [6.4983883 3.548113 ]], shape=(2, 2), dtype=float32)
<MyLayer name=my_layer_2, built=True>
-------------------------------
bias= <KerasVariable shape=(2,), dtype=float32, path=my_layer_2/bias>
output= tf.Tensor(
[[0.9942501  0.08129059]
 [0.9999962  0.00186742]], shape=(2, 2), dtype=float32)
<MyLayer name=my_layer_3, built=True>
-------------------------------
bias= <KerasVariable shape=(1,), dtype=float32, path=my_layer_3/bias>
output= tf.Tensor(
[[-0.9461639]
 [-1.1059246]], shape=(2, 1), dtype=float32)


### III.2. Custom Net

In [14]:
class MyMLP(keras.Model):
    def __init__(self):
        super().__init__()
        self.layers_list = []
        self.locked = False
    
    def add_layer(self, layer: MyLayer):
        if self.locked:
            raise Exception('You cannot add more layers')
        out_nbr = None
        if len(self.layers_list):
            out_nbr = self.layers_list[-1].kernel.shape[1]
        in_nbr = layer.kernel.shape[0]
        if out_nbr is not None and out_nbr != in_nbr:
            raise Exception(f'The last layer outputs ({out_nbr}) must be the same as this layer input {in_nbr}')
        self.layers_list.append(layer)
        return self
        
    def compile(self, nb_in=1, nb_out=1, bias=True, multiclass=False, lr=1.):
        if len(self.layers_list):
            nb_in = self.layers_list[-1].kernel.shape[1]

        loss = keras.losses.BinaryCrossentropy()
        act = 'sigmoid'
        if multiclass and nb_out > 1:
            act='softmax'
            loss = keras.losses.CategoricalCrossentropy()
        self.layers_list.append(MyLayer(nb_in, nb_out, bias=bias, act=act))
        optimizer = keras.optimizers.Adam(learning_rate=0.01)
        self.locked = True
        super().compile(optimizer=optimizer, loss=loss)

    def forward(self, X):
        Z = X 
        for layer in self.layers_list:
            Z = layer(Z)
        return Z

    def __call__(self, X):
        return self.forward(X)

### III.3. Model training

In [15]:
nn2 = MyMLP()
nn2.add_layer(MyLayer(X_train.shape[1], 10, act='relu'))\
   .add_layer(MyLayer(10, 10, act='relu'))\
   .compile(nb_out=Y_train.shape[1], lr=0.01, multiclass=True)

nn2.summary()

In [16]:
nn2.fit(X_train, Y_train, epochs=100, callbacks=[PrintEveryPrEpochs()], verbose=0)

epoch = 0 , loss= 1.3791676759719849
epoch = 10 , loss= 0.4372740387916565
epoch = 20 , loss= 0.40486615896224976
epoch = 30 , loss= 0.3750939965248108
epoch = 40 , loss= 0.36997494101524353
epoch = 50 , loss= 0.3551260530948639
epoch = 60 , loss= 0.35428428649902344
epoch = 70 , loss= 0.3431343138217926
epoch = 80 , loss= 0.33802530169487
epoch = 90 , loss= 0.3494139611721039


<keras.src.callbacks.history.History at 0x77ef405bf8e0>

### III.4. Model testing

In [17]:
print(classification_report(Y_test, lbin.inverse_transform(nn2(X_test).numpy()), zero_division=0))

              precision    recall  f1-score   support

           1       0.99      0.94      0.96       461
           2       0.96      0.93      0.95       224
           3       0.86      0.91      0.89       397
           4       0.43      0.24      0.31       211
           5       0.85      0.73      0.78       237
           7       0.71      0.90      0.80       470

    accuracy                           0.83      2000
   macro avg       0.80      0.78      0.78      2000
weighted avg       0.82      0.83      0.82      2000



## IV. Low level

### IV.1. Activation functions

In [18]:
def simple_sigmoid(X):
    return 1/(1+tf.math.exp(-X))

def simple_ReLU(X):
    return tf.where(X > 0., X, 0.)
    
def simple_softmax(X):
    H = tf.math.exp(X)
    # return H/tf.math.reduce_sum(H, axis=0)
    return H/tf.reshape(tf.math.reduce_sum(H, axis=1), (-1, 1))


In [19]:
XX = tf.constant([[1., -1., 0.], [-0.5, 0.2, 5]])
print(simple_sigmoid(XX))
print(simple_ReLU(XX))
print(simple_softmax(XX))

tf.Tensor(
[[0.7310586  0.26894143 0.5       ]
 [0.37754068 0.54983395 0.9933072 ]], shape=(2, 3), dtype=float32)
tf.Tensor(
[[1.  0.  0. ]
 [0.  0.2 5. ]], shape=(2, 3), dtype=float32)
tf.Tensor(
[[0.66524094 0.09003058 0.24472848]
 [0.00403705 0.00812962 0.9878334 ]], shape=(2, 3), dtype=float32)


### IV.2. Loss functions

In [20]:
class SimpleBCE(keras.Loss):
    def call(self, H, Y):
        return tf.reduce_mean(- Y * tf.math.log(H) - (1-Y) * tf.math.log(1-H))
    
class SimpleCE(keras.Loss):
    def call(self, H, Y):
        return tf.reduce_mean(- Y * tf.math.log(H))

### IV.3. Optimization functions

In [21]:
class SimpleGD(keras.Optimizer):
    def __init__(self, learning_rate=0.001):
        super().__init__(learning_rate=learning_rate)
    def apply_gradients(self, grads_and_vars):
        for grads, vars in grads_and_vars:
            vars.assign_sub(self.learning_rate * grads)

### IV.4. Custom Layer

In [22]:
# SimpleLayer in here
class SimpleLayer(object):
    def __init__(self, 
                 nb_in: int, nb_out: int, 
                 bias: bool = True, act: Literal['relu', 'sigmoid', 'linear'] = 'linear'):
        assert nb_in   > 0
        assert nb_out  > 0
        super().__init__()

        self.W = tf.Variable(tf.zeros([nb_in, nb_out]))
        self.trainable_weights = [self.W]
        self.b = tf.zeros([1   , nb_out])
        if bias:
            self.b = tf.Variable(self.b)
            self.trainable_weights.append(self.b)

        self.act = lambda x: x
        if act == 'relu':
            self.act = simple_ReLU
        elif act == 'sigmoid':
            self.act = simple_sigmoid

    def randomize(self):
        self.W.assign(tf.random.normal(self.W.shape, mean=0.0, stddev=0.1))
        if isinstance(self.b, tf.Variable):
            self.b.assign(tf.random.normal(self.b.shape, mean=0.0, stddev=0.1))
            
    def forward(self, X):
        return self.act(tf.matmul(X, self.W) + self.b)
    
    def __call__(self, X):
        return self.forward(X)


SimpleLayer(3, 2)

<__main__.SimpleLayer at 0x77ef405705b0>

In [23]:
sl = SimpleLayer(3, 2, bias=False)

sl.randomize()
sl.b, sl.W, sl.trainable_weights

(<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[0., 0.]], dtype=float32)>,
 <tf.Variable 'Variable:0' shape=(3, 2) dtype=float32, numpy=
 array([[ 0.07582887,  0.08336047],
        [-0.15432923,  0.02495772],
        [ 0.18534607, -0.06679763]], dtype=float32)>,
 [<tf.Variable 'Variable:0' shape=(3, 2) dtype=float32, numpy=
  array([[ 0.07582887,  0.08336047],
         [-0.15432923,  0.02495772],
         [ 0.18534607, -0.06679763]], dtype=float32)>])

### IV.5. Custom Net

In [24]:
class SimpleMLP(object):
    def __init__(self):
        super().__init__()
        self.layers = []
        self.locked = False
        self.trainable_weights = []
    
    def add_layer(self, layer: SimpleLayer):
        if self.locked:
            raise Exception('You cannot add more layers')
        out_nbr = None
        if len(self.layers):
            out_nbr = self.layers[-1].W.shape[1]
        in_nbr = layer.W.shape[0]
        if out_nbr is not None and out_nbr != in_nbr:
            raise Exception(f'The last layer outputs ({out_nbr}) must be the same as this layer input {in_nbr}')
        self.layers.append(layer)
        self.trainable_weights.extend(layer.trainable_weights)
        return self
        
    def compile(self, nb_in=1, nb_out=1, bias=True, multiclass=False, lr=1.):
        if len(self.layers):
            nb_in = self.layers[-1].W.shape[1]
        out_layer = SimpleLayer(nb_in, nb_out, bias=bias, act='sigmoid')
        self.loss = SimpleBCE()
        if multiclass and nb_out > 1:
            out_layer.act = simple_softmax
            self.loss = SimpleCE()
        self.layers.append(out_layer)
        self.trainable_weights.extend(self.layers[-1].trainable_weights)
        self.optimizer = SimpleGD(learning_rate=lr)
        self.locked = True
        

    def forward(self, X):
        Z = X 
        for layer in self.layers:
            Z = layer(Z)
        return Z
    
    def backward(self, X, Y):
        with tf.GradientTape() as tape:
            Y_pred = self.forward(X)
            loss   = self.loss(Y_pred, Y)
        grads = tape.gradient(loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        return loss.numpy()
    
    def fit(self, X, Y, epochs=20, pr: int=100):
        for epoch in range(epochs):
            loss = self.backward(X, Y)
            if not epoch%pr:
                print('epoch', epoch, ' loss =', loss)
    
    def randomize(self):
        for layer in self.layers:
            layer.randomize()
            
    def __call__(self, X):
        return self.forward(X)

In [25]:
# Result:
# tf.Tensor(
# [[0.8400944]
#  [0.8428117]], shape=(2, 1), dtype=float32)
# learning_rate 1.0
# 1.0020916
# <tf.Variable 'Variable:0' shape=(2, 1) dtype=float32, numpy=
# array([[0.51494634],
#        [0.5659208 ]], dtype=float32)>

nn3t = SimpleMLP()
nn3t.add_layer(SimpleLayer(2, 2, act='sigmoid'))\
    .add_layer(SimpleLayer(2, 2, act='sigmoid'))\
    .compile()


nn3t.layers[0].W.assign_add(tf.constant([[0.5, 0.3], [0.2, 0.4]]))
nn3t.layers[0].b.assign_add(tf.constant([[-0.3, 0.5]]))
nn3t.layers[1].W.assign_add(tf.constant([[0.3, -0.1], [0.5, -0.3]]))
nn3t.layers[1].b.assign_add(tf.constant([[-0.3, -0.2]]))
nn3t.layers[2].W.assign_add(tf.constant([[0.7], [0.7]]))
nn3t.layers[2].b.assign_add(tf.constant([[1.]]))

XX = tf.constant([[2, -1], [3, 5]], dtype=tf.float32)
YY = tf.constant([[0], [1]], dtype=tf.float32)

print(nn3t.forward(XX))

loss = nn3t.backward(XX, YY)

print(loss)

nn3t.layers[2].W

tf.Tensor(
[[0.8400944]
 [0.8428117]], shape=(2, 1), dtype=float32)
1.0020916


<tf.Variable 'Variable:0' shape=(2, 1) dtype=float32, numpy=
array([[0.51494634],
       [0.5659208 ]], dtype=float32)>

### IV.6. Model training

In [26]:
nn3 = SimpleMLP()
nn3.add_layer(SimpleLayer(X_train.shape[1], 10, act='relu'))\
   .add_layer(SimpleLayer(10, 10, act='relu'))\
   .compile(nb_out=Y_train.shape[1], lr=0.01, multiclass=True)

nn3.randomize()

list(nn3.trainable_weights)

[<tf.Variable 'Variable:0' shape=(36, 10) dtype=float32, numpy=
 array([[ 0.18425314, -0.11320397,  0.04093604,  0.03161915,  0.07488319,
         -0.09128364,  0.0349329 , -0.07464761, -0.02969515, -0.00922264],
        [ 0.21519005,  0.03331221,  0.11816205, -0.10681549,  0.00058334,
          0.09853227, -0.13442342,  0.13662682,  0.2041689 ,  0.1015119 ],
        [ 0.07625891, -0.00866228,  0.02954775,  0.02247149,  0.07250103,
          0.08763894,  0.12947802,  0.01101634, -0.0978509 , -0.07510026],
        [-0.11247783,  0.09608839, -0.1563275 , -0.00083876,  0.10808287,
         -0.07713726,  0.16384551,  0.0305205 ,  0.03858315,  0.00707863],
        [ 0.06424464,  0.09899114, -0.22343753,  0.08916724,  0.0366289 ,
         -0.19540405, -0.02137367,  0.13679968, -0.10654328, -0.13445829],
        [ 0.18460286, -0.01618042,  0.00453827,  0.00556519, -0.04105875,
          0.01501153,  0.02578078, -0.05715035,  0.06434871, -0.06879394],
        [ 0.07068237,  0.00382259, -0.1124

In [27]:
nn3.fit(X_train, Y_train, epochs=1000)

epoch 0  loss = 0.30071974
epoch 100  loss = 0.2998662
epoch 200  loss = 0.2990634
epoch 300  loss = 0.29830793
epoch 400  loss = 0.2975965
epoch 500  loss = 0.29692635
epoch 600  loss = 0.29629487
epoch 700  loss = 0.2956996
epoch 800  loss = 0.29513833
epoch 900  loss = 0.29460898


### IV.7. Model testing

In [28]:
print(classification_report(Y_test, lbin.inverse_transform(nn3(X_test).numpy()), zero_division=0))

              precision    recall  f1-score   support

           1       0.00      0.00      0.00       461
           2       0.00      0.00      0.00       224
           3       0.00      0.00      0.00       397
           4       0.00      0.00      0.00       211
           5       0.00      0.00      0.00       237
           7       0.23      1.00      0.38       470

    accuracy                           0.23      2000
   macro avg       0.04      0.17      0.06      2000
weighted avg       0.06      0.23      0.09      2000

