# Explainatory Notebook

This notebook try to explain who works one of our minimal learning step.

- There is no features here (momentum, tikhonov, etc...)
- The class written below is not identical to ours, is just more minimal.
- The last class in the notebook is minimal as the first but has lot of print statements, just watch the output.

In [1]:
project_dir = '../../'
data_dir = project_dir + 'data/'

import sys
sys.path.append(project_dir + 'NN/')

import numpy as np
import importlib
import time
import matplotlib.pyplot as plt

In [2]:
from utils.activations import actv_funcs, dactv_funcs

class Layer:
    """
    Layer of the Neural Network.
    """
    def __init__(self, unit_number, input_matrix, 
                 func = ("sigmoid",1), starting_points = 0.1,):
        
        self.input=np.array(input_matrix) # Storing the input matrix
        self.unit_number=unit_number      # Storing the number of units
        num_features = np.shape(self.input)[1] # Number of input features

        self.weight=np.random.uniform(-starting_points,starting_points,
                                      size=(unit_number, num_features ) )
        self.bias=np.random.uniform(- starting_points, starting_points,
                                    size = unit_number )

        #Storing the activation function and his derivative
        self.function, self.slope=func
        self.func=lambda x : actv_funcs(self.function)(x,self.slope)
        self.der_func=lambda x : dactv_funcs(self.function)(x,self.slope)

    @property
    def net(self):
        """
        This property evaluate the dot product between the inputs and the
        weight (adding the bias).
        """
        return self.input.dot( self.weight.T ) + self.bias

    @property
    def out(self):
        """
        This property return the output values of the net using the activation
        function.
        """
        return self.func(self.net)

In [3]:
"""
Implementation of the Multi Layer Perceptron.
"""
class MLP:
    """
    Multi Layer Perceptron class.
    """
    def __init__(self, structure = [], func = None, starting_points=None,
                 filename = None, epoch_to_restore = -1):
        self.network=[]
        self.structure=structure # Number of units per layer
        self.func=[f if isinstance(f, (tuple, list)) else (f, 1) for f in func]
        if starting_points == None: self.starting_points = [0.1]*len(self.structure)
        else: self.starting_points=starting_points # start_point list for random weights
        self.epoch = 0 # set number of epoch to 0

    def __getattr__(self,attr):
        """Get the atribute of MLP"""
        return [getattr(lay,attr) for lay in self.network]

    def train(self, input_data, labels, epoch, eta=0.1):

        self.eta = eta # Learning rate

        self.create_net(input_data)

        # Start train the net
        for i in range(epoch):
            self.feedforward()
            self.learning_step(labels)
            self.epoch += 1


    def predict(self, data):
        self.network[0].input = data
        self.feedforward()
        return self.network[-1].out

    def create_net(self, input_data):
        for layer,num_unit in enumerate(self.structure):
            if layer==0:
                self.network.append(Layer(num_unit,input_data,
                                    starting_points = self.starting_points[layer],
                                    func=self.func[layer]))
            else:
                self.network.append(Layer(num_unit,self.network[layer-1].out,
                                    starting_points = self.starting_points[layer],
                                    func=self.func[layer]))
                
    def feedforward(self):
        for lay_prev,lay_next in zip(self.network[:-1:],self.network[1::]):
            lay_next.input=lay_prev.out

    def learning_step(self,labels):
        for reverse_layer_number,layer in enumerate(self.network[::-1]):
            if reverse_layer_number==0:
                delta=((labels-layer.out)*layer.der_func(layer.net))
            else:
                delta=(np.matmul(delta,weight_1)*layer.der_func(layer.net))
            weight_1=layer.weight

            grad_W=np.sum([np.outer(i,j) for i,j in zip(delta,layer.input)], axis=0) #batch
            grad_b=np.sum(delta,axis=0)

            layer.weight+=self.eta*grad_W 
            layer.bias  +=self.eta*grad_b

## Define the dataset

In [4]:
input_data = [[0,0],
              [0,1],
              [1,0],
              [1,1]]
input_data = np.array(input_data)
labels = np.array([1,0,0,1]).reshape((len(input_data), 1))

# Lets train the model and evaluate the results

In [5]:
structure=[2,np.shape(labels)[1]]
act_func=["sigmoid","sigmoid"]
start=[2,]*2

netw = MLP(structure, func=act_func,
           starting_points=start)

N_epoch = 2000
learning_rate=2
netw.train(input_data, labels,
           epoch = N_epoch, eta = learning_rate)
print(netw.predict(input_data))


if (netw.predict(input_data) - labels < 0.5).all():
    print('\nXOR learned.')
else: print('\nXOR not learned, local minima reached... Please try again.')

[[0.98128081]
 [0.02129464]
 [0.01794481]
 [0.98310307]]

XOR learned.


## Go in deep of one epoch, print each learning step!

Define a verbose class that print what really happens.

In [6]:
"""
Implementation of the Multi Layer Perceptron.
"""
class verbose_MLP:
    """
    Multi Layer Perceptron class.
    """
    def __init__(self, structure = [], func = None, starting_points=None,
                 filename = None, epoch_to_restore = -1):
        self.network=[]
        self.structure=structure # Number of units per layer
        self.func=[f if isinstance(f, (tuple, list)) else (f, 1) for f in func]
        if starting_points == None: self.starting_points = [0.1]*len(self.structure)
        else: self.starting_points=starting_points # start_point list for random weights
        self.epoch = 0 # set number of epoch to 0

    def __getattr__(self,attr):
        """Get the atribute of MLP"""
        return [getattr(lay,attr) for lay in self.network]

    def train(self, input_data, labels, epoch, eta=0.1):

        self.eta = eta # Learning rate

        self.create_net(input_data)

        # Start train the net
        for i in range(epoch):
            print(f"Epoch {i}")
            self.feedforward()
            self.learning_step(labels)
            self.epoch += 1


    def predict(self, data):
        self.network[0].input = data
        self.feedforward()
        return self.network[-1].out

    def create_net(self, input_data):
        for layer,num_unit in enumerate(self.structure):
            if layer==0:
                self.network.append(Layer(num_unit,input_data,
                                    starting_points = self.starting_points[layer],
                                    func=self.func[layer]))
            else:
                self.network.append(Layer(num_unit,self.network[layer-1].out,
                                    starting_points = self.starting_points[layer],
                                    func=self.func[layer]))
                
    def feedforward(self):
        print("\nFILLING THE NETWORK:")
        i = 1
        for lay_prev,lay_next in zip(self.network[:-1:],self.network[1::]):
            print(f"\nLayer {i-1}")
            print(f" - {lay_prev.weight.shape[0]} Neuron")
            print(f" - {lay_prev.weight.shape[1]} Input Features")
            print(f"\n -----> Input in layer {i-1}:")
            print('\t' + str(lay_prev.input).replace('\n', '\n\t'))
            print(f"\n -----> Weights in layer {i-1}:")
            print('\t' + str(lay_prev.weight).replace('\n', '\n\t'))
            print(f"\n -----> Bias in layer {i-1}:")
            print('\t' + str(lay_prev.bias).replace('\n', '\n\t'))
            print(f"\n -----> input * W.T + bias in layer {i-1}:")
            print('\t' + str(lay_prev.net).replace('\n', '\n\t'))
            print(f"\n -----> Output (actv_f(net)) from layer {i-1}:")
            print('\t' + str(lay_prev.out).replace('\n', '\n\t'))
            
            print(f"\nFill the layer {i} input with output from layer {i-1}.")

            lay_next.input=lay_prev.out
            i +=1
        
        print(f"\nLayer {i-1}")
        print(f" - {lay_next.weight.shape[0]} Neuron")
        print(f" - {lay_next.weight.shape[1]} Input Features")
        print(f"\n -----> Input in layer {i-1}:")
        print('\t' + str(lay_next.input).replace('\n', '\n\t'))
        print(f"\n -----> Weights in layer {i-1}:")
        print('\t' + str(lay_next.weight).replace('\n', '\n\t'))
        print(f"\n -----> Bias in layer {i-1}:")
        print('\t' + str(lay_next.bias).replace('\n', '\n\t'))
        print(f"\n -----> input * W.T + bias in layer {i-1}:")
        print('\t' + str(lay_next.net).replace('\n', '\n\t'))
        print(f"\n -----> Output (actv_f(net)) from layer {i-1} = output of the net:")
        print('\t' + str(lay_next.out).replace('\n', '\n\t'))
        


    def learning_step(self,labels):
        print("\nLEARNING (BACKPROPAGATION)")
        i = len(self.network) - 1
        for reverse_layer_number,layer in enumerate(self.network[::-1]):
            print(f"\nLayer {i}")
            if reverse_layer_number==0:
                print(f"\n -----> labels-out of layer {i}:")
                print('\t' + str((labels-layer.out)).replace('\n', '\n\t'))
                delta=((labels-layer.out)*layer.der_func(layer.net))
                print(f"\n -----> delta of layer {i}:")
                print('\t' + str(delta).replace('\n', '\n\t'))
            else:
                delta=(np.matmul(delta,weight_1)*layer.der_func(layer.net))
                print(f"\n -----> delta of layer {i}:")
                print('\t' + str(delta).replace('\n', '\n\t'))
            weight_1=layer.weight
            print("\n -----> (Rounded results)")
            print(f"\n -----> delta x inputs of layer {i} (tensor prod.):")
            tens_prod = np.round(np.array([np.outer(i,j) for i,j in zip(delta,layer.input)]), decimals = 3)
            print('\t' + str(tens_prod).replace('\n', '\n\t'))
            
            grad_W=np.sum([np.outer(i,j) for i,j in zip(delta,layer.input)], axis=0) #batch
            
            print(f"\n -----> grad = sum(delta x inputs) of layer {i}:")
            print('\t' + str(np.round(grad_W, decimals = 3)).replace('\n', '\n\t'))

            grad_b=np.sum(delta,axis=0)

            layer.weight+=self.eta*grad_W 
            layer.bias  +=self.eta*grad_b
            i = i-1

## Define a new model with 3 hidden neurons and just one output for the task

In [7]:
structure=[3,np.shape(labels)[1]]
act_func=["sigmoid","sigmoid"]
start=[2,]*2

netw = verbose_MLP(structure, func=act_func,
                   starting_points=start)
N_epoch = 1
learning_rate=1
print(f"Let's train the model with [epoch: {N_epoch}] and [eta: {learning_rate}]\n")
netw.train(input_data, labels,
           epoch = N_epoch, eta = learning_rate)

Let's train the model with [epoch: 1] and [eta: 1]

Epoch 0

FILLING THE NETWORK:

Layer 0
 - 3 Neuron
 - 2 Input Features

 -----> Input in layer 0:
	[[0 0]
	 [0 1]
	 [1 0]
	 [1 1]]

 -----> Weights in layer 0:
	[[-1.15603991 -1.09376286]
	 [-0.03910188  1.84727646]
	 [-0.16393459 -1.18237623]]

 -----> Bias in layer 0:
	[-1.71924976 -1.89543105  1.27512321]

 -----> input * W.T + bias in layer 0:
	[[-1.71924976 -1.89543105  1.27512321]
	 [-2.81301262 -0.04815458  0.09274698]
	 [-2.87528968 -1.93453293  1.11118862]
	 [-3.96905254 -0.08725646 -0.07118761]]

 -----> Output (actv_f(net)) from layer 0:
	[[0.15196782 0.13062646 0.7816185 ]
	 [0.05662504 0.48796368 0.52317014]
	 [0.05338869 0.1262497  0.75235064]
	 [0.01854106 0.47819971 0.48221061]]

Fill the layer 1 input with output from layer 0.

Layer 1
 - 1 Neuron
 - 3 Input Features

 -----> Input in layer 1:
	[[0.15196782 0.13062646 0.7816185 ]
	 [0.05662504 0.48796368 0.52317014]
	 [0.05338869 0.1262497  0.75235064]
	 [0.01854106 0