# 5. Python implementation ANN from scratch back propagation
_Author: Maurice Snoeren_<br>
This notebook implement an ANN from scract in python with visualization. It will be used to gain extra insights of the weights and biases and how the optimizer is optimizing the model.

In [1]:
import numpy as np

class ANN:
    def __init__(self, num_input_nodes, num_output_nodes, output_activation): # construct the ANN object
        self.num_input_nodes   = num_input_nodes   # Hold the number of input nodes
        self.num_output_nodes  = num_output_nodes  # Hold the number of output nodes
        self.output_activation = output_activation # Hold the number of input nodes
        self.hidden_layers     = [] # Hold all the hidden_layer classes
        self.Wy                = np.random.rand(num_input_nodes, num_output_nodes) # Hold output layer weight matrix
        self.by                = np.zeros((1, num_output_nodes)) # Biases vector of the output nodes
        self.x                 = [] # Hold the input vector that is used for calculation
        self.zy                = [] # Hold the summation of the input and bias with the weights
        self.y                 = [] # hold the output vector

    def get_weight_matrix(self): # getter for the weight matrix
        return self.Wy

    def set_weight_matrix(self, Wy): # setter for the weight matrix
        self.Wy = Wy

    def get_biases_vector(self): # getter for the bias vector
        return self.by

    def set_biases_vector(self, by): # setter for the bias vector
        self.by = by

    def add_hidden_layer(self, hidden_layer): # add a new hidden layer to the ANN
        self.hidden_layers.append(hidden_layer) # Add the HiddenLayer class to the array
        self.Wy = np.random.rand(hidden_layer.num_hidden_nodes, self.num_output_nodes) # Re-initializes the output matrix
                                                                                       # based on number of hidden nodes
    def get_total_hidden_layers(self): # return how many hidden layers are configured
        return len(self.hidden_layers)

    def get_hidden_layer(self, i): # returns the hidden layer given the index (no checks performed!)
        return self.hidden_layers[i]
    
    def forward_propagation(self, x):
        self.x = x # store the input that we have used for the calculation

        if ( len(self.hidden_layers) == 0): # Within our design it is possible that no hidden layers exist!
            self.zy = np.dot( self.x, self.Wy ) + self.by
            self.y = self.output_activation.forward( self.zy )

        else: # when we have hidden layers, we iterate over these hidden layers
            input_vector = self.x # this input_vector is used to pass to the next layer
            for hidden_layer in self.hidden_layers:
                output_vector = hidden_layer.forward_propagation(input_vector) # the hidden layer class calculates the 
                                                                               # output based on the input.
                input_vector = output_vector # the next hidden layer will use the output of this hidden layer
            
            # calculate the output of the neural network using the output of the last hidden layer as input
            self.zy = np.dot( input_vector, self.Wy ) + self.by # first calculate the weight and bias result
            self.y  = self.output_activation.forward( self.zy ) # calculate the activation function

        return self.y # return the output activation vector of all the nodes
    
    def cost_function(self, input_example, output_desired):
        self.forward_propagation(input_example)  # Perform first the forward propagation calculation
        J = 0.5 * ( self.y - output_desired )**2 # Calculate the cost function
        return J

    def back_propagation(self, input_example, output_desired):
        J = self.cost_function(input_example, output_desired)
        
        # start back propagation of the network
        dJ_dy = ( self.y - output_desired )
        dy_dzy   = self.output_activation.derivative( self.zy )
        
        delta = np.multiply( dJ_dy, dy_dzy ) # required to back propagate through the network (part of the derivation that propagates back into the network)
        weights = self.Wy # weight required for the next layer
        dJ_dWh = [] # Hold the weight gradients of the hidden layers
        dJ_dbh = [] # Hold the weight gradients of the biases

        if ( len(self.hidden_layers) == 0): # When we do not have any hidden layers, we only have one weight matrix
            dJ_dWy = np.dot( self.x.transpose(), delta )

        else: # Loop over the hidden layers from back to start
            dzy_dWy = self.hidden_layers[ len(self.hidden_layers)-1 ].h
            dJ_dWy = np.dot( dzy_dWy.transpose(), delta )  # calculate the gradient of Wy
            dJ_dby = delta
            
            for hidden_layer in reversed(self.hidden_layers): # loop the hidden layers from back to start (reversed!)
                result  = hidden_layer.back_propagation(delta, weights) # calculate gradient of hidden layer Wh
                delta   = result['delta'] # update delat for the next layer
                weights = result['W'] # update the weights matrix for the next layer
                dJ_dWh.append( result['dJ_dWh']) # append the gradient hidden weight matrix to the array
                dJ_dbh.append( result['dJ_dbh'])

        return {'dJ_dWh': list(reversed(dJ_dWh)), 'dJ_dWy': dJ_dWy,
                'dJ_dbh': list(reversed(dJ_dbh)),'dJ_dby': dJ_dby } # return the back propagation result

In [3]:
class ANNHiddenLayer:
    def __init__(self, num_input_nodes, num_hidden_nodes, activation):
        self.num_input_nodes  = num_input_nodes # number of nodes of the previous layer used as input
        self.num_hidden_nodes = num_hidden_nodes # number of hidden nodes of this layer to be used
        self.activation       = activation # the activation function that should be used for all hidden nodes
        self.x                = [] # Input vector of this hidden layer
        self.Wh               = np.random.rand(num_input_nodes, num_hidden_nodes) # Hidden weight matrix
        self.bh               = np.zeros((1, num_hidden_nodes)) # Biases vector of the hidden layer
        self.zh               = [] # Hold the summation of the input and bias with the weights
        self.h                = [] # Hold the output vector of this hidden layer

    def get_weight_matrix(self): # getter for the weight matrix of the hidden layer
        return self.Wh

    def set_weight_matrix(self, Wh): # setter for the weight matrix of the hidden layer
        self.Wh = Wh

    def get_biases_vector(self): # getter for the biases vector of the hidden layer
        return self.bh

    def set_biases_vector(self, bh): # setter for the biases vector of the hidden layer
        self.bh = bh

    def forward_propagation(self, x):
        self.x = x # store the input that is used for the calculation
        self.zh = np.dot(x, self.Wh) + self.bh # first calculate the weight and bias result
        self.h  = self.activation.forward( self.zh ) # calculate the activation function
        
        return self.h # return the output activation vector of all the nodes
    
    def back_propagation(self, prev_delta, prev_W):
        delta = np.dot( prev_delta, prev_W.transpose() ) * self.activation.derivative( self.zh ) # this is required for
                                                                                                 # next layer
        dz1_dWh = self.x # Activation from the previous layer!
        dJ_dWh  = np.dot( dz1_dWh.transpose(), delta ) # calculate the gradient of the weight matrix 
        dJ_dbh  = delta

        return { 'delta': delta, 'W': self.Wh, 'dJ_dWh': dJ_dWh, 'dJ_dbh': dJ_dbh } # return the result of the backward propagation

In [1]:
class ANNSigmoidActivation:
    def forward(self, input_vector):
        return 1/(1 + np.exp(-input_vector))
    
    def derivative(self, input_vector):
        return np.exp(-input_vector) / ((1 + np.exp(-input_vector))**2)

class ANNReLUActivation:
    def forward(self, input_vector):
        return np.maximum(0, input_vector)
    
    def derivative(self, input_vector):
        temp = 1*input_vector # copy the data!
        temp[temp>0]  = 1
        temp[temp<=0] = 0
        return temp

Everything is in place and we can now try the class and see whether the calculation is done correctly.

In [2]:
#!pip install pygame
import pygame

pygame.init()  
scr = pygame.display.set_mode((600,500))  
pygame.display.set_caption('Pygame Window')
done = False  
while not done:  
    for event in pygame.event.get():  
        if event.type == pygame.QUIT:  
            done = True  
pygame.display.flip()  

pygame 2.1.2 (SDL 2.0.18, Python 3.9.12)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [5]:
#sa  = ANNSigmoidActivation() # construct the Sigmoid activation function
sa  = ANNReLUActivation() # construct the ReLU activation function

ann = ANN(4, 2, sa) # create an ANN with four input nodes and two output nodes. The output nodes get the sigmoid
                    # activation function.
    
ann.add_hidden_layer(ANNHiddenLayer(4, 10, sa)) # add a hidden layer with ten nodes, the input is four due to the
                                                  # total of number of input nodes x, defines by the ANN.
    
ann.add_hidden_layer(ANNHiddenLayer(10, 10, sa)) # create another hidden layer with ten node, the input is ten nodes
                                                   # due to the input nodes of the previoud hidden layer of ten.

x = np.array([[0.1, 0.1, 0.1, 0.1]]) # create an example input vector x

print( "output: " + str(ann.forward_propagation(x)) ) # print the output of the network

output: [[5.62306227 4.88301714]]


In [6]:
x = np.array([[1, 1, 1, 1]]) # input sample
y = np.array([[1, 0]])       # desired output
alpha = 0.0001                # learning rate

for i in range(10000):
    result = ann.back_propagation(x, y) # perform back propagation - 1 epoch met 1 data sample
    
    # update the weight matrices
    ann.set_weight_matrix(ann.get_weight_matrix() - alpha*result['dJ_dWy']) # update output weight matrix Wy
    ann.set_biases_vector(ann.get_biases_vector() - alpha*result['dJ_dby']) # update output weight matrix Wy
    for i in range(ann.get_total_hidden_layers()): # update weight matrices of the hidden layers
        hl = ann.get_hidden_layer(i)
        wm = result['dJ_dWh'][i]
        bm = result['dJ_dbh'][i]
        hl.set_weight_matrix( hl.get_weight_matrix() - alpha*wm )
        hl.set_biases_vector( hl.get_biases_vector() - alpha*bm )
        
result   = ann.forward_propagation(x)
cost = np.mean(ann.cost_function(x, y))

print( "Final result: " + str(result) )
print( "Costs       : " + str(cost))

Final result: [[1.00000000e+00 4.71844785e-16]]
Costs       : 8.388002686789728e-30
