<a href="https://colab.research.google.com/github/syphaxAouadene/Cours_programmation_concurrente/blob/main/my_first_CNN_for_handwritten_digit_classification_version_1_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import time
from IPython.display import clear_output
import json
import matplotlib.pyplot as plt
%pylab inline
import os

Populating the interactive namespace from numpy and matplotlib


In [2]:
from mlxtend.data import loadlocal_mnist
import platform

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# CNN operations

In [4]:
def multiplication(imaget, f):
    if len(imaget.shape) == 1 :
        imaget = imaget.reshape((imaget.shape[0], 1))
    result = imaget * f
    somme = 0
    for i in range(imaget.shape[0]):
        for j in range(imaget.shape[1]):
            somme = somme + result[i, j]
    return somme


def convolution(img, f):
    result = np.zeros((img.shape[0] - f.shape[0] + 1, img.shape[1] - f.shape[1] + 1))
    for i in range(result.shape[0]):
        for j in range(result.shape[1]):
            imaget = img[i:f.shape[0]+i, j:f.shape[1]+j]
            multi = multiplication(imaget, f)
            result[i, j] = multi
    return result


def ReLU_convolution(convolved_map):
    result = np.zeros(convolved_map.shape)
    for i in range(result.shape[0]):
        for j in range(result.shape[1]):
            result[i, j] = np.max([convolved_map[i,j], 0])
    return result


def get_convolved_layer_from_previous_layer(previous_layer, nbr_filter, size_filter):
    """
    don't forget to add bias term corresponding to each filter !
    - previous_layer = is a list that contains each feature map of the previous_layer
    - nbr_filter = is an integer that represents how many filter do we want to use, (ex. 6)
    - size_filter = is an integer that represents the shape of each filter (if size_filter=3 then shape_filter=(3, 3))
    
    this function return a list that contains each convolved map 
    (you should know that convolved_map = convolution between feature_map and filter)
    """
    convolved_layer = []
    filters = [initialize_filter(size_filter, size_filter) for i in range(nbr_filter)]
    biais = initialize_filter(nbr_filter, 1)
    for f in filters:
        somme = 0
        bias = 0
        for feature_map in previous_layer:
            somme = somme + convolution(feature_map, f)
        somme = somme + bias
        convolved_layer.append(ReLU_convolution(somme))
    return convolved_layer, filters, biais


def max_pooling(convolved_map, size_of_pooling_kernel, stride):
#     result_of_pooling has to have shape = ((input_width - kernel_width) + 2*padding/stride) + 1
    result = np.zeros((int((convolved_map.shape[0]-size_of_pooling_kernel)/stride)+1, int((convolved_map.shape[0]-size_of_pooling_kernel)/stride)+1))
    for i in range(0, result.shape[0], stride):
        for j in range(0, result.shape[1], stride):
            imaget = convolved_map[i:size_of_pooling_kernel+i, j:size_of_pooling_kernel+j]
            result[i, j] = np.max(imaget)
    return result


def mean_pooling(convolved_map, size_of_pooling_kernel, stride):
    #     result_of_pooling has to have shape = ((input_width - kernel_width) + 2*padding/stride) + 1
    result = np.zeros((int((convolved_map.shape[0]-size_of_pooling_kernel)/stride)+1, int((convolved_map.shape[0]-size_of_pooling_kernel)/stride)+1))
    for i in range(0, result.shape[0], stride):
        for j in range(0, result.shape[1], stride):
            imaget = convolved_map[i:size_of_pooling_kernel+i, j:size_of_pooling_kernel+j]
            result[i, j] = np.mean(imaget)
    return result
    
    
def min_pooling(convolved_map, size_of_pooling_kernel, stride):
    #     result_of_pooling has to have shape = ((input_width - kernel_width) + 2*padding/stride) + 1
    result = np.zeros((int((convolved_map.shape[0]-size_of_pooling_kernel)/stride)+1, int((convolved_map.shape[0]-size_of_pooling_kernel)/stride)+1))
    for i in range(0, result.shape[0], stride):
        for j in range(0, result.shape[1], stride):
            imaget = convolved_map[i:size_of_pooling_kernel+i, j:size_of_pooling_kernel+j]
            result[i, j] = np.min(imaget)
    return result


def pooling(convolved_layer, type_of_pooling, size_of_pooling_kernel, stride):
    """
    convolved_layer : is a list that contains each convolved_map from previous_layer
    type_of_pooling : should be either 'MAX_POOLING' or 'MEAN_POOLING' or 'MIN_POOLING'
    size_of_pooling_kernel : is an integer that represents the shape of kernel 
                            (if size_of_pooling_kernel=2 then shape_kernel=(2, 2))
    this function return a list that contains each pooled_map
    """
    pooled_layer = []
    switcher = {
        'MAX_POOLING': max_pooling,
        'MEAN_POOLING': mean_pooling,
        'MIN_POOLING': min_pooling
    }
    # Get the function from switcher dictionary
    pooling_operation = switcher.get(type_of_pooling, lambda: "Invalid type_of_pooling !")
    # Execute the function
    for convolved_map in convolved_layer:
        pooled_layer.append(pooling_operation(convolved_map, size_of_pooling_kernel, stride))
    return pooled_layer


def initialize_filter(filter_width, filter_height):
    """
    cette fonction s'occupe de l'initialisation d'un filtre aléatoirement selon la distribution normale
    """
    return np.random.randn(filter_width, filter_height)


# def show_image(img):
#     plt.imshow(img, cmap=plt.cm.binary)
#     plt.show()
    
    
def show_multiple_images(images, nbr_of_images=5):
    for img in images[:nbr_of_images]:
        show_image(img)
        time.sleep(1)
        clear_output(wait=True)
        
        
# def flatten(layer):
#     """
#     #arguments:
#     layer : is a list of feature_maps
#     #returns : list of all numbers that contained in the feature maps in the layer
#     """
#     result = []
#     for matrix in layer:
#         result = result + list(matrix.flatten())
#     result = np.array(result).reshape((1, len(result)))
#     return result

# def fully_connected_layer(input_layer, nbr_neurons, activation_function='ReLU'):
#     current_layer = []
#     input_layer = np.array(input_layer)
#     for i in range(nbr_neurons):
#         bias = 0
#         weights = np.random.randn(len(input_layer), 1)
#         current_layer.append(np.max([multiplication(input_layer, weights) + bias, 0]))
#     return current_layer


# def softmax(data):
#     output = []
#     for value in data:
#         proba_value = np.exp(value)/(np.sum(np.exp(data)))
#         output.append(proba_value)
#     return output
        
# def categoricalCrossEntropy(generated_values, target_values):
#     somme = 0
#     for i in range(len(generated_values)):
#         somme = somme + target_values[i] * np.log(generated_values[i])
#     return (-1) * somme  

#######################################################################################
#######  Fully_connected_layer_functions
#######################################################################################

# def update_weights(dL_dY, weights, inputs, lrate):
#     """
#     arguments :
#     dL_dY : un vecteur des dérivées de la couche supérieure par rapport a la couche de sortie Y de dimension n_outputs
#     weights : la matrice des poids de dimension (n_inputs x n_outputs)
#     inputs : le vecteur de sortie de la couche précedente de dimension n_inputs
#     lrate : learning rate (scalar)
#     """
#     dL_dW = []
#     for xi in inputs:
#         dL_dW = dL_dW + xi * dL_dY
#     new_weights = flatten(weights) - lrate * dL_dW
#     new_weights = new_weights.reshape(weights.shape)
#     return new_weights

# def calcul_dL_dX(dL_dY, weights):
#     return np.dot(dL_dY, np.transpose(weights))


# def fcl(inputs_layer, nbr_neurons, weights, biais, activation_type='ReLU'):
#     current_layer = []
#     current_layer = flatten(np.dot(inputs_layer, weights) + biais)[0]
#     output_layer = activation_function(current_layer, activation_type)
#     return output_layer
   
# def activation_function(layer, type_of_activation='relu'):
#     type_of_activation = type_of_activation.lower()
#     switcher = {
#         'relu': ReLU,
#         'tanh': tanh,
#         'segmoid': segmoid
#     }
#     # Get the function from switcher dictionary
#     activation_type = switcher.get(type_of_activation, lambda: "Invalid type_of_activation_function, please choose either 'ReLU' or 'tanh' or 'segmoid' !")
#     return activation_type(layer)
    
    
# def ReLU(layer):
#     layer = np.array(layer)
#     result = []
#     for y in layer:
#         result.append(np.max([y, 0]))
#     return result

# def tanh(layer):
#     layer = np.array(layer)
#     result = []
#     for y in layer:
#         r = (np.exp(y)-np.exp(-1*y))/(np.exp(y)+np.exp(-1*y))
#         result.append(r)
#     return result

# def segmoid(layer):
#     layer = np.array(layer)
#     result = []
#     for y in layer:
#         r = 1/(1+np.exp(-1*y))
#         result.append(r)
#     return result

def back_error_from_end_to_output_fcl(y_hat, y):
    """
    arguments : y_hat = list des outputs calculés par le forward, et y = list des targets
    cette fonction va calculer l'erreur de son origine(end_of_network) jusqu'à le output de fully_connected_layer
    soit ce bout de network :
    X ---> softmax(X) ---> CCE(y_hat, y) ---> Loss
    alors cette fonction va retourner la dérivée de l'erreur Loss par rapport à X
    càd elle return dL_dX
    """
    return y_hat - y


def unflatten(vector, pooled_layer):
    pooled_layer = np.array(pooled_layer)
    vector = vector.reshape(pooled_layer.shape)
    return vector
        
        

# Fully Connected Layer Operations

In [21]:
def activation_function(layer, type_of_activation='relu'):
    type_of_activation = type_of_activation.lower()
    switcher = {
        'relu': ReLU,
        'tanh': tanh,
        'segmoid': segmoid
    }
    # Get the function from switcher dictionary
    activation_type = switcher.get(type_of_activation, lambda: "Invalid type_of_activation_function, please choose either 'ReLU' or 'tanh' or 'segmoid' !")
    return activation_type(layer)
    

def ReLU(layer):
    return layer * (layer > 0)


def d_ReLU(layer):
    return 1. * (layer > 0)


def tanh(layer):
    r = (np.exp(layer)-np.exp(-1*layer))/(np.exp(layer)+np.exp(-1*layer))   
    return np.array(r)


def d_tanh(layer):
    return 1 - tanh(layer) * tanh(layer)


def segmoid(layer):
    return np.array(1/(1+np.exp(-1*layer)))


def d_segmoid(vector):
    """
    cette fontion prend un vector en entrée et retourne la dérivée de segmoid par rapport a ce vector
    """
    return segmoid(vector) * (1 - segmoid(vector))


def softmax(data):
    proba_values = np.exp(data)/(np.sum(np.exp(data)))   
    return np.array(proba_values)


def categoricalCrossEntropy(generated_values, target_values):
    somme = 0
    for i in range(len(generated_values)):
        somme = somme + target_values[i] * np.log(generated_values[i])
    return (-1) * somme 


def normelize(img):
    return img/255


def flatten(img):
    img = np.array(img) 
    return img.flatten()


def show_image(img):
    plt.imshow(img, cmap=plt.cm.binary)
    plt.show()
    

def init_params(my_network):
    nbr_layers = len(my_network) - 1
    W, B = [], []
    for i in range(nbr_layers):
        W.append(np.random.randn(my_network[i+1], my_network[i]))
        B.append(np.random.randn(my_network[i+1], 1))
    return W, B


def forward_pass(img, W, B):
    """
    here we will use this notation :
    Z[i] = W[i].X + B[i]
    A[i] = activation_function(Z[i])
    Z is a list that carries all the output of each layer
    A is a list that carries all the output of each activation function
    """
    act_functions = activation_functions_fcl[1:-1] # we omit the first element and the last one because the first activation will be None, and the last one will always be 'softmax'
    act_functions = [type_of_activation.lower() for type_of_activation in act_functions] # lawercase all the items
    switcher = {
        'relu': ReLU,
        'tanh': tanh,
        'segmoid': segmoid
    }
    # Get the function from switcher dictionary
    activation_types = [switcher.get(type_of_activation, lambda: "Invalid type_of_activation_function") for type_of_activation in act_functions]
    Z, A = [], [img]
    for i in range(len(W)):
        if i == len(W)-1: # we have to use softmax as activation layer because we're in the last layer
            Z.append(np.dot(W[i], A[i]) + B[i])
            A.append(softmax(Z[i]))
        else: # we're in hidden layer
            Z.append(np.dot(W[i], A[i]) + B[i])
            A.append(activation_types[i](Z[i]))
    return Z, A


def one_hot(y):
    return np.eye(10)[y].reshape(10, 1)


def update_W_and_B(W, dL_dW, B, dL_dB, lr):
    """
    this function update the weights and Biais of myNetwork
    arguments : 
    - W : it is a list that contains each Weight vector ([W1, W2, ...])
    - dL_dW : derivatives of loss with respect to Weights (it is a list that contains Weights derivatives vectors [dL_dW1, dL_dW2, ...])
    - B : it is a list that contains each Biais vector ([B1, B2, ...])
    - dL_dB : derivatives of loss with respect to Biais (it is a list that contains Biais derivatives vectors [dL_dB1, dL_dB2, ...])
    - lr : learning rate (real number)
    """
    new_W = []
    new_B = []
    for w, dw in zip(W, dL_dW):
        w = w - lr * dw
        new_W.append(w)
    for b, db in zip(B, dL_dB):
        b = b - lr * db
        new_B.append(b)
    return new_W, new_B


def compute_accuracy(x_val, y_val, W, B):
    '''
        This function does a forward pass of x_validation, then checks if the indices
        of the maximum value in the output equals the indices in the label
        y. Then it sums over each prediction and calculates the accuracy.
    '''
    predictions = []

    for x, y in zip(x_val, y_val):
        # prepare the input image
        X = flatten(x)
        X = X.reshape(len(X), 1)
        Y = one_hot(y)
        # forward-propagation
        Z, A = forward_pass(X, W, B)
        output = A[-1]
        pred = np.argmax(output)
        predictions.append(pred == np.argmax(Y))

    return np.mean(predictions)


def classify(img, W, B):
    """
    cette fonction recois une seule image en parametre
    et elle reçois les poids W et les Biais B et la liste des fonctions d'activations
    et elle retourne la catégorie de l'image en entier 0..9
    """
    predictions = []
    X = flatten(img)
    X = X.reshape(len(X), 1)
    # forward-propagation
    Z, A = forward_pass(X, W, B)
    output = A[-1]
    pred = np.argmax(output)
    return pred


def show_accuracies(train_images, train_labels, val_images, val_labels, test_images, test_labels, W, B):
    """
    this function compute accuracy for each train-set, validation-set, and test-set
    then print them all.
    arguments : train_images, train_labels, val_images, val_labels, test_images, test_labels, W, B
    """
    train_accuracy = compute_accuracy(train_images, train_labels, W, B)
    val_accuracy = compute_accuracy(val_images, val_labels, W, B)
    test_accuracy = compute_accuracy(test_images, test_labels, W, B)
    print("Accuracies :\n\
    - train accuracy = {} %\n\
    - val accuracy = {} %\n\
    - test accuracy = {} %".format(train_accuracy*100, val_accuracy*100, test_accuracy*100))


def backpro_pass(dL_dZ, A, Z, W, indice, indx_act_func):
    # we omit the first element and the last one because the first activation will be None, and the last one will always be 'softmax'
    act_functions = activation_functions[1:-1] 
    # lawercase all the items
    act_functions = [type_of_activation.lower() for type_of_activation in act_functions] 
    switcher = {
        'relu': d_ReLU,
        'tanh': d_tanh,
        'segmoid': d_segmoid
    }
    # Get the function from switcher dictionary
    activation_types = [switcher.get(type_of_activation, lambda: "Invalid type_of_activation_function") for type_of_activation in act_functions]
    
    dl_dw = np.dot(dL_dZ, np.transpose(A[indice]))
    dl_db = dL_dZ
    dl_dz = 0
    if indice*(-1) != len(Z)+1:
      dl_da = np.dot(np.transpose(W[indice+1]), dL_dZ)
      da_dz = activation_types[indx_act_func](Z[indice])
      dl_dz = dl_da * da_dz
    return dl_dw, dl_db, dl_dz

# Here we will upload the dataset and normelize it then shuffle it then split it

In [22]:
images_path = '/content/drive/MyDrive/Colab Notebooks/mnist_data/train-images.idx3-ubyte'
labels_path = '/content/drive/MyDrive/Colab Notebooks/mnist_data/train-labels.idx1-ubyte'
test_images_path = '/content/drive/MyDrive/Colab Notebooks/mnist_data/test-images.idx3-ubyte'
test_labels_path = '/content/drive/MyDrive/Colab Notebooks/mnist_data/test-labels.idx1-ubyte'
test_images, test_labels = loadlocal_mnist(test_images_path, test_labels_path)
train_images, train_labels = loadlocal_mnist(images_path, labels_path)

# group all the images in one list
# then normelize all the images
images = np.concatenate([train_images, test_images])
labels = np.concatenate([train_labels, test_labels])
images = normelize(images)

# shuffle all the images and all labels randomly
random.seed(12)
indices = np.arange(len(labels))
np.random.shuffle(indices)
labels = labels[indices]
images = images[indices]

# change shape of the images
images = images.reshape(len(images), 28, 28)

# split the data into train, validation and test 
train_images, val_images, test_images = images[:60000], images[60000:65000], images[65000:]
train_labels, val_labels, test_labels = labels[:60000], labels[60000:65000], labels[65000:]

# Learning  ------>  GO FOR LAUNCH !

In [25]:
def input_layer(dict):
  dict['type_of_layer'] = 'input'
  return dict

def convolution_layer(dict):
  dict['type_of_layer'] = 'convolution'
  return dict

def pooling_layer(dict):
  dict['type_of_layer'] = 'pooling'
  return dict

def flatten_layer():
  dict = {'type_of_layer': 'flatten'}
  return dict

def fcl(dict):
  dict['type_of_layer'] = 'fcl'
  return dict

In [65]:
numbers_of_epochs = 5
my_cnn = [input_layer({
              'width_image': 28,
              'height_image': 28,
              'nbr_channels': 1   # 1 --> means gray scale, and 3 --> means rgb
              }
          ),
          convolution_layer({
              'nbr_of_kernels':6, 
              'kernel_size':5, 
              'padding':0, 
              'stride':1, 
              'type_of_activation_function':'relu'
              }
          ), 
          pooling_layer({
              'type_of_pooling' : 'MAX_POOLING',
              'kernel_size' : 5,
              'stride' : 2
              } 
          ),
          # convolution_layer({
          #     'nbr_of_kernels':6, 
          #     'kernel_size':5, 
          #     'padding':0, 
          #     'stride':1, 
          #     'type_of_activation_function':'relu'
          #     }
          # ), 
          # pooling_layer({
          #     'type_of_pooling' : 'MAX_POOLING',
          #     'kernel_size' : 5,
          #     'stride' : 2
          #     } 
          # ),
          flatten_layer(),
          fcl({
              'network' : [20, 10], # 20 neurons in hidden layer, and 10 neurons in output layer
              'act_functions' : [None, 'tanh', 'softmax'], # 'tanh' will be the activation function in the hidden layer, and 'softmax' in the last layer
              'learning_rate' : 0.01
                }
          )
          ]
my_cnn

[{'height_image': 28,
  'nbr_channels': 1,
  'type_of_layer': 'input',
  'width_image': 28},
 {'kernel_size': 5,
  'nbr_of_kernels': 6,
  'padding': 0,
  'stride': 1,
  'type_of_activation_function': 'relu',
  'type_of_layer': 'convolution'},
 {'kernel_size': 5,
  'stride': 2,
  'type_of_layer': 'pooling',
  'type_of_pooling': 'MAX_POOLING'},
 {'type_of_layer': 'flatten'},
 {'act_functions': [None, 'tanh', 'softmax'],
  'learning_rate': 0.01,
  'network': [20, 10],
  'type_of_layer': 'fcl'}]

In [67]:
W, B = [], []
activation_functions = []
for layer in my_cnn:
  if layer['type_of_layer'] == 'fcl':
    activation_functions_fcl = layer['act_functions']
    break
losses = []
for epoch in range(numbers_of_epochs):
  epoch_losses = []
  start_epoch_time = time.time()
  for i in range(len(train_labels[:1000])):
    if i%100 == 0:
      print('image -----> ', i)
    img = np.array(train_images[i])
    Y = one_hot(train_labels[i])
    Z = []  # Z est une list qui contient chaque resultat d'une operation comme 'convolution', 'pooling', ..etc ---> avant d'appliquer l'activation 
    A = []  # A est une list qui contient chaque resultat final de chaque couche en ordre A=[input, convolved_layer, ...., output_layer_fcl] ---> A[couche] = act_funct(Z[couche])
    for layer in my_cnn:

      if layer['type_of_layer'] in ['input']:
        # print('i have to add None to my weights and Biais because we are in the input layer')
        W.append(None)
        B.append(None)
        A.append([img])
      elif layer['type_of_layer'] in ['convolution']:
        # print('i have to initialize weight and biais then add them to W')
        # filters = [initialize_filter(layer['kernel_size'], layer['kernel_size']) for i in range(layer['nbr_of_kernels'])]
        # biais = initialize_filter(layer['nbr_of_kernels'], 1)

        convolved_layer, filters, biais = get_convolved_layer_from_previous_layer([img], layer['nbr_of_kernels'], layer['kernel_size'])
        W.append(filters)
        B.append(biais)
        A.append(convolved_layer)

      elif layer['type_of_layer'] in ['pooling']:
        # print('i add None to the W because there is no Weights neither Biais in this layer')
        W.append(None)
        B.append(None)
        pooled_layer = pooling(A[-1], layer['type_of_pooling'], layer['kernel_size'], layer['stride'])
        A.append(pooled_layer)
      elif layer['type_of_layer'] in ['flatten']:
        # print('i add None to the W because there is no Weights neither Biais in this layer')
        W.append(None)
        B.append(None)
        A.append(flatten(A[-1]))
      elif layer['type_of_layer'] in ['fcl']:
        # print('i have to retreive the architecture of the fcl then initialize W and B')
        my_network_fcl = [len(A[-1])] + layer['network']  # we add the number of neurons of the fcl's input layer
        W_fcl, B_fcl = init_params(my_network_fcl)
        for w_fcl, b_fcl in zip(W_fcl, B_fcl):
          W.append(w_fcl)
          B.append(b_fcl)

        # prepare the input image
        X = flatten(A[-1]).reshape(len(A[-1]), 1)
    
        # forward-propagation
        Z_fcl, A_fcl = forward_pass(X, W_fcl, B_fcl)
        for z_fcl in Z_fcl:
          Z.append(z_fcl)
        loss = categoricalCrossEntropy(A_fcl[-1], Y)
        A[-1] = X
        for a_fcl in A_fcl[1:]:
          A.append(a_fcl)
        epoch_losses.append(loss)
      else:
        print('you certainly made an error in your configuration of network')
    # print(mean(losses))
  # Test my model at epoch = gama    
    # if (epoch % 5 == 0):
        # accuracy = compute_accuracy(val_images, val_labels, W, B)
        # accuracies.append(accuracy)
        # print('---------------------------------------------------------------------------> Accuracy : ',accuracies[-1])

  print('epoch ',epoch,' -------> loss : ',np.array(epoch_losses).mean(), ' | time : ',(time.time() - start_epoch_time))
  losses.append(np.array(epoch_losses).mean())


image ----->  0
image ----->  100
image ----->  200
image ----->  300
image ----->  400
image ----->  500
image ----->  600
image ----->  700
image ----->  800
image ----->  900
epoch  0  -------> loss :  7.2127309904294234  | time :  137.6916446685791
image ----->  0
image ----->  100
image ----->  200
image ----->  300
image ----->  400
image ----->  500
image ----->  600
image ----->  700
image ----->  800
image ----->  900
epoch  1  -------> loss :  7.450973238334995  | time :  137.17038011550903
image ----->  0
image ----->  100


KeyboardInterrupt: ignored

# Brouillon

In [None]:
my_list = [a = 2, b = {'h' : 5}]

SyntaxError: ignored

In [None]:
convolution1 = {
    'caracteristics' : {
        'kernel_size' : 5,
        'nbr_of_kernels' : 6,
        'padding' : 0,
        'stride' : 1
    },
    'weights' : {
        'type_of_init' : 'NORMAL_DISTRIBUTION',
        'W' : np.zeros((2,2))
    }
}
convolution1

{'caracteristics': {'kernel_size': 5,
  'nbr_of_kernels': 6,
  'padding': 0,
  'stride': 1},
 'weights': {'W': array([[0., 0.],
         [0., 0.]]), 'type_of_init': 'NORMAL_DISTRIBUTION'}}

In [None]:
convolution1['weights']['W'] = initialize_filter(convolution1['caracteristics']['kernel_size'], convolution1['caracteristics']['kernel_size'])

In [None]:
convolution1

{'caracteristics': {'kernel_size': 5,
  'nbr_of_kernels': 6,
  'padding': 0,
  'stride': 1},
 'weights': {'W': array([[-0.39524818, -0.1670707 ,  1.35749159,  1.84381627,  1.14554518],
         [ 1.16060183,  0.29525999, -1.09154292, -1.53366747, -0.25814321],
         [ 3.24020311, -0.86692194, -0.28928828, -0.08947808, -0.20620548],
         [ 0.13602014, -1.64640102,  0.3204804 ,  1.45943377, -0.08902425],
         [ 1.39445204, -1.01226396, -0.84211526, -0.35216059,  0.3105174 ]]),
  'type_of_init': 'NORMAL_DISTRIBUTION'}}

In [None]:
# Initialize weights and biais of my_network using normal distribution
W, B = init_params(my_network)

# training
start_time = time.time()
losses = []
accuracies = []
for epoch in range(number_epochs):
    epoch_losses = np.array([])
    start_epoch_time = time.time()
    for i in range(len(train_labels)):
        # prepare the input image
        X = flatten(train_images[i])
        X = X.reshape(len(X), 1)
        Y = one_hot(train_labels[i])
        # forward-propagation
        Z, A = forward_pass(X, W, B)
        loss = categoricalCrossEntropy(A[-1], Y)
        epoch_losses = np.concatenate([epoch_losses, loss])

        

        # backpropagation
        dL_dZ2 = A[-1] - Y
        dL_dW, dL_dB, dL_dZ = [], [], [dL_dZ2]
        # here the variable indice has for aim to keep truck to which layer are we
        # and the variable indx_act_func has the objectif to tell us which activation function should we use in each layer
        indice, indx_act_func = 0, -1
        for layer in range(len(my_network)-1):
          dl_dw, dl_db, dl_dz = backpro_pass(dL_dZ[-1], A, Z, W, indice - 2, indx_act_func)
          dL_dW.append(dl_dw)
          dL_dB.append(dl_db)
          dL_dZ.append(dl_dz)
          indice = indice - 1 
          indx_act_func = indx_act_func - 1
          
        # update weights W and Biais B  
        dL_dW.reverse()
        dL_dB.reverse()
        W, B = update_W_and_B(W, dL_dW, B, dL_dB, lr)
      
    # Test my model at epoch = gama    
    if (epoch % 5 == 0):
        accuracy = compute_accuracy(val_images, val_labels, W, B)
        accuracies.append(accuracy)
        print('---------------------------------------------------------------------------> Accuracy : ',accuracies[-1])

    print('epoch ',epoch,' -------> loss : ',np.array(epoch_losses).mean(), ' | time : ',(time.time() - start_epoch_time))
    losses.append(np.array(epoch_losses).mean())

print("--- %s seconds ---" % (time.time() - start_time), ' | time : ',(time.time() - start_time)) 
fig, ax = plt.subplots(2)
fig.suptitle('Graph of accuracy and loss')
ax[0].plot(losses)
ax[1].plot(accuracies)

---------------------------------------------------------------------------> Accuracy :  0.8266
epoch  0  -------> loss :  0.9008753403207422  | time :  11.954740047454834
epoch  1  -------> loss :  0.5117232454953177  | time :  11.885202169418335
epoch  2  -------> loss :  0.4440902588144349  | time :  11.783049821853638
epoch  3  -------> loss :  0.40813659364859706  | time :  11.741705417633057
epoch  4  -------> loss :  0.3830731204785044  | time :  11.751951932907104
---------------------------------------------------------------------------> Accuracy :  0.8964
epoch  5  -------> loss :  0.3646046786631581  | time :  12.014232635498047
epoch  6  -------> loss :  0.34978962074251735  | time :  11.79801893234253
epoch  7  -------> loss :  0.3370311651941264  | time :  11.982735633850098
epoch  8  -------> loss :  0.3267060959503052  | time :  11.86242151260376
epoch  9  -------> loss :  0.318313245392431  | time :  11.713358402252197
-------------------------------------------------

KeyboardInterrupt: ignored

In [None]:
show_accuracies(train_images, train_labels, val_images, val_labels, test_images, test_labels, W, B)

Accuracies :
    - train accuracy = 91.315 %
    - val accuracy = 90.42 %
    - test accuracy = 91.2 %
