<a href="https://colab.research.google.com/github/syphaxAouadene/Cours_programmation_concurrente/blob/main/mnist_classification_using_tensorFlow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**MNIST CLASSIFICATION USING TENSORFLOW**
# Accuracies :
##    - train accuracy = 95.79166666666666 %
##    - val accuracy = 95.1 %
##    - test accuracy = 95.38 %

In [23]:
import numpy as np
import pandas as pd
import time
from IPython.display import clear_output
import json
import matplotlib.pyplot as plt
%pylab inline
import os
from scipy import signal
from mlxtend.data import loadlocal_mnist
import platform
import tensorflow as tf

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


In [24]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [25]:
def one_hot(y):
    y = int(y)
    return tf.reshape(tf.eye(10)[y], (10, 1))

# Upload_data

In [26]:
def normelize(img):
    return (img/255) - 0.5

In [27]:
images_path = '/content/drive/MyDrive/Colab Notebooks/mnist_data/train-images.idx3-ubyte'
labels_path = '/content/drive/MyDrive/Colab Notebooks/mnist_data/train-labels.idx1-ubyte'
test_images_path = '/content/drive/MyDrive/Colab Notebooks/mnist_data/test-images.idx3-ubyte'
test_labels_path = '/content/drive/MyDrive/Colab Notebooks/mnist_data/test-labels.idx1-ubyte'
test_images, test_labels = loadlocal_mnist(test_images_path, test_labels_path)
train_images, train_labels = loadlocal_mnist(images_path, labels_path)

# group all the images in one list
# then normelize all the images
images = np.concatenate([train_images, test_images])
labels = np.concatenate([train_labels, test_labels])
images = normelize(images)

# shuffle all the images and all labels randomly
# random.seed(1331)
indices = np.arange(len(labels))
np.random.shuffle(indices)
labels = labels[indices]
images = images[indices]

# change shape of the images
images = images.reshape(len(images), 1, 28, 28)

# split the data into train, validation and test 
train_images, val_images, test_images = images[:60000], images[60000:65000], images[65000:]
train_labels, val_labels, test_labels = labels[:60000], labels[60000:65000], labels[65000:]

In [28]:
# --------------------------------------#
#shifting computation to GPU

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    gpu = gpus[0]
    tf.config.experimental.set_memory_growth(gpu, True)
    tf.config.set_visible_devices(gpu, 'GPU')

#--------------------------------------#

In [29]:
train_images = tf.convert_to_tensor(train_images)
val_images = tf.convert_to_tensor(val_images)
test_images = tf.convert_to_tensor(test_images)
train_labels = tf.convert_to_tensor(train_labels)
val_labels = tf.convert_to_tensor(val_labels)
test_labels = tf.convert_to_tensor(test_labels)

train_images = tf.cast(train_images, tf.float32)
val_images = tf.cast(val_images, tf.float32)
test_images = tf.cast(test_images, tf.float32)
train_labels = tf.cast(train_labels, tf.float32)
val_labels = tf.cast(val_labels, tf.float32)
test_labels = tf.cast(test_labels, tf.float32)

In [30]:
train_labels.shape

TensorShape([60000])

In [31]:
def input_layer(dict):
    dict['type_of_layer'] = 'input'
    return dict

def convolution_layer(dict):
    dict['type_of_layer'] = 'convolution'
    return dict

def pooling_layer(dict):
    dict['type_of_layer'] = 'pooling'
    return dict

def flatten_layer():
    dict = {'type_of_layer': 'flatten'}
    return dict

def fcl(dict):
    dict['type_of_layer'] = 'fcl'
    return dict

In [32]:
def initialize_filters(nbr_of_filters, filter_depth, filter_size):
    """
    cette fonction s'occupe de l'initialisation d'un filtre aléatoirement selon la distribution normale
    """
    # ca serait bien d'ajouter d'autres choix d'initialisations
    return tf.random.normal((nbr_of_filters, filter_depth, filter_size, filter_size))/9.0

def initialize_weights(nbr_of_neurons, nbr_neurons_prev_layer):
    """
    this function initialize a matrix of weights and return it
    """
    # ca serait bien d'ajouter d'autres choix d'initialisations
    return tf.random.normal((nbr_of_neurons, nbr_neurons_prev_layer))/9.0

In [33]:
def input_init_W_and_B(my_cnn, num_layer):
    w, b = None, None
    return w, b


def convolution_init_W_and_B(my_cnn, num_layer):
    global my_network
    nbr_of_filters = my_cnn[num_layer]['nbr_of_kernels']
    filter_depth = my_cnn[num_layer - 1]['depth']
    filter_size =  my_cnn[num_layer]['kernel_size']
    height_prev_map = my_cnn[num_layer - 1]['height']
    width_prev_map = my_cnn[num_layer - 1]['width']
    pad = my_cnn[num_layer]['padding']
    stride = my_cnn[num_layer]['stride']
    
    # add new informations to the layer such as : depth, height and width of the image at this level
    my_network[num_layer]['depth'] = nbr_of_filters
    my_network[num_layer]['height'] = int(((height_prev_map - filter_size + 2*pad)/stride) + 1)
    my_network[num_layer]['width'] = int(((width_prev_map - filter_size + 2*pad)/stride) + 1)

    w = initialize_filters(nbr_of_filters, filter_depth, filter_size)
    b = initialize_weights(nbr_of_filters, 1)
    return w, b


def pooling_init_W_and_B(my_cnn, num_layer):
    global my_network
    height_prev_map = my_cnn[num_layer - 1]['height']
    width_prev_map = my_cnn[num_layer - 1]['width']
    pad = my_cnn[num_layer]['padding']
    stride = my_cnn[num_layer]['stride']
    filter_size = my_cnn[num_layer]['kernel_size']

    # add new informations to the layer such as : depth, height and width of the image at this level
    my_network[num_layer]['depth'] = my_cnn[num_layer - 1]['depth']
    my_network[num_layer]['height'] = int(((height_prev_map - filter_size + 2*pad)/stride) + 1)
    my_network[num_layer]['width'] = int(((width_prev_map - filter_size + 2*pad)/stride) + 1)

    # there is no weights neither biais in this pooling layer, so we return None to each of them
    w, b = None, None 
    return w, b


def flatten_init_W_and_B(my_cnn, num_layer):
    global my_network
    depth_prev_map = my_cnn[num_layer - 1]['depth']
    height_prev_map = my_cnn[num_layer - 1]['height']
    width_prev_map = my_cnn[num_layer - 1]['width']

    # add new informations to the layer such as : depth (i.e. nbrs of neurons in this flatten layer at this level)
    my_network[num_layer]['depth'] = height_prev_map * width_prev_map * depth_prev_map

    # there is no weights neither biais in this pooling layer, so we return None to each of them
    w, b = None, None 
    return w, b


def fcl_init_W_and_B(my_cnn, num_layer):
    global my_network
    nbr_neurons = my_cnn[num_layer]['nbr_of_neurons']
    nbr_neurons_prev_layer = my_cnn[num_layer - 1]['depth']
    # add new informations to the layer such as : depth (i.e. nbrs of neurons in this flatten layer at this level)
    my_network[num_layer]['depth'] = nbr_neurons

    # initialize neurons and biais of this particular layer
    w = initialize_weights(nbr_neurons, nbr_neurons_prev_layer)
    b = initialize_weights(nbr_neurons, 1)
    return w, b


def initialization(my_cnn):
    my_cnn_architecture = [my_cnn[layer]['type_of_layer'] for layer in range(len(my_cnn))]
    switcher = {
        'input': input_init_W_and_B,
        'convolution': convolution_init_W_and_B,
        'pooling': pooling_init_W_and_B,
        'flatten': flatten_init_W_and_B,
        'fcl' : fcl_init_W_and_B
    }
    # Get the function from switcher dictionary
    operation_types = [switcher.get(type_of_layer, lambda: "Invalid type_of_layer") for type_of_layer in my_cnn_architecture]
    W, B = [], []
    for i in range(len(my_cnn)):
        w, b = operation_types[i](my_cnn, i) # variable i represents num_layer
        W.append(w)
        B.append(b)
    return W, B

def ReLU(layer):
    r = tf.nn.relu(layer)
    return r


def d_ReLU(layer):
    f = tf.cast(layer>0, tf.float32)
    return 1.0 * f

In [34]:
train_images.shape

    

TensorShape([60000, 1, 28, 28])

In [35]:
def max_pooling(prev_layer, size_of_pooling_kernel, pad, stride):
#     result_of_pooling has to have shape = ((input_width - kernel_width + 2*padding)/stride) + 1
    
    x = tf.transpose(prev_layer, [0, 2, 3, 1])
    p, arg = tf.nn.max_pool_with_argmax(x, size_of_pooling_kernel, strides=[stride], padding='VALID', data_format='NHWC', output_dtype=tf.dtypes.int32)
    p = tf.transpose(p, [0, 3, 1, 2])
    arg = tf.transpose(arg, [0, 3, 1, 2])
    return p, arg


In [36]:
def flatten(img):
    flattened_img = img.flatten()
    length = len(flattened_img)
    flattened_img = flattened_img.reshape(length, 1)
    return flattened_img

In [37]:
def forward_propagation(img, my_cnn, W, B):
    """
    - img : np.array(shape=(1, 1, 28, 28))
    - W : is a list of numpy arrays
    - B : is a list of numpy arrays
    """
    my_cnn_architecture = [my_cnn[layer]['type_of_layer'] for layer in range(len(my_cnn))]
    switcher = {
        'convolution': convolution_operation,
        'pooling': pooling_operation,
        'flatten': flatten_operation,
        'fcl' : fcl_operation
    }
    # Get the function from switcher dictionary
    operation_types = [switcher.get(type_of_layer, lambda: "Invalid type_of_layer") for type_of_layer in my_cnn_architecture]
    Z, A = [img], [img]
    dP_dC = []
    for i in range(1, len(my_cnn)):
        z, a, dp_dc = operation_types[i](my_cnn[i], A, W, B, i)
        Z.append(z)
        A.append(a)
        if type(dp_dc) != type(None):
            dP_dC.append(dp_dc)
    return Z, A, dP_dC


def convolution_operation(layer, A, W, B, num_layer): 
    prev_layer = A[-1]
    
    filters = W[num_layer]
    biais = B[num_layer]
    pad = layer['padding']
    stride = layer['stride']
    z = get_convolved_layer(prev_layer, filters, biais, pad=pad, stride=stride, mode='valid')
    a = ReLU(z)
    return z, a, None


def pooling_operation(layer, A, W, B, layer_num):
    """
    convolved_layer : is a list that contains each convolved_map from previous_layer
    type_of_pooling : should be either 'MAX_POOLING' or 'MEAN_POOLING' or 'MIN_POOLING'
    size_of_pooling_kernel : is an integer that represents the shape of kernel 
                            (if size_of_pooling_kernel=2 then shape_kernel=(2, 2))
    this function return a list that contains each pooled_map
    """
    
    prev_layer = A[-1]
    batch_size = prev_layer.shape[0]
    type_of_pooling = layer['type_of_pooling']
    size_of_pooling_kernel = layer['kernel_size']
    stride = layer['stride']
    pad = layer['padding']
    switcher = {
        'MAX_POOLING': max_pooling,
        'MEAN_POOLING': mean_pooling,
        'MIN_POOLING': min_pooling
    }
    # Get the function from switcher dictionary
    pooling_function = switcher.get(type_of_pooling, lambda: "Invalid type_of_pooling !")
    pooled_layer, dP_dC = pooling_function(prev_layer, size_of_pooling_kernel, pad, stride)
    return pooled_layer, pooled_layer, dP_dC


def flatten_operation(layer, A, W, B, layer_num):
    
    a = tf.reshape(A[-1], [-1]) 
    a = tf.reshape(a, a.shape+[1])
    a = tf.cast(a, tf.float32)
    return a, a, None


def fcl_operation(layer, A, W, B, layer_num):
    
    input_fcl = A[-1]
    weights_fcl = W[layer_num]
    biais_fcl = B[layer_num]
    type_of_activation = layer['type_of_activation'].lower()
    switcher = {
        'relu': ReLU,
        'tanh': tanh,
        'segmoid': segmoid,
        'softmax': softmax
    }
    # Get the function from switcher dictionary
    activation_type = switcher.get(type_of_activation, lambda: "Invalid type_of_activation_function, please choose either 'ReLU' or 'tanh' or 'segmoid' or 'softmax' !")
    
    output_fcl = tf.matmul(weights_fcl, input_fcl) + biais_fcl
    output = activation_type(output_fcl)
    return output_fcl, output, None

In [38]:
def tanh(layer):
    r = (np.exp(layer)-np.exp(-1*layer))/(np.exp(layer)+np.exp(-1*layer))   
    return np.array(r)


def d_tanh(layer):
    return 1 - tanh(layer) * tanh(layer)


def segmoid(layer):
    return np.exp(layer)/(1 + np.exp(layer))


def d_segmoid(vector):
    """
    cette fontion prend un vector en entrée et retourne la dérivée de segmoid par rapport a ce vector
    """
    return segmoid(vector) * (1 - segmoid(vector))


# def softmax(x):
#     maxi = np.max(x)
#     return np.exp(x-maxi)/np.sum(np.exp(x-maxi))
def softmax(x):
    return tf.nn.softmax(x, axis=0)

def categoricalCrossEntropy(generated_values, target_values):
    generated_values = [[1.0e-12] if r[0]==0.0 else r for r in generated_values]
    somme = 0
    for i in range(len(generated_values)):
        somme = somme + target_values[i] * np.log(generated_values[i])
    return (-1) * somme 


In [39]:
def full_convolution(img, f):
    return signal.convolve(img, f, mode='full')


def unflatten(vector, pooled_layer):
    vector = vector.reshape(pooled_layer.shape)
    return vector


def mean_pooling(convolved_map, size_of_pooling_kernel, stride):
    #     result_of_pooling has to have shape = ((input_width - kernel_width) + 2*padding/stride) + 1
    result = np.zeros((int((convolved_map.shape[0]-size_of_pooling_kernel)/stride)+1, int((convolved_map.shape[0]-size_of_pooling_kernel)/stride)+1))
    for i in range(0, result.shape[0], stride):
        for j in range(0, result.shape[1], stride):
            imaget = convolved_map[i:size_of_pooling_kernel+i, j:size_of_pooling_kernel+j]
            result[i, j] = np.mean(imaget)
    return result
    
    
def min_pooling(convolved_map, size_of_pooling_kernel, stride):
    #     result_of_pooling has to have shape = ((input_width - kernel_width) + 2*padding/stride) + 1
    result = np.zeros((int((convolved_map.shape[0]-size_of_pooling_kernel)/stride)+1, int((convolved_map.shape[0]-size_of_pooling_kernel)/stride)+1))
    for i in range(0, result.shape[0], stride):
        for j in range(0, result.shape[1], stride):
            imaget = convolved_map[i:size_of_pooling_kernel+i, j:size_of_pooling_kernel+j]
            result[i, j] = np.min(imaget)
    return result

In [40]:
def update_W_and_B(W, dL_dW, B, dL_dB, lr, optimizer='sgd', V_w=0, V_b=0):
    """
    this function update the weights and Biais of myNetwork
    arguments : 
    - W : it is a list that contains each Weight vector ([W1, W2, ...])
    - dL_dW : derivatives of loss with respect to Weights (it is a list that contains Weights derivatives vectors [dL_dW1, dL_dW2, ...])
    - B : it is a list that contains each Biais vector ([B1, B2, ...])
    - dL_dB : derivatives of loss with respect to Biais (it is a list that contains Biais derivatives vectors [dL_dB1, dL_dB2, ...])
    - lr : learning rate (real number)
    """
    params = W, dL_dW, B, dL_dB, lr
    if optimizer == 'sgd':
        new_W, new_B = sgd(params)
        return new_W, new_B, _, _
    elif optimizer == 'momentum':
        new_W, new_B, new_V_w, new_V_b = momentum(params, V_w, V_b)
        return new_W, new_B, new_V_w, new_V_b
    else:
        print('optimizer not understood !')
        return -1

def sgd(params):
    W, dL_dW, B, dL_dB, lr = params
    new_W, new_B = [], []
    for w, dw in zip(W, dL_dW):
        try: 
            w = w - lr * dw
        except:
            w = None
        new_W.append(w)
    for b, db in zip(B, dL_dB):
        try:
            b = b - lr * db
        except:
            b = None
        new_B.append(b)
    return new_W, new_B

def momentum(params, V_w, V_b):
    W, dL_dW, B, dL_dB, lr = params
    new_W, new_B, new_V_w, new_V_b = [], [], [], []
    for w, dw, v_w in zip(W, dL_dW, V_w):
        try: 
            v_w = 0.9*v_w + lr*dw
            w = w - v_w
        except:
            w = None
            v_w = None
        new_V_w.append(v_w)
        new_W.append(w)
    for b, db, v_b in zip(B, dL_dB, V_b):
        try:
            v_b = 0.9*v_b + lr*db
            b = b - v_b
        except:
            b = None
            v_b = None
        new_B.append(b)
        new_V_b.append(v_b)
    return new_W, new_B, new_V_w, new_V_b

In [41]:
def backpro_input(my_cnn, dL_dZ, W, Z, A, num_layer, dP_dC):
    return None, None, None

def backpro_convolution(my_cnn, dL_dZ, W, Z, A, num_layer, dP_dC):

    F = W[num_layer]
    dl_dz = d_ReLU(Z[num_layer]) * dL_dZ[-1]
    dL_dF = np.zeros(W[num_layer].shape)
    dL_dX = np.zeros(A[num_layer-1].shape)
    filter_size = my_cnn[num_layer].get('kernel_size')
    batch_size = A[num_layer-1].shape[0]
    dL_dB = np.zeros((my_cnn[num_layer]['depth'], 1))
  
    for n in range(dl_dz.shape[0]):
        for d in range(dl_dz.shape[1]):
            dL_dB[d] = np.sum(dl_dz[n][d])
    X = A[num_layer - 1]
    if num_layer>1:
        dL_dX = get_convolved_layer(dl_dz, F, mode='full')
    
    dL_dF = get_convolved_layer(X, dl_dz, mode='backpro')
    dL_dX = tf.cast(dL_dX, tf.float32)
    
    return dL_dF, dL_dB, dL_dX
    


def backpro_pooling(my_cnn, dL_dZ, W, Z, A, num_layer, dP_dC):
    
    if num_layer>1:
        if my_cnn[num_layer - 1].get('type_of_layer') == 'pooling':
            print('here i have to calculate max indices matrix')
        else:
            prev_layer = A[num_layer-1]
            dl_dz = dL_dZ[-1]
            dp_dc = dP_dC
            #############################
            tensor = tf.zeros(prev_layer.shape)
            tensor = tf.reshape(tensor, -1)
            tensor = tf.reshape(tensor, (tensor.shape[0], 1))
            tensor = tf.cast(tensor, tf.float32)
            indices = tf.reshape(dp_dc, -1)
            indices = tf.reshape(indices, (indices.shape[0], 1))
            updates = tf.reshape(dl_dz, -1)
            updates = tf.reshape(updates, (updates.shape[0], 1))
            res = tf.tensor_scatter_nd_update(tensor, indices, updates)
            res = tf.reshape(res, prev_layer.shape)
            #############################
    
    return None, None, res


def backpro_flatten(my_cnn, dL_dZ, W, Z, A, num_layer, dP_dC):
    if num_layer>1:
        dl_dz = unflatten(dL_dZ[-1].numpy(), A[num_layer-1].numpy())
    else:
        dl_dz = 0 # if flatten is 1-th layer (just after 'input_layer') then we don't have to continue the backpro
    dl_dz = tf.convert_to_tensor(dl_dz)
    return None, None, dl_dz


def backpro_fcl(my_cnn, dL_dZ, W, Z, A, num_layer, dP_dC):
    type_of_activation = my_cnn[num_layer-1].get('type_of_activation')
    
    switcher = {
        'relu': d_ReLU,
        'tanh': d_tanh,
        'segmoid': d_segmoid
    }
    # Get the function from switcher dictionary
    if type_of_activation != None:
        activation_type = switcher.get(type_of_activation, lambda: None)

    
    dl_dw = tf.matmul(dL_dZ[-1], tf.transpose(A[num_layer - 1]))
    dl_db = dL_dZ[-1]
    dl_da = tf.matmul(tf.transpose(W[num_layer]), dL_dZ[-1])
    
    if type_of_activation == None:
        dl_dz = dl_da
    else:
        da_dz = activation_type(Z[num_layer - 1])
        dl_dz = dl_da * da_dz
    return dl_dw, dl_db, dl_dz



def backpropagation(my_cnn, dL_dZ, W, Z, A, dP_dC):
    
    my_cnn_architecture = [my_cnn[layer]['type_of_layer'] for layer in range(len(my_cnn))]

    switcher = {
        'convolution': backpro_convolution,
        'pooling': backpro_pooling,
        'flatten': backpro_flatten,
        'fcl' : backpro_fcl,
        'input' : backpro_input
    }
    # Get the function from switcher dictionary
    operation_types = [switcher.get(type_of_layer, lambda: "Invalid type_of_layer") for type_of_layer in my_cnn_architecture]
    
    dL_dW, dL_dB = [], []
    
    for num_layer in range(len(my_cnn)-1, -1, -1): # iterate through all layers from output to input
        if my_cnn[num_layer].get('type_of_layer') == 'pooling':
            dl_dw, dl_db, dl_dz = operation_types[num_layer](my_cnn, dL_dZ, W, Z, A, num_layer, dP_dC[-1])
            dP_dC = dP_dC[:-1]
        else:
            dl_dw, dl_db, dl_dz = operation_types[num_layer](my_cnn, dL_dZ, W, Z, A, num_layer, _)
        
        dL_dW.append(dl_dw)
        dL_dB.append(dl_db)
        dL_dZ.append(dl_dz)
    
    return dL_dW, dL_dB

In [42]:
def compute_accuracy(my_cnn, x_val, y_val, W, B):
    '''
        This function does a forward pass of x_validation, then checks if the indices
        of the maximum value in the output equals the indices in the label
        y. Then it sums over each prediction and calculates the accuracy.
    '''
    predictions = []

    for x, y in zip(x_val, y_val):
        X = tf.reshape(x, (1,) + x.shape)
        Y = one_hot(y)
        
        Z, A, dP_dC = forward_propagation(X, my_cnn, W, B)
        output = A[-1]
        pred = np.argmax(output)
        predictions.append(pred == np.argmax(Y))

    return np.mean(predictions)

def show_accuracies(my_cnn, train_images, train_labels, val_images, val_labels, test_images, test_labels, W, B):
    """
    this function compute accuracy for each train-set, validation-set, and test-set
    then print them all.
    arguments : train_images, train_labels, val_images, val_labels, test_images, test_labels, W, B
    """
    train_accuracy = compute_accuracy(my_cnn, train_images, train_labels, W, B)
    val_accuracy = compute_accuracy(my_cnn, val_images, val_labels, W, B)
    test_accuracy = compute_accuracy(my_cnn, test_images, test_labels, W, B)
    print("Accuracies :\n\
    - train accuracy = {} %\n\
    - val accuracy = {} %\n\
    - test accuracy = {} %".format(train_accuracy*100, val_accuracy*100, test_accuracy*100))

In [44]:
def tf_rot180(w):
    """
    Roate by 180 degrees
    """
    return tf.reverse(w, axis=[0, 1])


def tf_pad_to_full_conv2d(x, w_size):
    """
    Pad x, such that using a 'VALID' convolution in tensorflow is the same
    as using a 'FULL' convolution. See
    http://deeplearning.net/software/theano/library/tensor/nnet/conv.html#theano.tensor.nnet.conv2d
    for description of 'FULL' convolution.
    """
    return tf.pad(x, [[0, 0],
                      [0, 0],
                      [w_size - 1, w_size - 1],
                      [w_size - 1, w_size - 1]
                      ])



def get_convolved_layer(prev_layer, filters, biais=0, pad=0, stride=1, mode='valid'):
    '''
    prev_layer : is 4 dimension np array with shape(batch_size, nbr_channels, height, width)
    filters : is a 4 dimension np array with shape(nbr_of_filters, filter_depth, filter_size, filter_size)
    biais : is a 2 dimension np array with shape(nbr_of_filters, 1)
    '''
    if mode == 'backpro':
        
        # --------------------------------------------------
        X = prev_layer
        X = tf.transpose(X, [1, 0, 2, 3])
        dl_dz = tf.transpose(filters, [2, 3, 0, 1])
        new_filters = tf.nn.conv2d(X, dl_dz, strides=[1,1], padding='VALID', data_format='NCHW')
        new_filters = tf.transpose(new_filters, [1, 0, 2, 3])
        convolved_layer = new_filters
        # --------------------------------------------------
        
    
    elif mode == 'valid':

        f = tf.transpose(filters, [2, 3, 1, 0])
        c = tf.nn.conv2d(prev_layer, f, [1], padding='VALID', data_format='NCHW')
        convolved_layer = c
    
    elif mode == 'full':
        dl_dz = prev_layer
        f = tf.transpose(filters, [2, 3, 0, 1])
        f = tf_rot180(f)
        dl_dz = tf_pad_to_full_conv2d(dl_dz, f.shape[0])
        dL_dX = tf.nn.conv2d(input=dl_dz,
                                filters=f,
                                strides=[1],
                                padding='VALID',
                                data_format='NCHW')

        convolved_layer = dL_dX
    else:
        print('Erreur : mode not understood ! --> mode has to be "valid" or "full"')
    return convolved_layer

In [43]:
# here you can define your own cnn architecture :
# you can choose any number of layers you want

my_network = [input_layer({
              'width': 28,
              'height': 28,
              'depth': 1   # 1 --> means gray scale, and 3 --> means rgb
              }
          ),
          convolution_layer({
              'nbr_of_kernels':32, 
              'kernel_size':3, 
              'padding':0, 
              'stride':1, 
              'type_of_activation':'relu'
              }
          ), 
          pooling_layer({
              'type_of_pooling' : 'MAX_POOLING',
              'kernel_size' : 2,
              'padding':0,
              'stride' : 2
              } 
          ),
        #   convolution_layer({
        #       'nbr_of_kernels':24, 
        #       'kernel_size':5, 
        #       'padding':0, 
        #       'stride':1, 
        #       'type_of_activation':'relu'
        #       }
        #   ), 
        #   pooling_layer({
        #       'type_of_pooling' : 'MAX_POOLING',
        #       'kernel_size' : 2,
        #       'padding':0,
        #       'stride' : 2
        #       } 
        #   ),
        #   convolution_layer({
        #       'nbr_of_kernels':48, 
        #       'kernel_size':5, 
        #       'padding':0, 
        #       'stride':1, 
        #       'type_of_activation':'relu'
        #       }
        #   ), 
        #   pooling_layer({
        #       'type_of_pooling' : 'MAX_POOLING',
        #       'kernel_size' : 2,
        #       'padding':0,
        #       'stride' : 2
        #       } 
        #   ),
        #   convolution_layer({
        #       'nbr_of_kernels':62, 
        #       'kernel_size':5, 
        #       'padding':0, 
        #       'stride':1, 
        #       'type_of_activation':'relu'
        #       }
        #   ), 
        #   pooling_layer({
        #       'type_of_pooling' : 'MAX_POOLING',
        #       'kernel_size' : 2,
        #       'padding':0,
        #       'stride' : 2
        #       } 
        #   ),
        #   convolution_layer({
        #       'nbr_of_kernels':120, 
        #       'kernel_size':5, 
        #       'padding':0, 
        #       'stride':1, 
        #       'type_of_activation':'relu'
        #       }
        #   ), 
        #   pooling_layer({
        #       'type_of_pooling' : 'MAX_POOLING',
        #       'kernel_size' : 2,
        #       'padding':0,
        #       'stride' : 2
        #       } 
        #   ),
        #   convolution_layer({
        #       'nbr_of_kernels':150, 
        #       'kernel_size':5, 
        #       'padding':0, 
        #       'stride':1, 
        #       'type_of_activation':'relu'
        #       }
        #   ), 
        #   pooling_layer({
        #       'type_of_pooling' : 'MAX_POOLING',
        #       'kernel_size' : 2,
        #       'padding':0,
        #       'stride' : 2
        #       } 
        #   ),
        #   convolution_layer({
        #       'nbr_of_kernels':200, 
        #       'kernel_size':5, 
        #       'padding':0, 
        #       'stride':1, 
        #       'type_of_activation':'relu'
        #       }
        #   ), 
        #   pooling_layer({
        #       'type_of_pooling' : 'MAX_POOLING',
        #       'kernel_size' : 2,
        #       'padding':0,
        #       'stride' : 2
        #       } 
        #   ),
          flatten_layer(),
#           fcl({
#               'nbr_of_neurons' : 100, # 20 neurons in hidden layer
#               'type_of_activation' : 'tanh', # 'tanh' will be the activation function in the hidden layer, and 'softmax' in the last layer
#               'learning_rate' : 0.001
#                 }
        #   ),
          fcl({
              'nbr_of_neurons' : 100, # nbr of neurons in output_layer layer
              'type_of_activation' : 'relu', # 'tanh' will be the activation function in the hidden layer, and 'softmax' in the last layer
          }
          ),
          fcl({
              'nbr_of_neurons' : 10, # nbr of neurons in output_layer layer
              'type_of_activation' : 'softmax', # 'tanh' will be the activation function in the hidden layer, and 'softmax' in the last layer
          }
          )
          ]
# define the hyper-parameters of your model
hyper_params = {
    'nbr_of_epochs': 20,
    'learning_rate': 0.0001,
    'optimizer': 'momentum',
    'batch_learning': False,
    'batch_size': 10,
    'drop-out': False,
    'drop-out_value': 0.1 
    }

In [45]:
# first of all, let's initialize our weights/filters and Biais of our network
my_cnn = my_network
W, B = initialization(my_cnn)
V_w, V_b = [], []
for w, b in zip(W, B):
    try:
        V_w.append(tf.zeros(w.shape))
        V_b.append(tf.zeros(b.shape))
    except:
        V_w.append(None)
        V_b.append(None)
nbr_of_epochs = hyper_params['nbr_of_epochs']
learning_rate = hyper_params['learning_rate']
optimizer = hyper_params['optimizer']
all_losses = []

for e in range(nbr_of_epochs):
    losses = []
    start_time = time.time()
    for i in range(len(train_images)):
        X = tf.reshape(train_images[i], (1,)+train_images[i].shape)
        Y = one_hot(train_labels[i])
        # forward propagation
        Z, A, dP_dC = forward_propagation(X, my_network, W, B)
        loss = categoricalCrossEntropy(A[-1], Y)
        losses.append(loss)
        # Backpropagation
        dL_dZ = [A[-1] - Y]
        dL_dW, dL_dB = backpropagation(my_network, dL_dZ, W, Z, A , dP_dC)
        # update weights W and Biais B  
        dL_dW.reverse()
        dL_dB.reverse()
        
        W, B, V_w, V_b = update_W_and_B(W, dL_dW, B, dL_dB, learning_rate, optimizer, V_w, V_b)
    acc = compute_accuracy(my_cnn, val_images, val_labels, W, B)
    nbr_minutes = (int(time.time()-start_time))//60
    nbr_seconds = (int(time.time()-start_time))%60
    print('Loss ---->',round(mean(losses), 5),' |  Validation Accuracy ----->',round(acc,4),' | Time ---->',nbr_minutes,'min',nbr_seconds,'s')

Loss ----> 0.44026  |  Validation Accuracy -----> 0.8954  | Time ----> 34 min 27 s
Loss ----> 0.2371  |  Validation Accuracy -----> 0.915  | Time ----> 34 min 6 s
Loss ----> 0.18188  |  Validation Accuracy -----> 0.9324  | Time ----> 33 min 55 s
Loss ----> 0.15335  |  Validation Accuracy -----> 0.9308  | Time ----> 33 min 51 s
Loss ----> 0.13409  |  Validation Accuracy -----> 0.9382  | Time ----> 33 min 58 s
Loss ----> 0.12166  |  Validation Accuracy -----> 0.9466  | Time ----> 34 min 1 s
Loss ----> 0.11382  |  Validation Accuracy -----> 0.933  | Time ----> 33 min 52 s
Loss ----> 0.10488  |  Validation Accuracy -----> 0.9368  | Time ----> 33 min 50 s
Loss ----> 0.09895  |  Validation Accuracy -----> 0.941  | Time ----> 33 min 46 s
Loss ----> 0.09483  |  Validation Accuracy -----> 0.9496  | Time ----> 33 min 52 s
Loss ----> 0.09322  |  Validation Accuracy -----> 0.9394  | Time ----> 33 min 48 s
Loss ----> 0.0936  |  Validation Accuracy -----> 0.9538  | Time ----> 33 min 58 s
Loss ----> 

In [46]:
show_accuracies(my_cnn, train_images, train_labels, val_images, val_labels, test_images, test_labels, W, B)

Accuracies :
    - train accuracy = 95.79166666666666 %
    - val accuracy = 95.1 %
    - test accuracy = 95.38 %
