<div class="alert alert-block alert-info" align="center">
    <h1>
        Imports
    </h1>
</div>

In [1]:
import numpy as np

<div class="alert alert-block alert-info" align="center">
    <h1>
        Déclarations
    </h1>
</div>

<div align="center"><h1> Fonctions utiles </h1></div>

In [2]:
def plot_histories (eta, epochs, cost_history, accuracy_history):
    fig, ax = plt.subplots(figsize = (5, 5))
    ax.set_ylabel(r'$J(\theta)$')
    ax.set_xlabel('Epochs')
    ax.set_title(r"$\eta$ :{}".format(eta))
    line1, = ax.plot(range(epochs), cost_history, label = 'Cost')
    line2, = ax.plot(range(epochs), accuracy_history, label = 'Accuracy')
    plt.legend(handler_map = {line1: HandlerLine2D(numpoints = 4)})

def plot_decision_boundary(func, X, y):
    amin, bmin = X.min(axis = 0) - 0.1
    amax, bmax = X.max(axis = 0) + 0.1
    hticks = np.linspace(amin, amax, 101)
    vticks = np.linspace(bmin, bmax, 101)

    aa, bb = np.meshgrid(hticks, vticks)
    ab = np.c_[aa.ravel(), bb.ravel()]
    c = func(ab)
    cc = c.reshape(aa.shape)

    cm = plt.cm.RdBu
    cm_bright = ListedColormap(['#FF0000', '#0000FF'])

    fig, ax = plt.subplots()
    contour = plt.contourf(aa, bb, cc, cmap = cm, alpha = 0.8)

    ax_c = fig.colorbar(contour)
    ax_c.set_label("$P(y = 1)$")
    ax_c.set_ticks([0, 0.25, 0.5, 0.75, 1])

    plt.scatter(X[:, 0], X[:, 1], c = y, cmap = cm_bright)
    plt.xlim(amin, amax)
    plt.ylim(bmin, bmax)
    plt.title("Decision Boundary")

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_prime(x):
    return sigmoid(x) * (1.0 - sigmoid(x))

def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return 1 - x ** 2

def relu(x):
    return np.maximum(0, x)

def relu_prime(x):
    x[x <= 0] = 0
    x[x > 0] = 1
    return x

def leakyrelu(x):
    return np.maximum(0.01, x)

def leakyrelu_prime(x):
    x[x <= 0] = 0.01
    x[x > 0] = 1
    return x

def softmax(x):
    expx = np.exp(x - np.max(x))
    return expx / expx.sum(axis = 0, keepdims = True)

<div align="center"><h1> Classes </h1></div>

In [13]:
class Conv3x3: # A Convolution layer using 3x3 filters
    def __init__(self, num_filters):
        self.num_filters = num_filters

        # filters is a 3d array with dimensions (num_filters, 3, 3)
        # We divide by 9 to reduce the variance of our initial values
        self.filters = np.random.randn(num_filters, 3, 3) / 9

    def iterate_regions(self, image):
        '''
        Generates all possible 3x3 image regions using valid padding.
        - image is a 2d numpy array
        '''
        h, w = image.shape

        for i in range(h - 2):
            for j in range(w - 2):
                im_region = image[i:(i + 3), j:(j + 3)]
                yield im_region, i, j

    def forward(self, input):
        '''
        Performs a forward pass of the conv layer using the given input.
        Returns a 3d numpy array with dimensions (h, w, num_filters).
        - input is a 2d numpy array
        '''
        h, w = input.shape
        output = np.zeros((h - 2, w - 2, self.num_filters))

        for im_region, i, j in self.iterate_regions(input):
            output[i, j] = np.sum(im_region * self.filters, axis=(1, 2))

        return output

class MaxPool2: # A Max Pooling layer using a pool size of 2
    def iterate_regions(self, image):
        '''
        Generates non-overlapping 2x2 image regions to pool over.
        - image is a 2d numpy array
        '''
        h, w, _ = image.shape
        new_h = h // 2
        new_w = w // 2

        for i in range(new_h):
            for j in range(new_w):
                im_region = image[(i * 2):(i * 2 + 2), (j * 2):(j * 2 + 2)]
                yield im_region, i, j

    def forward(self, input):
        '''
        Performs a forward pass of the maxpool layer using the given input.
        Returns a 3d numpy array with dimensions (h / 2, w / 2, num_filters).
        - input is a 3d numpy array with dimensions (h, w, num_filters)
        '''
        h, w, num_filters = input.shape
        output = np.zeros((h // 2, w // 2, num_filters))

        for im_region, i, j in self.iterate_regions(input):
            output[i, j] = np.amax(im_region, axis=(0, 1))

        return output
    
class Flatten: # A Flattening layer
    def forward(self, input):
        return input.flatten()
    
class MyLayer:
    def __init__(self, *args, **kwargs):
        self.input = kwargs.get("input", None) # Number of neurons at layer i-1
        self.output = kwargs.get("output", None) # Number of neurons at  layer i (current layer) 
        self.activ_function_curr = kwargs.get("activation", None) # Activation function for the layer
        self.paramCouche = kwargs.get("paramCouche", None) # Param de la couche Conv3x3
        self.type = kwargs.get("type", None)
        self.couche = kwargs.get("couche", None)
        self.parameters = {}
        self.derivatives = {}
        self.activation_func = None
        self.activationCNNFunc = None
        self.outputCNN = None
        
        if self.type == 'RNN':
            if self.activ_function_curr == "relu":
                self.activation_func = relu
                self.backward_activation_func = relu_prime
            elif self.activ_function_curr == "sigmoid":
                self.activation_func = sigmoid
                self.backward_activation_func = sigmoid_prime
            elif self.activ_function_curr == "tanh":
                self.activation_func = tanh
                self.backward_activation_func = tanh_prime
            elif self.activ_function_curr == "leakyrelu":
                self.activation_func = leakyrelu
                self.backward_activation_func = leakyrelu_prime
            elif self.activ_function_curr == "softmax":
                self.activation_func = softmax
                self.backward_activation_func = softmax
        elif self.type == 'CNN':
            if self.couche == "Conv3x3":
                self.outputCNN = Conv3x3(int(self.paramCouche))
                self.activationCNNFunc = self.outputCNN.forward
                #self.backwardactivationCNNFunc = conv.backward()
            elif self.couche == "MaxPool2":
                self.outputCNN = MaxPool2()
                self.activationCNNFunc = self.outputCNN.forward
                #self.backwardactivationCNNFunc = maxPool.backward()
            elif self.couche == "flatten":
                self.outputCNN = Flatten()
                self.activationCNNFunc = self.outputCNN.forward
                #self.backwardactivationCNNFunc = flatten.backward()

    def initParams(self):
        # Initialisation du dictionnaire de données parameters contenant W, A et Z pour un layer
        seed = 30
        np.random.seed(seed)
        self.parameters['W'] = np.random.randn(self.output, self.input) * np.sqrt(2 / self.input)
        self.parameters['b'] = np.random.randn(self.output, 1) * 0.1

    def setW(self, matW):
        self.parameters['W'] = np.copy(matW)
        
    def setA(self, matA):
        self.parameters['A'] = np.copy(matA) 
        
    def setZ(self, matZ):
        self.parameters['Z'] = np.copy(matZ)
    
    def setB(self, matB):
        self.parameters['b'] = np.copy(matB)
        
    def setdW(self, matdW):
        self.parameters['dW'] = np.copy(matdW)
        
    def setdA(self, matdA):
        self.parameters['dA'] = np.copy(matdA)
        
    def setdZ(self, matdZ):
        self.parameters['dZ'] = np.copy(matdZ)
    
    def setdB(self, matdB):
        self.parameters['db'] = np.copy(matdB)

class MyNeuralNetwork:
    def __init__(self):
        self.nbLayers = 0
        self.nbCNNlayers = 0
        self.layers = [] # NN layers
        self.CNN = [] # CNN layers
        
    def info(self):
        print(f'Content of the network :');
        j = 0;
        for i in range(len(self.CNN)):
            print(f'\n\tLayer n° {i} du CNN => ')
            print(f'\t\tInput : {self.CNN[i].input}\n\t\tOutput : {self.CNN[i].output}')
            if (i != 0):
                print(f'\t\tCouche : {self.CNN[i].activationCNNFunc}')
                print(f'\t\tW shape : {self.CNN[i].parameters["W"].shape}\n\t\tW data :\n{self.CNN[i].parameters["W"]}')
                print(f'\t\tb shape : {self.CNN[i].parameters["b"].shape}\n\t\tb data :\n{self.CNN[i].parameters["b"]}')
                
        for i in range(len(self.layers)):
            print(f'\n\tLayer n° {i} du NN => ')
            print(f'\t\tInput : {self.layers[i].input}\n\t\tOutput : {self.layers[i].output}')
            print(f'\t\tCouche : {self.layers[i].activation_func}')
            print(f'\t\tW shape : {self.layers[i].parameters["W"].shape}\n\t\tW data :\n{self.layers[i].parameters["W"]}')
            print(f'\t\tb shape : {self.layers[i].parameters["b"].shape}\n\t\tb data :\n{self.layers[i].parameters["b"]}')

    #TODO Ajout d'un param pour le type
    def addLayer(self, layer):
    #TODO Check l'indice du layer
        if(layer.type == 'CNN'):
            self.nbCNNlayers += 1
            if(self.nbCNNlayers == 1):
                layerZeroCNN = MyLayer(output = layer.input)
                self.CNN.append(layerZeroCNN)
            self.CNN.append(layer)
            self.CNN[self.nbCNNlayers].input = self.CNN[self.nbCNNlayers - 1].output
            layer.initParams()
        elif(layer.type == 'RNN'):
            self.nbLayers += 1;
            if(self.nbLayers == 1):
                layer.input = self.CNN[-1].output
                self.layers.append(layer)
                layer.initParams()
            else:
                self.layers.append(layer)
                self.layers[self.nbLayers - 1].input = self.layers[self.nbLayers - 2].output
                layer.initParams()

    def set_parametersW_b (self, numlayer, matX, matb):
        self.layers[numlayer].parameters['W'] = np.copy(matX)
        self.layers[numlayer].parameters['b'] = np.copy(matb)

    def forward_propagation(self, X):
        #TODO Check la variable type et l'indice des layers pour le CNN & NN
        if(type == 'CNN'):
            # Init predictive variables for the input layer
            self.CNN[0].setA(X)
        elif(type == 'RNN'):
            self.layers[0].setA(outputFlatten)

        # Propagation for all the layers
        for l in range(1, self.nbCNNlayers + 1):
            # Compute Z
            self.CNN[l].setZ(np.dot(self.CNN[l].parameters['W'], self.CNN[l - 1].parameters['A']) + self.CNN[l].parameters['b'])
            # Applying the activation function of the layer to Z
            self.CNN[l].setA(self.CNN[l].activationCNNFunc(self.CNN[l].parameters['Z']))
            
        for l in range(0, self.nbLayers + 1):
            # Compute Z
            self.layers[l].setZ(np.dot(self.layers[l].parameters['W'], self.layers[l - 1].parameters['A']) + self.layers[l].parameters['b'])
            # Applying the activation function of the layer to Z
            self.layers[l].setA(self.layers[l].activation_func(self.layers[l].parameters['Z']))

    def cost_function(self, y):
        return (-(y * np.log(self.layers[self.nbLayers].parameters['A'] + 1e-8) + (1 - y) * np.log(1 - self.layers[self.nbLayers].parameters['A'] + 1e-8))).mean()

    def backward_propagation(self, y):
        #calcul de dZ dW et db pour le dernier layer
        self.layers[self.nbLayers].derivatives['dZ']=self.layers[self.nbLayers].parameters['A']-y
        self.layers[self.nbLayers].derivatives['dW']=np.dot(self.layers[self.nbLayers].derivatives['dZ'],
                                                             np.transpose(self.layers[self.nbLayers-1].parameters['A']))
        m=self.layers[self.nbLayers].parameters['A'].shape[1]#égal au nombre de colonnes de A 
        self.layers[self.nbLayers].derivatives['db']=np.sum(self.layers[self.nbLayers].derivatives['dZ'], 
                                                       axis=1, keepdims=True) / m
        
        #calcul de dZ dW db pour les autres layers
        for l in range(self.nbLayers-1,0,-1) :
            self.layers[l].derivatives['dZ']=np.dot(np.transpose(self.layers[l+1].parameters['W']),
                                            self.layers[l+1].derivatives['dZ'])*self.layers[l].backward_activation_func(self.layers[l].parameters["Z"])
            
            self.layers[l].derivatives["dW"]=np.dot(self.layers[l].derivatives['dZ'],
                                            np.transpose(self.layers[l-1].parameters['A']))
                       
            m=self.layers[l-1].parameters['A'].shape[1]#égal au nombre de colonnes de A 
            self.layers[l].derivatives['db']=np.sum(self.layers[l].derivatives['dZ'], 
                                                       axis=1, keepdims=True) / m    

    def update_parameters(self, eta) :
        for l in range(1,self.nbLayers+1) :
            self.layers[l].parameters['W']-=eta*self.layers[l].derivatives['dW']
            self.layers[l].parameters["b"]-=eta*self.layers[l].derivatives["db"]

    def convert_prob_into_class(self,probs):
        probs = np.copy(probs)#pour ne pas perdre probs, i.e. y_hat
        probs[probs > 0.5] = 1
        probs[probs <= 0.5] = 0
        return probs

    def plot_W_b_epoch (self,epoch,parameter_history):
        mat=[]
        max_size_layer=0
        for l in range(1, self.nbLayers+1):    
            value=parameter_history[epoch]['W'+str(l)]
            if (parameter_history[epoch]['W'+str(l)].shape[1]>max_size_layer):
                max_size_layer=parameter_history[epoch]['W'+str(l)].shape[1]
            mat.append(value)
        figure=plt.figure(figsize=((self.nbLayers+1)*3,int (max_size_layer/2)))    
        for nb_w in range (len(mat)):    
                plt.subplot(1, len(mat), nb_w+1)
                plt.matshow(mat[nb_w],cmap = plt.cm.gist_rainbow,fignum=False, aspect='auto')
                plt.colorbar()    
        thelegend="Epoch "+str(epoch)
        plt.title (thelegend)    

    def accuracy(self,y_hat, y):
        if self.layers[self.nbLayers].activation_func==softmax:
            # si la fonction est softmax, les valeurs sont sur différentes dimensions
            # il faut utiliser argmax avec axis=0 pour avoir un vecteur qui indique
            # où est la valeur maximale à la fois pour y_hat et pour y
            # comme cela il suffit de comparer les deux vecteurs qui indiquent 
            # dans quelle ligne se trouve le max
            y_hat_encoded=np.copy(y_hat)
            y_hat_encoded = np.argmax(y_hat_encoded, axis=0)
            y_encoded=np.copy(y)
            y_encoded=np.argmax(y_encoded, axis=0)
            return (y_hat_encoded == y_encoded).mean()
        # la dernière fonction d'activation n'est pas softmax.
        # par exemple sigmoid pour une classification binaire
        # il suffit de convertir la probabilité du résultat en classe
        y_hat_ = self.convert_prob_into_class(y_hat)
        return (y_hat_ == y).all(axis=0).mean()       

    def predict(self, x):
        self.forward_propagation(x)
        return self.layers[self.nbLayers].parameters['A']

    def next_batch(self,X, y, batchsize):
        # pour avoir X de la forme : 2 colonnes, m lignes (examples) et également y
        # cela permet de trier les 2 tableaux avec un indices de permutation       
        X=np.transpose(X)
        y=np.transpose(y)
        
        m=len(y)
        # permutation aléatoire de X et y pour faire des batchs avec des valeurs au hasard
        indices = np.random.permutation(m)
        X = X[indices]
        y = y[indices]
        for i in np.arange(0, X.shape[0], batchsize):
            # creation des batchs de taille batchsize
            yield (X[i:i + batchsize], y[i:i + batchsize])

    def fit(self, X, y, *args,**kwargs):    
        epochs=kwargs.get("epochs",20)
        verbose=kwargs.get("verbose",False)
        eta =kwargs.get("eta",0.01)
        batchsize=kwargs.get("batchsize",32)
    #def fit(self, X, y, epochs, eta = 0.01,batchsize=64) :
        # sauvegarde historique coût et accuracy pour affichage
        cost_history = []
        accuracy_history = []
        parameter_history = []
        for i in range(epochs):
            i+=1
            # sauvegarde des coûts et accuracy par mini-batch
            cost_batch = []
            accuracy_batch = []
            # Descente de gradient par mini-batch
            for (batchX, batchy) in self.next_batch(X, y, batchsize):
                # Extraction et traitement d'un batch à la fois
                
                # mise en place des données au bon format
                batchX=np.transpose(batchX)
                if self.layers[self.nbLayers].activation_func==softmax:
                    # la classification n'est pas binaire, y a utilisé one-hot-encoder
                    # le batchy doit donc être transposé et le résultat doit
                    # être sous la forme d'une matrice de taille batchy.shape[1]
                    
                    batchy=np.transpose(batchy.reshape((batchy.shape[0], batchy.shape[1])))
                else:
                    # il s'agit d'une classification binaire donc shape[1] n'existe
                    # pas
                    batchy=np.transpose(batchy.reshape((batchy.shape[0], 1)))
                #batchy=np.transpose(batchy.reshape((batchy.shape[0], 1)))
                self.forward_propagation(batchX)
                #self.backward_propagation(batchy)
                #self.update_parameters(eta)
                
                # sauvegarde pour affichage
                current_cost=self.cost_function(batchy)
                cost_batch.append(current_cost)
                y_hat = self.predict(batchX)
                current_accuracy = self.accuracy(y_hat, batchy)
                accuracy_batch.append(current_accuracy)
               
            # SaveStats on W, B as well as values for A,Z, W, b
            save_values = {}
            save_values["epoch"]=i
            for l in range(1, self.nbLayers+1):
                save_values["layer"+str(l)]=l
                save_values["Wmean"+ str(l)]=np.mean(self.layers[self.nbLayers].parameters['W'])
                save_values["Wmax"+ str(l)]=np.amax(self.layers[self.nbLayers].parameters['W'])
                save_values["Wmin"+str(l)]=np.amin(self.layers[self.nbLayers].parameters['W'])
                save_values["Wstd"+str(l)]=np.std(self.layers[self.nbLayers].parameters['W'])
                save_values["bmean"+ str(l)]=np.mean(self.layers[self.nbLayers].parameters['b'])
                save_values["bmax"+ str(l)]=np.amax(self.layers[self.nbLayers].parameters['b'])
                save_values["bmin"+str(l)]=np.amin(self.layers[self.nbLayers].parameters['b'])
                save_values["bstd"+str(l)]=np.std(self.layers[self.nbLayers].parameters['b'])
                # be careful A,Z,W and b must be copied otherwise it is a referencee
                save_values['A'+str(l)]=np.copy(self.layers[self.nbLayers].parameters['A'])
                save_values['Z'+str(l)]=np.copy(self.layers[self.nbLayers].parameters['Z'])
                save_values['W'+str(l)]=np.copy(self.layers[self.nbLayers].parameters['W'])
                save_values['b'+str(l)]=np.copy(self.layers[self.nbLayers].parameters['b'])
                
            parameter_history.append(save_values)        
            # sauvegarde de la valeur moyenne des coûts et de l'accuracy du batch pour affichage
            current_cost=np.average(cost_batch)
            cost_history.append(current_cost)
            current_accuracy=np.average(accuracy_batch)
            accuracy_history.append(current_accuracy)
        
            if(verbose == True):
                print("Epoch : #%s/%s - %s/%s - cost : %.4f - accuracy : %.4f"%(i,epochs,X.shape[1],X.shape[1], float(current_cost), current_accuracy))
              
        return self.layers, cost_history, accuracy_history, parameter_history

<div align="center"><h1> Applications </h1></div>

In [14]:
myNetwork = MyNeuralNetwork()

myNetwork.addLayer(MyLayer(type = "CNN", input = 4, output = 10, couche = "Conv3x3", paramCouche = "8"))
myNetwork.addLayer(MyLayer(type = "CNN", output = 3, couche = "MaxPool2"))
myNetwork.addLayer(MyLayer(type = "CNN", output = 2, couche = "flatten"))
myNetwork.addLayer(MyLayer(type = "RNN", output = 10, activation = "relu"))
myNetwork.addLayer(MyLayer(type = "RNN", output = 5, activation = "softmax"))

myNetwork.info()

Content of the network :

	Layer n° 0 du CNN => 
		Input : None
		Output : 4

	Layer n° 1 du CNN => 
		Input : 4
		Output : 10
		Couche : <bound method Conv3x3.forward of <__main__.Conv3x3 object at 0x000001E12CFB5388>>
		W shape : (10, 4)
		W data :
[[-0.89382021  1.08039224 -0.68639629  0.3327359 ]
 [-0.07120333  0.21481422 -1.22043974  1.12083168]
 [ 0.09496203 -0.78266501  1.11597399  0.07601252]
 [-0.5402634  -0.54814105  0.97852771  0.53767345]
 [-0.20198188  0.38068329 -1.47353744  0.66311181]
 [-0.00408566 -0.33055755 -0.29911699  0.75270251]
 [-2.05299092  1.14331415  1.00798771 -0.47005285]
 [ 0.69651281 -1.20241052 -0.77791915 -0.93030086]
 [ 0.57311069 -0.73876987 -0.48735876 -0.60299016]
 [ 0.78734368 -1.35847214 -0.49596641  0.60039578]]
		b shape : (10, 1)
		b data :
[[ 0.10506998]
 [-0.05368745]
 [-0.00291927]
 [ 0.1394056 ]
 [-0.03978674]
 [ 0.10191274]
 [ 0.18988813]
 [-0.03548496]
 [-0.03870165]
 [-0.19273054]]

	Layer n° 2 du CNN => 
		Input : 10
		Output : 3
		Couc