# Réseaux à convolution

Dans ce notebook, nous crérons un réseau à convolution.  Mais avant de commencer, assurez-vous d'avoir bien compris les notebooks sur la **convolution**, la **batch-norm** et le **dropout**.

Ici, nous utiliserons pour l'essentiel le code dans les fichiers suivants :

    model/Model.py
    layers/Conv.py 
    utils/model_loss.py
    layers/MaxPool.py
    
Comme au tp1, la classe **Model** "crée" un réseau de neurones en ajoutant successivement des couches et une fonction de perte.


In [None]:
import random
import numpy as np
from utils.data_utils import load_CIFAR10
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# Pour automatiquement recharger les modules externes
# voir http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [None]:
def create_toy_data():
    np.random.seed(1)
    X = 10 * np.random.randn(num_inputs, channels, input_size, input_size)
    y = np.array([0, 1, 2, 1, 2])
    return X, y

def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500):
    """
    Charger la banque de données CIFAR-10, prétraiter les images et ajouter une dimension pour le biais.
    
    Input :
    - num_training : nombre d'images à mettre dans l'ensemble d'entrainement
    - num_validation : nombre d'images à mettre dans l'ensemble de validation
    - num_test : nombre d'images à mettre dans l'ensemble de test
    - num_dev : d'images à mettre dans l'ensemble dev
    
    Output :
    - X_train, y_train : données et cibles d'entrainement
    - X_val, y_val: données et cibles de validation
    - X_test y_test: données et cibles de test 
    - X_dev, y_dev: données et cicles dev
    """
    # Charger les données CIFAR-10
    cifar10_dir = 'datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
  
    # Séparer en ensembles d'entraînement, de validation, de test et de dev
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]
    mask = np.random.choice(num_training, num_dev, replace=False)
    X_dev = X_train[mask]
    y_dev = y_train[mask]
    
    X_train = X_train.transpose(0, 3, 1, 2)
    X_test = X_test.transpose(0, 3, 1, 2)
    X_val = X_val.transpose(0, 3, 1, 2)
    X_dev = X_dev.transpose(0, 3, 1, 2)

    # Normalisation
    X_train -= np.mean(X_train, axis = 0)
    X_val -= np.mean(X_val, axis = 0)
    X_test -= np.mean(X_test, axis = 0)
    X_dev -= np.mean(X_dev, axis = 0)

    return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev

## Modèle simple

Commençons avec un modèle très simple à une couche cachée.  Pour la fonction:

    cross_entropy_loss
    
vous pouvez récupérer votre code du tp1


In [None]:
from model.Model import Model
from layers.Conv import Conv2DNaive
from utils.model_loss import cross_entropy_loss_npdl

num_filters = 3
filter_size = 4
channels = 1
num_inputs = 5
input_size = 4
padding = 0
stride = 1

np.random.seed(0)
model = Model()
layer = Conv2DNaive(num_filters, filter_size=filter_size, channels=channels, 
                        stride=stride, padding=padding, weight_scale=1e-1)
model.add(layer)
model.add_loss(cross_entropy_loss_npdl)

In [None]:
##############################################################################
# TODO: Récupérez le code "naïf" de la convolution que vous avez fait dans   #
#   le notebook sur la convolution et le mettre dans la fonction "forward"   #
#   de la classe Conv2DNaive dans le fichier Conv.py.                        #
#   S'il n'y a pas de bug, le test suivant devrait passer                    #
##############################################################################
X, y = create_toy_data()
scores = model.forward_npdl(X).reshape(num_inputs, num_filters)
correct_scores = np.asarray([[2.26107191, 1.90356006, -10.7996371 ],
                             [-4.03277981, 6.72254124, 0.36232013],
                             [-0.33115169, 3.30740614, 8.24078811],
                             [-0.78558615, 3.5691293, -5.41204248],
                             [7.16759091, -7.01187203, -4.51633761]])

print('Your scores:')
print(scores)
print()
print('correct scores:')
print(correct_scores)
print()

# La différence devrait être assez basse, en principe inférieure à 1e-7.
print('Difference between your scores and correct scores: ', np.sum(np.abs(scores - correct_scores)))

In [None]:
loss, dScores, _ = model.calculate_loss(scores, y, 0.1)
correct_loss = 2.47808382744825

# on devrait obtenir une erreur  inférieure à environ 1e-12.
print('Loss: ', loss)
print('Correct loss: ', correct_loss)
print('Difference between your loss and correct loss: ', np.sum(np.abs(loss - correct_loss)))

In [None]:
##############################################################################
# TODO: Implémenter la méthode backward (rétro-propagation) de la classe de  #
# couche Conv2DNaive.                                                        #
##############################################################################

_ = model.backward_npdl(dScores.reshape(5, 3, 1, 1))

In [None]:
from utils.gradients import evaluate_numerical_gradient

# Retourne l'erreur relative maximale des matrices de gradients passées en paramètre.
# Pour chaque paramètre, l'erreur relative devrait être inférieure à environ 1e-8.
def rel_error(x, y):
    rel = np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y)))
    return np.max(rel)

gradients = model.gradients()
model_params = model.parameters()

for layer_name, layer_params in model_params.items():
    for param_name, _ in layer_params.items():
        grad_num = evaluate_numerical_gradient(X, y, model, layer_name, param_name, reg=0.1)
        max_error = rel_error(grad_num, gradients[layer_name][param_name])
        
        print('%s max relative error: %e' % (layer_name + '-' + param_name, max_error))

# Test de différentes implantations de la convolution

    version naive
    version "matricée"
    version "optimisée"

La version optimisée implique l'utilisation de code **"cython"**


In [None]:
from model.Model import Model
from layers.Conv import Conv2DNaive, Conv2DMat, Conv2DCython
from utils.model_loss import cross_entropy_loss_npdl

num_filters = 3
filter_size = 4
channels = 1

num_inputs = 5
input_size = 4
padding = 0
stride = 1

def create_toy_model():
    np.random.seed(0)
    model = Model()
    layer = Conv2DNaive(num_filters, filter_size=filter_size, channels=channels, 
                        stride=stride, padding=padding, weight_scale=1e-1)
    model.add(layer)
    model.add_loss(cross_entropy_loss_npdl)
    return model

def create_toy_model_mat():
    np.random.seed(0)
    model = Model()
    layer = Conv2DMat(num_filters, filter_size=filter_size, channels=channels, 
                      stride=stride, padding=padding, weight_scale=1e-1)
    model.add(layer)
    model.add_loss(cross_entropy_loss_npdl)
    return model

def create_toy_model_fast():
    np.random.seed(0)
    model = Model()
    layer = Conv2DCython(num_filters, filter_size=filter_size, channels=channels, 
                         stride=stride, padding=padding, weight_scale=1e-1)
    model.add(layer)
    model.add_loss(cross_entropy_loss_npdl)
    return model

model_naive = create_toy_model()
model_mat = create_toy_model_mat()
model_fast = create_toy_model_fast()
X, y = create_toy_data()

<font size="4">CNN avec matricisation des entrées</font>

Ici le code "forward" et "backward" de la convolution "matricée" vous est fourni... à un détail prêt:  **il manque dans les 2 cas, la fonction d'activation**.  À vous de l'ajouter à ces deux fonctions (c.f. classe **Conv2DMat**)


In [None]:
##############################################################################
# TODO: Implémenter la méthode forward (propagation avant) de la classe de   #
# couche Conv2DMat (convolution matricisée).                                 #
##############################################################################

scores = model_mat.forward_npdl(X).reshape(num_inputs, num_filters)
correct_scores = np.asarray([[2.26107191, 1.90356006, -10.7996371 ],
                             [-4.03277981, 6.72254124, 0.36232013],
                             [-0.33115169, 3.30740614, 8.24078811],
                             [-0.78558615, 3.5691293, -5.41204248],
                             [7.16759091, -7.01187203, -4.51633761]])

print('Your scores:')
print(scores)
print()
print('correct scores:')
print(correct_scores)
print()

# La différence devrait être assez basse, en principe inférieure à 1e-7.
print('Difference between your scores and correct scores: ', np.sum(np.abs(scores - correct_scores)))

In [None]:
loss, dScores_mat, _ = model_mat.calculate_loss(scores, y, 0.1)
correct_loss = 2.47808382744825

# on devrait obtenir une erreur  inférieure à environ 1e-12.
print('Loss: ', loss)
print('Correct loss: ', correct_loss)
print('Difference between your loss and correct loss: ', np.sum(np.abs(loss - correct_loss)))

In [None]:
##############################################################################
# TODO: Implémenter la méthode backward (rétro-propagation) de la classe de  #
# couche Conv2DMat.                                                          #
##############################################################################

_ = model_mat.backward_npdl(dScores_mat.reshape(5, 3, 1, 1))

In [None]:
gradients = model_mat.gradients()
model_params = model_mat.parameters()

for layer_name, layer_params in model_params.items():
    for param_name, _ in layer_params.items():
        grad_num = evaluate_numerical_gradient(X, y, model_mat, layer_name, param_name, reg=0.1)
        max_error = rel_error(grad_num, gradients[layer_name][param_name])
        
        print('%s max relative error: %e' % (layer_name + '-' + param_name, max_error))

<font size="4">CNN vectorisé, utilise cython</font>

Ici le code "forward" et "backward" de la convolution "cythonisée" vous est fourni... à un détail prêt: **il manque dans les 2 cas, la fonction d'activation**.  À vous de l'ajouter à ces deux fonctions (c.f. la classe **Conv2DCython**).

In [None]:
##############################################################################
# TODO: Implémenter la méthode forward (propagation avant) de la classe de   #
# couche Conv2DCython.                                                       #
##############################################################################

scores = model_fast.forward_npdl(X).reshape(num_inputs, num_filters)
correct_scores = np.asarray([[2.26107191, 1.90356006, -10.7996371 ],
                             [-4.03277981, 6.72254124, 0.36232013],
                             [-0.33115169, 3.30740614, 8.24078811],
                             [-0.78558615, 3.5691293, -5.41204248],
                             [7.16759091, -7.01187203, -4.51633761]])

print('Your scores:')
print(scores)
print()
print('correct scores:')
print(correct_scores)
print()

# La différence devrait être assez basse, en principe inférieure à 1e-7.
print('Difference between your scores and correct scores: ', np.sum(np.abs(scores - correct_scores)))

In [None]:
loss, dScores_fast, _ = model_fast.calculate_loss(scores, y, 0.1)
correct_loss = 2.47808382744825

# on devrait obtenir une erreur  inférieure à environ 1e-12.
print('Loss: ', loss)
print('Correct loss: ', correct_loss)
print('Difference between your loss and correct loss: ', np.sum(np.abs(loss - correct_loss)))

In [None]:
##############################################################################
# TODO: Implémenter la méthode backward (rétro-propagation) de la classe de  #
# couche Conv2DCython.                                                       #
##############################################################################

_ = model_fast.backward_npdl(dScores_fast.reshape(5, 3, 1, 1))

In [None]:
gradients = model_fast.gradients()
model_params = model_fast.parameters()

# L'erreur devrait être inférieure à 1e-5
for layer_name, layer_params in model_params.items():
    for param_name, _ in layer_params.items():
        grad_num = evaluate_numerical_gradient(X, y, model_fast, layer_name, param_name, reg=0.1)
        max_error = rel_error(grad_num, gradients[layer_name][param_name])
        
        print('%s max relative error: %e' % (layer_name + '-' + param_name, max_error))

<font size="4">Comparaison des performances</font>

Maintenant, voyons à quel point le type d'implantation d'une convolution peut avoir un impact sur la rapidité d'exécution.

<font size="3">Forward</font>

In [None]:
model_naive = create_toy_model()
model_mat = create_toy_model_mat()
model_fast = create_toy_model_fast()

In [None]:
print("version naive...")
%timeit model_naive.forward_npdl(X)
print("version matricée...")
%timeit model_mat.forward_npdl(X)
print("version rapide cythonisée...")
%timeit model_fast.forward_npdl(X)

<font size="3">Rétro-propagation</font>

In [None]:
model_naive = create_toy_model()
model_mat = create_toy_model_mat()
model_fast = create_toy_model_fast()

In [None]:
scores_naive = model_naive.forward_npdl(X).reshape(num_inputs, num_filters)
loss_naive, dScores_naive, _ = model_naive.calculate_loss(scores, y, 0.1)

scores_mat = model_mat.forward_npdl(X).reshape(num_inputs, num_filters)
loss_mat, dScores_mat, _ = model_fast.calculate_loss(scores, y, 0.1)

scores_fast = model_fast.forward_npdl(X).reshape(num_inputs, num_filters)
loss_fast, dScores_fast, _ = model_fast.calculate_loss(scores, y, 0.1)

In [None]:
# En principe, les loss et des gradients devraient être les mêmes

if np.abs(loss_naive - loss_mat) + \
    np.abs(loss_naive - loss_fast) + \
    np.abs(loss_mat - loss_fast) > 1e-6:
    print("Erreur!")
else:
    print("Loss bonne!")    

if (np.abs(dScores_mat - dScores_naive)).flatten().mean() + \
    (np.abs(dScores_fast - dScores_naive)).flatten().mean() + \
    (np.abs(dScores_mat - dScores_fast)).flatten().mean() > 1e-6 :
    print("Erreur!")
else:
    print("Gradients bons!")    


In [None]:
print("version naive...")
%timeit _ = model.backward_npdl(dScores_naive.reshape(5, 3, 1, 1))
print("version matricée...")
%timeit _ = model_mat.backward_npdl(dScores_mat.reshape(5, 3, 1, 1))
print("version rapide cythonisée...")
%timeit _ = model_fast.backward_npdl(dScores_fast.reshape(5, 3, 1, 1))

<font size="5">CNN à 2 couches</font>

Ici nous testerons des réseaux à 2 couches convolutives.  En principe, si le code des dernières cellules fonctionne, le code des prochaines cellules devrait fonctionner!


In [None]:
from model.Model import Model
from layers.Conv import Conv2DNaive, Conv2DMat, Conv2DCython
from utils.model_loss import cross_entropy_loss_npdl

num_filters = 4
filter_size = 3
channels = 2

num_inputs = 5
input_size = 5
padding = 1
stride = 2

num_classes = 3

def create_toy_model():
    np.random.seed(0)
    model = Model()
    layer0 = Conv2DNaive(num_filters, filter_size=filter_size, channels=channels, 
                         stride=stride, padding=padding, weight_scale=1e-1)
    layer1 = Conv2DNaive(num_classes, filter_size=3, channels=num_filters, 
                         weight_scale=1e-1)
    
    model.add(layer0)
    model.add(layer1)
    model.add_loss(cross_entropy_loss_npdl)
    return model

def create_toy_model_mat():
    np.random.seed(0)
    model = Model()
    layer0 = Conv2DMat(num_filters, filter_size=filter_size, channels=channels, 
                       stride=stride, padding=padding, weight_scale=1e-1)
    layer1 = Conv2DMat(num_classes, filter_size=3, channels=num_filters, 
                       weight_scale=1e-1)
    model.add(layer0)
    model.add(layer1)
    model.add_loss(cross_entropy_loss_npdl)
    return model

def create_toy_model_fast():
    np.random.seed(0)
    model = Model()
    layer0 = Conv2DCython(num_filters, filter_size=filter_size, channels=channels, 
                          stride=stride, padding=padding, weight_scale=1e-1)
    layer1 = Conv2DCython(num_classes, filter_size=3, channels=num_filters, 
                          weight_scale=1e-1)
    model.add(layer0)
    model.add(layer1)
    model.add_loss(cross_entropy_loss_npdl)
    return model

def create_toy_data():
    np.random.seed(1)
    X = 10 * np.random.randn(num_inputs, channels, input_size, input_size)
    y = np.array([0, 1, 2, 1, 2])
    return X, y

model_naive = create_toy_model()
model_mat = create_toy_model_mat()
model_fast = create_toy_model_fast()
X, y = create_toy_data()

In [None]:
scores_naive = model_naive.forward_npdl(X).reshape(num_inputs, num_classes)
loss_naive, dScores_naive, _ = model_naive.calculate_loss(scores_naive, y, 0.1)

scores_mat = model_mat.forward_npdl(X).reshape(num_inputs, num_classes)
loss_mat, dScores_mat, _ = model_mat.calculate_loss(scores_mat, y, 0.1)

scores_fast = model_fast.forward_npdl(X).reshape(num_inputs, num_classes)
loss_fast, dScores_fast, _ = model_fast.calculate_loss(scores_fast, y, 0.1)

In [None]:
# En principe, les loss et des gradients devraient être les mêmes

if np.abs(loss_naive - loss_mat) + \
    np.abs(loss_naive - loss_fast) + \
    np.abs(loss_mat - loss_fast) > 1e-6:
    print("Erreur!")
else:
    print("Loss bonne!")    

if (np.abs(dScores_mat - dScores_naive)).flatten().mean() + \
    (np.abs(dScores_fast - dScores_naive)).flatten().mean() + \
    (np.abs(dScores_mat - dScores_fast)).flatten().mean() > 1e-6 :
    print("Erreur!")
else:
    print("Gradients bons!")    


In [None]:
_ = model_naive.backward_npdl(dScores_naive.reshape(5, 3, 1, 1))
_ = model_mat.backward_npdl(dScores_mat.reshape(5, 3, 1, 1))
_ = model_fast.backward_npdl(dScores_fast.reshape(5, 3, 1, 1))

In [None]:
# Vérification du gradient pour le modèle naif, devrait être inférieur à 1e-4
gradients = model_mat.gradients()
model_params = model_mat.parameters()

for layer_name, layer_params in model_params.items():
    for param_name, _ in layer_params.items():
        grad_num = evaluate_numerical_gradient(X, y, model_mat, layer_name, param_name, reg=0.1)
        max_error = rel_error(grad_num, gradients[layer_name][param_name])
        
        print('%s max relative error: %e' % (layer_name + '-' + param_name, max_error))

In [None]:
# Gradient check pour le modèle matricisé, devrait être inférieur à 1e-4
gradients = model_mat.gradients()
model_params = model_mat.parameters()

for layer_name, layer_params in model_params.items():
    for param_name, _ in layer_params.items():
        grad_num = evaluate_numerical_gradient(X, y, model_mat, layer_name, param_name, reg=0.1)
        max_error = rel_error(grad_num, gradients[layer_name][param_name])
        
        print('%s max relative error: %e' % (layer_name + '-' + param_name, max_error))

In [None]:
# Gradient check pour le modèle cythonisé.
# Les valeurs peuvent être légèrement différentes de celles
# obtenues pour model_naive et model_mat (Effet de bord de Cython).
# En autant qu'elles restent inférieures à 1e-4, c'est correct.
gradients = model_fast.gradients()
model_params = model_fast.parameters()

for layer_name, layer_params in model_params.items():
    for param_name, _ in layer_params.items():
        grad_num = evaluate_numerical_gradient(X, y, model_fast, layer_name, param_name, reg=0.1)
        max_error = rel_error(grad_num, gradients[layer_name][param_name])
        
        print('%s max relative error: %e' % (layer_name + '-' + param_name, max_error))

<font size="4">Comparaison des performances</font>

<font size="3">Propagation avant</font>

les différentes implantation de convolution devraient entraîner divers temps d'exécution.

In [None]:
model_naive = create_toy_model()
model_mat = create_toy_model_mat()
model_fast = create_toy_model_fast()

In [None]:
print("CNN 2 couches, conv naive")
%timeit model_naive.forward_npdl(X)
print("CNN 2 couches, conv mat")
%timeit model_mat.forward_npdl(X)
print("CNN 2 couches, conv fast")
%timeit model_fast.forward_npdl(X)

<font size="3">Rétro-propagation</font>

In [None]:
scores_naive = model_naive.forward_npdl(X).reshape(num_inputs, num_classes)
_, dScores_naive, _ = model_naive.calculate_loss(scores_naive, y, 0.1)

scores_mat = model_mat.forward_npdl(X).reshape(num_inputs, num_classes)
_, dScores_mat, _ = model_fast.calculate_loss(scores, y, 0.1)

scores_fast = model_fast.forward_npdl(X).reshape(num_inputs, num_classes)
_, dScores_fast, _ = model_fast.calculate_loss(scores, y, 0.1)

In [None]:
print("CNN 2 couches, gradient conv naive")
%timeit _ = model_naive.backward_npdl(dScores_naive.reshape(5, 3, 1, 1))
print("CNN 2 couches, gradient conv mat")
%timeit _ = model_mat.backward_npdl(dScores_mat.reshape(5, 3, 1, 1))
print("CNN 2 couches, gradient conv fast")
%timeit _ = model_fast.backward_npdl(dScores_fast.reshape(5, 3, 1, 1))

<font size="5">MaxPool</font>

<font size="3">Propagation avant</font>

In [None]:
##############################################################################
# TODO: Implémenter la méthode forward (propagation avant) de la classe de   #
# couche MaxPool2DNaive.                                                     #
##############################################################################
import numpy as np
from layers.MaxPool import MaxPool2DNaive

X_shape = (2, 3, 4, 4)
X = np.linspace(-0.3, 0.4, num=np.prod(X_shape)).reshape(X_shape)

layer = MaxPool2DNaive(pooling_size=(2,2), stride=(2,2))

out = layer.forward_npdl(X)

correct_out = np.array([[[[-0.26315789, -0.24842105],
                          [-0.20421053, -0.18947368]],
                         [[-0.14526316, -0.13052632],
                          [-0.08631579, -0.07157895]],
                         [[-0.02736842, -0.01263158],
                          [ 0.03157895,  0.04631579]]],
                        [[[ 0.09052632,  0.10526316],
                          [ 0.14947368,  0.16421053]],
                         [[ 0.20842105,  0.22315789],
                          [ 0.26736842,  0.28210526]],
                         [[ 0.32631579,  0.34105263],
                          [ 0.38526316,  0.4       ]]]])

# Retourne l'erreur relative maximale des matrices de gradients passées en paramètre.
# Pour chaque paramètre, l'erreur relative devrait être inférieure à environ 1e-8.
print('difference: ', (out - correct_out).flatten().mean())

<font size="3">Rétro-propagation</font>

In [None]:
##############################################################################
# TODO: Implémenter la méthode backward (rétro-propagation) de la classe de  #
# couche MaxPool2DNaive.                                                     #
##############################################################################

X = np.random.randn(2, 2, 4, 4)

layer = MaxPool2DNaive(pooling_size=(2,2), stride=(2,2))

out = layer.forward_npdl(X)
dX = layer.backward_npdl(out)

print("Inputs")
print(X)
# Devrait retouner le max de chaque convolution
print("Outputs")
print(out)
# Devrait replacer les éléments des sorties au même endroit que X
print("Gradients")
print(dX)

<font size="5">ConvNet à N couches</font>

Dans les cellules qui suivent, vous devez compléter la fonction 

    create_Nlayer_cnn(num_filter_layer1, num_filter_layer2, fc_size, weight_scale)
    
avec au moins **3 couches convolutives**, du *max pooling*, du *dropout* des opérations *batchNorm* et atteindre les performances mentionnées plus loins.

In [None]:
from layers.BatchNorm import SpatialBatchNorm
from layers.Conv import Conv2DCython
from layers.Dense import Dense
from layers.Flatten import Flatten
from layers.MaxPool import MaxPool2DCython
from layers.Dropout import Dropout
from model.Model import Model
from utils.model_loss import cross_entropy_loss_npdl

# paramètres de convolution, à modifier au besoin
filter_size = 5
channels = 3
stride = 1
p_dropout = 0.1
pad = int((filter_size - 1)/2)

# paramètres dense
num_classes = 10

def create_Nlayer_cnn(num_filter_layer1, num_filter_layer2, fc_size, init_weight_scale):
    model = Model()
    
    conv1 = Conv2DCython(num_filter_layer1, filter_size=filter_size, channels=channels, padding=pad, weight_scale=init_weight_scale)
    batchnorm1 = SpatialBatchNorm(num_filter_layer1, activation='relu')
    dropout1 = Dropout(drop_rate=p_dropout)
    maxpool1 = MaxPool2DCython(pooling_size=2, stride=2)

    conv2 = Conv2DCython(num_filter_layer2, filter_size=filter_size, channels=num_filter_layer1, padding=pad, weight_scale=init_weight_scale)
    batchnorm2 = SpatialBatchNorm(num_filter_layer2, activation='relu')
    dropout2 = Dropout(drop_rate=p_dropout)
    maxpool2 = MaxPool2DCython(pooling_size=2, stride=2)
    
    conv_fc1 = Conv2DCython(fc_size, filter_size=8, channels=num_filter_layer2, weight_scale=init_weight_scale, activation='relu')
    dropout3 = Dropout(drop_rate=p_dropout)
    conv_fc2 = Conv2DCython(num_classes, filter_size=1, channels=fc_size, weight_scale=init_weight_scale)
    flatten = Flatten()
    
    model.add(conv1)
    model.add(batchnorm1)
    model.add(dropout1)
    model.add(maxpool1)

    model.add(conv2)
    model.add(batchnorm2)
    model.add(dropout2)
    model.add(maxpool2)

    model.add(conv_fc1)
    model.add(dropout3)
    model.add(conv_fc2)
    model.add(flatten)
    model.add_loss(cross_entropy_loss_npdl)
    
    return model

<font size="3">Validation de la perte</font>

En augmentant la régularisation, la loss devrait augmenter

In [None]:
model = create_Nlayer_cnn(32, 16, 50, 1e-2)

N = 50
X = np.random.randn(N, 3, 32, 32)
y = np.random.randint(10, size=N)

scores = model.forward_npdl(X)

loss, grads, _ = model.calculate_loss(scores, y, reg=0.0)
print('Initial loss (no regularization): ', loss)

loss, grads, _ = model.calculate_loss(scores, y, reg=0.1)
print('Initial loss (with regularization): ', loss)

<font size="3">Sur-apprendre sur un petit ensemble de données</font>

In [None]:
X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev = get_CIFAR10_data()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)
print('dev data shape: ', X_dev.shape)
print('dev labels shape: ', y_dev.shape)

In [None]:
# Ici on s'entraîne sur un petit ensemble d'entraînement afin de s'assurer que le modèle
# est capable d'overfitter.
from model.Solver import epoch_solver_npdl, Adam

N = 100
X_train_small = X_train[:N]
y_train_small = y_train[:N]

model = create_Nlayer_cnn(32, 16, 400, 1e-2)

optimizer = Adam(5e-4, model)
    
loss_history, train_accuracy_history, val_accuracy_history = epoch_solver_npdl(X_train_small, 
                                                                          y_train_small,
                                                                          X_val,
                                                                          y_val,
                                                                          1e-2,
                                                                          optimizer,
                                                                          batch_size=10,
                                                                          epochs=10)

In [None]:
plt.subplot(2, 1, 1)
plt.plot(loss_history, '-o')
plt.xlabel('iteration')
plt.ylabel('loss')

plt.subplot(2, 1, 2)
plt.plot(train_accuracy_history, '-o')
plt.plot(val_accuracy_history, '-o')
plt.legend(['train', 'val'], loc='upper left')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.show()

<font size="3">Entraînement complet</font>

- Entraînez votre réseau pour 3 epochs.  Vous devriez avoir une justesse en validation d'**au moins 48\%** lorsqu'entraîné sur 6,000 données.
- Entraînez votre réseau pour 1 autre epoch mais sur **100% des données**.  Vous devriez avoir une justesse en validation d'**au moins 58\%**.

In [None]:
from model.Solver import epoch_solver_npdl, Adam

reg = 1e-2 # à ajuster au besoin
lr = 5e-4  # à ajuster au besoin
model = create_Nlayer_cnn(32, 16, 400, reg)

optimizer = Adam(lr, model)

# change back to full X_train y_train for complete dataset
loss_history, train_accuracy_history, val_accuracy_history = epoch_solver_npdl(X_train[:6000], 
                                                                          y_train[:6000],
                                                                          X_val,
                                                                          y_val,
                                                                          reg,
                                                                          optimizer,
                                                                          batch_size=100,
                                                                          epochs=3)

In [None]:
plt.subplot(2, 1, 1)
plt.plot(loss_history, '-o')
plt.xlabel('iteration')
plt.ylabel('loss')
plt.title('Train : 10,000 images')

plt.subplot(2, 1, 2)
plt.plot(train_accuracy_history, '-o')
plt.plot(val_accuracy_history, '-o')
plt.legend(['train', 'val'], loc='upper left')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.show()

In [None]:
# On réentraîne (1 epoch) avec 100% des données d'entraînement.
loss_history, train_accuracy_history, val_accuracy_history = epoch_solver_npdl(X_train, 
                                                                          y_train,
                                                                          X_val,
                                                                          y_val,
                                                                          reg,
                                                                          optimizer,
                                                                          batch_size=100,
                                                                          epochs=1)

In [None]:
plt.subplot(2, 1, 1)
plt.plot(loss_history, '-o')
plt.xlabel('iteration')
plt.ylabel('loss')
plt.title('Train : 50,000 images')

plt.subplot(2, 1, 2)
plt.plot(train_accuracy_history, '-o')
plt.plot(val_accuracy_history, '-o')
plt.legend(['train', 'val'], loc='upper left')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.show()

## Visualisation des filtres
Vous pouvez visualiser les filtres de la première couche du réseau entraîné :

In [None]:
from visualization.utils import visualize_as_grid

def show_net_weights(model):
    W1 = model.parameters()['L0']['W']
    W1 = W1.transpose(0, 2, 3, 1)
    plt.imshow(visualize_as_grid(W1, padding=3).astype('uint8'))
    plt.gca().axis('off')
    plt.show()

show_net_weights(model)