# Deep Learning

La première étape intermédiaire de notre projet est d'utiliser les algorithmes ci-dessous sur le célébre dataset CIFAR-10.

Les algorithmes à étudier :

**Modèles précédents**
- Modèle Linéaire
- Perceptron Multicouches

**Nouveaux modèles**
- Conv Net(s)
- ResNets / HighwayNets - RNN(s)

Pour chacun des algorithmes cités, il faut :
1. L'influence de tous les hyperparamètres des modèles
    - Structure
    - Fonctions d'activations
    - etc.
2. Les paramètres des algorithmes d'apprentissages
    - Learning Rate
    - Momentum
    - etc.

----

In [172]:
import os
import numpy as np
from numpy.random import seed
import tensorflow as tf
from tensorflow.keras.layers import Flatten, Dense, Conv2D, BatchNormalization, Input
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.metrics import categorical_accuracy
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.activations import relu, softmax, tanh
from tensorflow.keras.initializers import he_normal, glorot_uniform
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.regularizers import l2
from tensorflow.random import set_seed

In [154]:
print("Version de TensorFlow :", tf.__version__)
print("Nom du GPU :", tf.test.gpu_device_name())

tf.keras.backend.clear_session()
tf.config.optimizer.set_jit(False)

Version de TensorFlow : 2.6.0
Nom du GPU : /device:GPU:0


2021-12-17 00:14:17.708714: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2021-12-17 00:14:17.709113: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


## Importation du dataset

In [None]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

In [None]:
NUM_CLASSES = 10
IMG_SIZE = x_train[0].shape

In [None]:
x_train = x_train.astype('float32') / 256
x_test = x_test.astype('float32') / 256

y_train = to_categorical(y_train, num_classes=NUM_CLASSES)
y_test = to_categorical(y_test, num_classes=NUM_CLASSES)

In [None]:
LOG_DIR = os.path.join("logs")

## Fixer les seeds

In [None]:
set_seed(42) # TensorFlow
seed(42) # NumPy

# Modèle linéaire

In [None]:
EPOCHS = 30
SHUFFLE = True

## Trouver le meilleur modèle

In [157]:
def linear_model(activation_function, kernel_initializer, learning_rate, momentum, batch_size, version=''):
    input_layer = Input(shape=(32, 32, 3))

    hidden_layers = Flatten()(input_layer)
    hidden_layers = Dense(IMG_SIZE[0] * IMG_SIZE[1] * IMG_SIZE[2], activation=activation_function, kernel_initializer=kernel_initializer)(hidden_layers)

    output_layer = Dense(NUM_CLASSES, activation=softmax)(hidden_layers)
    linear_model = Model(input_layer, output_layer)
    
    linear_model.compile(loss=categorical_crossentropy,
                         optimizer=SGD(learning_rate=learning_rate,
                                       momentum=momentum),
                         metrics=categorical_accuracy)
    log_name = os.path.join(LOG_DIR,
                            "linear",
                            f"linear_model_ep_{EPOCHS}_bs_{batch_size}_opt_SGD_lr_{learning_rate}_mo_{momentum}_ki_{kernel_initializer.__name__}_af_{activation_function.__name__}{version}")
    history = linear_model.fit(x_train,
                               y_train,
                               batch_size=batch_size,
                               epochs=EPOCHS,
                               validation_data=(x_test, y_test),
                               shuffle=SHUFFLE,
                               callbacks=[TensorBoard(log_name, histogram_freq=1)])
    return linear_model, history

Les hyperparamètres que nous souhaitons ajuster :
- learning_rates = [0.01, 0.05, 0.1, 0.2]
- kernel_initializers = [glorot_uniform, he_normal]
- activation_functions = [tanh, relu]
- batch_sizes = [64, 128, 256]
- momentums = [0, 0.5, 0.9]

In [None]:
learning_rates = [0.01, 0.05, 0.1, 0.2]
kernel_initializers = [glorot_uniform, he_normal]
activation_functions = [tanh, relu]
batch_sizes = [64, 128, 256]
momentums = [0, 0.5, 0.9]

In [None]:
# Grid Search V1

best_model_accuracy = 0
counter = 0
linear_models = {}

for mo in momentums:
    for bs in batch_sizes:
        for af in activation_functions:
            for ki in kernel_initializers:
                for lr in learning_rates:
                    print(f"Iteration n°{counter} | af: {af.__name__} - ki: {ki.__name__} - SGD lr: {lr} / mo: {mo} - bs: {bs}")
                    model_name = f"af_{af.__name__}_ki_{ki.__name__}_sgd_lr_{lr}_mo_{mo}_bs_{bs}"
                    model, history = linear_model(af, ki, lr, mo, bs)
                    accuracy = round(history.history["categorical_accuracy"][-1], 3)
                    print(f"Accuracy : {accuracy}")
                    linear_models[model_name] = accuracy
                    if accuracy > best_model_accuracy:
                        model.save(f"models/linear/{str(accuracy)+'_'+model_name}.keras")
                        best_model_accuracy = accuracy
                    counter += 1

Après une trentaine d'épochs, nous avons remarqué que nous avons les pires résultats lorsque nous utilisons un learning rate égale à 0.1 et 0.2. Du coup, nous allons les supprimer lors de la prochaine version. Nous allons aussi en profiter pour accrocher les fonctions d'activations à l'initialisateur de kernel conseillé durant le cours (tanh -> glorot_uniform et relu -> he_normal)

**Résultat:**

Les meilleurs modèles que nous avons ont en commun un batch size de 64, un learning rate de 0.05, peu de momentum et enfin le combo relu/he_normal. 

In [None]:
best_models_sorted = sorted(linear_models.items(), key=lambda x: x[1], reverse=True)
print(best_models_sorted[:5])

In [None]:
best_linear_model = load_model('models/linear/af_relu_ki_HeNormal_sgd_lr_0.05_mo_0_bs_64_0.708.keras')

In [None]:
test_loss, test_accuracy = best_linear_model.evaluate(x_test, y_test)
print(f"Test loss : {round(test_loss, 2)}")
print(f"Test accuracy : {round(test_accuracy, 2) * 100}%")

Pour l'instant, notre meilleur modèle prédit correctement 51% du temps, ce qui est pas mal sur 10 catégories.
Nous allons donc continuer de trouver le meilleure modèle en nous rapprochant de ces paramètres.

In [173]:
EPOCHS = 45

In [174]:
learning_rates = [0.04, 0.05, 0.06]
momentums = [0, 0.10, 0.25]
batch_size = 64
kernel_initializer = he_normal
activation_function = relu

In [178]:
# Panda fine-tuning
#learning_rate = 0.02
#momentum = 0.9


#input_layer = Input(shape=(32, 32, 3))

#hidden_layers = Flatten()(input_layer)
#hidden_layers = Dense(IMG_SIZE[0] * IMG_SIZE[1] * IMG_SIZE[2], activation=activation_function, kernel_initializer=kernel_initializer)(hidden_layers)

#output_layer = Dense(NUM_CLASSES, activation=softmax,
                     kernel_regularizer = l2(0.01),
                     bias_regularizer = l2(0.01))(hidden_layers)
#linear_model = Model(input_layer, output_layer)

#linear_model.compile(loss=categorical_crossentropy,
#                     optimizer=SGD(learning_rate=learning_rate,
#                                   momentum=momentum),
#                     metrics=categorical_accuracy)
#log_name = os.path.join(LOG_DIR,
#                        "linear",
#                        f"linear_model_ep_{EPOCHS}_bs_{batch_size}_opt_SGD_lr_{learning_rate}_mo_{momentum}_ki_{kernel_initializer.__name__}_af_{activation_function.__name__}_pft")
#history = linear_model.fit(x_train,
#                           y_train,
#                           batch_size=batch_size,
#                          epochs=EPOCHS,
#                           validation_data=(x_test, y_test),
#                           shuffle=SHUFFLE,
#                           callbacks=[TensorBoard(log_name, histogram_freq=1)])
#accuracy = round(history.history["val_categorical_accuracy"][-1], 3)
#print(f"Accuracy : {accuracy}")

2021-12-17 00:56:04.488204: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2021-12-17 00:56:04.488220: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
2021-12-17 00:56:04.488292: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.


Epoch 1/45
  3/782 [..............................] - ETA: 19s - loss: 3.5146 - categorical_accuracy: 0.1042 

2021-12-17 00:56:05.988019: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-12-17 00:56:06.113635: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2021-12-17 00:56:06.113645: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
2021-12-17 00:56:06.141936: I tensorflow/core/profiler/lib/profiler_session.cc:66] Profiler session collecting data.
2021-12-17 00:56:06.142250: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
2021-12-17 00:56:06.143566: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: logs/linear/linear_model_ep_45_bs_64_opt_SGD_lr_0.02_mo_0.9_ki_HeNormal_af_relu_pft/train/plugins/profile/2021_12_17_00_56_06

2021-12-17 00:56:06.143970: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for trace.json.gz to logs/linear/linear_model



2021-12-17 00:56:16.150216: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/45
Epoch 3/45
Epoch 4/45
Epoch 5/45
Epoch 6/45
Epoch 7/45
Epoch 8/45
Epoch 9/45
Epoch 10/45
Epoch 11/45
Epoch 12/45
Epoch 13/45
Epoch 14/45
Epoch 15/45
Epoch 16/45
Epoch 17/45
Epoch 18/45
Epoch 19/45
Epoch 20/45
Epoch 21/45
Epoch 22/45
Epoch 23/45
Epoch 24/45
Epoch 25/45
Epoch 26/45
Epoch 27/45
Epoch 28/45
Epoch 29/45
Epoch 30/45
Epoch 31/45
Epoch 32/45
Epoch 33/45
Epoch 34/45
Epoch 35/45
Epoch 36/45
Epoch 37/45
Epoch 38/45
Epoch 39/45
Epoch 40/45
Epoch 41/45
Epoch 42/45
Epoch 43/45
Epoch 44/45
Epoch 45/45
Accuracy : 0.478


In [None]:
#model_name = f"af_{activation_function.__name__}_ki_{kernel_initializer.__name__}_sgd_lr_{0.05}_mo_{0.10}_bs_{batch_size}"
#linear_model.save(f"models/linear/{str(accuracy)+'_'+model_name}.keras")

In [163]:
# Grid Search V2

best_model_accuracy = 0
counter = 0
linear_models = {}

for lr in learning_rates:
    for mo in momentums:
        counter += 1
        print(f"Iteration n°{counter} | af: {activation_function.__name__} - ki: {kernel_initializer.__name__} - SGD lr: {lr} / mo: {mo} - bs: {batch_size}")
        model_name = f"af_{activation_function.__name__}_ki_{kernel_initializer.__name__}_sgd_lr_{lr}_mo_{mo}_bs_{batch_size}"
        model, history = linear_model(activation_function, kernel_initializer, lr, mo, batch_size, version='_v2')
        accuracy = round(history.history["val_categorical_accuracy"][-1], 3)
        print(f"Accuracy : {accuracy}")
        linear_models[model_name] = accuracy
        if accuracy > best_model_accuracy:
            model.save(f"models/linear/{str(accuracy)+'_'+model_name}.keras")
            best_model_accuracy = accuracy

Iteration n°1 | af: relu - ki: HeNormal - SGD lr: 0.04 / mo: 0 - bs: 64


2021-12-17 00:17:58.827727: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2021-12-17 00:17:58.827742: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
2021-12-17 00:17:58.827824: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.


Epoch 1/50
  7/782 [..............................] - ETA: 14s - loss: 5.4403 - categorical_accuracy: 0.0960

2021-12-17 00:17:59.926755: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-12-17 00:18:00.002039: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2021-12-17 00:18:00.002052: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
2021-12-17 00:18:00.029046: I tensorflow/core/profiler/lib/profiler_session.cc:66] Profiler session collecting data.
2021-12-17 00:18:00.029373: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
2021-12-17 00:18:00.030466: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: logs/linear/linear_model_ep_50_bs_64_opt_SGD_lr_0.04_mo_0_ki_HeNormal_af_relu_v2/train/plugins/profile/2021_12_17_00_18_00

2021-12-17 00:18:00.030903: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for trace.json.gz to logs/linear/linear_model_ep



2021-12-17 00:18:09.655789: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50


Epoch 50/50
Accuracy : 0.391
Iteration n°2 | af: relu - ki: HeNormal - SGD lr: 0.04 / mo: 0.1 - bs: 64


2021-12-17 00:26:33.098810: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2021-12-17 00:26:33.098824: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
2021-12-17 00:26:33.098878: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.


Epoch 1/50
  8/782 [..............................] - ETA: 12s - loss: 4.8078 - categorical_accuracy: 0.0996

2021-12-17 00:26:33.724586: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-12-17 00:26:33.810521: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2021-12-17 00:26:33.810529: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
2021-12-17 00:26:33.836342: I tensorflow/core/profiler/lib/profiler_session.cc:66] Profiler session collecting data.
2021-12-17 00:26:33.836642: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
2021-12-17 00:26:33.837645: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: logs/linear/linear_model_ep_50_bs_64_opt_SGD_lr_0.04_mo_0.1_ki_HeNormal_af_relu_v2/train/plugins/profile/2021_12_17_00_26_33

2021-12-17 00:26:33.838088: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for trace.json.gz to logs/linear/linear_model_



2021-12-17 00:26:45.032421: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50

KeyboardInterrupt: 

## MLP

Hyperparameter tuning using Tensorflow and Tensorboard : https://www.tensorflow.org/tensorboard/hyperparameter_tuning_with_hparams

In [None]:
NUM_LAYERS = hp.HParam('num_layers', hp.Discrete(range(2, 6)))
NUM_UNITS = hp.HParam('num_units', hp.Discrete([8, 16, 32, 64, 128]))
OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'sgd', 'rmsprop']))
BATCH_SIZE = 256
EPOCHS = 200
SHUFFLE = True
HPARAM_DIR = os.path.join(LOG_DIR, "mlp", "hparam_tuning")

In [None]:
with tf.summary.create_file_writer(HPARAM_DIR).as_default():
    hp.hparams_config(
    hparams=[NUM_UNITS, NUM_LAYERS, OPTIMIZER],
    metrics=[hp.Metric("accuracy", display_name='Accuracy')],
  )


In [None]:
def mlp_model(hparams: hp) -> tf.keras.models.Model:
    random_units = []
    num_layer = None
    input_ = tf.keras.layers.Input(shape=(32, 32, 3))
    hidden_layers = tf.keras.layers.Flatten()(input_)
    for opt in OPTIMIZER.domain.values:
        for num_layers in NUM_LAYERS.domain.values:
            random_unit = np.random.choice(NUM_UNITS.domain.values, 1)
            random_units.append(random_unit)
            hidden_layers = tf.keras.layers.Dense(random_unit, activation="relu")(hidden_layers)

        output_ = tf.keras.layers.Dense(NUM_CLASSES, activation="softmax")(hidden_layers)
        mlp = tf.keras.models.Model(input_, output_)
        mlp.compile(loss=tf.keras.losses.CategoricalCrossentropy(),
                    optimizer=opt,
                    metrics="accuracy")
        MLP_LOG = os.path.join(LOG_DIR, "mlp", "hparam_tuning",
                f"mlp_ep_{EPOCHS}_bs_{BATCH_SIZE}_opt_{type(opt).__name__}_layers_{NUM_LAYERS}")
        mlp.fit(x_train,
                y_train,
                batch_size=BATCH_SIZE,
                epochs=EPOCHS,
                validation_data=(x_test, y_test),
                shuffle=SHUFFLE,
                callbacks=[tf.keras.callbacks.TensorBoard(MLP_LOG, histogram_freq=1),
                           hp.KerasCallback(MLP_LOG, hparams)]
               )
        _, accuracy = mlp.evaluate(x_test, y_test)
    return history