![imagenes](logo.png)

### Regularización en Keras

In [None]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (5, 5)

# Cargamos los datos

En deep learning no se suele hacer validación cruzada (a menos que el tamaño del dataset y el tiempo de entrenamiento lo permita). En lugar de eso se hacen simples separaciónes entre datos de entrenamiento y de validación.

In [None]:
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
x_train.shape

In [None]:
x_train[0].shape

In [None]:
x_train[0]

In [None]:
def dibujar_numero(i):
    plt.imshow(x_train[i], cmap="gray")
    plt.title("Número {}".format(y_train[i]))
    
    
dibujar_numero(20)    

In [None]:
import os

os.environ["KERAS_BACKEND"] = "tensorflow" #tensorflow

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten

In [None]:
np.unique(y_train)

In [None]:
x_train[0]

In [None]:
x_train[0].shape

In [None]:
x_train_plano = x_train.reshape(x_train.shape[0],28*28)
x_test_plano = x_test.reshape(x_test.shape[0],28*28)

In [None]:
x_train_plano[0].shape

In [None]:
x_train_plano[0]

In [None]:
from keras.utils import to_categorical

In [None]:
y_train[:10]

In [None]:
y_train_one_hot = to_categorical(y_train)
y_test_one_hot = to_categorical(y_test)

In [None]:
y_train_one_hot[:10]

In [None]:
modelo = Sequential()
modelo.add(Dense(50, activation="relu", input_shape=(784,)))
modelo.add(Dense(250, activation="relu"))
modelo.add(Dense(np.unique(y_train).shape[0], activation="softmax"))

modelo.compile(optimizer="sgd", loss="categorical_crossentropy",
               metrics=["accuracy"])

modelo.summary()

In [None]:
modelo.fit(x_train_plano, y_train_one_hot, epochs=50, batch_size=1000, verbose=0);

Vamos a ver ahora como se compara el funcionamiento del modelo respecto a los datos de entrenamiento y los de test.

In [None]:
RESULTADOS = {}

In [None]:
modelo.metrics_names

In [None]:
evaluacion_train = modelo.evaluate(x_train_plano, y_train_one_hot)
evaluacion_train

In [None]:
evaluacion_test = modelo.evaluate(x_test_plano, y_test_one_hot)
evaluacion_test

In [None]:
RESULTADOS["sin_regularizacion"] = [evaluacion_train[1], evaluacion_test[1]]

### Regularización l1, o l2

Keras permite regularizar los pesos, los sesgos (bias) y las activaciones de  forma independiente, pasando el parámetro `kernel_regularizer`, `bias_regularizer` y `activity_regularizer` respectivamente.

Keras tiene los penalizadores `l1`, `l2` y `l1_l2` (elasticnet)

In [None]:
from keras import regularizers

modelo_l2 = Sequential()
modelo_l2.add(Dense(50, activation="relu", input_shape=(784,)))
modelo_l2.add(Dense(250, activation="relu", kernel_regularizer=regularizers.l2(0.01)))
modelo_l2.add(Dense(np.unique(y_train).shape[0], activation="softmax"))

modelo_l2.compile(optimizer="sgd", loss="categorical_crossentropy",
               metrics=["accuracy"])

modelo_l2.summary()

In [None]:
modelo_l2.fit(x_train_plano, y_train_one_hot, verbose=0, epochs=50, batch_size=1000);

In [None]:
acc_train = modelo_l2.evaluate(x_train_plano, y_train_one_hot)[1]
acc_train

In [None]:
acc_test = modelo.evaluate(x_test_plano, y_test_one_hot)[1]
acc_test

In [None]:
RESULTADOS["regularizacion_l2"] = [acc_train, acc_test]

Hacemos lo mismo pero con regularización l1

In [None]:
modelo_l1 = Sequential()
modelo_l1.add(Dense(50, activation="relu", input_shape=(784,)))
modelo_l1.add(Dense(250, activation="relu", kernel_regularizer=regularizers.l1(0.01)))
modelo_l1.add(Dense(np.unique(y_train).shape[0], activation="softmax"))

modelo_l1.compile(optimizer="sgd", loss="categorical_crossentropy",
               metrics=["accuracy"])

modelo_l1.fit(x_train_plano, y_train_one_hot, verbose=0, epochs=50, batch_size=1000)

acc_train = modelo_l1.evaluate(x_train_plano, y_train_one_hot)[1]
acc_test = modelo_l1.evaluate(x_test_plano, y_test_one_hot)[1]

RESULTADOS["regularizacion_l1"] = [acc_train, acc_test]

In [None]:
print(acc_train, acc_test)

In [None]:
RESULTADOS

### Dropout

Ahora vamos a añadir Dropout a la misma red. Dropout simplemente ignora un porcentaje `p` de las unidades (neuronas) en cada iteración del entrenamiento (forward prop y backprop)

In [None]:
from keras.layers import Dropout

In [None]:
modelo_dropout = Sequential()
modelo_dropout.add(Dense(50, activation="relu", input_shape=(784,)))
modelo_dropout.add(Dense(250, activation="relu"))
modelo_dropout.add(Dropout(0.2))
modelo_dropout.add(Dense(np.unique(y_train).shape[0], activation="softmax"))

modelo_dropout.compile(optimizer="sgd", loss="categorical_crossentropy",
               metrics=["accuracy"])

modelo_dropout.summary()

Vemos que el número de parámetros a entrenar es el mismo, Dropout no añade pesos a la red

In [None]:
modelo_dropout.fit(x_train_plano, y_train_one_hot, verbose=0, epochs=50, batch_size=1000);

In [None]:
acc_train = modelo_dropout.evaluate(x_train_plano, y_train_one_hot)[1]
acc_test = modelo_dropout.evaluate(x_test_plano, y_test_one_hot)[1]

In [None]:
print(acc_train, acc_test)

In [None]:
RESULTADOS["regularizacion_dropout"] = [acc_train, acc_test]

### Normalización en bloques (batch normalization)

In [None]:
from keras.layers import BatchNormalization

In [None]:
modelo_bnorm = Sequential()
modelo_bnorm.add(Dense(50, activation="relu", input_shape=(784,)))
modelo_bnorm.add(Dense(250, activation="relu"))
modelo_bnorm.add(BatchNormalization())
modelo_bnorm.add(Dense(np.unique(y_train).shape[0], activation="softmax"))

modelo_bnorm.compile(optimizer="sgd", loss="categorical_crossentropy",
               metrics=["accuracy"])

modelo_bnorm.summary()

In [None]:
modelo_bnorm.fit(x_train_plano, y_train_one_hot, verbose=0, epochs=50, batch_size=1000)
acc_train = modelo_bnorm.evaluate(x_train_plano, y_train_one_hot)[1]
acc_test = modelo_bnorm.evaluate(x_test_plano, y_test_one_hot)[1]

RESULTADOS["batch_normalization"] = [acc_train, acc_test]
print(acc_train, acc_test)

### Batch Normalization + Dropout

Una práctica común es poner normalizacion batch y dropout juntos en una capa

In [None]:
modelo_bnorm_drop = Sequential()
modelo_bnorm_drop.add(Dense(50, activation="relu", input_shape=(784,)))
modelo_bnorm_drop.add(Dense(250, activation="relu"))
modelo_bnorm_drop.add(BatchNormalization())
modelo_bnorm_drop.add(Dropout(0.2))
modelo_bnorm_drop.add(Dense(np.unique(y_train).shape[0], activation="softmax"))

modelo_bnorm_drop.compile(optimizer="sgd", loss="categorical_crossentropy",
               metrics=["accuracy"])

modelo_bnorm_drop.fit(x_train_plano, y_train_one_hot, verbose=0, epochs=50, batch_size=1000)
acc_train = modelo_bnorm_drop.evaluate(x_train_plano, y_train_one_hot)[1]
acc_test = modelo_bnorm_drop.evaluate(x_test_plano, y_test_one_hot)[1]

RESULTADOS["batch_normalization + dropout"] = [acc_train, acc_test]
print(acc_train, acc_test)

Ahora metemos los resultados en un dataframe para inspeccionarlos

In [None]:
import pandas as pd

resultados = pd.DataFrame(RESULTADOS).T
resultados.columns = ["acc_train", "acc_test"]
resultados["pct_diff"] = 1 - (resultados.acc_test / resultados.acc_train)