**Experimento con LeNet sobre MNIST (Optimización)**

**Lectura del corpus mnist, partición train y test y normalización**

In [1]:
from tensorflow import keras
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

print('training set', x_train.shape)
print('test set', x_test.shape)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# Normalize [0..255]-->[0..1]
x_train /= 255
x_test /= 255

# convert class vectors to binary class matrices
num_classes=10
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
training set (60000, 28, 28)
test set (10000, 28, 28)



**Optimización con conjunto de validación**


Hacemos la partición entrenamiento/validación (80%/20%)

In [2]:
from sklearn.model_selection import train_test_split

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

print('training set', x_train.shape)
print('val set', x_val.shape)

training set (48000, 28, 28)
val set (12000, 28, 28)


**Optimización del modelo en función del learning rate**

In [4]:
from keras.optimizers import SGD

batch_size=128
epochs=5

LR=[0.0001,0.001,0.01,0.1]
best_acc=0.0
for lr in LR:
  M = keras.Sequential()
  M.add(keras.layers.Conv2D(filters=6, kernel_size=(5,5), activation='tanh', input_shape=(28,28,1)))
  M.add(keras.layers.AveragePooling2D(pool_size=(2,2), strides=2))
  M.add(keras.layers.Conv2D(filters=16, kernel_size=(5,5), activation='tanh'))
  M.add(keras.layers.AveragePooling2D(pool_size=(2,2), strides=2))
  M.add(keras.layers.Flatten())
  M.add(keras.layers.Dense(units=120, activation='tanh'))
  M.add(keras.layers.Dense(units=84, activation='tanh'))
  M.add(keras.layers.Dense(units=10, activation = 'softmax'))
  sgd=SGD(learning_rate=lr)
  M.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
  print("Learning rate:",lr)
  H = M.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_val, y_val))

  if H.history['val_accuracy'][-1]>best_acc:
        best_acc=H.history['val_accuracy'][-1]
        bestlr=lr

print("Best acc",best_acc)
print("Best learning rate",bestlr)

Learning rate: 0.0001
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Learning rate: 0.001
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Learning rate: 0.01
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Learning rate: 0.1
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Best acc 0.9788333177566528
Best learning rate 0.1



EARLY STOPPING

El número de épocas a emplear se puede ajustar en función de cómo evoluciona la convergencia sobre el conjunto de validación.


In [5]:
from keras.optimizers import SGD

batch_size=128
epochs=100

callback = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0.01, patience=2)

LR=[0.1]
best_acc=0.0
for lr in LR:
  M = keras.Sequential()
  M.add(keras.layers.Conv2D(filters=6, kernel_size=(5,5), activation='tanh', input_shape=(28,28,1)))
  M.add(keras.layers.AveragePooling2D(pool_size=(2,2), strides=2))
  M.add(keras.layers.Conv2D(filters=16, kernel_size=(5,5), activation='tanh'))
  M.add(keras.layers.AveragePooling2D(pool_size=(2,2), strides=2))
  M.add(keras.layers.Flatten())
  M.add(keras.layers.Dense(units=120, activation='tanh'))
  M.add(keras.layers.Dense(units=84, activation='tanh'))
  M.add(keras.layers.Dense(units=10, activation = 'softmax'))
  sgd=SGD(learning_rate=lr)
  M.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
  print("Learning rate:",lr)
  H = M.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_val, y_val),callbacks=[callback])

  if H.history['val_accuracy'][-1]>best_acc:
        best_acc=H.history['val_accuracy'][-1]
        bestlr=lr

print("Best acc",best_acc)
print("Best learning rate",bestlr)

Learning rate: 0.1
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Best acc 0.9815000295639038
Best learning rate 0.1


**Optimización del modelo en función de diferentes optimizadores**

In [6]:
from keras.optimizers import SGD,Adam,Adagrad

batch_size=128
epochs=100
lr=0.1

opt=[]
opt.append(SGD(learning_rate=lr))
opt.append(Adam(learning_rate=lr))
opt.append(Adagrad(learning_rate=lr))

best_acc=0.0
for optim in opt:
    print("Optimizador:",optim)
    M = keras.Sequential()
    M.add(keras.layers.Conv2D(filters=6, kernel_size=(5,5), activation='tanh', input_shape=(28,28,1)))
    M.add(keras.layers.AveragePooling2D(pool_size=(2,2), strides=2))
    M.add(keras.layers.Conv2D(filters=16, kernel_size=(5,5), activation='tanh'))
    M.add(keras.layers.AveragePooling2D(pool_size=(2,2), strides=2))
    M.add(keras.layers.Flatten())
    M.add(keras.layers.Dense(units=120, activation='tanh'))
    M.add(keras.layers.Dense(units=84, activation='tanh'))
    M.add(keras.layers.Dense(units=10, activation = 'softmax'))
    M.compile(loss='categorical_crossentropy',
              optimizer=optim,
              metrics=['accuracy'])
    H = M.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_val, y_val),callbacks=[callback])

    if H.history['val_accuracy'][-1]>best_acc:
        best_acc=H.history['val_accuracy'][-1]
        bestopt=optim

print("=============================")
print("Best acc",best_acc)
print("Best optim",bestopt)
print("=============================")

Optimizador: <keras.src.optimizers.sgd.SGD object at 0x7863100ef190>
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Optimizador: <keras.src.optimizers.adam.Adam object at 0x7863100ef1f0>
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Optimizador: <keras.src.optimizers.adagrad.Adagrad object at 0x7863100efaf0>
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Best acc 0.9858333468437195
Best optim <keras.src.optimizers.adagrad.Adagrad object at 0x7863100efaf0>
