# Keras model fitting

This notebook trains a simple neural net using Keras and assesses its performance.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import keras_tuner as kt

This notebook is parameterized to work with [Papermill](https://papermill.readthedocs.io).
The following cell contains the default values of the parameters.

In [None]:
n_epochs = 2
n_trials = 1
max_units = 10

First we load the training dataset.

In [None]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

X_train = X_train.astype(np.float32) / 255.0
X_test = X_test.astype(np.float32) / 255.0

print(f"Number of training samples: {len(X_train)}.")
print(f"Number of test samples: {len(X_test)}.")

This dataset contains images of digits. Here is a sample.

In [None]:
_, axes = plt.subplots(1, 10, figsize=(12, 5))
for ax, digit in zip(axes, X_train):
    ax.imshow(digit)
    ax.axis("off")

Then define a MLP model, using KerasTuner to be able to do hyperparameters optimization.

In [None]:
def build_model(hp):
    hp_units = hp.Int(
        "units", min_value=min(max_units, 32), max_value=max_units, step=32
    )
    hp_dropout = hp.Float("dropout", min_value=0.1, max_value=0.5)

    model = tf.keras.Sequential(
        [
            tf.keras.layers.Flatten(input_shape=X_train.shape[1:]),
            tf.keras.layers.Dense(hp_units, activation="relu"),
            tf.keras.layers.Dropout(hp_dropout),
            tf.keras.layers.Dense(10, activation="softmax"),
        ]
    )

    lr = hp.Float("learning_rate", min_value=1e-4, max_value=1e-2, sampling="log")
    model.compile(
        loss="sparse_categorical_crossentropy",
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
        metrics=["accuracy"],
    )
    return model

In [None]:
model = build_model(kt.HyperParameters())
model.summary()

In [None]:
!rm -rf logs mlp_tuning

In [None]:
tuner = kt.RandomSearch(
    build_model,
    objective="val_accuracy",
    max_trials=n_trials,
    seed=42,
    project_name="mlp_tuning",
)

Fitting will take more or less time depending on the total number of epochs used and the number of tested configurations.

In [None]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5)
tensorboard_callback = tf.keras.callbacks.TensorBoard("logs")

In [None]:
tuner.search(
    X_train,
    y_train,
    epochs=n_epochs,
    batch_size=32,
    validation_split=0.2,
    callbacks=[stop_early, tensorboard_callback],
)

Finally, we check the accuracy of the best model on the test dataset.

In [None]:
model = tuner.get_best_models(num_models=1)[0]
_, accuracy = model.evaluate(X_test, y_test)
print(f"MLP test accuracy is {accuracy * 100:.2f}%.")

We can investigate the results via Tensorboard, use the integrated reverse proxy to access it.

In [None]:
%env TENSORBOARD_PROXY_URL /user-redirect/proxy/%PORT%/

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir logs