# Chapter 10: Introduction to Artificial Neural Networks with Keras

### 10. Train a deep MLP on the MNIST dataset (you can load it using keras.datasets.mnist.load_data(). See if you can get over 98% precision. Try searching for the optimal learning rate by using the approach presented in this chapter (i.e., by growing the learning rate exponentially, plotting the error, and finding the point where the error shoots up). Try adding all the bells and whistles—save checkpoints, use early stopping, and plot learning curves using TensorBoard.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os

import tensorflow as tf
from tensorflow import keras
from keras import (
    Sequential,
    layers,
    callbacks,
    datasets,
    activations,
    optimizers,
    losses,
    metrics,
)
import keras_tuner as kt


Let's first download the dataset and split it into training, validation and test sets. The train set returned by `tf.keras.datasets.mnist.load_data()` has 60k images. We will use 50k for training and 10k for validation. At the same time we will normalize the values of the pixels to make sure all the possible values lay between 0 and 1.

In [2]:
def preprocessing(dataset, percentage=0.7, target=False):
    """This function normalizes the dataset and divides the dataset 
    into training and validation datasets

    Args:
        dataset (array): Dataset of images to train the model   
        percentage (float, optional): The percentage of the dataset that will be used to train. Defaults to 0.7.
        target (boolean): True if the dataset contains the targets of the model. Defaults to False.

    Returns:
        train_dataset (array): dataset that will be used to train the model
        valid_dataset (array): dataset that will be used to validate the results of the model
    """
    
    max_value = np.max(dataset)
    threshold = round(len(dataset) * percentage)
    train_dataset = dataset[:threshold]/max_value if not target else dataset[:threshold]
    valid_dataset = dataset[threshold:]/max_value if not target else dataset[threshold:]
    return train_dataset, valid_dataset


def get_mnist():
    """This function downloads the mnist dataset from Keras,
    and splits the data into training, validation and testing sets.

    Returns:
        train_set : array containing the input and target values for the training
        valid_set : array containing the input and target values for the validation in training
        test_set : array containing the input and target values for the testing of the model
    """
    (x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()

    x_train, x_valid = preprocessing(x_train)
    y_train, y_valid = preprocessing(y_train, target=True)

    train_set = [x_train, y_train]
    valid_set = [x_valid, y_valid]
    test_set = [x_test, y_test]
    
    return train_set, valid_set, test_set

In [3]:
def train_model(train_set, valid_set, test_set, neurons=100, epochs=25, lrate=1e-4):
    x_train, y_train = train_set
    x_valid, y_valid = valid_set
    x_test, y_test = test_set
    

    model = Sequential(
        [
            layers.Flatten(input_shape=[28, 28]),
            layers.Dense(neurons, activation=activations.relu),
            layers.Dense(neurons, activation=activations.relu),
            layers.Dense(neurons, activation=activations.relu),
            layers.Dense(10, activation=activations.softmax),
        ]
    )

    # Compilation
    optimizer = optimizers.SGD(learning_rate=lrate)
    model.compile(
        loss=[losses.sparse_categorical_crossentropy],
        metrics=["accuracy"],
        optimizer=optimizer,
    )

    # Define Callbacks
    logdir = os.path.join("..", "logs", "chapter_10")
    if not os.path.exists(logdir):
        os.makedirs(logdir)
    tensorboard_cb = callbacks.TensorBoard(log_dir=logdir)

    early_stop_cb = callbacks.EarlyStopping(patience=5)

    model.fit(
        x_train,
        y_train,
        validation_data=[x_valid, y_valid],
        callbacks=[tensorboard_cb, early_stop_cb],
        batch_size=32,
        epochs=epochs,
        verbose=0,
    )

    evaluation = model.evaluate(x_test, y_test, verbose=0)
    print(f'The accuracy of the model is {evaluation[1]:.4f}')
    print(f'Learning rate: {lrate}')
    print(f'\n')


In [4]:
def model_builder(hp):
    
    hp_units = hp.Int('units', min_value=50, max_value=300, step=50)
    
    model = Sequential(
        [
            layers.Flatten(input_shape=[28, 28]),
            layers.Dense(hp_units, activation=activations.relu),
            layers.Dense(hp_units, activation=activations.relu),
            layers.Dense(hp_units, activation=activations.relu),
            layers.Dense(10, activation=activations.softmax),
        ]
    )
    
    hp_learning_rate = hp.Choice('learning_rate', values=[8e-3, 9e-3, 1e-2, 2e-2, 3e-2, 4e-2])
    
    # Compilation
    optimizer = optimizers.SGD(learning_rate=hp_learning_rate)
    model.compile(
        loss=[losses.sparse_categorical_crossentropy],
        metrics=["accuracy"],
        optimizer=optimizer,
    )
    
    return model

In [5]:
train_set, valid_set, test_set = get_mnist()
x_train, y_train = train_set

In [6]:
tuner = kt.Hyperband(
    model_builder,
    objective='val_accuracy',
    max_epochs=10,
    factor=3,
    directory='tuner_kt',
    project_name='homl_chapter_10'
)

INFO:tensorflow:Reloading Oracle from existing project tuner_kt/homl_chapter_10/oracle.json
Metal device set to: Apple M1 Pro

systemMemory: 32.00 GB
maxCacheSize: 10.67 GB

INFO:tensorflow:Reloading Tuner from tuner_kt/homl_chapter_10/tuner0.json


In [7]:
early_stop_cb = callbacks.EarlyStopping(monitor='val_loss', patience=5)

tuner.search(x_train, y_train, epochs=50, validation_split=0.2, callbacks=[early_stop_cb])

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is {best_hps.get('units')} and the optimal learning rate for the optimizer is {best_hps.get('learning_rate')}.
""")

Trial 23 Complete [00h 00m 25s]
val_accuracy: 0.9258333444595337

Best val_accuracy So Far: 0.9688094854354858
Total elapsed time: 00h 03m 46s

Search: Running Trial #24

Value             |Best Value So Far |Hyperparameter
50                |300               |units
0.008             |0.04              |learning_rate
4                 |10                |tuner/epochs
0                 |4                 |tuner/initial_epoch
1                 |2                 |tuner/bracket
0                 |2                 |tuner/round

Epoch 1/4
Epoch 2/4
Epoch 3/4