# Deep Neural Network in Keras

In this notebook, we improve on our [intermediate neural net](https://github.com/the-deep-learners/deep-learning-illustrated/blob/master/notebooks/intermediate_net_in_keras.ipynb) by applying the theory we've covered since.

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/the-deep-learners/deep-learning-illustrated/blob/master/notebooks/deep_net_in_keras.ipynb)

#### Load dependencies

In [None]:
#!pip install keras-tuner

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout # new!
from tensorflow.keras.layers import BatchNormalization # new!
from tensorflow.keras.optimizers import SGD, Adam

from tensorflow.keras.utils import to_categorical
import time
import kerastuner as kt

#### Load data

In [None]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

#### Preprocess data

In [None]:
X_train = X_train.reshape(60000, 784).astype('float32')
X_test = X_test.reshape(10000, 784).astype('float32')

In [None]:
X_train /= 255
X_test /= 255

In [None]:
n_classes = 10
y_train = to_categorical(y_train, n_classes)
y_test = to_categorical(y_test, n_classes)

#### Define Model Builder Function

In [None]:
def model_builder(hp):
  model = Sequential()

  model.add(Dense(units=hp.Choice('l1_units', values=[32,64])
                  ,activation='relu', input_shape=(784,)))
  model.add(BatchNormalization())

  model.add(Dense(units=hp.Choice('l2_units', values=[32,64])
                  ,activation='relu'))
  model.add(BatchNormalization())

  model.add(Dense(units=hp.Choice('l3_units', values=[32,64])
                  ,activation='relu'))
  model.add(BatchNormalization())
  model.add(Dropout(hp.Float('do_rate', min_value=0.1, max_value=0.3, sampling='linear')))

  model.add(Dense(10, activation='softmax'))

  model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4]))
                ,metrics=['accuracy'])
  return model

#### Create Tuners


https://keras-team.github.io/keras-tuner/documentation/tuners/#tuners

In [None]:
# for reproducibility
SEED = 1


# max number of epochs that a model can be trained for using Hyperband tuner
HYPERBAND_MAX_EPOCHS = 40

# number of hyperparameter combinations (number of rounds) that will be tested by the tuner
MAX_TRIALS = 10

# number of models that should be built and fit for each trial for robustness purposes
EXECUTION_PER_TRIAL = 2

BAYESIAN_NUM_INITIAL_POINTS = 1

# directory for each search
RANDOM_DIR = "random-search-{}".format(int(time.time()))
HYPERBAND_DIR = "hyperband-search-{}".format(int(time.time()))
BAYESIAN_DIR = "bayesian-search-{}".format(int(time.time()))

In [None]:
random_tuner = kt.RandomSearch(model_builder, objective='val_accuracy', seed=SEED, max_trials=MAX_TRIALS, executions_per_trial=EXECUTION_PER_TRIAL, directory=RANDOM_DIR, project_name='deep_net_tuner')
hyper_tuner = kt.Hyperband(model_builder, objective='val_accuracy', max_epochs=HYPERBAND_MAX_EPOCHS, seed=SEED, executions_per_trial=EXECUTION_PER_TRIAL, directory=HYPERBAND_DIR, project_name='deep_net_tuner')
bayesian_tuner = kt.BayesianOptimization(model_builder, objective='val_accuracy', num_initial_points=BAYESIAN_NUM_INITIAL_POINTS, seed=SEED, max_trials=MAX_TRIALS, executions_per_trial=EXECUTION_PER_TRIAL, directory=BAYESIAN_DIR, project_name='deep_net_tuner')

#### Search for Best Parameters using RandomSearch tuner

In [None]:
random_tuner.search(X_train, y_train, batch_size=128, epochs=20, verbose=1, validation_split=0.2)

In [None]:
best_random_hps=random_tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The random search is complete
The optimal number of units in the first layer is {best_random_hps.get('l1_units')}.
optimal number of units in the second layer is {best_random_hps.get('l2_units')}.
optimal number of units in the third layer is {best_random_hps.get('l3_units')}.
optimal dropout rate is {best_random_hps.get('do_rate')}.
optimal learning rate for the optimizer is {best_random_hps.get('learning_rate')}.
""")

###Hyperband Search
__CAUTION__: This will run for quite a while

In [None]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

hyper_tuner.search(X_train, y_train, batch_size=128, epochs=20, verbose=1, validation_split=0.2, callbacks=[stop_early])

In [None]:
best_hyper_hps=hyper_tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperband search is complete
The optimal number of units in the first layer is {best_hyper_hps.get('l1_units')}.
optimal number of units in the second layer is {best_hyper_hps.get('l2_units')}.
optimal number of units in the third layer is {best_hyper_hps.get('l3_units')}.
optimal dropout rate is {best_hyper_hps.get('do_rate')}.
optimal learning rate for the optimizer is {best_hyper_hps.get('learning_rate')}.
""")

### Bayesian Search

In [None]:
bayesian_tuner.search(X_train, y_train, batch_size=128, epochs=20, verbose=1, validation_split=0.2)

In [None]:
best_bayesian_hps=bayesian_tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The bayesian optimization search is complete
The optimal number of units in the first layer is {best_bayesian_hps.get('l1_units')}.
optimal number of units in the second layer is {best_bayesian_hps.get('l2_units')}.
optimal number of units in the third layer is {best_bayesian_hps.get('l3_units')}.
optimal dropout rate is {best_bayesian_hps.get('do_rate')}.
optimal learning rate for the optimizer is {best_bayesian_hps.get('learning_rate')}.
""")

In [None]:
tuner = random_tuner
best_hps = best_random_hps

In [None]:
# Build the model with the optimal hyperparameters and train it on the data for 50 epochs
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train, y_train, epochs=50, validation_split=0.2)

val_acc_per_epoch = history.history['val_accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

In [None]:
hypermodel = tuner.hypermodel.build(best_hps)

# Retrain the model
hypermodel.fit(X_train, y_train, epochs=best_epoch, validation_split=0.2, verbose=1)

In [None]:
generalization_result = hypermodel.evaluate(X_test, y_test)
print("[test loss, test accuracy]:", generalization_result)