Keras metrics:
* https://keras.io/api/metrics/
* https://machinelearningmastery.com/custom-metrics-deep-learning-keras-python/
* https://neptune.ai/blog/keras-metrics

In the preferred organization the final layer has a linear activation. For historical reasons, the outputs in this form are referred to as logits. The loss function has an additional argument: from_logits = True. This informs the loss function that the softmax operation should be included in the loss calculation. This allows for an optimized implementation.

In [None]:
!pip install keras_tuner

In [None]:
import keras
from keras import backend as K
from keras.models import Sequential
from keras.layers import Activation
from keras.layers.core import Dense
from keras.optimizers import Adam
from keras.metrics import categorical_crossentropy

from sklearn.metrics import confusion_matrix
import itertools

import keras_tuner as kt
from keras_tuner import RandomSearch, HyperModel, BayesianOptimization, Hyperband
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import Adam, SGD
import tensorflow as tf

### Functions

In [None]:
class ClfHyperModel(kt.HyperModel):

    def __init__(self, input_shape):
        self.input_shape = input_shape

    def build(self, hp):
        model = Sequential()

        model.add(
            Dense(
                units = hp.Int('units', 8, 32, 4, default=8),
                activation = hp.Choice(
                    'dense_activation',
                    values = ['relu', 'tanh', 'softmax'],
                    default = 'relu'),
                input_shape = input_shape
            )
        )

        model.add(
            Dropout(
                hp.Float(
                    'dropout',
                    min_value = 0.0,
                    max_value = 0.3,
                    default = 0.0,
                    step = 0.05)
            )
        )
        #model.add(Dense(1, activation='sigmoid'))
        model.add(Dense(1, activation='linear'))

        hp_learning_rate = hp.Choice('learning_rate',
                                 values=[1e-2, 1e-3, 1e-4]
                                 )
        optimizers_dict = {
            "Adam":    Adam(learning_rate = hp_learning_rate),
            "SGD":     SGD(learning_rate = hp_learning_rate),
            }

        hp_optimizers = hp.Choice(
            'optimizer',
            values = ["Adam", "SGD"]
            )


        model.compile(optimizer=optimizers_dict[hp_optimizers], loss = 'binary_crossentropy', metrics=[tf.keras.metrics.AUC()])
        #model.compile(optimizer=optimizers_dict[hp_optimizers], loss = BinaryCrossEntropy(from_logits = True), metrics=[tf.keras.metrics.AUC()]) # multiclass

        return model

    def fit(self, hp, model, *args, **kwargs):
        return model.fit(
            *args,
            batch_size = hp.Choice("batch_size", [16, 32, 64, 128]),
            **kwargs,
        )

### Baseline NN

In [None]:
n_inputs = X_train_nn.shape[1]

nn_model = Sequential([
    Dense(n_inputs, input_shape=(n_inputs, ), activation='relu'),
    Dense(32, activation='relu'),
    #Dense(1, activation='sigmoid')
    Dense(1, activation='linear')
])

In [None]:
nn_model.compile(Adam(learning_rate = 0.0001), loss = 'binary_crossentropy', metrics = ['accuracy'])

In [None]:
history = nn_model.fit(X_train_nn, y_train_nn, batch_size=25, epochs=100, verbose=2, validation_data=(X_val_nn, y_val_nn))

In [None]:
history.history.keys()
# dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])

In [None]:
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()

In [None]:
history_dict = history.history
loss = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(loss) + 1)

plt.plot(epochs, loss, 'bo', label = 'Training loss')
plt.plot(epochs, val_loss, 'b', label = 'Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']

plt.plot(epochs, acc, 'bo', label = 'Training accuracy')
plt.plot(epochs, val_acc, 'b', label = 'Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

### Callback model

In [None]:
n_inputs = X_train_nn.shape[1]

nn_model_cb = Sequential([
    Dense(n_inputs, input_shape=(n_inputs, ), activation='relu'),
    Dense(32, activation='relu'),
    Dense(20, activation='relu'),
    Dense(15, activation='relu'),
    Dense(10, activation='relu'),
    #Dense(1, activation='sigmoid')
    Dense(1, activation='linear')
])

In [None]:
nn_model_cb.compile(Adam(learning_rate = 0.0001), loss = 'binary_crossentropy', metrics = ['accuracy'])

checkpoint_cb = keras.callbacks.ModelCheckpoint("my_keras_model.h5", save_best_only=True)
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10,
                                                  restore_best_weights=True)

history = nn_model_cb.fit(X_train_nn, y_train_nn,
                          batch_size=25,
                          epochs=100,
                          verbose=2,
                          validation_data=(X_val_nn, y_val_nn),
                          callbacks=[checkpoint_cb, early_stopping_cb])

model_cb = keras.models.load_model("my_keras_model.h5") # rollback to best model
cb_test = model_cb.evaluate(X_test, y_test)

In [None]:
class PrintValTrainRatioCallback(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        print("\nval/train: {:.2f}".format(logs["val_loss"] / logs["loss"]))

In [None]:
val_train_ratio_cb = PrintValTrainRatioCallback()
val_train_ratio_cb

In [None]:
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()

In [None]:
nn_model_predictions_cb = model_cb.predict(X_test, batch_size=200, verbose=0)
nn_model_class_predictions_cb = np.argmax(nn_model_predictions_cb, axis=1)

### Hyperparameter NN

In [None]:
input_shape = (X_train_nn.shape[1],)
hypermodel = ClfHyperModel(input_shape)

In [None]:
tuner_rs = RandomSearch(
            hypermodel,
            objective=kt.Objective("val_auc", direction="max"),
            seed=42,
            max_trials=20,
            executions_per_trial=2,
            overwrite=True,
            directory="my_dir",
            project_name="helloworld",)

In [None]:
tuner_rs.search(X_train_nn, y_train_nn, epochs=40, validation_data=(X_val_nn, y_val_nn), verbose=1, callbacks=[tf.keras.callbacks.EarlyStopping(patience=2)])

In [None]:
best_hps=tuner_rs.get_best_hyperparameters(num_trials=1)[0]

print(f"""
units = {best_hps.get('units')}
optimizer = {best_hps.get('optimizer')}
activation = {best_hps.get('dense_activation')}
dropout = {best_hps.get('dropout')}
batch_size = {best_hps.get('batch_size')}
learning_rate = {best_hps.get('learning_rate')}
""")

In [None]:
tuned_model = tuner_rs.hypermodel.build(best_hps)

early_stopping_cb = keras.callbacks.EarlyStopping(patience=10,
                                                  restore_best_weights=True)

history_tm = tuned_model.fit(x = X_train_nn,
                y = y_train_nn,
                epochs = 100,
                verbose = 2,
                validation_data=(X_val_nn, y_val_nn),
                callbacks=[checkpoint_cb, early_stopping_cb]
)

In [None]:
pd.DataFrame(history_tm.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()

In [None]:
tnn_model_predictions = tuned_model.predict(X_test, batch_size=100, verbose=0)
tnn_model_class_predictions = np.argmax(tnn_model_predictions, axis=1)

### Bayesian optimisation

In [None]:
tuner_bo = BayesianOptimization(
            hypermodel,
            objective = kt.Objective("val_auc", direction="max"),
            max_trials = 20,
            seed = 42,
            executions_per_trial = 2,
            overwrite = True,
            directory = "my_dir",
            project_name = "helloworld",
        )

tuner_bo.search(X_train_nn, y_train_nn, epochs=10, validation_data=(X_val_nn, y_val_nn), verbose=1)

best_model_bo = tuner_bo.get_best_models(num_models=1)[0]
best_model_bo.evaluate(X_test, y_test)

In [None]:
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10,
                                                  restore_best_weights=True)

history_bo = best_model_bo.fit(x = X_train_nn,
                               y = y_train_nn,
                               epochs = 100,
                               verbose = 2,
                               validation_data=(X_val_nn, y_val_nn),
                               callbacks=[checkpoint_cb, early_stopping_cb]
)

In [None]:
pd.DataFrame(history_bo.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()

### Hyperband

In [None]:
tuner_hb = Hyperband(
            hypermodel,
            max_epochs = 10,
            objective = kt.Objective("val_auc", direction="max"),
            seed = 42,
            executions_per_trial = 2,
            overwrite = True,
            directory = "my_dir",
            project_name = "helloworld",
        )
tuner_hb.search(X_train_nn, y_train_nn, epochs=10, verbose=1, validation_data=(X_val_nn, y_val_nn))
best_model_hb = tuner_hb.get_best_models(num_models=1)[0]
best_model_hb.evaluate(X_test, y_test)

### Batch normalisation

In [None]:
bn_model = Sequential([
    Dense(n_inputs, input_shape=(n_inputs, ), activation='relu'),
    BatchNormalization(),
    Dense(32, activation='relu'),
    BatchNormalization(),
    Dense(20, activation='relu'),
    BatchNormalization(),
    Dense(15, activation='relu'),
    BatchNormalization(),
    Dense(10, activation='relu'),
    BatchNormalization(),
    Dense(1, activation='sigmoid')
])

In [None]:
bn_model.compile(Adam(learning_rate = 0.0001), loss = 'binary_crossentropy', metrics = [keras.metrics.AUC()])

#checkpoint_cb = keras.callbacks.ModelCheckpoint("bn_keras_model.h5", save_best_only=True)
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10,
                                                  restore_best_weights=True)

history_bn = bn_model.fit(X_train_nn, y_train_nn,
                          batch_size=25,
                          epochs=100,
                          verbose=2,
                          validation_data=(X_val_nn, y_val_nn),
                          callbacks=[checkpoint_cb, early_stopping_cb])

#model_bn = keras.models.load_model("bn_keras_model.h5") # rollback to best model
bn_test = model_bn.evaluate(X_test, y_test)

In [None]:
pd.DataFrame(history_bn.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()

In [None]:
nn_model_predictions_bn = model_bn.predict(X_test, batch_size=200, verbose=0)
nn_model_class_predictions_bn = np.argmax(nn_model_predictions_bn, axis=1)

### Multiclass

In [None]:
#nn_model.compile(Adam(learning_rate = 0.0001), loss = BinaryCrossEntropy(from_logits = True), metrics = ['accuracy']) # multiclass