# Keras Tuner

Use the tuner package to determine the best model.

# Versions

- V2: Switching from BatchNormalization to Dropout
- V1: Original - build_model01()

# References

- https://www.kaggle.com/fchollet/keras-kerastuner-best-practices
- https://www.kaggle.com/fchollet/moa-keras-kerastuner-best-practices
- https://keras.io/keras_tuner/
- https://keras.io/guides/keras_tuner/getting_started/

In [None]:
import os
import time

import pandas as pd
import numpy as np
from pathlib import Path

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import random

from sklearn.preprocessing import StandardScaler

from sklearn.metrics import roc_auc_score
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold,StratifiedKFold, GroupKFold

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers
from tensorflow.keras.initializers import RandomUniform

# Configuration

In [None]:
class Config:
    debug = False
    competition = "TPS_202111"
    seed = 42
    n_folds = 5
    batch_size = 1024
    epochs = 100

In [None]:
def seed_everything(seed=Config.seed):
    np.random.seed(seed)
    random.seed(seed)
    tf.random.set_seed(seed)

In [None]:
data_dir = Path('../input/tabular-playground-series-nov-2021')

In [None]:
train_df = pd.read_csv(data_dir / "train.csv", 
#                        nrows=10000
                      )
test_df = pd.read_csv(data_dir / "test.csv",
#                      nrows=1000
                     )
sample_submission = pd.read_csv(data_dir / "sample_submission.csv")

print(f"train data: Rows={train_df.shape[0]}, Columns={train_df.shape[1]}")
print(f"test data : Rows={test_df.shape[0]}, Columns={test_df.shape[1]}")

In [None]:
train_df.head()

# Feature Engineering

In [None]:
features = [col for col in train_df.columns if col not in ('id', 'target')]

# Standardize/Normalize the Data

In [None]:
scaler = StandardScaler()

train_df[features] = scaler.fit_transform(train_df[features])
test_df[features] = scaler.transform(test_df[features])

# Extract Target and Drop Unused Columns

In [None]:
y = train_df.target

test = test_df.drop(columns=["id"], axis=1)
X = train_df.drop(columns=["id", "target"], axis=1)

In [None]:
x_train, x_valid, y_train, y_valid = train_test_split(X, y,
                                                      test_size=0.2,
                                                      random_state=Config.seed)

# Models

In [None]:
import keras_tuner as kt

def make_model(hp):
    
    inputs = keras.Input(shape=(X.shape[1]))


    x = layers.Dense(units=hp.Int("dense_01",
                                      min_value=128,
                                      max_value=256, 
                                      step=32),
                         activation='relu')(inputs)

    x = layers.Dropout(
        hp.Float('dense_dropout', min_value=0., max_value=0.7)
    )(x)
    
    num_block = hp.Int('num_block', min_value=1, max_value=3, step=1)

    for i in range(num_block):
        x = layers.Dense(units=hp.Int("units_" + str(i),
                                      min_value=32,
                                      max_value=256, 
                                      step=32),
                         activation='relu')(x)
        x = layers.Dropout(
          hp.Float('dense_dropout', min_value=0., max_value=0.7)
        )(x)
#         x = keras.layers.BatchNormalization()(x)

    outputs = keras.layers.Dense(1, activation="sigmoid")(x)
    
    model = keras.Model(inputs, outputs)
    
    roc_auc = tf.keras.metrics.AUC(name='roc_auc', curve='ROC')

    model.compile(
        loss="binary_crossentropy",
        optimizer=keras.optimizers.Adam(
            hp.Choice("learning_rate", values=[1e-2, 1e-3, 1e-4])
        ),
        metrics=[roc_auc, "acc"]
    )

    model.summary()
    return model

In [None]:
tuner = kt.tuners.RandomSearch(
    make_model,
    objective='val_acc',
    max_trials=100, # 100
    overwrite=True)

tuner.search_space_summary()

In [None]:
callbacks=[keras.callbacks.EarlyStopping(monitor='val_acc',
                                         mode='max',
                                         patience=3,
                                         baseline=0.9)]

# Same format as model.fit()
tuner.search(x_train, y_train, 
             validation_split=0.2, 
             callbacks=callbacks,
             batch_size=Config.batch_size,
             verbose=1, 
             epochs=100) #100

In [None]:
tuner.results_summary()

# Best Model

In [None]:
best_hp = tuner.get_best_hyperparameters()[0] # Best hyperparameters
best_model = make_model(best_hp)
print("="*20, " Best Model ", "="*20)
best_model.summary()

In [None]:
# best_model.save("best_model")

In [None]:
best_hp = tuner.get_best_hyperparameters()[0]

print(f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is {best_hp.get('dense_01')} and the optimal learning rate for the optimizer
is {best_hp.get('learning_rate')}.
""")

print(f"First Dense Layer: {best_hp.get('dense_01')}")
print(f"Zero Layer: {best_hp.get('units_0')}")
print(f"one Layer: {best_hp.get('units_1')}")
print(f"Second Layer: {best_hp.get('units_2')}")
# print(f"Third Layer: {best_hp.get('units_3')}")
# print(f"4th Layer: {best_hp.get('units_4')}")

print(f"Best Learning Rate: {best_hp.get('learning_rate')}")

# Find the best epoch value

In [None]:
best_hp = tuner.get_best_hyperparameters()[0]
model = make_model(best_hp)
history = model.fit(x_train, y_train, validation_split=0.2, epochs=50)

In [None]:
val_acc_per_epoch = history.history['val_acc']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print(f"Best epoch: {best_epoch}")

# Train the production model

In [None]:
best_model = make_model(best_hp)
best_model.fit(x_train, y_train, epochs=best_epoch)

# Predict Validation

In [None]:
preds_valid = best_model.predict(x_valid)

In [None]:
auc = roc_auc_score(y_valid,  preds_valid)
print(f"Validation AUC Score: {auc}")

# Cross Validation

In [None]:
seed_everything()

final_test_predictions = []
final_valid_predictions = {}
scores = []
histories = []

kf = StratifiedKFold(n_splits=Config.n_folds, random_state=Config.seed, shuffle=True)

for fold, (train_idx, valid_idx) in enumerate(kf.split(X = X, y = y)):
    print(10*"=", f"Fold={fold+1}", 10*"=")
    start_time = time.time()

    x_train = X.loc[train_idx, :]
    x_valid = X.loc[valid_idx, :]
    
    y_train = y[train_idx]
    y_valid = y[valid_idx]
    
#     model = build_model02_swish(x_shape=(X.shape[1],))
    best_model = make_model(best_hp)

    early_stopping_cb = keras.callbacks.EarlyStopping(monitor="val_auc",
                                                      mode='max',
                                                      verbose=1,
                                                      restore_best_weights=True,
                                                      patience=3)
    
    lr_scheduler_cb = keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss', 
        factor=0.2,
        patience=10,
        mode='min'
    )
    
    history = model.fit(X, y,
              callbacks=[early_stopping_cb, lr_scheduler_cb],
                  batch_size=Config.batch_size,
              validation_data=(x_valid, y_valid),
              epochs=Config.epochs
             )
    histories.append(history)

    # Predictions for OOF
    print("--- Predicting OOF ---")
    preds_valid = model.predict(x_valid)[:, -1]
    final_valid_predictions.update(dict(zip(valid_idx, preds_valid)))
    
    auc = roc_auc_score(y_valid,  preds_valid)
    scores.append(auc)

    run_time = time.time() - start_time
    
    # Predictions for Test Data
    print("--- Predicting Test Data ---")
    test_preds = model.predict(test_df[features])[:, -1]
    final_test_predictions.append(test_preds)
    print(f"Fold={fold+1}, auc: {auc:.8f}, Run Time: {run_time:.2f}")


# Scores

In [None]:
print(f"Scores -> Adjusted: {np.mean(scores) - np.std(scores):.8f} , mean: {np.mean(scores):.8f}, std: {np.std(scores):.8f}")

# History

In [None]:
def plot_history(history, metric, val_metric, title):

    loss = history.history[metric]
    val_loss = history.history[val_metric]

    epoch = history.epoch

    plt.figure(figsize=(11, 4))
    
    plt.plot(epoch, loss, label=metric, color="r")
    plt.plot(epoch, val_loss, label=val_metric, color="b")

    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.grid(True)
    plt.title(f"Training and Validation {title}")

    plt.show()


In [None]:
history.history.keys()

In [None]:
for fold, h in enumerate(histories):
    print(20*'=', f"Fold = {fold+1}", 20*'=')

    plot_history(h, "acc", "val_acc", "Accuracy")

    plot_history(h, "loss", "val_loss", "Loss")
    plot_history(h, "roc_auc", "val_roc_auc", "AUC")

    plt.show()


# Submission File

In [None]:
sample_submission['target'] = np.mean(np.column_stack(final_test_predictions), axis=1)
sample_submission.to_csv("test_pred_2.csv",index=None)
sample_submission.to_csv("submission.csv",index=None)
sample_submission