Click [here]() to access the associated Medium article.

# Setup


In [1]:
!pip install -q keras-tuner tensorflow pandas scikit-learn seaborn

[33mDEPRECATION: textract 1.6.5 has a non-standard dependency specifier extract-msg<=0.29.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of textract or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0m

In [2]:
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from tensorflow import keras

import keras_tuner

# Set random seed for reproducibility
SEED = 42

Using TensorFlow backend


# Data

In [3]:
# Load the data
data = sns.load_dataset("titanic")

# Preprocess the data
data = data.dropna()  # remove rows with missing values
data = pd.get_dummies(data)  # convert categorical variables to dummy variables

# Split into features and target
X = data.drop("survived", axis=1)
y = data["survived"]

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=SEED
)

# Building the Model

In [4]:
def build_model(hp: keras_tuner.HyperParameters):
    model = keras.Sequential()

    # Tune the number of units in the first Dense layer
    # Choose an optimal value between 8-64
    hp_units = hp.Int("units", min_value=8, max_value=64, step=8)

    model.add(keras.layers.Dense(units=hp_units, activation="relu"))
    model.add(keras.layers.Dense(1, activation="sigmoid"))

    # Tune the learning rate for the optimizer
    # Choose an optimal value from 0.01, 0.001, or 0.0001
    hp_learning_rate = hp.Choice("learning_rate", values=[1e-2, 1e-3, 1e-4])

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
        loss=keras.losses.BinaryCrossentropy(from_logits=True),
        metrics=["accuracy"],
    )

    return model

# Defining the Tuner

In [5]:
tuner = keras_tuner.BayesianOptimization(
    build_model,
    objective="val_accuracy",
    max_trials=5,
    seed=SEED,
    max_retries_per_trial=3,
    directory="search",
    project_name="titanic",
    overwrite=True,
)

# Summary of the search space
tuner.search_space_summary()



Search space summary
Default search space size: 2
units (Int)
{'default': None, 'conditions': [], 'min_value': 8, 'max_value': 64, 'step': 8, 'sampling': 'linear'}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}


# Hyperparameter Search

In [6]:
tuner.search(
    X_train,
    y_train,
    epochs=10,
    validation_split=0.2,
    callbacks=[keras.callbacks.TensorBoard("./search/tb_logs")],
)

Trial 5 Complete [00h 00m 01s]
val_accuracy: 1.0

Best val_accuracy So Far: 1.0
Total elapsed time: 00h 00m 03s


# Best Model

In [7]:
# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]
best_model.build(X_train.shape)

# Summary of the best model
best_model.summary()



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (145, 48)                 1488      
                                                                 
 dense_1 (Dense)             (145, 1)                  49        
                                                                 
Total params: 1537 (6.00 KB)
Trainable params: 1537 (6.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


# Summary of Search Results

In [8]:
tuner.results_summary()

Results summary
Results in search/titanic
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 0 summary
Hyperparameters:
units: 48
learning_rate: 0.01
Score: 1.0

Trial 1 summary
Hyperparameters:
units: 32
learning_rate: 0.01
Score: 1.0

Trial 3 summary
Hyperparameters:
units: 40
learning_rate: 0.01
Score: 1.0

Trial 4 summary
Hyperparameters:
units: 64
learning_rate: 0.01
Score: 1.0

Trial 2 summary
Hyperparameters:
units: 64
learning_rate: 0.001
Score: 0.8620689511299133


# Evaluation

In [9]:
# Evaluate the model on the test data
score = best_model.evaluate(X_test, y_test, verbose=0)

print("Test loss:", score[0])
print("Test accuracy:", score[1])

Test loss: 0.10603184998035431
Test accuracy: 0.9729729890823364


# Visualization by TensorBoard

In [None]:
!tensorboard --logdir search/tb_logs