# INTRODUCTION

## Libraries

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import random

from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
from sklearn.compose import ColumnTransformer


import tensorflow as tf
from tensorflow.keras import layers, models, Sequential, regularizers
from tensorflow.keras.layers import SimpleRNN, Dense, Dropout, Embedding, LSTM, GRU
from tensorflow.keras.optimizers.legacy import Adam, RMSprop, SGD
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from tensorflow.keras.utils import timeseries_dataset_from_array
from tensorflow.data import Dataset, AUTOTUNE

from keras.regularizers import L1, L2, L1L2

import keras_tuner as kt

%load_ext tensorboard

## Import data & column groups

In [2]:
DAILY_DATA_PATH = "data.v3/daily" 

df = pd.read_parquet(os.path.join(DAILY_DATA_PATH, "daily_flights_and_weather_merged.parquet"))

# Flights column groups
flights_terminal_cols = ['flights_arr_A', 'flights_arr_B', 'flights_arr_C', 'flights_arr_D', 'flights_arr_E',
                         'flights_dep_A', 'flights_dep_B', 'flights_dep_C', 'flights_dep_D', 'flights_dep_E']

flights_non_terminal_cols = ['flights_total', 'flights_cancel', 'flights_delay', 'flights_ontime',
                             'flights_arr_ontime', 'flights_arr_delay', 'flights_arr_cancel',
                             'flights_dep_ontime', 'flights_dep_delay', 'flights_dep_cancel']

flights_percentage_cols = ['flights_cancel_pct', 'flights_delay_pct', 'flights_ontime_pct',
                            'flights_arr_delay_pct', 'flights_arr_ontime_pct', 'flights_arr_cancel_pct',
                            'flights_dep_delay_pct', 'flights_dep_ontime_pct', 'flights_dep_cancel_pct']

# Date column groups
date_cols = ['date', 'covid', 'ordinal_date', 'year', 'month', 'day_of_month', 'day_of_week', 'season', 'holiday', 'halloween', 'xmas_eve', 'new_years_eve', 'jan_2', 'jan_3', 'day_before_easter', 'days_until_xmas', 'days_until_thanksgiving', 'days_until_july_4th', 'days_until_labor_day', 'days_until_memorial_day']

# Weather column groups
weather_cols = ['wx_temperature_max', 'wx_temperature_min', 'wx_apcp', 'wx_prate', 'wx_asnow', 'wx_frozr', 'wx_vis', 'wx_gust', 'wx_maxref', 'wx_cape', 'wx_lftx', 'wx_wind_speed', 'wx_wind_direction']

# Lag column groups
lag_cols =  ['flights_total_lag_1', 'flights_total_lag_2', 'flights_total_lag_3', 'flights_total_lag_4', 'flights_total_lag_5', 'flights_total_lag_6', 'flights_total_lag_7', 'flights_cancel_lag_1', 'flights_cancel_lag_2', 'flights_cancel_lag_3', 'flights_cancel_lag_4', 'flights_cancel_lag_5', 'flights_cancel_lag_6', 'flights_cancel_lag_7']

# DATA SPLITS

In [4]:
# Select features and targets
train_features = ['random'] + date_cols + weather_cols + lag_cols
targets = flights_non_terminal_cols + flights_percentage_cols

# Create X and y
X = df[train_features].drop('date', axis=1)
y = df[targets]

print(f"Feature names: {X.columns.tolist()}")
print(f"Target columns: {y.columns.tolist()}", end="\n\n")
print("Unique data types in X", X.dtypes.value_counts(), sep = '\n')

# Identify categorical and numeric columns in X
categorical_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()
numeric_cols = X.select_dtypes(include = ['float64', 'float32', 'int32', 'int64']).columns.tolist()

print(f"Categorical columns to one-hot-encode: {categorical_cols}")

Feature names: ['random', 'covid', 'ordinal_date', 'year', 'month', 'day_of_month', 'day_of_week', 'season', 'holiday', 'halloween', 'xmas_eve', 'new_years_eve', 'jan_2', 'jan_3', 'day_before_easter', 'days_until_xmas', 'days_until_thanksgiving', 'days_until_july_4th', 'days_until_labor_day', 'days_until_memorial_day', 'wx_temperature_max', 'wx_temperature_min', 'wx_apcp', 'wx_prate', 'wx_asnow', 'wx_frozr', 'wx_vis', 'wx_gust', 'wx_maxref', 'wx_cape', 'wx_lftx', 'wx_wind_speed', 'wx_wind_direction', 'flights_total_lag_1', 'flights_total_lag_2', 'flights_total_lag_3', 'flights_total_lag_4', 'flights_total_lag_5', 'flights_total_lag_6', 'flights_total_lag_7', 'flights_cancel_lag_1', 'flights_cancel_lag_2', 'flights_cancel_lag_3', 'flights_cancel_lag_4', 'flights_cancel_lag_5', 'flights_cancel_lag_6', 'flights_cancel_lag_7']
Target columns: ['flights_total', 'flights_cancel', 'flights_delay', 'flights_ontime', 'flights_arr_ontime', 'flights_arr_delay', 'flights_arr_cancel', 'flights_dep_

## Train Test Split - "flights_ontime"

In [5]:
# Split data into train and test sets
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y['flights_ontime'], test_size=0.1, random_state=42)

# Split data into X_train_rull and y_train_full into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.1, random_state=42)

# Print shapes
print("X_train_full shape:", X_train_full.shape)
print("y_train_full shape:", y_train_full.shape)

print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

print("X_Test shape:", X_test.shape)
print("y_Test shape:", y_test.shape)

X_train_full shape: (1516, 47)
y_train_full shape: (1516,)
X_train shape: (1364, 47)
y_train shape: (1364,)
X_Test shape: (169, 47)
y_Test shape: (169,)


# DENSE NETWORK PREPROCESS

In [18]:
# Fit transformers to the training data
f_scaler = StandardScaler()
f_scaler.fit(X_train[numeric_cols])

ohe = OneHotEncoder(sparse_output=False, handle_unknown='ignore') # Some observed holidays may not be in the training data
ohe.fit(X_train[categorical_cols])

t_scaler = StandardScaler()
t_scaler.fit(y_train.values.reshape(-1, 1)) # reshape y_train to be 2D

# Define preprocessor
def preprocess(features, target, set_global_scaler = False):
    global global_targer_scaler

    scaled_features = f_scaler.transform(features[numeric_cols])
    encoded_features = ohe.transform(features[categorical_cols])
    scaled_target = t_scaler.transform(target.values.reshape(-1, 1))
    processed_features = np.concatenate([scaled_features, encoded_features], axis=1)

    if set_global_scaler:
        global_targer_scaler = t_scaler

    return processed_features, scaled_target

# Preprocess the data
X_train_d, y_train_d = preprocess(X_train, y_train, set_global_scaler=True)
X_val_d, y_val_d = preprocess(X_val, y_val)

# PREDICT `flights_ontime` WITH 1 NEURON

The goal of this section is to simulate linear regression using a neural newtork with one neuron and no activation function. We'll use L2 regularization to simulate ridge regression and compare results to those from Sklearn's lasso regression.

## Create TensorFlow datasets (not timeseries)

In [19]:
# TensorFlow datasets
train_ds_flights_ontime_d = Dataset.from_tensor_slices((X_train_d, y_train_d)).shuffle(len(X_train_d))
val_ds_flights_ontime_d = Dataset.from_tensor_slices((X_val_d, y_val_d)).shuffle(len(X_val_d))

# Batch and prefetch
batch_size = 32
train_ds_flights_ontime_d = train_ds_flights_ontime_d.batch(batch_size).prefetch(AUTOTUNE)
val_ds_flights_ontime_d = val_ds_flights_ontime_d.batch(batch_size).prefetch(AUTOTUNE)


## Create R-squared metric

In [24]:
from keras import backend as K

def r_squared(y_true, y_pred):
    y_true_inv = tf.numpy_function(global_targer_scaler.inverse_transform, [y_true], tf.float32)
    y_pred_inv = tf.numpy_function(global_targer_scaler.inverse_transform, [y_pred], tf.float32)
    SS_res =  K.sum(K.square(y_true_inv - y_pred_inv)) 
    SS_tot = K.sum(K.square(y_true_inv - K.mean(y_true_inv))) 
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

## 1-Neuron "linear regression" Model fit

In [47]:
# Callbacks & Tensorboard Setup
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Keras Tuner Design
def model_builder(hp):
    learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG', default=1e-3)
    l1_regularization = hp.Float('l1_regularization', min_value=1e-5, max_value=1e-1, sampling='LOG', default=1e-2)
    l2_regularization =  hp.Float('l2_regularization', min_value=1e-5, max_value=1e-1, sampling='LOG', default=1e-2)

    model = Sequential([
        Dense(units = 1, 
            input_dim=X_train_d.shape[1], 
            kernel_regularizer=L1L2(l1_regularization, l2_regularization))
    ])

    model.compile(optimizer=Adam(learning_rate=learning_rate), 
                loss='mean_squared_error', 
                metrics=['mean_absolute_error', r_squared])
    return model

# Create a Keras Tuner
dense_lr_tuner = kt.RandomSearch(
    hypermodel = model_builder,
    objective='val_loss',
    max_trials=20,
    executions_per_trial=2,
    directory='logs/flights_ontime/dense_lr/',
    project_name='tuner',
    overwrite = True
)

# Search for best hyperparameters
dense_lr_tuner.search(train_ds_flights_ontime_d, 
             validation_data=val_ds_flights_ontime_d, 
             epochs=500, 
             callbacks=[early_stopping])

Trial 81 Complete [00h 00m 05s]
val_loss: 0.5952489674091339

Best val_loss So Far: 0.4128335863351822
Total elapsed time: 00h 11m 25s

Search: Running Trial #82

Value             |Best Value So Far |Hyperparameter
0.00017155        |0.0080984         |learning_rate
0.06706           |5.9607e-05        |l1_regularization
4.1022e-05        |0.00017501        |l2_regularization

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoc

## 1-Neuron LR model performance

In [None]:
# Print hyperparameters for the 10 best trials
dense_lr_tuner.results_summary(num_trials=10)

Results summary
Results in logs/flights_ontime/dense_lr/tuner
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 07 summary
Hyperparameters:
learning_rate: 0.008838843007600083
l1_regularization: 3.7685531425442215e-05
l2_regularization: 1.372387064892845e-05
Score: 0.41170233488082886

Trial 03 summary
Hyperparameters:
learning_rate: 0.006377919119726471
l1_regularization: 0.0005978766454671086
l2_regularization: 1.5554077728561435e-05
Score: 0.42575952410697937

Trial 01 summary
Hyperparameters:
learning_rate: 0.005682201189419717
l1_regularization: 0.0003781487250481495
l2_regularization: 0.009060936985623536
Score: 0.43453511595726013

Trial 00 summary
Hyperparameters:
learning_rate: 0.0006588156968074904
l1_regularization: 1.3806428858696358e-05
l2_regularization: 7.501389510842734e-05
Score: 0.4362606108188629

Trial 05 summary
Hyperparameters:
learning_rate: 0.00019955413624931227
l1_regularization: 4.368943790310963e-05
l2_regularization: 0.00023083171717

In [None]:
# Get best hyperparameters
best_hps = dense_lr_tuner.get_best_hyperparameters(num_trials = 1)[0]
learning_rate = best_hps.get('learning_rate')
l1_amount = best_hps.get('l1_regularization')
l2_amount = best_hps.get('l2_regularization')

print(f"""
Optimal Hyperparameters:
- Learning Rate: {learning_rate:.3f}
- L1 Regularization: {l1_amount:.5f}
- L2 Regularization: {l2_amount:.5f}
""")

# Get best trial
best_trial = dense_lr_tuner.oracle.get_best_trials(num_trials=1)[0]

# Best trial metrics
val_loss = best_trial.metrics.get_best_value('val_loss')
val_mae = best_trial.metrics.get_best_value('val_mean_absolute_error')
val_r2 = best_trial.metrics.get_best_value('val_r_squared')

print(f"""Loss and Metrics for Best Trial:
- Validation Loss: {val_loss:.2f}
- Validation MAE: {val_mae:.2f}
- Validation R^2: {val_r2:.3f}
""")

# Tensorboard directory setup
!rm -rf ./logs/flights_ontime/dense_lr/tensorboard/ 
log_dir = "logs/flights_ontime/dense_lr/tensorboard/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Train the model with the optimal hyperparameters
model = dense_lr_tuner.hypermodel.build(best_hps)
history = model.fit(train_ds_flights_ontime_d, 
                    validation_data=val_ds_flights_ontime_d, 
                    epochs=500, 
                    callbacks=[early_stopping, tensorboard_callback],
                    verbose=0)


Optimal Hyperparameters:
- Learning Rate: 0.009
- L1 Regularization: 0.00004
- L2 Regularization: 0.00001

Loss and Metrics for Best Trial:
- Validation Loss: 0.41
- Validation MAE: 0.44
- Validation R^2: 0.522



##  1-Neuron LR TensorBoard

In [46]:
%tensorboard --logdir logs/flights_ontime/dense_lr/tensorboard

Reusing TensorBoard on port 6009 (pid 92391), started 0:00:02 ago. (Use '!kill 92391' to kill it.)

## FORECASTING WITH A SHALLOW DENSE NETWORK

In [38]:
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

def build_model(hp):
    n_hidden = hp.Int('n_hidden', min_value=1, max_value=2, default=2)
    n_neurons = hp.Int('n_neurons', min_value=1, max_value=32, default=16)
    learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG', default=1e-3)
    dropout_rate = hp.Float('dropout_rate', min_value=0.02, max_value=0.03, default=0.0)
    l2_regularization =  hp.Float('l2_regularization', min_value=1e-5, max_value=1e-1, sampling='LOG', default=1e-2)

    model = Sequential()
    model.add(Dense(units=n_neurons, 
                    activation='relu', 
                    kernel_regularizer=L2(l2_regularization)))
    
    for layer in range(n_hidden-1):
        model.add(Dense(units=n_neurons, 
                        activation='relu', 
                        kernel_regularizer=L2(l2_regularization)))
        model.add(Dropout(dropout_rate))

    if n_hidden > 0:
        model.add(Dense(units=n_neurons, 
                        activation='relu', 
                        kernel_regularizer=L2(l2_regularization)))
        model.add(Dropout(dropout_rate))

    model.add(Dense(1))

    model.compile(optimizer=Adam(learning_rate=learning_rate), 
                  loss='mean_squared_error', 
                  metrics=['mean_absolute_error'])
    
    return model

tuner = kt.BayesianOptimization(
    hypermodel = build_model,
    objective='val_loss',
    max_trials=100,
    executions_per_trial=2,
    num_initial_points=2,
    directory = "flights_ontime",
    project_name = "flights_ontime_shallow_dense_fit",
    overwrite=True
)

dense_shallow_tuner = kt.RandomSearch(
    hypermodel = build_model,
    objective='val_loss',
    max_trials=100,
    executions_per_trial=2,
    directory = "logs/flights_ontime/dense_shallow/",
    project_name = "tuner",
    overwrite=True
)

dense_shallow_tuner.search(train_ds_flights_ontime_d, 
             epochs=500, 
             validation_data=val_ds_flights_ontime_d, 
             callbacks=[early_stopping])

Trial 5 Complete [00h 00m 03s]
val_loss: 0.4693162441253662

Best val_loss So Far: 0.4221741557121277
Total elapsed time: 00h 00m 13s


## Shallow Dense NN model perfomance

In [39]:
# Print hyperparameters for the 10 best trials
dense_shallow_tuner.results_summary(num_trials=10)

Results summary
Results in flights_ontime/flights_ontime_shallow_dense_fit
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 3 summary
Hyperparameters:
n_hidden: 1
n_neurons: 32
learning_rate: 0.00041697231056181166
dropout_rate: 0.02562061484564263
l2_regularization: 1.4209495255444217e-05
optimizer: rmsprop
Score: 0.4221741557121277

Trial 0 summary
Hyperparameters:
n_hidden: 2
n_neurons: 6
learning_rate: 0.0021840106813786914
dropout_rate: 0.023538920452711963
l2_regularization: 0.0001279036661122155
optimizer: rmsprop
Score: 0.46542906761169434

Trial 4 summary
Hyperparameters:
n_hidden: 2
n_neurons: 4
learning_rate: 0.0008024046237377111
dropout_rate: 0.025398744946381654
l2_regularization: 0.007335252091684161
optimizer: adam
Score: 0.4693162441253662

Trial 1 summary
Hyperparameters:
n_hidden: 1
n_neurons: 21
learning_rate: 0.006439485147843038
dropout_rate: 0.02250353639255253
l2_regularization: 0.08388836483286709
optimizer: adam
Score: 0.48005479574203

In [None]:
best_hps = dense_shallow_tuner.get_best_hyperparameters(num_trials = 1)[0]

# Get best hyperparameters
n_hidden = best_hps.get('n_hidden')
n_neurons = best_hps.get('n_neurons')
learning_rate = best_hps.get('learning_rate')
dropout_rate = best_hps.get('dropout_rate')
l2_amount = best_hps.get('l2_regularization')

print(f"""
Optimal Hyperparameters:
- Number of Hidden Layers: {n_hidden}
- Number of Neurons: {n_neurons}
- Learning Rate: {learning_rate:.3f}
- Dropout Rate: {dropout_rate:.3f}
- L2 Regularization: {l2_amount:.5f}
""")

# Get best trial
best_trial = dense_shallow_tuner.oracle.get_best_trials(num_trials=1)[0]

# Best trial metrics
val_loss = best_trial.metrics.get_best_value('val_loss')
val_mae = best_trial.metrics.get_best_value('val_mean_absolute_error')

print(f"""Loss and Metrics for Best Trial:
- Validation Loss: {val_loss:.2f}
- Validation MAE: {val_mae:.2f}
""")

# Tensorboard directory setup
!rm -rf ./logs/flights_ontime/dense_shallow/tensorboard/ 
log_dir = "logs/flights_ontime/dense_shallow/tensorboard/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Change early stopping patience
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)


# Train the model with the optimal hyperparameters
model = dense_shallow_tuner.hypermodel.build(best_hps)
history = model.fit(train_ds_flights_ontime_d, 
                    validation_data=val_ds_flights_ontime_d, 
                    epochs=500, 
                    callbacks=[early_stopping, tensorboard_callback],
                    verbose=0)

# TIMESERIES WITH RNN

## Remove lag variables from X train, val, and test sets

In [None]:
rnn_X_train_full = X_train_full.drop(lag_cols, axis=1)
rnn_X_train = X_train.drop(lag_cols, axis=1)
rnn_X_val = X_val.drop(lag_cols, axis=1)
rnn_X_test = X_test.drop(lag_cols, axis=1)

## RNN column transformers

In [None]:
rnn_numeric_cols = [col for col in numeric_cols if col not in lag_cols]

# Fit transformers to the training data
rnn_f_scaler = StandardScaler()
rnn_f_scaler.fit(rnn_X_train[rnn_numeric_cols])

# Create a function to preprocess TensorFlow datasets
def rnn_preprocess(features, target):
    scaled_features = rnn_f_scaler.transform(features[rnn_numeric_cols])
    encoded_features = ohe.transform(features[categorical_cols])
    scaled_target = t_scaler.transform(target.values.reshape(-1, 1))
    processed_features = np.concatenate([scaled_features, encoded_features], axis=1)
    return processed_features, scaled_target

# Transform the data
X_train_rnn, y_train_rnn = rnn_preprocess(X_train, y_train)
X_val_rnn, y_val_rnn = rnn_preprocess(X_val, y_val)
X_test_rnn, y_test_rnn = rnn_preprocess(X_test, y_test)

## Create timeseries datasets

In [None]:
seed_value = 42
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

seq_length = 7
batch_size = 32

train_rnn = timeseries_dataset_from_array(
    data = X_train_rnn, 
    targets = y_train_rnn,
    sequence_length = seq_length,
    sequence_stride = 1,
    shuffle = True,
    batch_size = batch_size
)

val_rnn = timeseries_dataset_from_array(
    data = X_val_rnn, 
    targets = y_val_rnn[seq_length-1:],
    sequence_length = seq_length,
    sequence_stride = 1,
    shuffle = True,
    batch_size = batch_size
)

test_rnn = timeseries_dataset_from_array(
    data = X_test_rnn, 
    targets = y_test_rnn[seq_length-1:],
    sequence_length = seq_length,
    sequence_stride = 1,
    shuffle = True,
    batch_size = batch_size
)

## Forecasting Using a single neuron RNN

In [None]:
np.random.seed(seed_value)
tf.random.set_seed(seed_value)
random.seed(seed_value)

# The validation MAE still varies from one run to another. GPU may impart some randomness to the results.


model = Sequential([
    SimpleRNN(
        units=32,
        input_shape=(None, X_train_rnn.shape[1]),
        kernel_regularizer=regularizers.l2(0.01),
        recurrent_regularizer=regularizers.l2(0.01),
        activation='relu'
        ),
        Dropout(0.5),
        ])


opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False)
# opt = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)

model.compile(optimizer=opt, loss='mean_squared_error', metrics=['mean_absolute_error'])

# Clear any logs from previous runs
!rm -rf ./logs/rnn_1_flights_ontime/

log_dir = "logs/rnn_1_flights_ontime/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)

history = model.fit(
    train_rnn,
    epochs=5000,
    validation_data=val_rnn,
    callbacks=[tensorboard_callback, early_stopping]
)    

In [None]:
# !kill 220
%tensorboard --logdir logs/rnn_1_flights_ontime

## Forecasting using a shallow RNN

In [None]:
from keras.regularizers import l1, l2, l1_l2

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)


def build_model(hp):
    n_hidden = hp.Int('n_hidden', min_value=1, max_value=2, default=2)
    n_neurons = hp.Int('n_neurons', min_value=1, max_value=32, default=16)
    learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG', default=1e-3)
    dropout_rate = hp.Float('dropout_rate', min_value=0.02, max_value=0.03, default=0.0)
    regularization_type = hp.Choice('regularization_type', values=['l1', 'l2', 'l1_l2'], default='l2')
    l1_regularization = hp.Float('l1_regularization', min_value=1e-4, max_value=1e-1, sampling='LOG', default=1e-2)
    l2_regularization = hp.Float('l2_regularization', min_value=1e-4, max_value=1e-1, sampling='LOG', default=1e-2)


    if regularization_type == 'l1':
        regularizer = l1(l1_regularization)
    elif regularization_type == 'l2':
        regularizer = l2(l2_regularization)
    else:
        regularizer = l1_l2(l1=l1_regularization, l2=l2_regularization)

   
    optimizer = hp.Choice('optimizer', values=['adam', 'rmsprop', 'sgd'], default='adam')
    if optimizer == 'adam':
        opt = Adam(learning_rate=learning_rate)
    elif optimizer == 'rmsprop':
        opt = RMSprop(learning_rate=learning_rate)
    else:
        opt = SGD(learning_rate=learning_rate)

    model = Sequential()
    model.add(SimpleRNN(units=n_neurons, input_shape=(None, X_train_rnn.shape[1]), activation='relu', return_sequences=True, kernel_regularizer=regularizer))
    for layer in range(n_hidden-1):
        model.add(SimpleRNN(units=n_neurons, activation='relu', return_sequences=True, kernel_regularizer=regularizer))
        model.add(Dropout(dropout_rate))
    if n_hidden > 0:
        model.add(SimpleRNN(units=n_neurons, activation='relu', kernel_regularizer=regularizer))
        model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    model.compile(optimizer=opt, loss='mean_squared_error', metrics=['mean_absolute_error'])
    return model

# tuner = kt.BayesianOptimization(
#     build_model,
#     objective='val_loss',
#     max_trials=50,
#     num_initial_points=2,
#     overwrite=True
# )

tuner = kt.RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=5,
    overwrite=True
)

tuner.search(train_rnn, epochs=500, validation_data=val_rnn, callbacks=[early_stopping])

In [None]:
best_3hps = tuner.get_best_hyperparameters(num_trials=3)

print("Best 3 hyperparameter sets:")
print(best_3hps[0].values)
print(best_3hps[1].values)
print(best_3hps[2].values, '\n')


best_trial = tuner.oracle.get_best_trials(1)[0]

best_trial.summary()

print("\nBest trial validation loss", best_trial.metrics.get_last_value('val_loss'))
print("Best trial validation MAE", best_trial.metrics.get_last_value('mean_absolute_error'))

## Next Steps
1. add TensorBoard to RNN
2. Add L1 and L2 regularization to Keras Tuner 
3. Tune for 500 trials (overnight)