# INTRODUCTION

## Libraries

In [22]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import random

from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
from sklearn.compose import ColumnTransformer


import tensorflow as tf
from tensorflow.keras import layers, models, Sequential, regularizers
from tensorflow.keras.layers import SimpleRNN, Dense, Dropout, Embedding, LSTM, GRU
from tensorflow.keras.optimizers.legacy import Adam, RMSprop, SGD
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from tensorflow.keras.utils import timeseries_dataset_from_array
from tensorflow.data import Dataset, AUTOTUNE

from keras.regularizers import L1, L2, L1L2

import keras_tuner as kt

%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


## Import data & column groups

In [23]:
DAILY_DATA_PATH = "data.v3/daily" 

df = pd.read_parquet(os.path.join(DAILY_DATA_PATH, "daily_flights_and_weather_merged.parquet"))

# Flights column groups
flights_terminal_cols = ['flights_arr_A', 'flights_arr_B', 'flights_arr_C', 'flights_arr_D', 'flights_arr_E',
                         'flights_dep_A', 'flights_dep_B', 'flights_dep_C', 'flights_dep_D', 'flights_dep_E']

flights_non_terminal_cols = ['flights_total', 'flights_cancel', 'flights_delay', 'flights_ontime',
                             'flights_arr_ontime', 'flights_arr_delay', 'flights_arr_cancel',
                             'flights_dep_ontime', 'flights_dep_delay', 'flights_dep_cancel']

flights_percentage_cols = ['flights_cancel_pct', 'flights_delay_pct', 'flights_ontime_pct',
                            'flights_arr_delay_pct', 'flights_arr_ontime_pct', 'flights_arr_cancel_pct',
                            'flights_dep_delay_pct', 'flights_dep_ontime_pct', 'flights_dep_cancel_pct']

# Date column groups
date_cols = ['date', 'covid', 'ordinal_date', 'year', 'month', 'day_of_month', 'day_of_week', 'season', 'holiday', 'halloween', 'xmas_eve', 'new_years_eve', 'jan_2', 'jan_3', 'day_before_easter', 'days_until_xmas', 'days_until_thanksgiving', 'days_until_july_4th', 'days_until_labor_day', 'days_until_memorial_day']

# Weather column groups
weather_cols = ['wx_temperature_max', 'wx_temperature_min', 'wx_apcp', 'wx_prate', 'wx_asnow', 'wx_frozr', 'wx_vis', 'wx_gust', 'wx_maxref', 'wx_cape', 'wx_lftx', 'wx_wind_speed', 'wx_wind_direction']

# Lag column groups
lag_cols =  ['flights_total_lag_1', 'flights_total_lag_2', 'flights_total_lag_3', 'flights_total_lag_4', 'flights_total_lag_5', 'flights_total_lag_6', 'flights_total_lag_7', 'flights_cancel_lag_1', 'flights_cancel_lag_2', 'flights_cancel_lag_3', 'flights_cancel_lag_4', 'flights_cancel_lag_5', 'flights_cancel_lag_6', 'flights_cancel_lag_7']

## Train Test Split - "flights_ontime"

In [25]:
# Select features and targets
train_features = ['random'] + date_cols + weather_cols + lag_cols
targets = flights_non_terminal_cols + flights_percentage_cols

# Create X and y
X = df[train_features].drop('date', axis=1)
y = df[targets]

# Split data into train and test sets
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y['flights_ontime'], test_size=0.1, random_state=42)

# Split data into X_train_rull and y_train_full into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.1, random_state=42)

# Print shapes
print("X_train_full shape:", X_train_full.shape)
print("y_train_full shape:", y_train_full.shape)

print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

print("X_Test shape:", X_test.shape)
print("y_Test shape:", y_test.shape)

X_train_full shape: (1516, 47)
y_train_full shape: (1516,)
X_train shape: (1364, 47)
y_train shape: (1364,)
X_Test shape: (169, 47)
y_Test shape: (169,)


# PREPROCESS FOR DENSE NETWORK

In [28]:
print(f"Feature names: {X.columns.tolist()}")
print(f"Target columns: {y.columns.tolist()}", end="\n\n")
print("Unique data types in X", X.dtypes.value_counts(), sep = '\n')

# Identify categorical and numeric columns in X
categorical_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()
numeric_cols = X.select_dtypes(include = ['float64', 'float32', 'int32', 'int64']).columns.tolist()

print(f"\nCategorical columns to one-hot-encode: {categorical_cols}")

# Fit transformers to the training data
f_scaler = StandardScaler()
f_scaler.fit(X_train[numeric_cols])

ohe = OneHotEncoder(sparse_output=False, handle_unknown='ignore') # Some observed holidays may not be in the training data
ohe.fit(X_train[categorical_cols])

t_scaler = StandardScaler()
t_scaler.fit(y_train.values.reshape(-1, 1)) # reshape y_train to be 2D

# Define preprocessor
def preprocess(features, target, set_global_scaler = False):
    global global_targer_scaler

    scaled_features = f_scaler.transform(features[numeric_cols])
    encoded_features = ohe.transform(features[categorical_cols])
    scaled_target = t_scaler.transform(target.values.reshape(-1, 1))
    processed_features = np.concatenate([scaled_features, encoded_features], axis=1)

    if set_global_scaler:
        global_targer_scaler = t_scaler

    return processed_features, scaled_target

# Preprocess the data
X_train_d, y_train_d = preprocess(X_train, y_train, set_global_scaler=True)
X_val_d, y_val_d = preprocess(X_val, y_val)

Feature names: ['random', 'covid', 'ordinal_date', 'year', 'month', 'day_of_month', 'day_of_week', 'season', 'holiday', 'halloween', 'xmas_eve', 'new_years_eve', 'jan_2', 'jan_3', 'day_before_easter', 'days_until_xmas', 'days_until_thanksgiving', 'days_until_july_4th', 'days_until_labor_day', 'days_until_memorial_day', 'wx_temperature_max', 'wx_temperature_min', 'wx_apcp', 'wx_prate', 'wx_asnow', 'wx_frozr', 'wx_vis', 'wx_gust', 'wx_maxref', 'wx_cape', 'wx_lftx', 'wx_wind_speed', 'wx_wind_direction', 'flights_total_lag_1', 'flights_total_lag_2', 'flights_total_lag_3', 'flights_total_lag_4', 'flights_total_lag_5', 'flights_total_lag_6', 'flights_total_lag_7', 'flights_cancel_lag_1', 'flights_cancel_lag_2', 'flights_cancel_lag_3', 'flights_cancel_lag_4', 'flights_cancel_lag_5', 'flights_cancel_lag_6', 'flights_cancel_lag_7']
Target columns: ['flights_total', 'flights_cancel', 'flights_delay', 'flights_ontime', 'flights_arr_ontime', 'flights_arr_delay', 'flights_arr_cancel', 'flights_dep_

# PREDICT WITH 1 NEURON "LINEAR MODEL"

The goal of this section is to simulate linear regression using a neural newtork with one neuron and no activation function. We'll use L2 regularization to simulate ridge regression and compare results to those from Sklearn's lasso regression.

## Create TensorFlow datasets (not timeseries)

In [10]:
# TensorFlow datasets
train_ds_flights_ontime_d = Dataset.from_tensor_slices((X_train_d, y_train_d)).shuffle(len(X_train_d))
val_ds_flights_ontime_d = Dataset.from_tensor_slices((X_val_d, y_val_d)).shuffle(len(X_val_d))

# Batch and prefetch
batch_size = 32
train_ds_flights_ontime_d = train_ds_flights_ontime_d.batch(batch_size).prefetch(AUTOTUNE)
val_ds_flights_ontime_d = val_ds_flights_ontime_d.batch(batch_size).prefetch(AUTOTUNE)


## Create R-squared metric

In [11]:
from keras import backend as K

def r_squared(y_true, y_pred):
    y_true_inv = tf.numpy_function(global_targer_scaler.inverse_transform, [y_true], tf.float32)
    y_pred_inv = tf.numpy_function(global_targer_scaler.inverse_transform, [y_pred], tf.float32)
    SS_res =  K.sum(K.square(y_true_inv - y_pred_inv)) 
    SS_tot = K.sum(K.square(y_true_inv - K.mean(y_true_inv))) 
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

## 1-Neuron Model fit

In [12]:
# Callbacks & Tensorboard Setup
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Keras Tuner Design
def model_builder(hp):
    learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG', default=1e-3)
    l1_regularization = hp.Float('l1_regularization', min_value=1e-5, max_value=1e-1, sampling='LOG', default=1e-2)
    l2_regularization =  hp.Float('l2_regularization', min_value=1e-5, max_value=1e-1, sampling='LOG', default=1e-2)

    model = Sequential([
        Dense(units = 1, 
            input_dim=X_train_d.shape[1], 
            kernel_regularizer=L1L2(l1_regularization, l2_regularization))
    ])

    model.compile(optimizer=Adam(learning_rate=learning_rate), 
                loss='mean_squared_error', 
                metrics=['mean_absolute_error', r_squared])
    return model

# Create a Keras Tuner
dense_lr_tuner = kt.RandomSearch(
    hypermodel = model_builder,
    objective='val_loss',
    max_trials=20,
    executions_per_trial=2,
    directory='logs/flights_ontime/dense_lr/',
    project_name='tuner',
    overwrite = True
)

# Search for best hyperparameters
dense_lr_tuner.search(train_ds_flights_ontime_d, 
             validation_data=val_ds_flights_ontime_d, 
             epochs=500, 
             callbacks=[early_stopping])

Trial 20 Complete [00h 00m 14s]
val_loss: 0.4528622478246689

Best val_loss So Far: 0.4294688552618027
Total elapsed time: 00h 02m 33s


## 1-Neuron model performance

In [13]:
# Print hyperparameters for the 10 best trials
dense_lr_tuner.results_summary(num_trials=10)

Results summary
Results in logs/flights_ontime/dense_lr/tuner
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 12 summary
Hyperparameters:
learning_rate: 0.0015511343794524048
l1_regularization: 1.9021653410411186e-05
l2_regularization: 0.0003770290850368308
Score: 0.4294688552618027

Trial 17 summary
Hyperparameters:
learning_rate: 0.0033946715472269115
l1_regularization: 0.00044896982083573897
l2_regularization: 0.004091965398247103
Score: 0.43117551505565643

Trial 14 summary
Hyperparameters:
learning_rate: 0.0037561240060219256
l1_regularization: 3.319105045503531e-05
l2_regularization: 0.00667842270347455
Score: 0.4329136162996292

Trial 01 summary
Hyperparameters:
learning_rate: 0.005441667012386122
l1_regularization: 3.906857576488224e-05
l2_regularization: 0.002584585097692256
Score: 0.4344650208950043

Trial 05 summary
Hyperparameters:
learning_rate: 0.00038311174343190024
l1_regularization: 1.2901888244893854e-05
l2_regularization: 0.00073348476971748

In [14]:
# Get best hyperparameters
best_hps = dense_lr_tuner.get_best_hyperparameters(num_trials = 1)[0]
learning_rate = best_hps.get('learning_rate')
l1_amount = best_hps.get('l1_regularization')
l2_amount = best_hps.get('l2_regularization')

print(f"""
Optimal Hyperparameters:
- Learning Rate: {learning_rate:.3f}
- L1 Regularization: {l1_amount:.5f}
- L2 Regularization: {l2_amount:.5f}
""")

# Get best trial
best_trial = dense_lr_tuner.oracle.get_best_trials(num_trials=1)[0]

# Best trial metrics
val_loss = best_trial.metrics.get_best_value('val_loss')
val_mae = best_trial.metrics.get_best_value('val_mean_absolute_error')
val_r2 = best_trial.metrics.get_best_value('val_r_squared')

print(f"""Loss and Metrics for Best Trial:
- Validation Loss: {val_loss:.2f}
- Validation MAE: {val_mae:.2f}
- Validation R^2: {val_r2:.3f}
""")

# Tensorboard directory setup
!rm -rf ./logs/flights_ontime/dense_lr/tensorboard/ 
log_dir = "logs/flights_ontime/dense_lr/tensorboard/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Train the model with the optimal hyperparameters
model = dense_lr_tuner.hypermodel.build(best_hps)
history = model.fit(train_ds_flights_ontime_d, 
                    validation_data=val_ds_flights_ontime_d, 
                    epochs=500, 
                    callbacks=[early_stopping, tensorboard_callback],
                    verbose=0)


Optimal Hyperparameters:
- Learning Rate: 0.002
- L1 Regularization: 0.00002
- L2 Regularization: 0.00038

Loss and Metrics for Best Trial:
- Validation Loss: 0.43
- Validation MAE: 0.44
- Validation R^2: 0.633



##  1-Neuron TensorBoard

In [15]:
%tensorboard --logdir logs/flights_ontime/dense_lr/tensorboard

## PREDICT WITH A SHALLOW DENSE NETWORK

In [16]:
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

def build_model(hp):
    n_hidden = hp.Int('n_hidden', min_value=1, max_value=2, default=2)
    n_neurons = hp.Int('n_neurons', min_value=1, max_value=32, default=16)
    learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG', default=1e-3)
    dropout_rate = hp.Float('dropout_rate', min_value=0.02, max_value=0.03, default=0.0)
    l2_regularization =  hp.Float('l2_regularization', min_value=1e-5, max_value=1e-1, sampling='LOG', default=1e-2)

    model = Sequential()
    model.add(Dense(units=n_neurons, 
                    activation='relu', 
                    kernel_regularizer=L2(l2_regularization)))
    
    for layer in range(n_hidden-1):
        model.add(Dense(units=n_neurons, 
                        activation='relu', 
                        kernel_regularizer=L2(l2_regularization)))
        model.add(Dropout(dropout_rate))

    if n_hidden > 0:
        model.add(Dense(units=n_neurons, 
                        activation='relu', 
                        kernel_regularizer=L2(l2_regularization)))
        model.add(Dropout(dropout_rate))

    model.add(Dense(1))

    model.compile(optimizer=Adam(learning_rate=learning_rate), 
                  loss='mean_squared_error', 
                  metrics=['mean_absolute_error'])
    
    return model

tuner = kt.BayesianOptimization(
    hypermodel = build_model,
    objective='val_loss',
    max_trials=100,
    executions_per_trial=2,
    num_initial_points=2,
    directory = "flights_ontime",
    project_name = "flights_ontime_shallow_dense_fit",
    overwrite=True
)

dense_shallow_tuner = kt.RandomSearch(
    hypermodel = build_model,
    objective='val_loss',
    max_trials=100,
    executions_per_trial=2,
    directory = "logs/flights_ontime/dense_shallow/",
    project_name = "tuner",
    overwrite=True
)

dense_shallow_tuner.search(train_ds_flights_ontime_d, 
             epochs=500, 
             validation_data=val_ds_flights_ontime_d, 
             callbacks=[early_stopping])

Trial 100 Complete [00h 00m 03s]
val_loss: 0.4783097803592682

Best val_loss So Far: 0.4009020924568176
Total elapsed time: 00h 07m 53s


## Shallow Dense NN model perfomance

In [17]:
# Print hyperparameters for the 10 best trials
dense_shallow_tuner.results_summary(num_trials=10)

Results summary
Results in logs/flights_ontime/dense_shallow/tuner
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 010 summary
Hyperparameters:
n_hidden: 2
n_neurons: 30
learning_rate: 0.006715083131370706
dropout_rate: 0.023519556922148722
l2_regularization: 0.0001572568838800907
Score: 0.4009020924568176

Trial 017 summary
Hyperparameters:
n_hidden: 2
n_neurons: 3
learning_rate: 0.002605398610948284
dropout_rate: 0.02128017052812929
l2_regularization: 1.733380806051542e-05
Score: 0.40239638090133667

Trial 074 summary
Hyperparameters:
n_hidden: 1
n_neurons: 20
learning_rate: 0.001468267917477422
dropout_rate: 0.02327180712115707
l2_regularization: 2.5663603752396305e-05
Score: 0.4088706821203232

Trial 023 summary
Hyperparameters:
n_hidden: 1
n_neurons: 20
learning_rate: 0.0077158177708831835
dropout_rate: 0.023931066081892202
l2_regularization: 0.0001342648611028202
Score: 0.41031041741371155

Trial 007 summary
Hyperparameters:
n_hidden: 2
n_neurons: 21
lea

In [18]:
best_hps = dense_shallow_tuner.get_best_hyperparameters(num_trials = 1)[0]

# Get best hyperparameters
n_hidden = best_hps.get('n_hidden')
n_neurons = best_hps.get('n_neurons')
learning_rate = best_hps.get('learning_rate')
dropout_rate = best_hps.get('dropout_rate')
l2_amount = best_hps.get('l2_regularization')

print(f"""
Optimal Hyperparameters:
- Number of Hidden Layers: {n_hidden}
- Number of Neurons: {n_neurons}
- Learning Rate: {learning_rate:.3f}
- Dropout Rate: {dropout_rate:.3f}
- L2 Regularization: {l2_amount:.5f}
""")

# Get best trial
best_trial = dense_shallow_tuner.oracle.get_best_trials(num_trials=1)[0]

# Best trial metrics
val_loss = best_trial.metrics.get_best_value('val_loss')
val_mae = best_trial.metrics.get_best_value('val_mean_absolute_error')

print(f"""Loss and Metrics for Best Trial:
- Validation Loss: {val_loss:.2f}
- Validation MAE: {val_mae:.2f}
""")

# Tensorboard directory setup
!rm -rf ./logs/flights_ontime/dense_shallow/tensorboard/ 
log_dir = "logs/flights_ontime/dense_shallow/tensorboard/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Change early stopping patience
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)


# Train the model with the optimal hyperparameters
model = dense_shallow_tuner.hypermodel.build(best_hps)
history = model.fit(train_ds_flights_ontime_d, 
                    validation_data=val_ds_flights_ontime_d, 
                    epochs=500, 
                    callbacks=[early_stopping, tensorboard_callback],
                    verbose=0)


Optimal Hyperparameters:
- Number of Hidden Layers: 2
- Number of Neurons: 30
- Learning Rate: 0.007
- Dropout Rate: 0.024
- L2 Regularization: 0.00016

Loss and Metrics for Best Trial:
- Validation Loss: 0.40
- Validation MAE: 0.41



# PREDICT WITH RNN

## Remove lag variables from X train, val, and test sets

In [19]:
rnn_X_train_full = X_train_full.drop(lag_cols, axis=1)
rnn_X_train = X_train.drop(lag_cols, axis=1)
rnn_X_val = X_val.drop(lag_cols, axis=1)
rnn_X_test = X_test.drop(lag_cols, axis=1)

## RNN column transformers

In [20]:
rnn_numeric_cols = [col for col in numeric_cols if col not in lag_cols]

# Fit transformers to the training data
rnn_f_scaler = StandardScaler()
rnn_f_scaler.fit(rnn_X_train[rnn_numeric_cols])

# Create a function to preprocess TensorFlow datasets
def rnn_preprocess(features, target):
    scaled_features = rnn_f_scaler.transform(features[rnn_numeric_cols])
    encoded_features = ohe.transform(features[categorical_cols])
    scaled_target = t_scaler.transform(target.values.reshape(-1, 1))
    processed_features = np.concatenate([scaled_features, encoded_features], axis=1)
    return processed_features, scaled_target

# Transform the data
X_train_rnn, y_train_rnn = rnn_preprocess(X_train, y_train)
X_val_rnn, y_val_rnn = rnn_preprocess(X_val, y_val)
X_test_rnn, y_test_rnn = rnn_preprocess(X_test, y_test)

## Create timeseries datasets

In [21]:
seed_value = 42
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

seq_length = 7
batch_size = 32

train_rnn = timeseries_dataset_from_array(
    data = X_train_rnn, 
    targets = y_train_rnn,
    sequence_length = seq_length,
    sequence_stride = 1,
    shuffle = True,
    batch_size = batch_size
)

val_rnn = timeseries_dataset_from_array(
    data = X_val_rnn, 
    targets = y_val_rnn[seq_length-1:],
    sequence_length = seq_length,
    sequence_stride = 1,
    shuffle = True,
    batch_size = batch_size
)

test_rnn = timeseries_dataset_from_array(
    data = X_test_rnn, 
    targets = y_test_rnn[seq_length-1:],
    sequence_length = seq_length,
    sequence_stride = 1,
    shuffle = True,
    batch_size = batch_size
)

## Predict Using a single RNN

RETURN HERE: 
1. COPY SHALLOW DENSE NETWORK BUILD FUNCTION. EXPLORE THE SAME ARCHITECTURE HYPERPARAMETERS, BUT WITH AN RNN. 
2. ADD TENSOBOARD AND EARLY STOPPING
3. ADD L1 AND L2 REGULARIZATION
4. LEARN THE TENSORBOARD FEATURES
5. WRITE CONCLUSIONS


In [34]:
 
model = Sequential([
    SimpleRNN(
        units=1,
        input_shape=(None, X_train_rnn.shape[1]),
        kernel_regularizer=regularizers.L2(0.01),
        recurrent_regularizer=regularizers.L2(0.01),
        activation='relu'
        )
        ])


opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False)

model.compile(optimizer=opt, 
              loss='mean_squared_error', 
              metrics=['mean_absolute_error'])

# Clear any logs from previous runs
!rm -rf ./logs/rnn_1_flights_ontime/

log_dir = "logs/rnn_1_flights_ontime/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)

history = model.fit(
    train_rnn,
    epochs=100,
    validation_data=val_rnn,
    callbacks=[tensorboard_callback, early_stopping]
)    

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100


In [32]:
# !kill 220
%tensorboard --logdir logs/rnn_1_flights_ontime

Reusing TensorBoard on port 6009 (pid 9089), started 0:00:24 ago. (Use '!kill 9089' to kill it.)

## Predict using a shallow RNN

In [35]:
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

def build_model(hp):
    n_hidden = hp.Int('n_hidden', min_value=1, max_value=2, default=2)
    n_neurons = hp.Int('n_neurons', min_value=1, max_value=32, default=16)
    learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG', default=1e-3)
    dropout_rate = hp.Float('dropout_rate', min_value=0.0, max_value=0.5, default=0.0)
    recurrent_dropout_rate = hp.Float('recurrent_dropout_rate', min_value=0.0, max_value=0.5, default=0.0)
    kernel_reg = hp.Float('kernel_reg', min_value=1e-4, max_value=1e-1, sampling='LOG', default=1e-2)
    recurr_reg = hp.Float('recurr_reg', min_value=1e-4, max_value=1e-1, sampling='LOG', default=1e-2)

    model = Sequential()

    # Input layer with dropout
    model.add(Dropout(dropout_rate, 
                      input_shape=(None, X_train_rnn.shape[1])))
                     
    # model.add(SimpleRNN(units=n_neurons, 
    #                     input_shape=(None, X_train_rnn.shape[1]), 
    #                     activation='relu', 
    #                     return_sequences=True, 
    #                     kernel_regularizer=L2(kernel_reg),
    #                     recurrent_regularizer=L2(recurr_reg))

    # First n-1 Hidden layers
    for _ in range(n_hidden-1):
        model.add(SimpleRNN(units=n_neurons, 
                            activation='relu', 
                            return_sequences=True,
                            kernel_regularizer=L2(kernel_reg),
                            recurrent_regularizer=L2(recurr_reg),
                            dropout = dropout_rate,
                            recurrent_dropout = recurrent_dropout_rate))

    # Last hidden layer
    if n_hidden > 0:
        model.add(SimpleRNN(units=n_neurons, 
                            activation='relu', 
                            kernel_regularizer=L2(kernel_reg),
                            recurrent_regularizer=L2(recurr_reg),
                            dropout = dropout_rate,
                            recurrent_dropout = recurrent_dropout_rate))

    # Output layer
    model.add(Dense(1))

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=learning_rate), 
                  loss='mean_squared_error', 
                  metrics=['mean_absolute_error'])
    
    return model


# tuner = kt.BayesianOptimization(
#     build_model,
#     objective='val_loss',
#     max_trials=50,
#     num_initial_points=2,
#     overwrite=True
# )

tuner = kt.RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=5,
    overwrite=True
)

tuner.search(train_rnn, epochs=500, validation_data=val_rnn, callbacks=[early_stopping])

Trial 5 Complete [00h 00m 48s]
val_loss: 1.3270539045333862

Best val_loss So Far: 1.239393711090088
Total elapsed time: 00h 01m 11s


In [36]:
best_3hps = tuner.get_best_hyperparameters(num_trials=3)

print("Best 3 hyperparameter sets:")
print(best_3hps[0].values)
print(best_3hps[1].values)
print(best_3hps[2].values, '\n')


best_trial = tuner.oracle.get_best_trials(1)[0]

best_trial.summary()

print("\nBest trial validation loss", best_trial.metrics.get_last_value('val_loss'))
print("Best trial validation MAE", best_trial.metrics.get_last_value('mean_absolute_error'))

Best 3 hyperparameter sets:
{'n_hidden': 2, 'n_neurons': 28, 'learning_rate': 0.0038915899689643918, 'dropout_rate': 0.1347496912624052, 'recurrent_dropout_rate': 0.011123302279362424, 'kernel_reg': 0.004347669530569911, 'recurr_reg': 0.019211177831550998}
{'n_hidden': 1, 'n_neurons': 1, 'learning_rate': 0.0002590015503494759, 'dropout_rate': 0.38334649514249525, 'recurrent_dropout_rate': 0.10174476464789312, 'kernel_reg': 0.0005400562198482222, 'recurr_reg': 0.011842609343404916}
{'n_hidden': 1, 'n_neurons': 15, 'learning_rate': 0.0014267123289125915, 'dropout_rate': 0.273838857755164, 'recurrent_dropout_rate': 0.42722199443089204, 'kernel_reg': 0.0007982790969186325, 'recurr_reg': 0.08684008944452379} 

Trial 1 summary
Hyperparameters:
n_hidden: 2
n_neurons: 28
learning_rate: 0.0038915899689643918
dropout_rate: 0.1347496912624052
recurrent_dropout_rate: 0.011123302279362424
kernel_reg: 0.004347669530569911
recurr_reg: 0.019211177831550998
Score: 1.239393711090088

Best trial validati

## Next Steps
1. add TensorBoard to RNN
2. Add L1 and L2 regularization to Keras Tuner 
3. Tune for 500 trials (overnight)