# Hyperparameter tuning - deep learning models

In [None]:
import time
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import *
from keras.losses import MeanSquaredError
from keras.metrics import RootMeanSquaredError
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
import keras_tuner
from utils import train_test_split, X_Y_split_DL

In [None]:
#pip install keras-tuner --upgrade

In [None]:
data = pd.read_csv('data_droped_nov18_dummy_final.csv')

In [None]:
data = data.set_index('datetime')
data.index = pd.to_datetime(data.index)
data = data.drop(['Unnamed: 0.2','Unnamed: 0.1','Unnamed: 0', 'diff'],axis=1)
data=data.reindex(columns=['lots_available', 'total_lot',  'carpark_number','x_coord', 'y_coord',
'car_park_decks', 'gantry_height','BASEMENT CAR PARK', 'COVERED CAR PARK','MECHANISED AND SURFACE CAR PARK', 'MULTI-STOREY CAR PARK',
'SURFACE CAR PARK', '7AM-10.30PM', '7AM-7PM', 'NO', 'WHOLE DAY', 'NO.1','SUN & PH FR 1PM-10.30PM', 'SUN & PH FR 7AM-10.30PM', 'NO.2', 
'YES','N', 'Y'])

In [None]:
data["day_of_week"] = data.index.weekday
data["hour_of_day"] = data.index.hour

In [None]:
# feature selection based on RFE algorithm
features = ['lots_available','day_of_week','hour_of_day','total_lot', 'carpark_number', 'x_coord', 'y_coord','car_park_decks', 'gantry_height', 'MULTI-STOREY CAR PARK','WHOLE DAY', 
       'NO.1','SUN & PH FR 7AM-10.30PM']

In [None]:
data = data[features]
data=data.reindex(columns=features)

In [None]:
data.loc['2016-02-19 11:15:00',:] = np.nan
data.dropna(inplace=True)

In [None]:
Train, Test = train_test_split(data, test_step_size=673)
train, val = train_test_split(Train, test_step_size=480)

In [None]:
for i in Train.columns:
    scaler = MinMaxScaler()
    
    s_train = scaler.fit_transform(train[i].values.reshape((-1,1)))
    s_val = scaler.transform(val[i].values.reshape((-1,1)))
    s_test = scaler.transform(Test[i].values.reshape((-1,1)))

    s_train = np.reshape(s_train,(len(s_train)))
    s_val = np.reshape(s_val,(len(s_val)))
    s_test = np.reshape(s_test,(len(s_test)))

    train[i] = s_train
    val[i] = s_val
    Test[i] = s_test

In [None]:
X_train,Y_train= X_Y_split_DL(train, window_size=1, label_col_no=0)
X_val, Y_val = X_Y_split_DL(val, window_size=1, label_col_no=0)
X_test,Y_test= X_Y_split_DL(Test, window_size=1, label_col_no=0)

## LSTM

In [None]:
def build_model_lstm(hp):
    model = Sequential()
    model.add(LSTM(units=hp.Int('first_layer_units', min_value=100,max_value=200,step=50),return_sequences=True, input_shape=(X_train.shape[1],X_train.shape[2])))
    model.add(Dropout(hp.Float('first_dropout_rate',min_value=0,max_value=0.5,step=0.1)))
    model.add(LSTM(units=hp.Int('second_layer_units', min_value=100,max_value=200,step=50),return_sequences=False))
    model.add(Dropout(hp.Float('second_dropout_rate',min_value=0,max_value=0.5,step=0.1)))
    model.add(Dense(1,activation='linear'))
    model.compile(loss=MeanSquaredError(), optimizer=Adam(), metrics=[RootMeanSquaredError()])
    return model

In [None]:
tuner_lstm = keras_tuner.RandomSearch(hypermodel=build_model_lstm,
                          objective=keras_tuner.Objective("root_mean_squared_error", direction="min"))

In [None]:
stop_early = EarlyStopping(monitor='val_loss', patience=3)
start_time = time.time()
tuner_lstm.search(X_train, Y_train, epochs=10, validation_data=(X_val,Y_val),callbacks=[stop_early] )
print("--- %s seconds ---" %(time.time()- start_time))

In [None]:
tuner_lstm.results_summary()

In [None]:
best_model = tuner_lstm.get_best_models(num_models=1)[0]

## GRU

In [None]:
def build_model_gru(hp):
    model = Sequential()
    model.add(GRU(units=hp.Int('first_layer_units', min_value=100,max_value=200,step=50),return_sequences=True, input_shape=(X_train.shape[1],X_train.shape[2])))
    model.add(Dropout(hp.Float('first_dropout_rate',min_value=0,max_value=0.5,step=0.1)))
    model.add(GRU(units=hp.Int('second_layer_units', min_value=100,max_value=200,step=50),return_sequences=False))
    model.add(Dropout(hp.Float('second_dropout_rate',min_value=0,max_value=0.5,step=0.1)))
    model.add(Dense(1,activation='linear'))
    model.compile(loss=MeanSquaredError(), optimizer=Adam(), metrics=[RootMeanSquaredError()])
    return model

In [None]:
tuner_gru = keras_tuner.RandomSearch(hypermodel=build_model_gru,
                          objective=keras_tuner.Objective("root_mean_squared_error", direction="min"))

In [None]:
stop_early = EarlyStopping(monitor='val_loss', patience=3)
start_time = time.time()
tuner_gru.search(X_train, Y_train, epochs=10, validation_data=(X_val,Y_val),callbacks=[stop_early] )
print("--- %s seconds ---" %(time.time()- start_time))

In [None]:
tuner_gru.results_summary()

In [None]:
best_model = tuner_gru.get_best_models(num_models=1)[0]

## Seq2seq

- due to the limitation of computational resource, this study naively adopted hp values from the literature.
- for those who wants to know more details, please check the original thesis paper in the repository.