# Hyperpamaters Tuning

In [1]:
import sys
sys.path.append('..')

In [2]:
import numpy as np
import random
import tensorflow
import keras_tuner

from tensorflow import keras
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, TimeDistributed, RepeatVector
from keras.optimizers import Adam
from lib.read_data import read_and_join_output_file
from lib.deeplearning import get_train_test_datasets,  get_sets_shapes

In [3]:
RANDOM_SEED = 31
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
tensorflow.random.set_seed(RANDOM_SEED)

## Preparing the Dataset
The dataset is prepared as explained in the /ml/deeplearning.ipynb notebook. Please refer to it for more details. As a summary:
* The train and test sets are split by Township-Ranges, i.e. some Township-Ranges data are either fully in the train or test set.
* The target value is the value of that variable for 2021
* Data are imputed using a custom pipeline

The resulting train and test sets are of shape [number of Township-Ranges, 7 years (2014-2020), the number of features].
We do not create a validation dataset as we use Keras internal cross-validation mechanism to shuffle the data points (i.e., the Township-Ranges) and keep some for the validation at each training epoch.

In [5]:
test_size=0.15
target_variable="GSE_GWE"
# Load the data from the ETL output files
X = read_and_join_output_file()
# Split the input pandas Dataframe into training and test datasets, applies the impute pipeline
# transformation and reshapes the datasets to 3D (samples, time, features) numpy arrays
X_train, X_test, y_train, y_test, _, _ = get_train_test_datasets(X, target_variable=target_variable,
                                                                                           test_size=test_size, random_seed=RANDOM_SEED)
nb_features = X_train.shape[-1]
get_sets_shapes(X_train, X_test)

Unnamed: 0_level_0,Unnamed: 1_level_0,TOTALDRILLDEPTH_AVG,WELLYIELD_AVG,STATICWATERLEVEL_AVG,TOPOFPERFORATEDINTERVAL_AVG,BOTTOMOFPERFORATEDINTERVAL_AVG,TOTALCOMPLETEDDEPTH_AVG,VEGETATION_BLUE_OAK-GRAY_PINE,VEGETATION_CALIFORNIA_COAST_LIVE_OAK,VEGETATION_CANYON_LIVE_OAK,VEGETATION_HARD_CHAPARRAL,...,POPULATION_DENSITY,PCT_OF_CAPACITY,GROUNDSURFACEELEVATION_AVG,AVERAGE_YEARLY_PRECIPITATION,SHORTAGE_COUNT,GSE_GWE,WELL_COUNT_AGRICULTURE,WELL_COUNT_DOMESTIC,WELL_COUNT_INDUSTRIAL,WELL_COUNT_PUBLIC
TOWNSHIP_RANGE,YEAR,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
T01N R02E,2014,0.000000,0.000489,0.020868,0.052288,0.076699,0.127841,0.010798,0.002749,0.000000,0.000633,...,0.391791,0.776467,0.043092,0.286941,0.0,0.083371,0.021277,0.013889,0.0,0.0
T01N R02E,2015,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.010798,0.002749,0.000000,0.000633,...,0.394044,0.776467,0.037376,0.301232,0.0,0.081869,0.000000,0.000000,0.0,0.0
T01N R02E,2016,0.000000,0.003259,0.036728,0.084967,0.058252,0.056818,0.010798,0.002749,0.000000,0.000633,...,0.395968,0.776467,0.016622,0.357881,0.0,0.071257,0.000000,0.013889,0.0,0.0
T01N R02E,2017,0.066667,0.006410,0.025876,0.082789,0.064725,0.074495,0.010798,0.002749,0.000000,0.000633,...,0.406050,0.776467,0.031660,0.689154,0.0,0.070044,0.000000,0.041667,0.0,0.0
T01N R02E,2018,0.053651,0.000652,0.063022,0.077124,0.053592,0.064015,0.010798,0.002749,0.000000,0.000633,...,0.405447,0.776467,0.051869,0.252603,0.0,0.067062,0.021277,0.013889,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
T32S R30E,2016,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.033178,0.000000,0.002023,0.003535,...,0.004489,0.496289,0.058099,0.118655,0.0,0.667084,0.000000,0.000000,0.0,0.0
T32S R30E,2017,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.033178,0.000000,0.002023,0.003535,...,0.004477,0.496289,0.058099,0.180043,0.0,0.566686,0.000000,0.000000,0.0,0.0
T32S R30E,2018,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.033178,0.000000,0.002023,0.003535,...,0.004494,0.496289,0.058099,0.084816,0.0,0.597051,0.000000,0.000000,0.0,0.0
T32S R30E,2019,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.033178,0.000000,0.002023,0.003535,...,0.004511,0.580893,0.058099,0.168764,0.0,0.608404,0.000000,0.000000,0.0,0.0


## Hyperparameters Tuning
For each of the 3 LSTM models architectures (from simplest to most complex), we use the Keras BayesianOptimization hyperparameters tuner to estimate the best values for the following hyperparameters:
* the number of units for each *LSTM* or *Dense* unit
* the activation function (*sigmoid*, *tanh*, *relu*) used for all layers, except the output layer which is fixed to a *linear* activation function.
* the learning rate
* the size of the validation dataset
* the batch size
* the number of epochs
## Simple Model Hyper-parameter Tuning
![Simple LSTM Model](../doc/images/deeplearning-architecture-1.jpg)

In [6]:
class Model1(keras_tuner.HyperModel):
    def build(self, hp):
        model = Sequential()
        hp_units = hp.Int("units", min_value=10, max_value=300, step=10)
        hp_activ = hp.Choice("activation", values=["tanh", "sigmoid"])
        model.add(LSTM(units=hp_units, activation=hp_activ, input_shape=(7, nb_features)))
        model.add(Dense(1, activation="linear"))
        hp_learning_rate = hp.Choice("learning_rate", values=[1e-2, 1e-3, 1e-4])
        model.compile(loss="mse", optimizer=Adam(learning_rate=hp_learning_rate), metrics=[keras.metrics.RootMeanSquaredError()])
        return model

    def fit(self, hp, model, *args, **kwargs):
        return model.fit(
            *args,
            validation_split=hp.Choice("validation_split", values=[0.05, 0.1, 0.15, 0.2]),
            batch_size=hp.Int("batch_size", min_value=32, max_value=192, step=16),
            epochs=hp.Int("epochs", min_value=30, max_value=500, step=5),
            shuffle=True,
            **kwargs,
        )

In [7]:
stop_early = tensorflow.keras.callbacks.EarlyStopping(monitor='val_root_mean_squared_error', patience=10, verbose=1)
tuner = keras_tuner.BayesianOptimization(Model1(),
                             objective=keras_tuner.Objective("val_root_mean_squared_error", direction="min"),
                             max_trials=250,
                             beta=3.2,
                             seed=RANDOM_SEED,
                             overwrite=True,
                             directory="keras_tuner",
                             project_name="model1_tuner")
tuner.search(X_train, y_train, callbacks=[stop_early])

Trial 250 Complete [00h 00m 05s]
val_root_mean_squared_error: 0.09763942658901215

Best val_root_mean_squared_error So Far: 0.0824510008096695
Total elapsed time: 01h 05m 47s
INFO:tensorflow:Oracle triggered exit


### Best Model Hyperparameters

In [8]:
# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"""
The hyperparameter search is complete.
validation_split: {best_hps.get('validation_split')}
lstm_units: {best_hps.get('units')}
lstm_activation: {best_hps.get('activation')}
learning_rate: {best_hps.get('learning_rate')}
batch_size: {best_hps.get('batch_size')}
epochs: {best_hps.get('epochs')}
""")


The hyperparameter search is complete.
validation_split: 0.15
lstm_units: 60
lstm_activation: tanh
learning_rate: 0.01
batch_size: 112
epochs: 30



### Hyperparameters Tuning Summary

In [9]:
tuner.results_summary()

Results summary
Results in keras_tuner\model1_tuner
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x00000220D05A6130>
Trial summary
Hyperparameters:
units: 60
activation: tanh
learning_rate: 0.01
validation_split: 0.15
batch_size: 112
epochs: 30
Score: 0.0824510008096695
Trial summary
Hyperparameters:
units: 60
activation: tanh
learning_rate: 0.01
validation_split: 0.15
batch_size: 112
epochs: 30
Score: 0.08390998095273972
Trial summary
Hyperparameters:
units: 60
activation: tanh
learning_rate: 0.01
validation_split: 0.15
batch_size: 112
epochs: 30
Score: 0.08468343317508698
Trial summary
Hyperparameters:
units: 60
activation: tanh
learning_rate: 0.01
validation_split: 0.15
batch_size: 112
epochs: 30
Score: 0.08546236157417297
Trial summary
Hyperparameters:
units: 60
activation: tanh
learning_rate: 0.01
validation_split: 0.15
batch_size: 112
epochs: 30
Score: 0.08679324388504028
Trial summary
Hyperparameters:
units: 60
activation: tanh
learning_rate: 0.01
val

## Model2 Hyper-parameter tuning
![LSTM Model With Dense Layer](../doc/images/deeplearning-architecture-2.jpg)

In [10]:
class Model2(keras_tuner.HyperModel):
    def build(self, hp):
        model = Sequential()
        lstm_units = hp.Int("lstm_units", min_value=10, max_value=300, step=10)
        lstm_activ = hp.Choice("lstm_activation", values=["tanh", "sigmoid"])
        model.add(LSTM(units=lstm_units, activation=lstm_activ, input_shape=(7, nb_features)))
        dense_units = hp.Int("dense_units", min_value=11, max_value=101, step=2)
        dense_activation = hp.Choice("dense_activation", values=["relu", "tanh", "sigmoid"])
        model.add(Dense(dense_units, activation=dense_activation))
        hp_dropout = hp.Float("dropout_rate", min_value=0.05, max_value=0.25, step=0.05)
        model.add(Dropout(hp_dropout))
        model.add(Dense(1, activation="linear"))
        hp_learning_rate = hp.Choice("learning_rate", values=[1e-2, 1e-3, 1e-4])
        model.compile(loss="mse", optimizer=Adam(learning_rate=hp_learning_rate), metrics=[keras.metrics.RootMeanSquaredError()])
        return model

    def fit(self, hp, model, *args, **kwargs):
        return model.fit(
            *args,
            validation_split=hp.Choice("validation_split", values=[0.05, 0.1, 0.15, 0.2]),
            batch_size=hp.Int("batch_size", min_value=32, max_value=192, step=16),
            epochs=hp.Int("epochs", min_value=30, max_value=500, step=5),
            shuffle=True,
            **kwargs,
        )

In [11]:
stop_early = tensorflow.keras.callbacks.EarlyStopping(monitor="val_root_mean_squared_error", patience=10, verbose=1)
tuner = keras_tuner.BayesianOptimization(Model2(),
                              objective=keras_tuner.Objective("val_root_mean_squared_error", direction="min"),
                              max_trials=400,
                              beta=3.2,
                              seed=RANDOM_SEED,
                              overwrite=True,
                              directory="keras_tuner",
                              project_name="model2_tuner")
tuner.search(X_train, y_train, callbacks=[stop_early])

Trial 400 Complete [00h 00m 06s]
val_root_mean_squared_error: 0.08002614974975586

Best val_root_mean_squared_error So Far: 0.06468775868415833
Total elapsed time: 03h 14m 13s
INFO:tensorflow:Oracle triggered exit


### Best Model Hyperparameters

In [12]:
# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"""
The hyperparameter search is complete.
validation_split: {best_hps.get('validation_split')}
lstm_units: {best_hps.get('lstm_units')}
lstm_activation: {best_hps.get('lstm_activation')}
dense_units: {best_hps.get('dense_units')}
dense_activation: {best_hps.get('dense_activation')}
dropout_rate: {best_hps.get('dropout_rate')}
learning_rate: {best_hps.get('learning_rate')}
batch_size: {best_hps.get('batch_size')}
epochs: {best_hps.get('epochs')}
""")


The hyperparameter search is complete.
validation_split: 0.05
lstm_units: 10
lstm_activation: sigmoid
dense_units: 11
dense_activation: relu
dropout_rate: 0.05
learning_rate: 0.01
batch_size: 32
epochs: 500



### Hyperparameters Tuning Summary

In [13]:
tuner.results_summary()

Results summary
Results in keras_tuner\model2_tuner
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x00000220D8130B50>
Trial summary
Hyperparameters:
lstm_units: 10
lstm_activation: sigmoid
dense_units: 11
dense_activation: relu
dropout_rate: 0.05
learning_rate: 0.01
validation_split: 0.05
batch_size: 32
epochs: 500
Score: 0.06468775868415833
Trial summary
Hyperparameters:
lstm_units: 10
lstm_activation: sigmoid
dense_units: 11
dense_activation: relu
dropout_rate: 0.1
learning_rate: 0.01
validation_split: 0.05
batch_size: 32
epochs: 500
Score: 0.06481722742319107
Trial summary
Hyperparameters:
lstm_units: 10
lstm_activation: sigmoid
dense_units: 11
dense_activation: relu
dropout_rate: 0.15000000000000002
learning_rate: 0.01
validation_split: 0.05
batch_size: 32
epochs: 415
Score: 0.06549284607172012
Trial summary
Hyperparameters:
lstm_units: 10
lstm_activation: sigmoid
dense_units: 11
dense_activation: relu
dropout_rate: 0.15000000000000002
learning_rate: 0.01

## Model3 Hyper-parameter tuning
![Encoder-Decoder LSTM Model](../doc/images/deeplearning-architecture-3.jpg)

In [7]:
class Model3(keras_tuner.HyperModel):
    def build(self, hp):
        model = Sequential()
        lstm_units = hp.Int("lstm_units", min_value=10, max_value=300, step=10)
        lstm_activ = hp.Choice("lstm_activation", values=["tanh", "sigmoid"])
        model.add(LSTM(units=lstm_units, activation="sigmoid", input_shape=(7, nb_features)))
        model.add(RepeatVector(1))
        lstm_units_2 = hp.Int("2nd_lstm_units", min_value=10, max_value=300, step=10)
        lstm_activ_2 = hp.Choice("2nd_lstm_activation", values=["tanh", "sigmoid"])
        model.add(LSTM(units=lstm_units_2, activation="sigmoid", return_sequences=True))
        dense_units = hp.Int("dense_units", min_value=11, max_value=101, step=2)
        dense_activation = hp.Choice("dense_activation", values=["relu", "tanh", "sigmoid"])
        model.add(TimeDistributed(Dense(dense_units, activation=dense_activation)))
        hp_dropout = hp.Float("dropout_rate", min_value=0.05, max_value=0.25, step=0.05)
        model.add(Dropout(hp_dropout))
        model.add(Dense(1, activation="linear"))
        hp_learning_rate = hp.Choice("learning_rate", values=[1e-2, 1e-3, 1e-4])
        model.compile(loss="mse", optimizer=Adam(learning_rate=hp_learning_rate), metrics=[keras.metrics.RootMeanSquaredError()])
        return model

    def fit(self, hp, model, *args, **kwargs):
        return model.fit(
            *args,
            validation_split=hp.Choice("validation_split", values=[0.05, 0.1, 0.15, 0.2]),
            batch_size=hp.Int("batch_size", min_value=32, max_value=192, step=16),
            epochs=hp.Int("epochs", min_value=30, max_value=500, step=5),
            shuffle=True,
            **kwargs,
        )

In [8]:
stop_early = tensorflow.keras.callbacks.EarlyStopping(monitor="val_root_mean_squared_error", patience=10, verbose=1)
tuner = keras_tuner.BayesianOptimization(Model3(),
                              objective=keras_tuner.Objective("val_root_mean_squared_error", direction="min"),
                              max_trials=400,
                              beta=3.2,
                              seed=RANDOM_SEED,
                              overwrite=True,
                              directory="keras_tuner",
                              project_name="model3_tuner")
tuner.search(X_train, y_train, callbacks=[stop_early])

Trial 500 Complete [00h 00m 18s]
val_root_mean_squared_error: 0.1150362491607666

Best val_root_mean_squared_error So Far: 0.06309118866920471
Total elapsed time: 07h 29m 36s
INFO:tensorflow:Oracle triggered exit


### Best Model Hyperparameters

In [9]:
# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"""
The hyperparameter search is complete.
validation_split: {best_hps.get('validation_split')}
lstm_units: {best_hps.get('lstm_units')}
lstm_activation: {best_hps.get('lstm_activation')}
2nd_lstm_units: {best_hps.get('2nd_lstm_units')}
2nd_lstm_activation: {best_hps.get('2nd_lstm_activation')}
dense_units: {best_hps.get('dense_units')}
dense_activation: {best_hps.get('dense_activation')}
dropout_rate: {best_hps.get('dropout_rate')}
learning_rate: {best_hps.get('learning_rate')}
batch_size: {best_hps.get('batch_size')}
epochs: {best_hps.get('epochs')}
""")


The hyperparameter search is complete.
validation_split: 0.05
lstm_units: 300
lstm_activation: sigmoid
2nd_lstm_units: 300
2nd_lstm_activation: tanh
dense_units: 11
dense_activation: sigmoid
dropout_rate: 0.05
learning_rate: 0.01
batch_size: 32
epochs: 30



### Hyperparameters Tuning Summary

In [10]:
tuner.results_summary()

Results summary
Results in keras_tuner\model3_tuner
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x000002575F038250>
Trial summary
Hyperparameters:
lstm_units: 300
lstm_activation: sigmoid
2nd_lstm_units: 300
2nd_lstm_activation: tanh
dense_units: 11
dense_activation: sigmoid
dropout_rate: 0.05
learning_rate: 0.01
validation_split: 0.05
batch_size: 32
epochs: 30
Score: 0.06309118866920471
Trial summary
Hyperparameters:
lstm_units: 300
lstm_activation: sigmoid
2nd_lstm_units: 300
2nd_lstm_activation: tanh
dense_units: 11
dense_activation: sigmoid
dropout_rate: 0.05
learning_rate: 0.01
validation_split: 0.05
batch_size: 32
epochs: 30
Score: 0.06499446928501129
Trial summary
Hyperparameters:
lstm_units: 300
lstm_activation: sigmoid
2nd_lstm_units: 300
2nd_lstm_activation: sigmoid
dense_units: 11
dense_activation: sigmoid
dropout_rate: 0.05
learning_rate: 0.01
validation_split: 0.05
batch_size: 32
epochs: 30
Score: 0.06594016402959824
Trial summary
Hyperparamete