In [15]:
import warnings
warnings.simplefilter("ignore")
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
import keras_tuner as kt

In [26]:
DATA_PATH = 'Reservoir_Project/Data'
HP_TUNING_PATH = 'Reservoir_Project/Hyperparameter_Tuning'

In [43]:
basin_inflow = pd.read_excel(f'{DATA_PATH}/Custom/basin_inflow_no_dates.xlsx', index_col=0)
basin_inflow_train = pd.read_excel(f'{DATA_PATH}/Custom/basin_inflow_train.xlsx', index_col=0)
basin_inflow_validation = pd.read_excel(f'{DATA_PATH}/Custom/basin_inflow_validation.xlsx', index_col=0)
basin_inflow_test = pd.read_excel(f'{DATA_PATH}/Custom/basin_inflow_test.xlsx', index_col=0)

### Create data window

In [5]:
""" TensorFlow utility class for producing data windows from time series data """

class WindowGenerator():
  def __init__(self, input_width, label_width, shift,
               train_df, val_df, test_df,
               label_columns=None):
    # Store the raw data.
    self.train_df = train_df
    self.val_df = val_df
    self.test_df = test_df
    
    self.column_indices = {name: i for i, name in
                           enumerate(train_df.columns)}
    
    # Work out the label column indices.
    self.label_columns = label_columns
    if label_columns is not None:
      self.label_columns_indices = {name: i for i, name in
                                    enumerate(label_columns)}

    # Work out the window parameters.
    self.input_width = input_width
    self.label_width = label_width
    self.shift = shift

    self.total_window_size = input_width + shift

    self.input_slice = slice(0, input_width)
    self.input_indices = np.arange(self.total_window_size)[self.input_slice]

    self.label_start = self.total_window_size - self.label_width
    self.labels_slice = slice(self.label_start, None)
    self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

In [6]:
"""
Window
- Given 60 days of history predict 30 days ahead. A season is about 90 days in a CA Water Year.
- Window size: 90
"""

window = WindowGenerator(input_width=60, label_width=1, shift=30,
                     train_df=basin_inflow_train, val_df=basin_inflow_validation, 
                     test_df=basin_inflow_test, label_columns=['INFLOW'])

<__main__.WindowGenerator at 0x299736610>

In [7]:
""" Create window of inputs and labels """

def split_window(self, features):
  inputs = features[:, self.input_slice, :]
  labels = features[:, self.labels_slice, :]
  if self.label_columns is not None:
    labels = tf.stack(
        [labels[:, :, self.column_indices[name]] for name in self.label_columns],
        axis=-1)

  # set shapes after slicing
  inputs.set_shape([None, self.input_width, None])
  labels.set_shape([None, self.label_width, None])

  return inputs, labels

WindowGenerator.split_window = split_window

In [8]:
""" Create dataset of sliding windows over a time series dataframe """

def make_dataset(self, data):
  data = np.array(data, dtype=np.float32)
  ds = tf.keras.utils.timeseries_dataset_from_array(
      data=data,
      targets=None,
      sequence_length=self.total_window_size,
      sequence_stride=1,
      shuffle=True,
      batch_size=32,)

  # (input_window, label_window) pairs 
  ds = ds.map(self.split_window)

  return ds

WindowGenerator.make_dataset = make_dataset

In [9]:
@property
def train(self):
  return self.make_dataset(self.train_df)

@property
def val(self):
  return self.make_dataset(self.val_df)

@property
def test(self):
  return self.make_dataset(self.test_df)

WindowGenerator.train = train
WindowGenerator.val = val
WindowGenerator.test = test

### Model

In [30]:
def build_model(hp):
  lstm = keras.Sequential()

  ### Tuning hyperparameters ### 

  # Number of lstm layer units
  hp_units = hp.Int('units', min_value=32, max_value=128, step=32)

  # Dropout rate applied to input values
  hp_dropout = hp.Choice("dropout", [0.2, 0.3, 0.4, 0.5])

  # Recurrent dropout rate applied to hidden cell states between time steps
  hp_recurrent_dropout = hp.Choice("recurrent_dropout", [0.2, 0.3, 0.4, 0.5])
  
  # L2 regularization 
  hp_l2_reg = hp.Choice("l2", [0.001, 0.01, 0.02, 0.05])

  # Optimizer learning rate
  hp_learning_rate = hp.Choice('learning_rate', values=[0.01, 0.001, 0.0001])

  ### Layers ### 
  
  lstm.add(tf.keras.layers.LSTM(units=hp_units, dropout=hp_dropout, recurrent_dropout=hp_recurrent_dropout, 
                  kernel_regularizer=tf.keras.regularizers.l2(l2=hp_l2_reg),
                  return_sequences=True, input_shape=[None, 39]))
    
  lstm.add(tf.keras.layers.LSTM(units=hp_units, dropout=hp_dropout, recurrent_dropout=hp_recurrent_dropout, 
                  kernel_regularizer=tf.keras.regularizers.l2(l2=hp_l2_reg)))
    
  lstm.add(keras.layers.Dense(1))    
    
  ### Compile ### 
           
  lstm.compile(loss=tf.keras.losses.MeanAbsoluteError(),
               optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=hp_learning_rate),
               metrics=['mean_absolute_error'])

  return lstm

In [33]:
tuner = kt.Hyperband(build_model,
                     objective='val_mean_absolute_error',
                     max_epochs=10,
                     factor=3,
                     directory=HP_TUNING_PATH,
                     project_name='reservoir_model_hp_tuning')

In [34]:
MAX_EPOCHS = 10
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, mode='min')

In [None]:
tuner.search(window.train, epochs=MAX_EPOCHS, validation_data=window.val, callbacks=[early_stopping])

In [36]:
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

In [None]:
# find the optimal number of training epochs

model = tuner.hypermodel.build(best_hps)
history = model.fit(window.train, epochs=MAX_EPOCHS, validation_data=window.val, callbacks=[early_stopping])

val_mae_per_epoch = history.history['val_mean_absolute_error']
best_epoch = val_mae_per_epoch.index(min(val_mae_per_epoch)) + 1 # epoch of lowest validation MAE 

In [None]:
hypermodel = tuner.hypermodel.build(best_hps)
history = hypermodel.fit(window.train, epochs=best_epoch, validation_data=window.val, callbacks=[early_stopping])

### Predict next 30 days of inflow

In [46]:
""" Inverse scaling for cfs prediction """

df_size = len(basin_inflow)
basin_inflow_test = basin_inflow[int(df_size * 0.9):] # last 10%
basin_inflow_test.reset_index(drop=True, inplace=True)
# obtain absolute maximum inflow value for inverse scaling
INFLOW_ABS_MAX = basin_inflow_test['INFLOW'].abs().max()

def inverse_scaling(predictions):
    next_30_days = predictions[0:30]
    
    print('Next 30 Days of Inflow\n')
    for idx, target in enumerate(next_30_days):
        day = idx + 1
        print("{}. {:.3f} cfs".format(day, target[0] * INFLOW_ABS_MAX))

In [40]:
""" 60-day windows of basin weather and river data """

windowTestBatch1 = window.test.take(1)
windowTestBatch2 = window.test.take(2)
windowTestBatch3 = window.test.take(3)

In [41]:
""" Predicted Inflows """

predictedInflow1 = hypermodel.predict(windowTestBatch1)
predictedInflow2 = hypermodel.predict(windowTestBatch2)
predictedInflow3 = hypermodel.predict(windowTestBatch3)



In [48]:
inverse_scaling(predictedInflow1)

Next 30 Days of Inflow

1. 6096.670 cfs
2. 7168.289 cfs
3. 7115.677 cfs
4. 6249.618 cfs
5. 6506.479 cfs
6. 9250.617 cfs
7. 8197.594 cfs
8. 13122.252 cfs
9. 5557.840 cfs
10. 8920.194 cfs
11. 9449.152 cfs
12. 13048.285 cfs
13. 19556.594 cfs
14. 19501.059 cfs
15. 9529.258 cfs
16. 9934.721 cfs
17. 7019.022 cfs
18. -3893.511 cfs
19. 12713.444 cfs
20. 6187.136 cfs
21. 7742.878 cfs
22. 10527.569 cfs
23. 11802.981 cfs
24. 7219.112 cfs
25. 7104.785 cfs
26. 6460.319 cfs
27. 9677.672 cfs
28. 7686.907 cfs
29. 18663.553 cfs
30. 19070.869 cfs
