# RTP Forecasting: CNN-LSTM search
In this notebook, we do a full search for the best hyperparameters for the LSTM model. To achieve this task, we used keras-tuner with some manual tuning. Six searchs runs was conducted and the best model was chosen and implement in `LSTM_model.ipynb` notebook.

In [1]:
# pip install keras-tuner --upgrade

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import numpy as np
import time
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
import keras
from tensorflow.keras.models import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.layers import *

%load_ext autoreload
%autoreload 2
%matplotlib inline

2023-11-19 14:34:29.723308: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-11-19 14:34:29.762271: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
print(tf.__version__)

2.12.1


In [4]:
cwd = os.getcwd()

In [5]:
def make_dir(path):
    if os.path.exists(path) is False:
        os.makedirs(path)

In [6]:
#timing callback
class TimeHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.times = []

    def on_epoch_begin(self, batch, logs={}):
        self.epoch_time_start = time.time()

    def on_epoch_end(self, batch, logs={}):
        self.times.append(time.time() - self.epoch_time_start)

In [8]:
#zones = ['CAPITL', 'CENTRL', 'DUNWOD', 'GENESE', 'HUD VL', 'LONGIL',
#         'MHK VL', 'MILLWD', 'N.Y.C.', 'NORTH', 'WEST']
zone = 'N.Y.C.'
year = 2021

### Data Import

In [9]:
# Read each timeseries (RTP = Real-Time Price, DAP = Day-Ahead Price, LF = Load Forecast)
raw_DAP = pd.read_csv("nyiso/da_lmp_zones_df_2015_2021.csv", index_col=0)
raw_RTP = pd.read_csv("nyiso/rt_lmp_zones_df_2015_2021.csv", index_col=0)
raw_LF = pd.read_csv("nyiso/load_frcstd_df_2015_2021.csv", index_col=0)

In [10]:
# Prepare the dataset as a dataframe
raw_data = pd.concat([raw_DAP.loc[:,zone], raw_LF.loc[:,zone], raw_RTP.loc[:,zone]],
                       axis=1).loc['2017-01-01 05:00:00+00:00':]
raw_data.columns = ['DAP', 'LF', 'RTP']
raw_data.index.names = ['date']
raw_data.to_csv('nyiso/NYISO_'+zone+'_raw.csv')

In [12]:
# We perform log tranformation before feeding the dataset into the model to make enhance the performance
# We only log tranform the price data (RTP and DAP)
log = 1
# log transformation for the forecasting task log10(Y + 1 - min(Y))
if log:
    log_data = raw_data.copy(deep=True)
    log_data.loc[:,"DAP"] = np.log(raw_data.loc[:,"DAP"] + 1 - min(raw_data.loc[:,"DAP"]))
    log_data.loc[:,"RTP"] = np.log(raw_data.loc[:,"RTP"] + 1 - min(raw_data.loc[:,"RTP"]))
    log_data.to_csv('nyiso/NYISO_'+zone+'_log.csv')

### Data Preprocessing

In [14]:
# Split dataset: 2017-2020 years for training and 2021 year for testing
x_train_df = log_data.iloc[:8760*4+24,:]
x_test_df = log_data.iloc[8760*4+24:,:]

y_train_df = log_data.iloc[:8760*4+24,2:]
y_test_df = log_data.iloc[8760*4+24:,2:]

In [15]:
# # Standardization
x_mean, x_std = x_train_df.mean(), x_train_df.std()
y_mean, y_std = y_train_df.mean(), y_train_df.std()

x_train = ((x_train_df - x_mean)/x_std).to_numpy()
x_test = ((x_test_df - x_mean)/x_std).to_numpy()

y_train = ((y_train_df - y_mean)/y_std).to_numpy()
y_test = ((y_test_df - y_mean)/y_std).to_numpy()

print(x_train.shape,y_train.shape,x_test.shape, y_test.shape)

(35064, 3) (35064, 1) (8760, 3) (8760, 1)


### Reshape to (samples, steps, features)
more details about the reshape can be found in `LSTM_model.ipynb` or the report

In [16]:
n_steps_in = 72
n_steps_out = 24

x_train_lstm = np.array([x_train[i:i+n_steps_in] for i in range(0, x_train.shape[0]-n_steps_in-n_steps_out+1)])
y_train_lstm = np.array([y_train[i+n_steps_in:i+n_steps_in+n_steps_out] for i in range(0, y_train.shape[0]-n_steps_in-n_steps_out+1)])

x_test_lstm = np.array([x_test[i:i+n_steps_in] for i in range(0, x_test.shape[0]-n_steps_in-n_steps_out+1)])
y_test_lstm = np.array([y_test[i+n_steps_in:i+n_steps_in+n_steps_out] for i in range(0, y_test.shape[0]-n_steps_in-n_steps_out+1)])

print(x_train_lstm.shape,y_train_lstm.shape,x_test_lstm.shape,y_test_lstm.shape)

(34969, 72, 3) (34969, 24, 1) (8665, 72, 3) (8665, 24, 1)


In [17]:
# These hyperparameter are set manually as it is not part of the model structure,
# so we don't include it to keras-tuner search pool
# minibatch considered are [32, 64]

minibatch_size = 64
num_epochs     = 50
n_trials       = 10

In [18]:
# Function for keras-tuner model builder, the model structure was set during experimntation phase of the project
def build_model(hp):

    hp_neurons = hp.Choice('neurons', values=[16,32,64])
    hp_activation = hp.Choice('activation', values=['relu', 'tanh', 'sigmoid'])
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4, 1e-5])
    hp_loss = hp.Choice('loss', values=['mse','mae'])
    
    lstm_model = keras.models.Sequential()

    lstm_model.add(LSTM(hp_neurons,input_shape=(x_train_lstm.shape[1],x_train_lstm.shape[2]),
               return_sequences=True,activation=hp_activation))
    lstm_model.add(LSTM(hp_neurons,return_sequences=False,
               activation=hp_activation))
    lstm_model.add(Dense(hp_neurons,activation=hp_activation))
    lstm_model.add(Dense(y_train_lstm.shape[-2],activation='linear')) 

    lstm_model.compile(loss='mse',optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate))
    
    return lstm_model

In [19]:
# Instantiate the Tuner and save the trials in a directory with customized project name
random_tuner = keras_tuner.RandomSearch(build_model, 
                                        max_trials=n_trials,
                                        seed=5,
                                        objective='val_loss', 
                                        max_retries_per_trial=0,
                                        max_consecutive_failed_trials=3,
                                        directory='random_search', 
                                        project_name='lstm_search7')

In [20]:
random_tuner.search_space_summary()

Search space summary
Default search space size: 4
neurons (Choice)
{'default': 16, 'conditions': [], 'values': [16, 32, 64], 'ordered': True}
activation (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'tanh', 'sigmoid'], 'ordered': False}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001, 1e-05], 'ordered': True}
loss (Choice)
{'default': 'mse', 'conditions': [], 'values': ['mse', 'mae'], 'ordered': False}


In [21]:
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

random_tuner.search(x_train_lstm, y_train_lstm, 
                    batch_size = minibatch_size,
                    epochs = num_epochs,
                    validation_split=0.2, verbose=1,
                    callbacks=[early_stop],
                    shuffle=False)


Trial 10 Complete [00h 36m 25s]
val_loss: 0.2370855212211609

Best val_loss So Far: 0.19176627695560455
Total elapsed time: 05h 59m 54s


In [22]:
# display the model structure with the best tuning
best_model = random_tuner.get_best_models()[0]
best_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 72, 32)            4608      
                                                                 
 lstm_1 (LSTM)               (None, 32)                8320      
                                                                 
 dense (Dense)               (None, 32)                1056      


2023-11-19 21:10:18.616448: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-11-19 21:10:18.617743: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-11-19 21:10:18.618635: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

                                                                 
 dense_1 (Dense)             (None, 24)                792       
                                                                 
Total params: 14,776
Trainable params: 14,776
Non-trainable params: 0
_________________________________________________________________


In [23]:
# check hyperparameters of the all tuning trials
all_hps = random_tuner.get_best_hyperparameters(num_trials=n_trials)

# print the hyperparameters of the top 3 tuning trials
for it in range(n_trials):
    print("Ranking #", str(it+1).zfill(1), "of best tuning, total trials = ", str(n_trials))
    print(all_hps[it].values)
    print("=============================================================================================")


# get the hyperparameters of the best tuning trial
best_hps = random_tuner.get_best_hyperparameters(num_trials=n_trials)[0]

Ranking # 1 of best tuning, total trials =  10
{'neurons': 32, 'activation': 'tanh', 'learning_rate': 0.001, 'loss': 'mse'}
Ranking # 2 of best tuning, total trials =  10
{'neurons': 32, 'activation': 'tanh', 'learning_rate': 0.0001, 'loss': 'mse'}
Ranking # 3 of best tuning, total trials =  10
{'neurons': 32, 'activation': 'sigmoid', 'learning_rate': 0.001, 'loss': 'mse'}
Ranking # 4 of best tuning, total trials =  10
{'neurons': 32, 'activation': 'tanh', 'learning_rate': 0.01, 'loss': 'mse'}
Ranking # 5 of best tuning, total trials =  10
{'neurons': 16, 'activation': 'sigmoid', 'learning_rate': 0.001, 'loss': 'mae'}
Ranking # 6 of best tuning, total trials =  10
{'neurons': 32, 'activation': 'sigmoid', 'learning_rate': 0.0001, 'loss': 'mse'}
Ranking # 7 of best tuning, total trials =  10
{'neurons': 64, 'activation': 'tanh', 'learning_rate': 0.01, 'loss': 'mse'}
Ranking # 8 of best tuning, total trials =  10
{'neurons': 32, 'activation': 'relu', 'learning_rate': 0.0001, 'loss': 'mae'

In [24]:
# Initiate an empty dataframe to store your results
tune_res = pd.DataFrame()

# Run a for loop to extract all the information we want
for trial in random_tuner.oracle.trials:
    # Get the state for this trial
    trial_state = random_tuner.oracle.trials[trial].get_state()
    
    # Create a Series contaning the hyperparameter values for this trial
    trial_hyperparameters = pd.Series(
        trial_state["hyperparameters"]["values"],
        index = trial_state["hyperparameters"]["values"].keys())
    
    # Create a Series contaning the validation loss for this trial
    trial_loss = pd.Series(trial_state["score"], index = ["val_loss"])
    
    # Combine both Series into one Series
    trial_tune_res = pd.concat([trial_hyperparameters, trial_loss])
    
    # Name the Series (such that we can trace the trial numbers in the final DataFrame)
    trial_tune_res.name = trial
    
    # Add this trial information to the DataFrame
    tune_res = pd.concat([tune_res, trial_tune_res], axis = 1)
    
# Transpose the DataFrame such that each row represents a trial
tune_res = tune_res.T


In [25]:
for s in range(n_trials):
    min_idx = s
    for i in range(s + 1, n_trials):
             
            # For sorting in descending order
            # for minimum element in each loop
        if (tune_res["val_loss"][i] < tune_res["val_loss"][min_idx]):
                min_idx = i
 
        # Arranging min at the correct position
    b, c = tune_res.iloc[s].copy(), tune_res.iloc[min_idx].copy()
    temp = tune_res.iloc[s].copy()
    tune_res.iloc[s] = c
    tune_res.iloc[min_idx] = temp


tune_res

Unnamed: 0,neurons,activation,learning_rate,loss,val_loss
0,32,tanh,0.001,mse,0.191766
1,32,tanh,0.0001,mse,0.193819
2,32,sigmoid,0.001,mse,0.19983
3,32,tanh,0.01,mse,0.200516
4,16,sigmoid,0.001,mae,0.237086
5,32,sigmoid,0.0001,mse,0.256971
6,64,tanh,0.01,mse,0.328912
7,32,relu,0.0001,mae,0.485965
8,16,relu,1e-05,mse,0.505513
9,16,sigmoid,1e-05,mae,0.570843


In [None]:
tune_res.to_csv('tuning_results/lstm_tune_res_mini'+str(minibatch_size)+'_'+str(n_steps_in)+'h.csv')

After running all the different random searches, we will only test the best one for the three different look-back windows, and see which is the best in the testing set. This process is done in another notebook `LSTM_model.ipynb`