In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

import tensorflow as tf
from tensorflow import keras
import keras
from tensorflow.keras.models import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.layers import *
from tensorflow.keras.losses import MeanSquaredError

import numpy as np
import pandas as pd
import cvxpy as cp
from tqdm import tqdm

%load_ext autoreload
%autoreload 2
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
cwd = os.getcwd()

def make_dir(path):
    if os.path.exists(path) is False:
        os.makedirs(path)

def evaluate_prediction(predictions, actual, model_name):
    errors = predictions - actual
    mse = np.square(errors).mean()
    rmse = np.sqrt(mse)
    mae = np.abs(errors).mean()
    print('MAE: {:.2f}'.format(mae))
    print('RMSE: {:.2f}'.format(rmse))
    print('')
    print('')
    return mae, rmse

In [3]:
sl = pd.read_csv('dataset/ercot_sl_2019_2023.csv')
sl['sced_time_stamp_local'] = pd.to_datetime(sl['sced_time_stamp_local'])
sl.set_index('sced_time_stamp_local', inplace=True)
sl = sl.resample('h').mean()
date_range = pd.date_range(start=sl.index.min(), end=sl.index.max(), freq='h')
sl = sl[~sl.index.duplicated(keep='first')]
sl = sl.reindex(date_range, fill_value=np.nan)
sl.interpolate(method='time', inplace=True)


In [4]:
sl

Unnamed: 0,system_lambda
2019-01-01 00:00:00,14.013557
2019-01-01 01:00:00,15.067093
2019-01-01 02:00:00,15.575292
2019-01-01 03:00:00,16.118229
2019-01-01 04:00:00,16.033707
...,...
2023-12-31 19:00:00,14.422107
2023-12-31 20:00:00,12.597410
2023-12-31 21:00:00,10.170172
2023-12-31 22:00:00,9.872549


In [5]:
dap = pd.read_csv('dataset/hourly_ercot_day_ahead_sl_2019_2023.csv')
dap['timestamp'] = pd.to_datetime(dap['timestamp'])
dap.set_index('timestamp', inplace=True)
date_range = pd.date_range(start=dap.index.min(), end='2024-01-01 23:55:00', freq='h')
dap = dap[~dap.index.duplicated(keep='first')]
dap = dap.reindex(date_range, fill_value=np.nan)
dap.interpolate(method='time', inplace=True)

  dap['timestamp'] = pd.to_datetime(dap['timestamp'])


In [6]:
dap

Unnamed: 0,SystemLambda
2019-01-02 00:00:00,23.9250
2019-01-02 01:00:00,23.3140
2019-01-02 02:00:00,23.3475
2019-01-02 03:00:00,23.0595
2019-01-02 04:00:00,25.2672
...,...
2024-01-01 19:00:00,23.1651
2024-01-01 20:00:00,23.2113
2024-01-01 21:00:00,21.3244
2024-01-01 22:00:00,20.3351


The target variable is the Day Ahead Price, not the current SCED

In [8]:
#prepare the data for training and testing
df = pd.concat([dap, sl], axis=1)
df.columns = ['DAP', 'SCED']
df.dropna(inplace=True)

## log transform
log_data = df.copy(deep=True)
log_data.loc[:,"SCED"] = np.log(df.loc[:,"SCED"] + 1 - min(df.loc[:,"SCED"]))
log_data.loc[:,"DAP"] = np.log(df.loc[:,"DAP"] + 1 - min(df.loc[:,"DAP"]))

#Shift for prediction
df['DAP'] = df['DAP'].shift(-24)

# 3 years training, 1 year validation, 1 year testing
x_train_df_reg = log_data.loc[:'2021-12-31 23:55:00'].iloc[:,:]
x_val_df_reg = log_data.loc['2022-01-01 00:00:00':'2022-12-31 23:55:00'].iloc[:,:]
x_test_df_reg = log_data.loc['2023-01-01 00:00:00':].iloc[:,:]

# Shift back for target variable
df['DAP'] = df['DAP'].shift(24)

#Change target variable to DAP
y_train_df_reg = log_data.loc[:'2021-12-31 23:55:00'].iloc[:, :1]
y_val_df_reg = log_data.loc['2022-01-01 00:00:00':'2022-12-31 23:55:00'].iloc[:, :1]
y_test_df_reg = log_data.loc['2023-01-01 00:00:00':].iloc[:, :1]

x_train_df_reg.reset_index(drop=True, inplace=True)
x_val_df_reg.reset_index(drop=True, inplace=True)
x_test_df_reg.reset_index(drop=True, inplace=True)

# Standardization
x_mean_reg, x_std_reg = x_train_df_reg.mean(), x_train_df_reg.std()
y_mean_reg, y_std_reg = y_train_df_reg.mean(), y_train_df_reg.std()

x_std_reg = x_std_reg +0.00001

x_train_reg = (x_train_df_reg - x_mean_reg)/x_std_reg
x_val_reg = (x_val_df_reg - x_mean_reg)/x_std_reg
x_test_reg = (x_test_df_reg - x_mean_reg)/x_std_reg

y_train_reg = (y_train_df_reg - y_mean_reg)/y_std_reg
y_val_reg = (y_val_df_reg - y_mean_reg)/y_std_reg
y_test_reg = (y_test_df_reg - y_mean_reg)/y_std_reg

# Shift the data for the lags
n_steps_in = 24
n_steps_out = 24

x_train_lstm = np.array([x_train_reg[i:i+n_steps_in] for i in range(0, x_train_reg.shape[0]-n_steps_in-n_steps_out+1)])
y_train_lstm = np.array([y_train_reg[i+n_steps_in:i+n_steps_in+n_steps_out] for i in range(0, y_train_reg.shape[0]-n_steps_in-n_steps_out+1)])

x_val_lstm = np.array([x_val_reg[i:i+n_steps_in] for i in range(0, x_val_reg.shape[0]-n_steps_in-n_steps_out+1)])
y_val_lstm = np.array([y_val_reg[i+n_steps_in:i+n_steps_in+n_steps_out] for i in range(0, y_val_reg.shape[0]-n_steps_in-n_steps_out+1)])

x_test_lstm = np.array([x_test_reg[i:i+n_steps_in] for i in range(0, x_test_reg.shape[0]-n_steps_in-n_steps_out+1)])
y_test_lstm = np.array([y_test_reg[i+n_steps_in:i+n_steps_in+n_steps_out] for i in range(0, y_test_reg.shape[0]-n_steps_in-n_steps_out+1)])

print(x_train_lstm.shape,y_train_lstm.shape,x_val_lstm.shape,y_val_lstm.shape, x_test_lstm.shape,y_test_lstm.shape)

(26233, 24, 2) (26233, 24, 1) (8713, 24, 2) (8713, 24, 1) (8713, 24, 2) (8713, 24, 1)


In [12]:
# set hyperparameters
n_neurons  = 64  # number of neurons in the Dense layer
activation     = 'relu' # activation function
learning_rate  = 0.0005
minibatch_size = 64
num_epochs     = 50

# MLP model
lstm_model = Sequential()
lstm_model.add(LSTM(n_neurons,input_shape=(x_train_lstm.shape[1],x_train_lstm.shape[2]),
               return_sequences=True,activation=activation))
lstm_model.add(LSTM(n_neurons,return_sequences=False,
               activation=activation))
lstm_model.add(Dense(n_neurons,activation=activation))
lstm_model.add(Dense(y_train_lstm.shape[-2],activation='linear')) 

lstm_model.compile(loss='mse',optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate))

lstm_model.summary()

early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

history = lstm_model.fit(x_train_lstm, y_train_lstm, 
                        validation_data = (x_val_lstm, y_val_lstm), 
                        batch_size = minibatch_size,
                        epochs = num_epochs,
                        verbose=1,
                        callbacks=[early_stop],
                        shuffle=False)

# saving trained model
model_path = os.path.join(cwd,'saved_model')
make_dir(model_path)
lstm_model.save(os.path.join(model_path,'DAP_Model_Nathan.h5'))


  super().__init__(**kwargs)


Epoch 1/50
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 13ms/step - loss: 0.9746 - val_loss: 2.0651
Epoch 2/50
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - loss: 0.6745 - val_loss: 0.8880
Epoch 3/50
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - loss: 0.5470 - val_loss: 0.7534
Epoch 4/50
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - loss: 1.8692 - val_loss: 0.8705
Epoch 5/50
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - loss: 0.5410 - val_loss: 0.8179
Epoch 6/50
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - loss: 0.5004 - val_loss: 0.7048
Epoch 7/50
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - loss: 0.7907 - val_loss: 0.7445
Epoch 8/50
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - loss: 0.4827 - val_loss: 0.6403
Epoch 9/50
[1m410/410[0m [32m



In [25]:
# reload the saved model
model_path = os.path.join(cwd,'saved_model')
lstm_model = load_model(
    os.path.join(model_path, 'DAP_Model_Nathan.h5'),
    custom_objects={'mse': MeanSquaredError()}
)

# Make prediciton
y_test_pred = lstm_model.predict(x_test_lstm)

# Evaluate Prediction
evaluate_prediction(y_test_pred , y_test_lstm[:,:,0], 'lstm')

# Rescale to get values before normalization
y_test_pred_rescale = y_test_pred*y_std_reg.values + y_mean_reg.values
y_test_lstm_rescale = y_test_lstm*y_std_reg.values + y_mean_reg.values

# inverse log to get prices in the actual scale
y_test_pred_invlog = np.exp(y_test_pred_rescale) -1 + min(df.loc[:,"SCED"])
y_test_lstm_invlog = np.exp(y_test_lstm_rescale) -1 + min(df.loc[:,"SCED"])

# revaluation after the rescaling
evaluate_prediction(y_test_pred_invlog , y_test_lstm_invlog[:,:,0], 'lstm')



[1m273/273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step
MAE: 0.36
RMSE: 0.81


MAE: 36.02
RMSE: 205.96




(np.float64(36.01648692183936), np.float64(205.95863538260144))

In [26]:
# add timestamp to the predictions
sl_actual = pd.DataFrame(y_test_lstm_invlog[:,:,0], index=df.loc['2023-01-01 23:00:00':'2023-12-30 23:00:00'].index)
sl_actual.to_csv('hourly_sl_actual_dap_2424h.csv')
sl_pred = pd.DataFrame(y_test_pred_invlog, index=df.loc['2023-01-01 23:00:00':'2023-12-30 23:00:00'].index)
sl_pred.to_csv('hourly_sl_pred_dap_2424h.csv')

In [27]:
sl_pred

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
2023-01-01 23:00:00,54.441825,61.670339,62.917587,54.834967,56.049095,57.209965,64.775844,65.315469,66.806195,67.591239,...,73.020417,74.659347,77.405672,72.922357,75.116128,68.262545,66.396026,70.593870,69.747361,60.689681
2023-01-02 00:00:00,52.823108,60.472906,60.920554,54.450525,55.621390,57.298271,65.695282,67.510059,67.933670,69.414204,...,72.908615,74.074827,76.058192,72.005680,74.570908,66.086152,64.326802,68.864160,67.687925,58.918389
2023-01-02 01:00:00,49.421234,57.568537,57.131366,52.543969,54.224441,56.297604,65.911930,68.624796,68.307236,70.746355,...,71.846992,73.498024,75.093663,71.163402,74.257958,63.667096,61.521416,66.549943,64.938683,55.939720
2023-01-02 02:00:00,46.787231,54.812164,53.673869,50.751936,53.082013,54.710753,65.460542,68.155307,68.385984,71.389848,...,70.128247,72.737526,73.651913,69.316519,73.860195,61.977850,59.260867,65.097385,63.129791,53.939977
2023-01-02 03:00:00,45.396516,53.196014,51.768512,49.541109,52.638914,53.876741,64.576391,66.661120,68.243823,70.973280,...,68.274668,71.676909,71.956075,66.963703,73.494002,60.813417,58.348379,64.816346,63.204060,53.456067
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-30 19:00:00,61.803676,70.870979,69.060470,67.252798,68.938977,71.555955,80.370393,78.574107,77.380208,76.423922,...,68.502839,66.967307,65.862197,70.532335,66.106445,64.999672,64.833146,73.860382,70.787061,67.151112
2023-12-30 20:00:00,62.288007,71.998100,70.129181,68.161596,70.324573,72.631107,81.189945,78.113315,77.135672,75.634354,...,67.308252,65.783053,65.081797,70.207935,65.408085,65.373931,65.169894,74.336304,71.242015,67.810936
2023-12-30 21:00:00,62.002884,72.216360,70.440387,68.067465,70.710282,72.667298,81.306594,77.133715,76.262224,74.459955,...,66.268754,64.966140,64.670425,69.839203,64.959117,65.474974,65.118246,74.500271,71.394876,67.882854
2023-12-30 22:00:00,62.094538,72.729283,71.141398,68.468918,71.379959,72.630662,81.178412,76.114884,75.151450,73.316440,...,65.205939,64.302824,64.317727,69.376285,64.431025,65.661405,65.340098,74.835632,71.814806,68.193691
