# Forecasting Error
We do not need to estimate the LMP cost, only find the amount of error to put in a 7-day forecast

In [2]:
import pandas as pd
import numpy as np

from prophet import Prophet

import plotly_express as px
import plotly.figure_factory as ff
from prophet.plot import plot_plotly, plot_components_plotly

import os

## Import the day-ahead LMP data from ISO NE

In [3]:
# Get list of data files
files = os.listdir("data_ISO_NE")

# Create an empty dataframe
df = pd.DataFrame()

# Concatenate each file to the dataframe
for file in files[5:]:
    try:
        df_new = pd.read_excel(f"data_ISO_NE/{file}", sheet_name="ISONE CA")
        df_new["Date"] = df_new["Date"].astype(str) + " " + df_new["Hour"].subtract(1).astype(str) + ":00"
        df_new["Date"] = pd.to_datetime(df_new["Date"])
        df_new.set_index("Date", inplace=True)
        df_new.drop(["Hour"], axis=1, inplace=True)
    except:
        df_new = pd.read_excel(f"data_ISO_NE/{file}", sheet_name="ISO NE CA")
        df_new["Date"] = df_new["Date"].astype(str) + " " + df_new["Hr_End"].subtract(1).astype(str) + ":00"
        df_new["Date"] = pd.to_datetime(df_new["Date"])
        df_new.set_index("Date", inplace=True)
        df_new.drop(["Hr_End"], axis=1, inplace=True)
    df = pd.concat([df, df_new])

# Drop nan values (there are some in 2016)
df.dropna(inplace=True)

# Set the frequency of the time-steps
df.index.freq = "H"

# Lets make temperature be deviation from value, i.e. find extreeme hot/cold
deviation_from_temp = 52.5
df["Temp from Mean"] = (df["Dry_Bulb"] - deviation_from_temp).abs()

# Save a way to know if it was high or low temp
df["Temp High/Low"] = "High"
df["Temp High/Low"][df["Dry_Bulb"] < deviation_from_temp] = "Low"

df.columns

Index(['DA_Demand', 'RT_Demand', 'DA_LMP', 'DA_EC', 'DA_CC', 'DA_MLC',
       'RT_LMP', 'RT_EC', 'RT_CC', 'RT_MLC', 'Dry_Bulb', 'Dew_Point',
       'System_Load', 'Reg_Service_Price', 'Reg_Capacity_Price',
       'Min_5min_RSP', 'Max_5min_RSP', 'Min_5min_RCP', 'Max_5min_RCP',
       'Temp from Mean', 'Temp High/Low'],
      dtype='object')

## Take a look at the data
Note: clear the plots after done looking at them, lots of plots can get slow
Cell --> Current Outputs --> Clear

In [None]:
px.imshow(df[["DA_LMP", "DA_Demand", "Dry_Bulb", "Dew_Point", "Temp from Mean"]].corr(), text_auto=True).show()
px.scatter(df, x="DA_LMP", y="DA_Demand", opacity=0.4).update_layout(height=350).show()
px.scatter(df, x="DA_LMP", y="Temp from Mean", color="Temp High/Low", opacity=0.2).update_layout(height=350).show()
px.scatter(df, x="DA_Demand", y='RT_Demand', opacity=0.2).update_layout(height=350).show()

## Split dataframe for train and test data

In [27]:
def split_df_train_test(df, date, number_train_days=None, print_dates=False):
    
    if number_train_days is None:
        df_train = df[df.index < date]
        df_test = df[df.index >= date]
    else:
        df_train = df[(df.index < date) & (df.index > pd.to_datetime(date) - pd.Timedelta(days=number_train_days))]
        df_test = df[df.index >= date]
    
    if print_dates:
        print(f"Train data starts: {df_train.index[0]} ends: {df_train.index[-1]}")
        print(f"Test data starts: {df_test.index[0]} ends: {df_test.index[-1]}")
    
    return df_train, df_test

date = "2023-04-01"
number_train_days = 3*365

df_train, df_test = split_df_train_test(df, date, number_train_days, print_dates=True)

Train data starts: 2020-04-01 01:00:00 ends: 2023-03-31 23:00:00
Test data starts: 2023-04-01 00:00:00 ends: 2023-09-30 23:00:00


## Prophet model with n_days moing window to create model

In [88]:
date = "2023-08-01"
number_train_days = 31

df_train, df_test = split_df_train_test(df, date, number_train_days, print_dates=True)


df_prophet = df_train[["DA_LMP", "DA_Demand", "Temp from Mean"]].copy()
df_prophet.reset_index(inplace=True)
df_prophet.columns = ["ds", "y", "DA_Demand", "Temp from Mean"]

model = Prophet()
#model.add_country_holidays(country_name='US')
model.add_regressor("Temp from Mean")
model.add_regressor("DA_Demand")
model.fit(df_prophet)

future_dates = model.make_future_dataframe(periods=7*24, freq="H")
future_dates.set_index("ds", inplace=True)
future_dates["Temp from Mean"] = df["Temp from Mean"]
future_dates["DA_Demand"] = df["DA_Demand"]
future_dates.reset_index(inplace=True)

forecast = model.predict(future_dates)

fig = plot_plotly(model, forecast)
fig.show()

fig = plot_components_plotly(model, forecast)
fig.show()

error = forecast[["ds", "yhat"]]
error.set_index("ds", inplace=True)
error["actual LMP"] = df_test["DA_LMP"]
error.dropna(inplace=True)
error["error"] = error['yhat'] - error["actual LMP"]

print(error["error"].describe())
px.line(error)

INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.


Train data starts: 2023-07-01 01:00:00 ends: 2023-07-31 23:00:00
Test data starts: 2023-08-01 00:00:00 ends: 2023-09-30 23:00:00


count    168.000000
mean       2.091973
std       14.291156
min      -32.342784
25%       -6.944813
50%        4.322627
75%       13.234153
max       28.295430
Name: error, dtype: float64


In [89]:
error

Unnamed: 0_level_0,yhat,actual LMP,error
ds,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-08-01 00:00:00,11.287942,30.85,-19.562058
2023-08-01 01:00:00,11.665024,29.94,-18.274976
2023-08-01 02:00:00,11.715788,29.91,-18.194212
2023-08-01 03:00:00,9.396528,27.94,-18.543472
2023-08-01 04:00:00,8.822381,27.94,-19.117619
...,...,...,...
2023-08-07 19:00:00,52.167892,36.77,15.397892
2023-08-07 20:00:00,46.324412,33.02,13.304412
2023-08-07 21:00:00,39.233068,30.91,8.323068
2023-08-07 22:00:00,40.127417,28.72,11.407417


## Old Prophet model

In [None]:
df_prophet = df_train[["DA_LMP", "DA_Demand", "Temp from Mean"]].copy()
df_prophet.reset_index(inplace=True)
df_prophet.columns = ["ds", "y", "DA_Demand", "Temp from Mean"]

model = Prophet()
model.add_country_holidays(country_name='US')
model.add_regressor("Temp from Mean")
model.add_regressor("DA_Demand")
model.fit(df_prophet)

## Create dates to forecast and add regressor data
NOTE: It is assumed we know the regressors, in actuality these are also forecasted

In [None]:
future_dates = model.make_future_dataframe(periods=31*24, freq="H")
future_dates.set_index("ds", inplace=True)
future_dates["Temp from Mean"] = df["Temp from Mean"]
future_dates["DA_Demand"] = df["DA_Demand"]
future_dates.reset_index(inplace=True)

future_dates

## Make the forecast

In [None]:
forecast = model.predict(future_dates)

## Plot the results

In [None]:
fig = plot_plotly(model, forecast)
fig.show()

fig = plot_components_plotly(model, forecast)
fig.show()

In [None]:
error = forecast[["ds", "yhat"]]
error.set_index("ds", inplace=True)
error["actual LMP"] = df_test["DA_LMP"]
error.dropna(inplace=True)
error["error"] = error['yhat'] - error["actual LMP"]

print(error["error"].describe())
px.line(error)

In [11]:
df.index[-1] 

Timestamp('2023-09-30 23:00:00', freq='H')