# Selection of Calibration Windows for Day-Ahead Electricity Price Forecasting
## by Grzegorz Marcjasz, Tomasz Serafin and Rafał Weron 

In [15]:
# libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import median_abs_deviation

In [44]:
df_day_ahead_epex = pd.read_csv('Day_Ahead_Epex.csv', sep=';')
df_NP2018 = pd.read_csv('NP2018.csv', header=None)
df_PJM = pd.read_csv('PJM.csv', sep=';')

In [38]:
df_day_ahead_epex.head()

Unnamed: 0,Date,hour,Spot DE.AT_price,day
0,20150101,1,25.02,5
1,20150101,2,18.29,5
2,20150101,3,16.04,5
3,20150101,4,14.6,5
4,20150101,5,14.95,5


chiedere nomi colonne

In [45]:
columns = ['Date', 'Hour', 'Price', "forcast", "forcast1"]
df_NP2018.columns = columns
df_NP2018.head()

Unnamed: 0,Date,Hour,Price,forcast,forcast1
0,20130101,1,31.05,42497.0,2798.0
1,20130101,2,30.47,41463.0,2417.0
2,20130101,3,28.92,40812.0,2036.0
3,20130101,4,27.88,40246.0,1706.0
4,20130101,5,26.96,40088.0,1427.0


In [41]:
df_PJM.head()

Unnamed: 0,date,hour,comed price,rto load forecast,comed load foecast,day
0,20110102,0,17.141179,68590.0,11432.0,1
1,20110102,1,14.179616,65896.0,10862.0,1
2,20110102,2,12.736793,64511.0,10486.0,1
3,20110102,3,11.096262,63935.0,10318.0,1
4,20110102,4,10.690926,64321.0,10208.0,1


### 2. Methodology

#### 2.1. Preliminaries

##### Normalization of df_day_ahead_epex

Normalization before forecast

In [30]:
df_day_ahead_epex2 = pd.read_csv('Day_Ahead_Epex.csv', sep=';')

# Convert the 'Date' column to a datetime format without hour
df_day_ahead_epex2['Date'] = pd.to_datetime(df_day_ahead_epex2['Date'], format='%Y%m%d').dt.date

# Calculate 'a' and 'b'
a = df_day_ahead_epex2['Spot DE.AT_price'].median()
b = median_abs_deviation(df_day_ahead_epex2['Spot DE.AT_price'])

# Normalize the prices
df_day_ahead_epex2['P_dh'] = 1/b * (df_day_ahead_epex2['Spot DE.AT_price'] - a)

# Apply the area hyperbolic sine transformation
df_day_ahead_epex2['X_dh'] = np.arcsinh(df_day_ahead_epex2['P_dh'])

In [32]:
df_day_ahead_epex2.head()

Unnamed: 0,Date,hour,Spot DE.AT_price,day,P_dh,X_dh
0,2015-01-01,1,25.02,5,-1.071019,-0.930711
1,2015-01-01,2,18.29,5,-1.841924,-1.370622
2,2015-01-01,3,16.04,5,-2.099656,-1.487335
3,2015-01-01,4,14.6,5,-2.264605,-1.556074
4,2015-01-01,5,14.95,5,-2.224513,-1.539758


Normalization after forecast

In [None]:
# Assuming 'forecasts' is your array of forecasts
forecasts = model.predict(X_test)

# Apply inverse transformation
price_predictions = np.sinh(forecasts) * b + a

##### Normalization of df_NP2018

Before forecast

In [46]:
df_NP2018_2 = df_NP2018.copy()

# Convert the 'Date' column to a datetime format without hour
df_NP2018_2['Date'] = pd.to_datetime(df_NP2018_2['Date'], format='%Y%m%d')

# Calculate 'a' and 'b'
a = df_NP2018_2['Price'].median()
b = median_abs_deviation(df_NP2018_2['Price'])

# Normalize the prices
df_NP2018_2['P_dh'] = 1/b * (df_NP2018_2['Price'] - a)

# Apply the area hyperbolic sine transformation
df_NP2018_2['X_dh'] = np.arcsinh(df_NP2018_2['P_dh'])

In [47]:
df_NP2018_2.head(30)

Unnamed: 0,Date,Hour,Price,forcast,forcast1,P_dh,X_dh
0,2013-01-01,1,31.05,42497.0,2798.0,0.278937,0.275441
1,2013-01-01,2,30.47,41463.0,2417.0,0.16888,0.168088
2,2013-01-01,3,28.92,40812.0,2036.0,-0.125237,-0.124912
3,2013-01-01,4,27.88,40246.0,1706.0,-0.322581,-0.317233
4,2013-01-01,5,26.96,40088.0,1427.0,-0.497154,-0.478665
5,2013-01-01,6,27.84,40360.0,1183.0,-0.330171,-0.324448
6,2013-01-01,7,28.79,41027.0,1012.0,-0.149905,-0.149349
7,2013-01-01,8,28.63,41976.0,908.0,-0.180266,-0.179303
8,2013-01-01,9,28.44,42705.0,848.0,-0.216319,-0.214666
9,2013-01-01,10,28.3,43503.0,819.0,-0.242884,-0.240557


In [None]:
# Assuming 'forecasts' is your array of forecasts
forecasts = model.predict(X_test)

# Apply inverse transformation
price_predictions = np.sinh(forecasts) * b + a

##### Normalization of df_PJM

Before forecast

In [34]:
df_PJM2 = pd.read_csv('PJM.csv', sep=';')

# Convert the 'Date' column to a datetime format without hour
df_PJM2['Date'] = pd.to_datetime(df_PJM2['date'], format='%Y%m%d').dt.date

# Calculate 'a' and 'b'
a = df_PJM2['comed price'].median()
b = median_abs_deviation(df_PJM2['comed price'])

# Normalize the prices
df_PJM2['P_dh'] = 1/b * (df_PJM2['comed price'] - a)

# Apply the area hyperbolic sine transformation
df_PJM2['X_dh'] = np.arcsinh(df_PJM2['P_dh'])

In [35]:
df_PJM2.head(30)

Unnamed: 0,date,hour,comed price,rto load forecast,comed load foecast,day,Date,P_dh,X_dh
0,20110102,0,17.141179,68590.0,11432.0,1,2011-01-02,-1.877857,-1.387638
1,20110102,1,14.179616,65896.0,10862.0,1,2011-01-02,-2.382102,-1.602532
2,20110102,2,12.736793,64511.0,10486.0,1,2011-01-02,-2.627761,-1.693663
3,20110102,3,11.096262,63935.0,10318.0,1,2011-01-02,-2.907083,-1.788647
4,20110102,4,10.690926,64321.0,10208.0,1,2011-01-02,-2.976097,-1.81086
5,20110102,5,11.611351,65539.0,10212.0,1,2011-01-02,-2.819383,-1.759728
6,20110102,6,11.897034,67827.0,10236.0,1,2011-01-02,-2.770741,-1.743342
7,20110102,7,22.768068,70959.0,10350.0,1,2011-01-02,-0.919805,-0.823515
8,20110102,8,24.571483,74338.0,10472.0,1,2011-01-02,-0.61275,-0.579727
9,20110102,9,26.437882,77075.0,10799.0,1,2011-01-02,-0.294971,-0.290853


In [None]:
# Assuming 'forecasts' is your array of forecasts
forecasts = model.predict(X_test)

# Apply inverse transformation
price_predictions = np.sinh(forecasts) * b + a

#### 2.2. Expert Models

##### ARX1

In [7]:
import statsmodels.api as sm

def prepare_data_arx1(data):
    # Prepare the data for the ARX1 model
    # This includes creating the lagged variables and the dummy variables for the weekdays
    data['X_lag1'] = data['X'].shift(1)
    data['X_lag2'] = data['X'].shift(2)
    data['X_lag7'] = data['X'].shift(7)
    data['X_min'] = data['X'].rolling(window=24).min().shift(24)
    data['load_forecast'] = data['load'].shift(24)
    data = pd.get_dummies(data, columns=['weekday']) # create dummy variables for the weekdays
    data = data.dropna()  # drop missing values
    return data

def fit_arx1(data):
    # Fit the ARX1 model
    # The dependent variable is 'X'
    # The independent variables are the lagged variables, the load forecast, and the dummy variables
    exog_vars = ['X_lag1', 'X_lag2', 'X_lag7', 'X_min', 'load_forecast', 'weekday_0', 'weekday_1', 'weekday_2', 'weekday_3', 'weekday_4', 'weekday_5', 'weekday_6']
    exog = sm.add_constant(data[exog_vars])
    endog = data['X']
    model = sm.OLS(endog, exog) # Ordinary Least Squares (OLS) model
    results = model.fit()
    return results # results of the model fitting

##### AR1

In [8]:
def prepare_data_ar1(data):
    # Prepare the data for the AR1 model
    data['X_lag1'] = data['X'].shift(1)
    data['X_lag2'] = data['X'].shift(2)
    data['X_lag7'] = data['X'].shift(7)
    data['X_min'] = data['X'].rolling(window=24).min().shift(24)
    data = pd.get_dummies(data, columns=['weekday'])
    data = data.dropna()  # drop missing values
    return data

def fit_ar1(data):
    # Fit the AR1 model
    exog_vars = ['X_lag1', 'X_lag2', 'X_lag7', 'X_min', 'weekday_0', 'weekday_1', 'weekday_2', 'weekday_3', 'weekday_4', 'weekday_5', 'weekday_6']
    exog = sm.add_constant(data[exog_vars])
    endog = data['X']
    model = sm.OLS(endog, exog)
    results = model.fit()
    return results

In [None]:
# Prepare the data
data = prepare_data_ar1(data)

# Fit the AR1 model
results = fit_ar1(data)

# Print the model summary
print(results.summary())

##### ARX2

In [9]:
def prepare_data_arx2(data):
    # Prepare the data for the ARX2 model
    data['X_lag1'] = data['X'].shift(1)
    data['X_lag2'] = data['X'].shift(2)
    data['X_lag7'] = data['X'].shift(7)
    data['X_min'] = data['X'].rolling(window=24).min().shift(24)
    data['X_max'] = data['X'].rolling(window=24).max().shift(24)
    data['X_last'] = data['X'].shift(24)
    data['load_forecast'] = data['load'].shift(24)
    data = pd.get_dummies(data, columns=['weekday'])
    data = data.dropna()  # drop missing values
    return data

def fit_arx2(data):
    # Fit the ARX2 model
    exog_vars = ['X_lag1', 'X_lag2', 'X_lag7', 'X_min', 'X_max', 'X_last', 'load_forecast', 'weekday_0', 'weekday_1', 'weekday_2', 'weekday_3', 'weekday_4', 'weekday_5', 'weekday_6']
    exog = sm.add_constant(data[exog_vars])
    endog = data['X']
    model = sm.OLS(endog, exog)
    results = model.fit()
    return results

In [10]:
# Prepare the data
data = prepare_data_arx2(data)

# Fit the ARX2 model
results = fit_arx2(data)

# Print the model summary
print(results.summary())

NameError: name 'data' is not defined

##### AR2

In [None]:
def prepare_data_ar2(data):
    # Prepare the data for the AR2 model
    data['X_lag1'] = data['X'].shift(1)
    data['X_lag2'] = data['X'].shift(2)
    data['X_lag7'] = data['X'].shift(7)
    data['X_min'] = data['X'].rolling(window=24).min().shift(24)
    data['X_max'] = data['X'].rolling(window=24).max().shift(24)
    data['X_last'] = data['X'].shift(24)
    data = pd.get_dummies(data, columns=['weekday'])
    data = data.dropna()  # drop missing values
    return data

def fit_ar2(data):
    # Fit the AR2 model
    exog_vars = ['X_lag1', 'X_lag2', 'X_lag7', 'X_min', 'X_max', 'X_last', 'weekday_0', 'weekday_1', 'weekday_2', 'weekday_3', 'weekday_4', 'weekday_5', 'weekday_6']
    exog = sm.add_constant(data[exog_vars])
    endog = data['X']
    model = sm.OLS(endog, exog)
    results = model.fit()
    return results

In [None]:
# Prepare the data
data = prepare_data_ar2(data)

# Fit the AR2 model
results = fit_ar2(data)

# Print the model summary
print(results.summary())