# Selection of Calibration Windows for Day-Ahead Electricity Price Forecasting
by Grzegorz Marcjasz, Tomasz Serafin and Rafał Weron 

In [226]:
# libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import median_abs_deviation

## Data Preparation


In [276]:
df_day_ahead_epex = pd.read_csv('Day_Ahead_Epex.csv', sep=';')
df_NP2018_complete = pd.read_csv('NP2018_complete.csv')
# remove the first column
df_NP2018_complete = df_NP2018_complete.iloc[:,1:]
df_PJM = pd.read_csv('PJM.csv', sep=';')

In [228]:
def normalize_prices(prices):
    a = np.median(prices)
    b = median_abs_deviation(prices)
    p = 1/b * (prices - a)
    return p, a, b

def apply_vst(p): # apply the area hyperbolic sine transformation to the normalized prices
    X = np.arcsinh(p)
    return X

def inverse_transform(forecast, a, b): # apply the inverse of VST to the forecast to obtain the price prediction
    price_predictions = b * np.sinh(forecast) + a
    return price_predictions

### df_NP2018

In [267]:
df_NP2018_complete['sunday'] = np.where(df_NP2018_complete['Day'] == 7, 1, 0)
df_NP2018_complete['saturday'] = np.where(df_NP2018_complete['Day'] == 6, 1, 0)
df_NP2018_complete['monday'] = np.where(df_NP2018_complete['Day'] ==  1, 1, 0)

In [268]:
price = df_NP2018_complete["Price"]
exogenos = df_NP2018_complete["load"]
sunday = df_NP2018_complete["sunday"]
saturday = df_NP2018_complete["saturday"]
monday = df_NP2018_complete["monday"]
df_NP2018_complete['Min_Comed_Price'] = df_NP2018_complete.groupby('Date')['Price'].transform('min')
df_NP2018_complete['Max_Comed_Price'] = df_NP2018_complete.groupby('Date')['Price'].transform('max')

In [269]:
df_NP2018_complete.head()

Unnamed: 0,Date,Hour,Price,load,placeholder2,Day,sunday,saturday,monday,Min_Comed_Price,Max_Comed_Price
0,20130101,1,31.05,42497.0,2798.0,2,0,0,0,26.96,36.65
1,20130101,2,30.47,41463.0,2417.0,2,0,0,0,26.96,36.65
2,20130101,3,28.92,40812.0,2036.0,2,0,0,0,26.96,36.65
3,20130101,4,27.88,40246.0,1706.0,2,0,0,0,26.96,36.65
4,20130101,5,26.96,40088.0,1427.0,2,0,0,0,26.96,36.65


#### Normalization

In [271]:
# Extract 'comed price' column and convert to numpy array
prices = df_NP2018_complete['Price'].values

# Normalize prices
p, a, b = normalize_prices(prices)

# Apply VST
X = apply_vst(p)

# Update DataFrame
df_NP2018_complete['Price_norm'] = X

In [272]:
df_NP2018_complete["min_norm"] = df_NP2018_complete.groupby('Date')['Price_norm'].transform('min')
df_NP2018_complete["max_norm"] = df_NP2018_complete.groupby('Date')['Price_norm'].transform('max')

In [273]:
df_NP2018_complete.head()

Unnamed: 0,Date,Hour,Price,load,placeholder2,Day,sunday,saturday,monday,Min_Comed_Price,Max_Comed_Price,Price_norm,min_norm,max_norm
0,20130101,1,31.05,42497.0,2798.0,2,0,0,0,26.96,36.65,0.275441,-0.478665,1.103536
1,20130101,2,30.47,41463.0,2417.0,2,0,0,0,26.96,36.65,0.168088,-0.478665,1.103536
2,20130101,3,28.92,40812.0,2036.0,2,0,0,0,26.96,36.65,-0.124912,-0.478665,1.103536
3,20130101,4,27.88,40246.0,1706.0,2,0,0,0,26.96,36.65,-0.317233,-0.478665,1.103536
4,20130101,5,26.96,40088.0,1427.0,2,0,0,0,26.96,36.65,-0.478665,-0.478665,1.103536


### df_PJM

In [277]:
df_PJM['sunday'] = np.where(df_PJM['day'] == 7, 1, 0)
df_PJM['saturday'] = np.where(df_PJM['day'] == 6, 1, 0)
df_PJM['monday'] = np.where(df_PJM['day'] ==  1, 1, 0)

In [278]:
price = df_PJM["comed price"]
exogenos = df_PJM["rto load forecast"]
sunday = df_PJM["sunday"]
saturday = df_PJM["saturday"]
monday = df_PJM["monday"]
df_PJM['Min_Comed_Price'] = df_PJM.groupby('date')['comed price'].transform('min')
df_PJM['Max_Comed_Price'] = df_PJM.groupby('date')['comed price'].transform('max')

#### Normalization

In [281]:
# Extract 'comed price' column and convert to numpy array
prices = df_PJM['comed price'].values

# Normalize prices
p, a, b = normalize_prices(prices)

# Apply VST
X = apply_vst(p)

# Update DataFrame
df_PJM['comed_price_norma'] = X

In [282]:
df_PJM["min_norm"] = df_PJM.groupby('date')['comed_price_norma'].transform('min')
df_PJM["max_norm"] = df_PJM.groupby('date')['comed_price_norma'].transform('max')

In [283]:
df_PJM.head()

Unnamed: 0,date,hour,comed price,rto load forecast,comed load foecast,day,sunday,saturday,monday,Min_Comed_Price,Max_Comed_Price,for_d_1,comed_price_norma,min_norm,max_norm
0,20110102,0,17.141179,68590.0,11432.0,1,0,0,1,10.690926,51.389987,,-1.387638,-1.81086,2.083361
1,20110102,1,14.179616,65896.0,10862.0,1,0,0,1,10.690926,51.389987,17.141179,-1.602532,-1.81086,2.083361
2,20110102,2,12.736793,64511.0,10486.0,1,0,0,1,10.690926,51.389987,14.179616,-1.693663,-1.81086,2.083361
3,20110102,3,11.096262,63935.0,10318.0,1,0,0,1,10.690926,51.389987,12.736793,-1.788647,-1.81086,2.083361
4,20110102,4,10.690926,64321.0,10208.0,1,0,0,1,10.690926,51.389987,11.096262,-1.81086,-1.81086,2.083361


### df_day_ahead_epex

In [290]:
df_day_ahead_epex['sunday'] = np.where(df_day_ahead_epex['day'] == 7, 1, 0)
df_day_ahead_epex['saturday'] = np.where(df_day_ahead_epex['day'] == 6, 1, 0)
df_day_ahead_epex['monday'] = np.where(df_day_ahead_epex['day'] ==  1, 1, 0)

In [292]:
price = df_day_ahead_epex["Spot DE.AT_price"]
# exogenos = df_day_ahead_epex["rto load forecast"]
sunday = df_day_ahead_epex["sunday"]
saturday = df_day_ahead_epex["saturday"]
monday = df_day_ahead_epex["monday"]
df_day_ahead_epex['Min_Comed_Price'] = df_day_ahead_epex.groupby('Date')['Spot DE.AT_price'].transform('min')
df_day_ahead_epex['Max_Comed_Price'] = df_day_ahead_epex.groupby('Date')['Spot DE.AT_price'].transform('max')

In [293]:
df_day_ahead_epex.head()

Unnamed: 0,Date,hour,Spot DE.AT_price,day,sunday,saturday,monday,Min_Comed_Price,Max_Comed_Price
0,20150101,1,25.02,5,0,0,0,0.04,26.9
1,20150101,2,18.29,5,0,0,0,0.04,26.9
2,20150101,3,16.04,5,0,0,0,0.04,26.9
3,20150101,4,14.6,5,0,0,0,0.04,26.9
4,20150101,5,14.95,5,0,0,0,0.04,26.9


## ARX1

### df_PJM

#### Raw Data

In [279]:
load = df_PJM["rto load forecast"]
errors = []
T = 150
for h in range(24):
    p_hour = df_PJM.loc[h::24,'comed price'].values # hourly data selection
    exogenos = df_PJM.loc[h::24,'rto load forecast'].values
    sunday = df_PJM.loc[h::24,'sunday'].values
    monday = df_PJM.loc[h::24,'monday'].values
    saturday = df_PJM.loc[h::24,'saturday'].values
    c_d_min = df_PJM.loc[h::24,'Min_Comed_Price'].values
    h_errors = []
    # Initialize an empty list to store the forecasts
    forecasts = []
    for day in range(T,len(price)//24):
            cal_data = price[(day-T):day].values # calibration data --> normalize it
            # ------------------------------ normalize here -------------------------------
            # also the min and max
            Y = cal_data[7:T]
            X1 = cal_data[6:T-1] 
            X2 = cal_data[5:T-2] 
            X3 = cal_data[0:T-7]
            X4 = c_d_min[7:T]
            X5 = exogenos[7:T]  #C_d_h
            X6 = sunday[7:T] # sunday
            X7 = monday[7:T] # monday
            X8 = saturday[7:T] # saturday
            X0 = np.ones(np.size(X1)) 
            X = np.stack([X0,X1,X2, X3,X4,X5, X6, X7, X8],axis = 1)
            betas = np.dot(np.linalg.inv(np.dot(X.T,X)), np.dot(X.T,Y))
            X_fut = np.array([1,cal_data[T-1],cal_data[T-2],cal_data[T-7],c_d_min[day], exogenos[day],sunday[day],monday[day],saturday[day]])
            forecast = np.dot(X_fut,betas)
            forecasts.append(forecast)
            # transofrmation + reverse transformation
            real = price[day]
            err = real - forecast
            errors1 = (np.abs(err))
            errors.append(errors1)
print("Raw Data Errors for ARX1 (df_PJM):" , np.mean(errors))

Raw Data Errors for ARX1 (df_PJM): 3.095383368363987


#### Normalized

In [284]:
load = df_PJM["rto load forecast"]
errors = []
T = 150
for h in range(24):
    p_hour = df_PJM.loc[h::24,'comed_price_norma'].values # hourly data selection
    exogenos = df_PJM.loc[h::24,'rto load forecast'].values
    sunday = df_PJM.loc[h::24,'sunday'].values
    monday = df_PJM.loc[h::24,'monday'].values
    saturday = df_PJM.loc[h::24,'saturday'].values
    c_d_min = df_PJM.loc[h::24,'min_norm'].values
    h_errors = []
    # Initialize an empty list to store the forecasts
    forecasts = []
    for day in range(T,len(price)//24):
            cal_data = price[(day-T):day].values # calibration data 
            Y = cal_data[7:T]
            X1 = cal_data[6:T-1] 
            X2 = cal_data[5:T-2] 
            X3 = cal_data[0:T-7]
            X4 = c_d_min[7:T]
            X5 = exogenos[7:T]  #C_d_h
            X6 = sunday[7:T] # sunday
            X7 = monday[7:T] # monday
            X8 = saturday[7:T] # saturday
            X0 = np.ones(np.size(X1)) 
            X = np.stack([X0,X1,X2, X3,X4,X5, X6, X7, X8],axis = 1)
            betas = np.dot(np.linalg.inv(np.dot(X.T,X)), np.dot(X.T,Y))
            X_fut = np.array([1,cal_data[T-1],cal_data[T-2],cal_data[T-7],c_d_min[day], exogenos[day],sunday[day],monday[day],saturday[day]])
            forecast = np.dot(X_fut,betas)
            forecasts.append(forecast)
            real = price[day]
            err = real - forecast
            errors1 = (np.abs(err))
            errors.append(errors1)
print("Normalized Errors for ARX1 (df_PJM):" , np.mean(errors))

Normalized Errors for ARX1 (df_PJM): 3.066570716311972


In [None]:
# Convert the list of forecasts to a numpy array
forecasts = np.array(forecasts)

# Apply inverse transformation
price_predictions = inverse_transform(forecasts, a, b)

### df_NP2018_complete

#### Raw Data

In [270]:
load = df_NP2018_complete["load"]
errors = []
T = 150
for h in range(24):
    p_hour = df_NP2018_complete.loc[h::24,'Price'].values # hourly data selection
    exogenos = df_NP2018_complete.loc[h::24,'load'].values
    sunday = df_NP2018_complete.loc[h::24,'sunday'].values
    monday = df_NP2018_complete.loc[h::24,'monday'].values
    saturday = df_NP2018_complete.loc[h::24,'saturday'].values
    c_d_min = df_NP2018_complete.loc[h::24,'Min_Comed_Price'].values
    h_errors = []
    # Initialize an empty list to store the forecasts
    forecasts = []
    for day in range(T,len(price)//24):
            cal_data = price[(day-T):day].values # calibration data 
            Y = cal_data[7:T]
            X1 = cal_data[6:T-1] 
            X2 = cal_data[5:T-2] 
            X3 = cal_data[0:T-7]
            X4 = c_d_min[7:T]
            X5 = exogenos[7:T]  #C_d_h
            X6 = sunday[7:T] # sunday
            X7 = monday[7:T] # monday
            X8 = saturday[7:T] # saturday
            X0 = np.ones(np.size(X1)) 
            X = np.stack([X0,X1,X2, X3,X4,X5, X6, X7, X8],axis = 1)
            betas = np.dot(np.linalg.inv(np.dot(X.T,X)), np.dot(X.T,Y))
            X_fut = np.array([1,cal_data[T-1],cal_data[T-2],cal_data[T-7],c_d_min[day], exogenos[day],sunday[day],monday[day],saturday[day]])
            forecast = np.dot(X_fut,betas)
            forecasts.append(forecast)
            real = price[day]
            err = real - forecast
            errors1 = (np.abs(err))
            errors.append(errors1)
print("Raw Data Errors for ARX1 (df_NP2018_complete):" , np.mean(errors))

Raw Data Errors for ARX1 (df_NP2018_complete): 1.8732773354352787


#### Normalized


In [274]:
df_NP2018_complete.head()

Unnamed: 0,Date,Hour,Price,load,placeholder2,Day,sunday,saturday,monday,Min_Comed_Price,Max_Comed_Price,Price_norm,min_norm,max_norm
0,20130101,1,31.05,42497.0,2798.0,2,0,0,0,26.96,36.65,0.275441,-0.478665,1.103536
1,20130101,2,30.47,41463.0,2417.0,2,0,0,0,26.96,36.65,0.168088,-0.478665,1.103536
2,20130101,3,28.92,40812.0,2036.0,2,0,0,0,26.96,36.65,-0.124912,-0.478665,1.103536
3,20130101,4,27.88,40246.0,1706.0,2,0,0,0,26.96,36.65,-0.317233,-0.478665,1.103536
4,20130101,5,26.96,40088.0,1427.0,2,0,0,0,26.96,36.65,-0.478665,-0.478665,1.103536


In [275]:
load = df_NP2018_complete["load"]
errors = []
T = 150
for h in range(24):
    p_hour = df_NP2018_complete.loc[h::24,'Price'].values # hourly data selection
    exogenos = df_NP2018_complete.loc[h::24,'load'].values
    sunday = df_NP2018_complete.loc[h::24,'sunday'].values
    monday = df_NP2018_complete.loc[h::24,'monday'].values
    saturday = df_NP2018_complete.loc[h::24,'saturday'].values
    c_d_min = df_NP2018_complete.loc[h::24,'min_norm'].values
    h_errors = []
    # Initialize an empty list to store the forecasts
    forecasts = []
    for day in range(T,len(price)//24):
            cal_data = price[(day-T):day].values # calibration data 
            Y = cal_data[7:T]
            X1 = cal_data[6:T-1] 
            X2 = cal_data[5:T-2] 
            X3 = cal_data[0:T-7]
            X4 = c_d_min[7:T]
            X5 = exogenos[7:T]  #C_d_h
            X6 = sunday[7:T] # sunday
            X7 = monday[7:T] # monday
            X8 = saturday[7:T] # saturday
            X0 = np.ones(np.size(X1)) 
            X = np.stack([X0,X1,X2, X3,X4,X5, X6, X7, X8],axis = 1)
            betas = np.dot(np.linalg.inv(np.dot(X.T,X)), np.dot(X.T,Y))
            X_fut = np.array([1,cal_data[T-1],cal_data[T-2],cal_data[T-7],c_d_min[day], exogenos[day],sunday[day],monday[day],saturday[day]])
            forecast = np.dot(X_fut,betas)
            forecasts.append(forecast)
            real = price[day]
            err = real - forecast
            errors1 = (np.abs(err))
            errors.append(errors1)
print("Normalized Errors for ARX1 (df_NP2018_complete):" , np.mean(errors))

Normalized Errors for ARX1 (df_NP2018_complete): 1.9003364181382847


In [116]:
# Convert the list of forecasts to a numpy array
forecasts = np.array(forecasts)

# Apply inverse transformation
price_predictions = inverse_transform(forecasts, a, b)

### df_day_ahead_epex

#### Raw Data

#### Normalized

## ARX2

### df_PJM

#### Raw Data

In [280]:
load = df_PJM["rto load forecast"]
df_PJM["for_d_1"] = df_PJM["comed price"].shift(1)
errors = []
T = 150
for h in range(24):
    p_hour = df_PJM.loc[h::24,'comed price'].values # hourly data selection
    exogenos = df_PJM.loc[h::24,'rto load forecast'].values
    weekday = df_PJM.loc[h::24,'day'].values
    c_d_min = df_PJM.loc[h::24,'Min_Comed_Price'].values
    c_d_max = df_PJM.loc[h::24,'Max_Comed_Price'].values
    for_d_1 = df_PJM.loc[h::24,'for_d_1'].values
    h_errors = []
    # Initialize an empty list to store the forecasts
    forecasts = []
    for day in range(T,len(price)//24):
            cal_data = price[(day-T):day].values # calibration data 
            Y = cal_data[7:T]
            X1 = cal_data[6:T-1] 
            X2 = cal_data[5:T-2] 
            X3 = cal_data[0:T-7]
            X4 = c_d_min[7:T]
            X5 = c_d_max[7:T]  #C_d_h
            X6 = for_d_1[7:T] # c_d-1_24
            X7 = exogenos[7:T]
            X8 = weekday[7:T] 
            X0 = np.ones(np.size(X1)) 
            X = np.stack([X0,X1,X2, X3,X4,X5, X6, X7, X8],axis = 1)
            betas = np.dot(np.linalg.inv(np.dot(X.T,X)), np.dot(X.T,Y))
            X_fut = np.array([1,cal_data[T-1],cal_data[T-2],cal_data[T-7],c_d_min[day], exogenos[day],sunday[day],monday[day],saturday[day]])
            forecast = np.dot(X_fut,betas)
            forecasts.append(forecast)
            real = price[day]
            err = real - forecast
            errors1 = (np.abs(err))
            errors.append(errors1)
print("Raw Data Errors for ARX2 (df_PJM):" , np.mean(errors))

Raw Data Errors for ARX2 (df_PJM): 3914.6397079165254


#### Normalized

In [285]:
df_PJM.head()

Unnamed: 0,date,hour,comed price,rto load forecast,comed load foecast,day,sunday,saturday,monday,Min_Comed_Price,Max_Comed_Price,for_d_1,comed_price_norma,min_norm,max_norm
0,20110102,0,17.141179,68590.0,11432.0,1,0,0,1,10.690926,51.389987,,-1.387638,-1.81086,2.083361
1,20110102,1,14.179616,65896.0,10862.0,1,0,0,1,10.690926,51.389987,17.141179,-1.602532,-1.81086,2.083361
2,20110102,2,12.736793,64511.0,10486.0,1,0,0,1,10.690926,51.389987,14.179616,-1.693663,-1.81086,2.083361
3,20110102,3,11.096262,63935.0,10318.0,1,0,0,1,10.690926,51.389987,12.736793,-1.788647,-1.81086,2.083361
4,20110102,4,10.690926,64321.0,10208.0,1,0,0,1,10.690926,51.389987,11.096262,-1.81086,-1.81086,2.083361


In [286]:
load = df_PJM["rto load forecast"]
df_PJM["for_d_1_norm"] = df_PJM["comed_price_norma"].shift(1)
errors = []
T = 150
for h in range(24):
    p_hour = df_PJM.loc[h::24,'comed_price_norma'].values # hourly data selection
    exogenos = df_PJM.loc[h::24,'rto load forecast'].values
    weekday = df_PJM.loc[h::24,'day'].values
    c_d_min = df_PJM.loc[h::24,'min_norm'].values
    c_d_max = df_PJM.loc[h::24,'max_norm'].values
    for_d_1 = df_PJM.loc[h::24,'for_d_1_norm'].values
    h_errors = []
    # Initialize an empty list to store the forecasts
    forecasts = []
    for day in range(T,len(price)//24):
            cal_data = price[(day-T):day].values # calibration data 
            Y = cal_data[7:T]
            X1 = cal_data[6:T-1] 
            X2 = cal_data[5:T-2] 
            X3 = cal_data[0:T-7]
            X4 = c_d_min[7:T]
            X5 = c_d_max[7:T]  #C_d_h
            X6 = for_d_1[7:T] # c_d-1_24
            X7 = exogenos[7:T]
            X8 = weekday[7:T] 
            X0 = np.ones(np.size(X1)) 
            X = np.stack([X0,X1,X2, X3,X4,X5, X6, X7, X8],axis = 1)
            betas = np.dot(np.linalg.inv(np.dot(X.T,X)), np.dot(X.T,Y))
            X_fut = np.array([1,cal_data[T-1],cal_data[T-2],cal_data[T-7],c_d_min[day], exogenos[day],sunday[day],monday[day],saturday[day]])
            forecast = np.dot(X_fut,betas)
            forecasts.append(forecast)
            real = price[day]
            err = real - forecast
            errors1 = (np.abs(err))
            errors.append(errors1)
print("Normalized Errors for ARX2 (df_PJM): " , np.mean(errors))

Normalized Errors for ARX2 (df_PJM):  93441.96711814604


### df_NP2018_complete

In [243]:
df_NP2018_complete.head()

Unnamed: 0,Date,Hour,Price,load,placeholder2,Day,sunday,saturday,monday,Min_Comed_Price,Max_Comed_Price
0,20130101,1,31.05,42497.0,2798.0,2,0,0,0,26.96,36.65
1,20130101,2,30.47,41463.0,2417.0,2,0,0,0,26.96,36.65
2,20130101,3,28.92,40812.0,2036.0,2,0,0,0,26.96,36.65
3,20130101,4,27.88,40246.0,1706.0,2,0,0,0,26.96,36.65
4,20130101,5,26.96,40088.0,1427.0,2,0,0,0,26.96,36.65


#### Raw Data

In [244]:
load = df_NP2018_complete["load"]
df_NP2018_complete["for_d_1"] = df_NP2018_complete["Price"].shift(1)
errors = []
T = 150
for h in range(24):
    p_hour = df_NP2018_complete.loc[h::24,'Price'].values # hourly data selection
    exogenos = df_NP2018_complete.loc[h::24,'load'].values
    weekday = df_NP2018_complete.loc[h::24,'Day'].values
    c_d_min = df_NP2018_complete.loc[h::24,'Min_Comed_Price'].values
    c_d_max = df_NP2018_complete.loc[h::24,'Max_Comed_Price'].values
    for_d_1 = df_NP2018_complete.loc[h::24,'for_d_1'].values
    h_errors = []
    # Initialize an empty list to store the forecasts
    forecasts = []
    for day in range(T,len(price)//24):
            cal_data = price[(day-T):day].values # calibration data 
            Y = cal_data[7:T]
            X1 = cal_data[6:T-1] 
            X2 = cal_data[5:T-2] 
            X3 = cal_data[0:T-7]
            X4 = c_d_min[7:T]
            X5 = c_d_max[7:T]  #C_d_h
            X6 = for_d_1[7:T] # c_d-1_24
            X7 = exogenos[7:T]
            X8 = weekday[7:T] 
            X0 = np.ones(np.size(X1)) 
            X = np.stack([X0,X1,X2, X3,X4,X5, X6, X7, X8],axis = 1)
            betas = np.dot(np.linalg.inv(np.dot(X.T,X)), np.dot(X.T,Y))
            X_fut = np.array([1,cal_data[T-1],cal_data[T-2],cal_data[T-7],c_d_min[day], exogenos[day],sunday[day],monday[day],saturday[day]])
            forecast = np.dot(X_fut,betas)
            forecasts.append(forecast)
            real = price[day]
            err = real - forecast
            errors1 = (np.abs(err))
            errors.append(errors1)
print("Raw Data Errors for ARX2 (df_NP2018_complete):" , np.mean(errors))

Raw Data Errors for ARX2 (df_NP2018_complete): 1090.7164130160647


#### Normalized

In [252]:
df_NP2018_complete.head()

Unnamed: 0,Date,Hour,Price,load,placeholder2,Day,sunday,saturday,monday,Min_Comed_Price,Max_Comed_Price,for_d_1,Price_norm,min_norm,max_norm
0,20130101,1,31.05,42497.0,2798.0,2,0,0,0,26.96,36.65,,0.275441,-0.478665,1.103536
1,20130101,2,30.47,41463.0,2417.0,2,0,0,0,26.96,36.65,31.05,0.168088,-0.478665,1.103536
2,20130101,3,28.92,40812.0,2036.0,2,0,0,0,26.96,36.65,30.47,-0.124912,-0.478665,1.103536
3,20130101,4,27.88,40246.0,1706.0,2,0,0,0,26.96,36.65,28.92,-0.317233,-0.478665,1.103536
4,20130101,5,26.96,40088.0,1427.0,2,0,0,0,26.96,36.65,27.88,-0.478665,-0.478665,1.103536


In [253]:
load = df_NP2018_complete["load"]
df_NP2018_complete["for_d_1_norm"] = df_NP2018_complete["Price_norm"].shift(1)
errors = []
T = 150
for h in range(24):
    p_hour = df_NP2018_complete.loc[h::24,'Price_norm'].values # hourly data selection
    exogenos = df_NP2018_complete.loc[h::24,'load'].values
    weekday = df_NP2018_complete.loc[h::24,'Day'].values
    c_d_min = df_NP2018_complete.loc[h::24,'min_norm'].values
    c_d_max = df_NP2018_complete.loc[h::24,'max_norm'].values
    for_d_1 = df_NP2018_complete.loc[h::24,'for_d_1_norm'].values
    h_errors = []
    # Initialize an empty list to store the forecasts
    forecasts = []
    for day in range(T,len(price)//24):
            cal_data = price[(day-T):day].values # calibration data 
            Y = cal_data[7:T]
            X1 = cal_data[6:T-1] 
            X2 = cal_data[5:T-2] 
            X3 = cal_data[0:T-7]
            X4 = c_d_min[7:T]
            X5 = c_d_max[7:T]  #C_d_h
            X6 = for_d_1[7:T] # c_d-1_24
            X7 = exogenos[7:T]
            X8 = weekday[7:T] 
            X0 = np.ones(np.size(X1)) 
            X = np.stack([X0,X1,X2, X3,X4,X5, X6, X7, X8],axis = 1)
            betas = np.dot(np.linalg.inv(np.dot(X.T,X)), np.dot(X.T,Y))
            X_fut = np.array([1,cal_data[T-1],cal_data[T-2],cal_data[T-7],c_d_min[day], exogenos[day],sunday[day],monday[day],saturday[day]])
            forecast = np.dot(X_fut,betas)
            forecasts.append(forecast)
            real = price[day]
            err = real - forecast
            errors1 = (np.abs(err))
            errors.append(errors1)
print("Normalized Errors for ARX2 (df_NP2018_complete):" , np.mean(errors))

Normalized Errors for ARX2 (df_NP2018_complete): 32715.448723899073


### df_day_ahead_epex

In [294]:
df_day_ahead_epex.head()

Unnamed: 0,Date,hour,Spot DE.AT_price,day,sunday,saturday,monday,Min_Comed_Price,Max_Comed_Price
0,20150101,1,25.02,5,0,0,0,0.04,26.9
1,20150101,2,18.29,5,0,0,0,0.04,26.9
2,20150101,3,16.04,5,0,0,0,0.04,26.9
3,20150101,4,14.6,5,0,0,0,0.04,26.9
4,20150101,5,14.95,5,0,0,0,0.04,26.9


#### Raw Data

#### Normalized