In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import re
import os
import time
import datetime as dt
import packages.tools as tools

### Estimation/Forecasting Method:

### Note:
* In the paper, a 2-year rolling window was used for training the models, and the rest was out-of-sample data (specifics are a little unclear).
* They had 2 years and 7 months worth of data. On the other hand, our (Irish) dataset ranges only over 1 year and 6 months, so we will have to use a smaller 'training' window.

### Read in (pre-processed) dataset

In [2]:
path_dam_prices = "Datasets/DAMPrices.csv"
dam_prices = tools.preprocess_price_data(path_dam_prices)

In [3]:
pd.read_csv(path_dam_prices)

Unnamed: 0,AreaSet,AuctionDateTime,EURGBPRate,DeliveryPeriod,IntervalDuration,EURPrices
0,SEM-DA,2018-11-11 11:00:00,0.873668,2018-11-11 23:00:00,60,55.089
1,SEM-DA,2018-11-11 11:00:00,0.873668,2018-11-12 00:00:00,60,55.120
2,SEM-DA,2018-11-11 11:00:00,0.873668,2018-11-12 01:00:00,60,46.460
3,SEM-DA,2018-11-11 11:00:00,0.873668,2018-11-12 02:00:00,60,45.710
4,SEM-DA,2018-11-11 11:00:00,0.873668,2018-11-12 03:00:00,60,41.380
5,SEM-DA,2018-11-11 11:00:00,0.873668,2018-11-12 04:00:00,60,44.370
6,SEM-DA,2018-11-11 11:00:00,0.873668,2018-11-12 05:00:00,60,47.290
7,SEM-DA,2018-11-11 11:00:00,0.873668,2018-11-12 06:00:00,60,60.440
8,SEM-DA,2018-11-11 11:00:00,0.873668,2018-11-12 07:00:00,60,63.979
9,SEM-DA,2018-11-11 11:00:00,0.873668,2018-11-12 08:00:00,60,67.190


In [4]:
dam_prices.head()

Unnamed: 0_level_0,AuctionDateTime,EURPrices
DeliveryPeriod,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-11-12 00:00:00,2018-11-11 11:00:00,55.089
2018-11-12 01:00:00,2018-11-11 11:00:00,55.12
2018-11-12 02:00:00,2018-11-11 11:00:00,46.46
2018-11-12 03:00:00,2018-11-11 11:00:00,45.71
2018-11-12 04:00:00,2018-11-11 11:00:00,41.38


In [61]:
"""
Parameters:
    data: pd.DataFrame
    target: datetime
    day_lag: int
        - Number of days to go back in order to make forecast, e.g. day_lag=7 indicates a weekly persistent model,
        day_lag=1 indicates a daily persistent model, etc.
"""

def naive(data, target, day_lag):
    if "AuctionDateTime" in data.columns:
        data.drop("AuctionDateTime", axis=1, inplace=True)
    
    # Make forecasts (depending on day_lag)
    delivery_day = target - dt.timedelta(days=day_lag)
    forecast_df = data.loc[data.index.date == delivery_day.date()]
        
    # Create and prepare forecasts dataframe
    forecast_df.index = pd.date_range(target, periods=24, freq="H")
    forecast_df.index.name = "DeliveryPeriod"
    
    return(forecast_df)

In [63]:
test_df = naive(data=dam_prices, target=dt.datetime.strptime("2018-11-13", "%Y-%m-%d"), day_lag=1)
test_df

Unnamed: 0_level_0,EURPrices
DeliveryPeriod,Unnamed: 1_level_1
2018-11-13 00:00:00,55.089
2018-11-13 01:00:00,55.12
2018-11-13 02:00:00,46.46
2018-11-13 03:00:00,45.71
2018-11-13 04:00:00,41.38
2018-11-13 05:00:00,44.37
2018-11-13 06:00:00,47.29
2018-11-13 07:00:00,60.44
2018-11-13 08:00:00,63.979
2018-11-13 09:00:00,67.19


In [8]:
naive_params = dict(day_lag=2)
walk_forward_validation(method_function=naive, parameters=naive_params, data=dam_prices,
                        starting_window_size=2, moving_window=True, start=None, end=None)

Unnamed: 0_level_0,Unnamed: 1_level_0,EURPrices_forecast,EURPrices_data,residual,absolute_error,squared_error
DeliveryDay,TimeStepID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-11-14,0,55.089,48.01,-7.079,7.079,50.1122
2018-11-14,1,55.12,35.0,-20.12,20.12,404.814
2018-11-14,2,46.46,6.0,-40.46,40.46,1637.01
2018-11-14,3,45.71,1.03,-44.68,44.68,1996.3
2018-11-14,4,41.38,0.0,-41.38,41.38,1712.3
2018-11-14,5,44.37,0.0,-44.37,44.37,1968.7
2018-11-14,6,47.29,0.0,-47.29,47.29,2236.34
2018-11-14,7,60.44,22.37,-38.07,38.07,1449.32
2018-11-14,8,63.979,47.18,-16.799,16.799,282.206
2018-11-14,9,67.19,61.05,-6.14,6.14,37.6996
