In [16]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import re
import os
import time
import datetime as dt
import packages.tools as tools
from sklearn import metrics

### Estimation/Forecasting Method:

### Note:
* In the paper, a 2-year rolling window was used for training the models, and the rest was out-of-sample data (specifics are a little unclear).
* They had 2 years and 7 months worth of data. On the other hand, our (Irish) dataset ranges only over 1 year and 6 months, so we will have to use a smaller 'training' window.

### Read in (pre-processed) dataset

In [2]:
price_path = "Datasets/DAMPrices.csv"
prices = tools.read_price_data(price_path)

In [3]:
pd.read_csv(price_path).head()

Unnamed: 0,AreaSet,AuctionDateTime,EURGBPRate,DeliveryPeriod,IntervalDuration,EURPrices
0,SEM-DA,2018-11-11 11:00:00,0.873668,2018-11-11 23:00:00,60,55.089
1,SEM-DA,2018-11-11 11:00:00,0.873668,2018-11-12 00:00:00,60,55.12
2,SEM-DA,2018-11-11 11:00:00,0.873668,2018-11-12 01:00:00,60,46.46
3,SEM-DA,2018-11-11 11:00:00,0.873668,2018-11-12 02:00:00,60,45.71
4,SEM-DA,2018-11-11 11:00:00,0.873668,2018-11-12 03:00:00,60,41.38


In [4]:
prices.head()

Unnamed: 0_level_0,EURPrices
DeliveryPeriod,Unnamed: 1_level_1
2018-11-12 00:00:00,55.089
2018-11-12 01:00:00,55.12
2018-11-12 02:00:00,46.46
2018-11-12 03:00:00,45.71
2018-11-12 04:00:00,41.38


In [32]:
"""
Parameters:
    data: pd.DataFrame
    day_lag: int
        - Number of days to go back in order to make forecast, e.g. day_lag=7 indicates a weekly persistent model,
        day_lag=1 indicates a daily persistent model, etc.
"""

def naive(data, day_lag):
    # Make forecasts
    forecast_df = data.loc[data.index.date == data.index.date[-(day_lag*24)]]
        
    # Create and prepare forecasts dataframe
    forecast_df.index = pd.date_range(dt.datetime.combine(data.index.date[-1], dt.datetime.min.time()), periods=24, freq="H")
    forecast_df.index.name = "DeliveryPeriod"
    
    return(forecast_df)

In [6]:
# Split into training and test set
last_day_of_data = dt.datetime.combine(prices.index.date[-1], dt.datetime.min.time())
test_data_index = pd.date_range(start=last_day_of_data, end=last_day_of_data+dt.timedelta(hours=23), freq='h')

# Test data
test_prices = prices.loc[test_data_index]

# Training data + predictors for forecast
train_prices = prices.loc[:last_day_of_data-dt.timedelta(hours=1)]

In [69]:
test_df = naive(data=train_prices, day_lag=1)
print(metrics.mean_squared_error(test_df, test_prices, squared=False),
    metrics.mean_absolute_error(test_df, test_prices))

19.851775949101715 18.24083333333333
