In [17]:
import sys
sys.path.append("../../")
from Hack import load
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from pathlib import Path
from statsmodels.tsa.ar_model import AutoReg
import numpy as np
import datetime as datetime
from sklearn.metrics import r2_score

In [18]:
price = load.systemprice().load()
print(price.keys())

# resample to required resolution (full res is probably too noisy)
price = price.resample('300T').mean()
print(price)

Index(['Settlement Date', 'Settlement Period', 'System Sell Price(£/MWh)',
       'System Buy Price(£/MWh)', 'Net Imbalance Volume(MWh)',
       'local_datetime', 'local_time'],
      dtype='object')
                     Settlement Period  System Sell Price(£/MWh)  \
Date                                                               
2019-04-08 00:00:00                5.5                 49.416000   
2019-04-08 05:00:00               15.5                 60.817000   
2019-04-08 10:00:00               25.5                 44.385000   
2019-04-08 15:00:00               35.5                 37.742812   
2019-04-08 20:00:00               35.9                 51.174000   
...                                ...                       ...   
2021-12-15 02:00:00                7.5                 73.325000   
2021-12-15 07:00:00               17.5                302.500000   
2021-12-15 12:00:00               27.5                287.850000   
2021-12-15 17:00:00               37.5              

In [19]:
# plot data
fig, axs = plt.subplots(1,1)
axs.plot(price.index, price['System Buy Price(£/MWh)'].values)

[<matplotlib.lines.Line2D at 0x146500a30>]

# Model

In [20]:
# Check what the autocorrelation function looks like
plt.figure()
pd.plotting.autocorrelation_plot(price['System Buy Price(£/MWh)'])

<AxesSubplot:xlabel='Lag', ylabel='Autocorrelation'>

In [21]:
# Remove seasonal trends (this might be improvable)
price['stationary']=price['System Buy Price(£/MWh)'].diff()

In [22]:
# Split into train and test data
t1 = datetime.datetime(2021, 1, 1, 0, 0, 0)   # train up to this date
t2 = datetime.datetime(2021, 1, 6, 0, 0, 0)   # predict up to this date

train_data = price['stationary'].loc[price.index<t1]
test_data = price['stationary'].loc[(price.index>=t1)&(price.index<t2)]
train_data, test_data = train_data.dropna(), test_data.dropna()

In [24]:
# Fit model on training data 
# Try many different lags
fig, axs = plt.subplots(1,1)
axs.plot(test_data.index, test_data.values, c='black')
lags = np.arange(10, 1000, 200)
error = []
for i in lags:
    model = AutoReg(train_data, lags=i)
    model_fitted = model.fit()

    # make predictions 
    predictions = model_fitted.predict(start=len(train_data), end=len(train_data) + len(test_data)-1, dynamic=False)
    axs.plot(test_data.index, predictions, label=str(i))
    error.append(np.mean(abs((predictions - test_data.values))))
axs.legend()

fig, axs = plt.subplots(1,1)
axs.scatter(lags, np.asarray(error))
axs.set_ylabel("Error")
axs.set_xlabel("Lag")

Text(0.5, 0, 'Lag')