In [1]:
from Hack import load
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from pathlib import Path
from statsmodels.tsa.ar_model import AutoReg
import numpy as np
import datetime as datetime
from sklearn.metrics import r2_score

In [2]:
price = load.systemprice().load()
print(price.keys())

# resample to required resolution (full res is probably too noisy)
price = price.resample('180T').mean()
print(price)

Data/systemprice.csv
Index(['Settlement Period', 'System Sell Price(£/MWh)',
       'System Buy Price(£/MWh)', 'Net Imbalance Volume(MWh)'],
      dtype='object')
                     Settlement Period  System Sell Price(£/MWh)  \
Date                                                               
2019-04-08 00:00:00                3.0                 47.832000   
2019-04-08 03:00:00                8.5                 51.805000   
2019-04-08 06:00:00               14.5                 52.390000   
2019-04-08 09:00:00               20.5                 65.701667   
2019-04-08 12:00:00               26.5                 42.498333   
...                                ...                       ...   
2021-12-15 12:00:00               26.5                320.000000   
2021-12-15 15:00:00               32.5                206.166667   
2021-12-15 18:00:00               38.5                300.191667   
2021-12-15 21:00:00               44.5                242.350000   
2021-12-16 00:00:00  

In [3]:
# plot data
fig, axs = plt.subplots(1,1)
axs.plot(price.index, price['System Buy Price(£/MWh)'].values)

[<matplotlib.lines.Line2D at 0x13d298e50>]

# Model

In [4]:
# Check what the autocorrelation function looks like
plt.figure()
pd.plotting.autocorrelation_plot(price['System Buy Price(£/MWh)'])

<AxesSubplot:xlabel='Lag', ylabel='Autocorrelation'>

In [5]:
# Remove seasonal trends (this might be improvable)
price['stationary']=price['System Buy Price(£/MWh)'].diff()

In [6]:
# Split into train and test data
t1 = datetime.datetime(2021, 1, 1, 0, 0, 0)   # train up to this date
t2 = datetime.datetime(2021, 1, 6, 0, 0, 0)   # predict up to this date

train_data = price['stationary'].loc[price.index<t1]
test_data = price['stationary'].loc[(price.index>=t1)&(price.index<t2)]
train_data, test_data = train_data.dropna(), test_data.dropna()

In [7]:
# Fit model on training data 
# Try many different lags
fig, axs = plt.subplots(1,1)
axs.plot(test_data.index, test_data.values, c='black')
lags = np.arange(10, 2000, 400)
for i in lags:
    model = AutoReg(train_data, lags=i)
    model_fitted = model.fit()

    # make predictions 
    predictions = model_fitted.predict(start=len(train_data), end=len(train_data) + len(test_data)-1, dynamic=False)
    axs.plot(test_data.index, predictions, label=str(i))
axs.legend()


<matplotlib.legend.Legend at 0x14269ef40>