# SARIMA forecast models

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.arima.model import ARIMA
import os

# Pull data and split into training an test data

In [None]:
path = os.getcwd()
path = os.path.join(path, "priceData2019To2024.csv")
price_df = pd.read_csv(path, sep=";")
price_df.columns = price_df.columns.str.strip()
price_df["time"] = pd.to_datetime(price_df["Datum"] + " " + price_df["von"], format="%d.%m.%Y %H:%M")
start_time = price_df["time"].min()
price_df["hours"] = (price_df["time"] - start_time).dt.total_seconds() / 60 / 60
price_df["price"] = price_df["Spotmarktpreis in ct/kWh"]

valid_rows = price_df[np.isfinite(price_df["price"])]
time = np.array(valid_rows["hours"].tolist())
price = np.array(valid_rows["price"].tolist())

In [None]:
SAMPLES_PER_DAY = 24
PREDICTION_HORIZON = 24

# For calculation of PRMSE
nTestSamples = int(366 * SAMPLES_PER_DAY)
testData = price[-nTestSamples:]
testTime = time[-nTestSamples:]

# Training Data
nTrainingSamples = price.size - nTestSamples
trainingData = price[:nTrainingSamples]
trainingTime = time[:nTrainingSamples]  # hour sampling time

In [None]:
figure = plt.figure()
plt.plot(trainingTime, trainingData, label="Training data")
plt.plot(testTime, testData, label="Test data")
plt.xlabel("Time [quarter hours]")
plt.ylabel("Price [€/MWh]")
plt.legend()

In [None]:
acf_size = SAMPLES_PER_DAY * 10
plot_acf(trainingData, lags=np.arange(acf_size))

In [None]:
def fitAndPlotForecast(
    trainingData,
    trainingTime,
    testData,
    testTime,
    arimaModel,
    title="Forecast",
    ylabel="Price [cts/MWh]",
):
    n_hist = SAMPLES_PER_DAY * 10
    n_pred = PREDICTION_HORIZON * 2

    trainedArimaModel = arimaModel.fit(method="innovations_mle")
    forecast = trainedArimaModel.get_forecast(n_pred)

    pastTime = trainingTime[-n_hist:]
    pastData = trainingData[-n_hist:]
    futureTime = testTime[0:n_pred]
    futureData = testData[0:n_pred]

    figure = plt.figure()
    plt.title(title)
    plt.plot(pastTime, pastData, label="Known past")
    plt.plot(futureTime, forecast.predicted_mean, label="Nominal forecast")
    plt.plot(futureTime, futureData, label="Unknown future")
    plt.legend()
    plt.xlabel("Time [hours]")
    plt.ylabel(ylabel)

    print(trainedArimaModel.summary())

    return trainedArimaModel

# Forecasting

## Persistence forecast

In [None]:
arima010 = ARIMA(endog=trainingData, order=(0, 1, 0), trend="n")
trainedArima010 = fitAndPlotForecast(
    trainingData,
    trainingTime,
    testData,
    testTime,
    arima010,
    title="Persistence forecast",
)

In [None]:
plot_acf(trainedArima010.resid, lags=np.arange(acf_size), title="Persistence model")

## ARIMA(0, 0, 0)(0, 1, 0)<sub>48</sub> without trend (seasonal persistence forecast)

In [None]:
season_periods = 24
arima000010s48 = ARIMA(
    endog=trainingData, order=(0, 0, 0), seasonal_order=(0, 1, 0, season_periods), trend="n"
)
trainedArima000010s48 = fitAndPlotForecast(
    trainingData,
    trainingTime,
    testData,
    testTime,
    arima000010s48,
    title="ARIMA$(0, 0, 0)(0, 1, 0)_{s}$",
)

In [None]:
plot_acf(trainedArima000010s48.resid, lags=np.arange(acf_size))

## ARIMA(0, 0, 0)(2, 0, 0)<sub>48</sub>

In [None]:
# arima000200s48 = ARIMA(
#     endog=trainingData, order=(0, 0, 0), seasonal_order=(7, 0, 0, season_periods), trend="t"
# )

arima000200s48 = ARIMA(endog=trainingData, order=(2,1,2), seasonal_order=(0,0,0,0), exog=trainingData.shift([24, 168]))
result = arima000200s48.fit()
# season ([24 and 168])
# order ([0, 5], [0, 1], [0, 5])
trainedArima000200s48 = fitAndPlotForecast(
    trainingData,
    trainingTime,
    testData,
    testTime,
    arima000200s48,
    title="ARIMA$(0, 0, 0)(2, 0, 0)_{s}$",
)

In [None]:
plot_acf(trainedArima000200s48.resid, lags=np.arange(acf_size))

## ARIMA(1, 0, 0)(2, 0, 0)<sub>48</sub>

In [None]:
arima100200s48 = ARIMA(
    endog=trainingData, order=(1, 0, 0), seasonal_order=(2, 0, 0, season_periods), trend="t"
)
trainedArima100200s48 = fitAndPlotForecast(
    trainingData,
    trainingTime,
    testData,
    testTime,
    arima100200s48,
    title="ARIMA$(1, 0, 0)(2, 0, 0)_{s}$",
)

In [None]:
plot_acf(trainedArima100200s48.resid, lags=np.arange(acf_size))

## ARIMA(0, 0, 0)(0, 0, 1)<sub>48</sub>

In [None]:
arima000001s48 = ARIMA(
    endog=trainingData, order=(0, 0, 0), seasonal_order=(0, 0, 1, season_periods), trend="t"
)
trainedArima000001s48 = fitAndPlotForecast(
    trainingData,
    trainingTime,
    testData,
    testTime,
    arima000001s48,
    title="ARIMA$(0, 0, 0)(0, 0, 1)_{s}$",
)

In [None]:
plot_acf(trainedArima000001s48.resid, lags=np.arange(acf_size))

# Combination into ARIMA model

In [None]:
arima100111s48 = ARIMA(
    endog=trainingData, order=(1, 0, 0), seasonal_order=(1, 1, 1, season_periods), trend="t"
)
trainedArima100111s48 = fitAndPlotForecast(
    trainingData,
    trainingTime,
    testData,
    testTime,
    arima100111s48,
    title="ARIMA$(1, 0, 0)(1, 1, 1)_{s}$",
)

In [None]:
plot_acf(trainedArima100111s48.resid, lags=np.arange(acf_size))

# Out-of-sample analysis

## Prediction-Root-Mean-Square-Error (PRMSE)

In [None]:
def calculatePrmse(
    trainedModel, testData, testTime, nTestSamples, forecastHorizon
):
    prmse = np.zeros(nTestSamples)
    for k in range(nTestSamples):
        forecast = trainedModel.forecast(forecastHorizon)
        actual = testData[k : k + forecastHorizon]
        error = actual - forecast
        prmse[k] = np.sqrt(1 / forecastHorizon * sum(error**2))
        trainedModel = trainedModel.extend(testData[k][None])
        currentTime = testTime[k : k + forecastHorizon]
    return prmse

In [None]:
nTest = nTestSamples - PREDICTION_HORIZON
prmseArima010 = calculatePrmse(
    trainedArima010, testData, testTime, nTest, PREDICTION_HORIZON
)
prmseArima000010s48 = calculatePrmse(
    trainedArima000010s48, testData, testTime, nTest, PREDICTION_HORIZON
)
prmseArima000200s48 = calculatePrmse(
    trainedArima000200s48, testData, testTime, nTest, PREDICTION_HORIZON
)
prmseArima100200s48 = calculatePrmse(
    trainedArima100200s48, testData, testTime, nTest, PREDICTION_HORIZON
)
prmseArima000001s48 = calculatePrmse(
    trainedArima000001s48, testData, testTime, nTest, PREDICTION_HORIZON
)
prmseArima100111s48 = calculatePrmse(
    trainedArima100111s48, testData, testTime, nTest, PREDICTION_HORIZON
)

In [None]:
prmse = [
    prmseArima010,
    prmseArima000010s48,
    prmseArima000200s48,
    prmseArima100200s48,
    prmseArima000001s48,
    prmseArima100111s48,
]
fig, ax = plt.subplots()
bp = ax.boxplot(prmse, vert=False, medianprops=dict(color="firebrick"))
plt.yticks(
    [1, 2, 3, 4, 5, 6],
    [
        "$(010)$",
        "$(000)(010)_{s}$",
        "$(000)(200)_{s}$",
        "$(100)(200)_{s}$",
        "$(000)(001)_{s}$",
        "$(100)(111)_{s}$",
    ],
)
plt.yticks(rotation=60)
plt.xlabel("PRMSE [pu]")
mean = np.mean(prmse, axis=1)
for i, line in enumerate(bp["medians"]):
    x, y = line.get_xydata()[1]
    text = "μ={:.4f}".format(mean[i])
    ax.annotate(text, xy=(x - 0.02, y + 0.07), color="firebrick")