In [None]:
# here i aim to select values of p and q that best match the autoregressive (AR) and moving average (MA) orders.

# Partial Autocorrelation Function (PACF): Helps you identify the AR (p) order. 
# peaks outside the  bounds in the PACF plot suggest the lag order for the AR term.

# Autocorrelation Function (ACF):  identify the MA (q) order. 
#  lags in the ACF plot suggest the lag order for the MA term.

In [None]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt
import pmdarima as pm
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [None]:
# load data and convert to time series -- both isem_original and isem_diff_1 meet stationarity assumptions so it is safe to use either.
df = pd.read_csv('data/arima/isem_original.csv')
df['StartDateTime'] = pd.to_datetime(df['StartDateTime'])
df.set_index('StartDateTime', inplace=True)

# test loading using dis 
# print(df.head())


In [None]:
# plot the acf and pacf to visualise possible values for p and q.

# however, we can find these values automatically by testing all possible combinations and 
# selecting the best one. this is what we do later using pmdarima, but this block is still useful as a nice-to-have as it visualisees WHY 
# auto-arima selects ARIMA(2, 0, 1) -- p = 2, d = 0, q = 1 

fig, ax = plt.subplots(1,2, figsize=(15,5))

# ACF plot
plot_acf(df['ISEM DA Price'], ax=ax[0], lags=40)
ax[0].set_title('Autocorrelation Function (ACF)')

# PACF plot
plot_pacf(df['ISEM DA Price'], ax=ax[1], lags=40, method='ywm')
ax[1].set_title('Partial Autocorrelation Function (PACF)')

plt.tight_layout()
plt.show()

In [None]:
# 1. Split data into train and test sets (80-20 split)
# Fit auto_arima model
model = pm.auto_arima(df['ISEM DA Price'],
                     start_p=0, max_p=5,    # maximum p value to try
                     start_q=0, max_q=5,    # maximum q value to try
                     d=0,                   # d=0 since data is already differenced
                     seasonal=False,         # no seasonal component
                     trace=True,            # print model AIC results
                     error_action='ignore',  # ignore convergence errors
                     suppress_warnings=True,
                     stepwise=True)         # use stepwise search

# Print the best model's order
print("\nBest ARIMA order:", model.order)

# Get model summary
print(model.summary())

# Make predictions (same as before)
train_size = int(len(df) * 0.9)
train = df[:train_size]
test = df[train_size:]

predictions = model.predict(n_periods=len(test))

# Plot results
import matplotlib.pyplot as plt
import numpy as np

plt.figure(figsize=(12,6))
plt.plot(train.index, train['ISEM DA Price'], label='Training Data')
plt.plot(test.index, test['ISEM DA Price'], label='Actual Test Data')
plt.plot(test.index, predictions, label='Predictions', color='red')
plt.title('Auto ARIMA Model: Actual vs Predicted')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()

# Calculate error metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error

rmse = np.sqrt(mean_squared_error(test['ISEM DA Price'], predictions))
mae = mean_absolute_error(test['ISEM DA Price'], predictions)
print(f'Root Mean Squared Error (RMSE): {rmse}')
print(f'Mean Absolute Error (MAE): {mae}')
