In [None]:
# To create an ARIMA model, the data must be stationary. 
# here i use differencing to remove trends and ensure we meet this staiontariy assumption.
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller


# Load data
df = pd.read_csv('data/data_raw.csv', parse_dates=['StartDateTime'], index_col='StartDateTime')
df = df.asfreq('D')  # Ensure the data has a daily frequency if applicable

# Extract the relevant series
time_series = df['ISEM DA Price']

In [None]:
# Visual inspection of the original time series
plt.figure(figsize=(12,6))
plt.plot(time_series)
plt.title('Original ISEM DA Price Time Series')
plt.xlabel('Date')
plt.ylabel('Price')
plt.show()

# ADF test on the original series
result = adfuller(time_series.dropna())
print('ADF Statistic: {:.6f}'.format(result[0]))
print('p-value: {:.6f}'.format(result[1]))
print('Critical Values:')
for key, value in result[4].items():
    print('   {}: {:.3f}'.format(key, value))

In [None]:
# differencing iterations

max_diff_order = 2  # Set the maximum differencing order you want to test

for d in range(1, max_diff_order + 1):
    print(f'\n### Differencing Order: {d} ###')
    
    # Apply differencing
    diff_series = time_series.diff(d)
    
    # Drop NaN values introduced by differencing
    diff_series = diff_series.dropna()
    
    # Plot the differenced series
    plt.figure(figsize=(12,6))
    plt.plot(diff_series)
    plt.title(f'ISEM DA Price Time Series Differenced by Order {d}')
    plt.xlabel('Date')
    plt.ylabel('Differenced Price')
    plt.show()
    
    # Perform the ADF test on the differenced series
    result_diff = adfuller(diff_series)
    print('ADF Statistic: {:.6f}'.format(result_diff[0]))
    print('p-value: {:.6f}'.format(result_diff[1]))
    print('Critical Values:')
    for key, value in result_diff[4].items():
        print('   {}: {:.3f}'.format(key, value))
