In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.tsa.stattools import adfuller
from pmdarima import auto_arima
from sklearn.model_selection import KFold

df = pd.read_excel('../City-Specific Data/Chicago.xlsx')
df = df.dropna()

energy_demand = df['EnergyDemand']

def test_stationarity(timeseries):
    result = adfuller(timeseries, autolag='AIC')
    print('ADF Statistic: %f' % result[0])
    print('p-value: %f' % result[1])
    print('Critical Values:')
    for key, value in result[4].items():
        print('\t%s: %.3f' % (key, value))

test_stationarity(energy_demand)

energy_demand_diff = energy_demand.diff().dropna()
test_stationarity(energy_demand_diff)

kf = KFold(n_splits=5, shuffle=True, random_state=42)

mse_scores = []
mae_scores = []
mape_scores = []

def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    non_zero_indices = y_true != 0
    return np.mean(np.abs((y_true[non_zero_indices] - y_pred[non_zero_indices]) / y_true[non_zero_indices])) * 100

for train_index, test_index in kf.split(energy_demand_diff):
    train, test = energy_demand_diff.iloc[train_index], energy_demand_diff.iloc[test_index]

    model = auto_arima(train, start_p=0, start_q=0, max_p=5, max_q=5, m=12,
                       start_P=0, seasonal=True, d=1, D=1, trace=True,
                       error_action='ignore', suppress_warnings=True, stepwise=True)

    forecast = model.predict(n_periods=len(test))
    forecast = pd.Series(forecast, index=test.index)

    if forecast.isnull().any() or test.isnull().any():
        print("NaN values found in forecast or test data.")
        continue

    mse = mean_squared_error(test, forecast)
    mae = mean_absolute_error(test, forecast)
    mape = mean_absolute_percentage_error(test, forecast)
    mse_scores.append(mse)
    mae_scores.append(mae)
    mape_scores.append(mape)

average_mse = np.mean(mse_scores)
average_mae = np.mean(mae_scores)
average_mape = np.mean(mape_scores)

print(f"Average Mean Squared Error: {average_mse}")
print(f"Average Mean Absolute Error: {average_mae}")
print(f"Average Mean Absolute Percentage Error: {average_mape}")

plt.figure(figsize=(12, 6))
plt.plot(test, label='Actual')
plt.plot(forecast, label='Forecast')
plt.title('Forecast vs Actual Energy Demand')
plt.xlabel('Time')
plt.ylabel('Energy Demand')
plt.legend()
plt.show()

ADF Statistic: -13.876101
p-value: 0.000000
Critical Values:
	1%: -3.430
	5%: -2.862
	10%: -2.567
ADF Statistic: -35.930386
p-value: 0.000000
Critical Values:
	1%: -3.430
	5%: -2.862
	10%: -2.567
Performing stepwise search to minimize aic
 ARIMA(0,1,0)(0,1,1)[12]             : AIC=inf, Time=9.60 sec
 ARIMA(0,1,0)(0,1,0)[12]             : AIC=122644.753, Time=0.29 sec
 ARIMA(1,1,0)(1,1,0)[12]             : AIC=117680.151, Time=6.01 sec
 ARIMA(0,1,1)(0,1,1)[12]             : AIC=inf, Time=10.12 sec
 ARIMA(1,1,0)(0,1,0)[12]             : AIC=122634.585, Time=0.29 sec
 ARIMA(1,1,0)(2,1,0)[12]             : AIC=117473.955, Time=21.91 sec
 ARIMA(1,1,0)(2,1,1)[12]             : AIC=inf, Time=64.58 sec
 ARIMA(1,1,0)(1,1,1)[12]             : AIC=inf, Time=20.45 sec
 ARIMA(0,1,0)(2,1,0)[12]             : AIC=117544.169, Time=4.58 sec
 ARIMA(2,1,0)(2,1,0)[12]             : AIC=117394.604, Time=15.71 sec
 ARIMA(2,1,0)(1,1,0)[12]             : AIC=117567.586, Time=8.71 sec
 ARIMA(2,1,0)(2,1,1)[12] 

  return get_prediction_index(
  return get_prediction_index(


NaN values found in forecast or test data.
Performing stepwise search to minimize aic
 ARIMA(0,1,0)(0,1,1)[12]             : AIC=inf, Time=16.19 sec
 ARIMA(0,1,0)(0,1,0)[12]             : AIC=316711.403, Time=0.45 sec
 ARIMA(1,1,0)(1,1,0)[12]             : AIC=302520.861, Time=29.62 sec
 ARIMA(0,1,1)(0,1,1)[12]             : AIC=inf, Time=49.05 sec
 ARIMA(1,1,0)(0,1,0)[12]             : AIC=307392.981, Time=0.94 sec
 ARIMA(1,1,0)(2,1,0)[12]             : AIC=300663.717, Time=105.32 sec
 ARIMA(1,1,0)(2,1,1)[12]             : AIC=inf, Time=164.48 sec
 ARIMA(1,1,0)(1,1,1)[12]             : AIC=inf, Time=45.47 sec
 ARIMA(0,1,0)(2,1,0)[12]             : AIC=310040.475, Time=19.76 sec
 ARIMA(2,1,0)(2,1,0)[12]             : AIC=296416.275, Time=111.82 sec
 ARIMA(2,1,0)(1,1,0)[12]             : AIC=298153.828, Time=39.00 sec
 ARIMA(2,1,0)(2,1,1)[12]             : AIC=inf, Time=172.95 sec
 ARIMA(2,1,0)(1,1,1)[12]             : AIC=inf, Time=61.47 sec
 ARIMA(3,1,0)(2,1,0)[12]             : AIC=2

: 