### Libraries, paths, and set-up

In [1]:
import pandas as pd
import numpy as np
import datetime as dt
from math import sqrt
import os
import requests 
import pickle
os.chdir('..')
import warnings
warnings.filterwarnings('ignore')
import plotly.express as px
import plotly.io as pio
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import pmdarima as pm
from statsmodels.tsa.arima.model import ARIMA
from src.models.metrics import *
from src.utils.utils import *

storing_path = 'data/processed/'
if not os.path.exists('models'):
    os.mkdir('models')

fulldata = pd.read_csv(storing_path + 'fulldata.csv')
fulldata = datetimer(fulldata)

### AR(1) - Baseline
An autogressive approach using only the previous observation: $y_{t} = \phi \cdot y_{t-1} + \epsilon_{t}$
- We define these by grouping plants and their timestamp ranges

In [5]:
AR1_df = fulldata.set_index(['plant', 'datetime'])
AR1_df = np.log(AR1_df[['daily_ask']]).diff().dropna()

In [6]:
# Initializing prediction and actual lists
y_pred_1 = []
y_actual_1 = []

# A dictionary to pickle the models
models_dict = {}

# Fitting the AR(1) model and making predictions
for plant, group in AR1_df.groupby('plant'):
    if len(group) > 1:  # Ensuring we are regressing for plants with more than 1 observation in the timeframe
        model = ARIMA(group['daily_ask'], order=(1, 0, 0))
        model_fit = model.fit(low_memory=False)
        
        # Save each model in the dictionary
        models_dict[plant] = model_fit
        
        # Makes predictions and append to list
        pred = model_fit.predict()
        y_pred_1.append(pred)
        
        # Appends actual values to list
        y_actual_1.append(group['daily_ask'])
        
# Converting lists to pandas series
y_pred_1 = pd.concat(y_pred_1)
y_actual_1 = pd.concat(y_actual_1)

# Ensures predictions and actual values have the same index (proper alignment)
y_pred_1.index = y_actual_1.index

# Pickle the AR(1) models dictionary and save it to a file
# with open('/Users/manotas/Desktop/models/ar1_models.pkl', 'wb') as f:
#    pickle.dump(models_dict, f)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._

Using a custom function to return metrics we use across architectures for direct comparison

In [7]:
# Calculate and print performance metrics for training data
print("Naïve model - Test metrics:")
calculate_metrics(y_actual_test, predictions_test)

Naïve model - Test metrics:
RMSE: 0.21751556000396013
MAE: 0.06192927721710418
sMAPE(0-200): 29.274042622206597%
R-squared: 0.9118171819216949


### ARIMA - Improvement over baseline

In [2]:
ARIMA_df = fulldata.set_index(['plant', 'datetime'])
ARIMA_df = np.log(ARIMA_df[['daily_ask']]).diff().dropna()

In [3]:
# Initialize prediction and actual lists
y_pred_arima = []
y_actual_arima = []

# ARIMA model dictionary
arima_models_dict = {}

# Fitting the ARIMA model and making predictions
for plant, group in ARIMA_df.groupby('plant'):
    if len(group) > 1: 
#        Using auto_arima to find the optimal parameters
#        auto = pm.auto_arima(group['daily_ask'], seasonal=False, m=365, start_p=2, max_p=5 
#                            d=1, max_d=1, 
#                             max_q=3,
#                             trace=True,error_action="ignore", suppress_warnings=True, max_iter=20)
#        p, d, q = auto.order

        # Fitting the ARIMA model - autoARIMA optimization as of Hyndman and Kandakar (2008) - Similar to Gao (2017) best model
        model = ARIMA(group['daily_ask'], order=(5, 1, 3))
        model_fit = model.fit(low_memory=False)
        
        # Saving each model in the dictionary
        arima_models_dict[plant] = model_fit
        
        # Making predictions and append to list
        pred = model_fit.predict()
        y_pred_arima.append(pred)
        
        # Append actual values to list
        y_actual_arima.append(group['daily_ask'])

# Convert lists to pandas series
predictions_arima = pd.concat(y_pred_arima)
y_actual_arima = pd.concat(y_actual_arima)

# Ensure predictions and actual values have the same index for proper alignment
predictions_arima.index = y_actual_arima.index

# Pickle the ARIMA models dictionary and save it to a file
with open('/Users/manotas/Desktop/models/arima_models.pkl', 'wb') as f:
    pickle.dump(arima_models_dict, f)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._

In [4]:
# Calculate metrics
print('ARIMA(5,1,3) model - Test metrics:')
calculate_metrics(y_actual_arima, predictions_arima)

ARIMA(5,1,3) model - Test metrics:
RMSE: 0.2064922142403573
MAE: 0.058952391031131826
sMAPE(0-200): 19.65463038611697%
R-squared: 0.9205286342337993
