# Forecasting with SARIMA

### Imports

In [35]:
import numpy as np
import pandas as pd

import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import dates
matplotlib.use('TkAgg')

import joblib
import statsmodels.api as sm
import pmdarima as pmd
import plotly.graph_objects as go

from tkinter import *
import dtale as dt

from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

from sklearn.preprocessing import PowerTransformer
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.stattools import adfuller

pd.set_option('display.float_format', '{:,.6}'.format)
# pd.set_option('max_columns', 100)


### 1.] Preparing the Data

In [2]:

df_enr = joblib.load("./data/GTMA Trades_df_enr")
df_sys = joblib.load("./data/GTMA Trades_df_sys")

# df_sys.head(4)

In [14]:
dt.show(df_sysM, ignore_index=True, open_browser=True)



In [3]:

#* System Trades -- Reseampling to Monthly 
df_sys_M1 = df_sys.set_index(['End Time_D&T'])[['Volume', 'Cost']].resample('M').sum()
df_sys_M2 = df_sys.set_index(['End Time_D&T'])[['Price', 'Trade Hours Duration']].resample('M').mean()
df_sys_M = df_sys_M1.join(df_sys_M2,how='left')

# df_sys_D["Cost"].head(6)

In [4]:

#* Energy Trades -- Reseampling to Monthly 
df_enr_M1 = df_enr.set_index(['End Time_D&T'])[['Volume', 'Cost']].resample('M').sum()
df_enr_M2 = df_enr.set_index(['End Time_D&T'])[['Price', 'Trade Hours Duration']].resample('M').mean()
df_enr_M = df_enr_M1.join(df_enr_M2,how='left')

df_enr_M.tail(6)

Unnamed: 0_level_0,Volume,Cost,Price,Trade Hours Duration
End Time_D&T,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-01-31,13450.0,1388880.0,109.423,1.0
2021-02-28,-883.0,1787770.0,41.0662,1.00992
2021-03-31,-19128.0,379342.0,27.3795,1.0
2021-04-30,-9765.0,1810570.0,55.6336,1.01045
2021-05-31,-58230.0,1460120.0,-18.7634,1.01138
2021-06-30,-1200.0,-59438.5,49.743,1.0


In [5]:
df_sys_M.drop(labels=df_sys_M.index[-1], inplace=True)
df_enr_M.drop(labels=df_enr_M.index[-1], inplace=True)

In [6]:
joblib.dump(df_sys_M,"./data/GTMA Trades_df_sysM")
joblib.dump(df_enr_M,"./data/GTMA Trades_df_enrM")

['./data/GTMA Trades_df_enrM']

In [44]:
df_sysM = joblib.load("./data/GTMA Trades_df_sysM")
df_enrM = joblib.load("./data/GTMA Trades_df_enrM")

> Power Transform

In [45]:
transformer_pwr = joblib.load('./transformer_pwr')

df_sysM["Cost_pt"] = transformer_pwr.fit_transform(df_sysM[["Cost"]]).reshape(-1)
df_enrM["Cost_pt"] = transformer_pwr.fit_transform(df_enrM[["Cost"]]).reshape(-1)

### Auxiliary Functions

In [50]:

#? Ho: It is non stationary
#? H1: It is stationary

def adfuller_test(series):
    result=adfuller(series)
    labels = ['ADF Test Statistic','p-value','#Lags Used','Number of Observations Used']
    for value,label in zip(result,labels):
        print(label+' : '+str(value) )
    if result[1] <= 0.05:
        print("Strong evidence against Null hypothesis(Ho), Reject the null hypothesis. Data has NO unit root and IS Stationary")
    else:
        print("Weak evidence against Null hypothesis(Ho), Time series has a Unit root, indicating it IS Non-stationary")
    

In [46]:
def create_corr_plot(series, plot_pacf=False):
        corr_array = pacf(series.dropna(), nlags=(len(series)//2 -1), alpha=0.05) if plot_pacf else acf(series.dropna(),nlags=len(series), alpha=0.05)
        lower_y = corr_array[1][:,0] - corr_array[0]
        upper_y = corr_array[1][:,1] - corr_array[0]

        fig = go.Figure()
        [fig.add_scatter(x=(x,x), y=(0,corr_array[0][x]), mode='lines',line_color='#3f3f3f') for x in range(len(corr_array[0]))]
        fig.add_scatter(x=np.arange(len(corr_array[0])), y=corr_array[0], mode='markers', marker_color='#1f77b4', marker_size=12)
        fig.add_scatter(x=np.arange(len(corr_array[0])), y=upper_y, mode='lines', line_color='rgba(255,255,255,0)')
        fig.add_scatter(x=np.arange(len(corr_array[0])), y=lower_y, mode='lines',fillcolor='rgba(32, 146, 230,0.3)',
                        fill='tonexty', line_color='rgba(255,255,255,0)')
        fig.update_traces(showlegend=False)
        fig.update_xaxes(range=[-1,42])
        fig.update_yaxes(zerolinecolor='#000000')
        
        title='Partial Autocorrelation (PACF)' if plot_pacf else 'Autocorrelation (ACF)'
        fig.update_layout(title=title)
        fig.show()

In [47]:

dict_train = {
        "Y2017_S" : (1*12),
        "Y2017_M" : (1*12) + 5,   #? Midyear of 2017
        "Y2018_S" : (2*12),       #? Start of 2018
        "Y2018_M" : (2*12) + 5,        
        "Y2019_S" : (3*12),       #? Start of 2019
        "Y2019_M" : (3*12) + 5,
        "Y2020_S" : (4*12),          
        "Y2020_M" : (4*12) + 5,   #? 2020 midyear, Start of July
        }

#? xaxis = go.layout.XAxis(tickangle=-45) | Optional for: xaxis=xaxis
layout = go.Layout(
        margin=go.layout.Margin(
        l=20, #left margin
        r=15, #right margin
        b=15, #bottom margin
        t=35 ),#top margin
        xaxis_title="Date", yaxis_title="Cost_pt")

In [48]:
def go_plotter(df_train, df_test, df_forecast, df_confint, FC, LABEL, CAT):
        fig = go.Figure(layout=layout)
        fig.update_layout(title= str(CAT) + " Trades " + str(FC) + "-Month Forecast" + f"<br><sup>Train -- {LABEL}</sup>")
        fig.update_xaxes(type="date")
        # fig.update_xaxes(dtick="M3") 
        
        #? Note: Index of incoming dataframes are Datetime
        for i, t in zip([df_train, df_test, df_forecast],["Train","Test","Forecast"]):
                mode_var = 'lines+markers' if (t =="Forecast") else 'lines'
                line_dash = 'dot' if (t =="Train") else 'solid'
                fig.add_trace(go.Scatter(x=i.index, y=i["Cost_pt"].values, name=t, mode=mode_var,
                line_dash=line_dash, hovertemplate="Date = %{x}<br>Cost = %{y}"))


        #? Plotting Confidence interval
        fig.add_trace(go.Scatter(x=df_confint.index, y=df_confint[0], mode='lines',opacity=0.10,
                line_dash='dot', name='Conf_lower'))
        fig.add_trace(go.Scatter(x=df_confint.index, y=df_confint[1], mode='lines',opacity=0.10,
                line_dash='dot',fill='tonexty', name='Conf_higher'))
        fig.show()

In [49]:
EPSILON = 1e-10
def maape(actual, predicted):
    #? Mean Arctangent Absolute Percentage Error
    #? Note: result is NOT multiplied by 100
    return np.mean(np.arctan(np.abs((actual - predicted) / (actual + EPSILON))))

### 2.] System Trades

In [9]:
adfuller_test(df_sysM["Cost"])

ADF Test Statistic : -3.867829107134802
p-value : 0.0022847863235395506
#Lags Used : 0
Number of Observations Used : 64
Strong evidence against Null hypothesis(Ho), Reject the null hypothesis. Data has NO unit root and IS Stationary


#### ACF & PACF

Change the LABEL's value according to the dictionary. Best results are obtained with Training data that **starts** at the begginning of the year; '_S'. <br>"Indexing" notes are provided for specifying the last entries of the train & test data.

In [51]:
LABEL = 'Y2018_S'

#? Train Data Indexing: 60 as end of 2020. 48 end of 2019
df_train = df_sysM[['Cost_pt']][dict_train.get(LABEL):48]

#? Test Data Indexing
# df_sysM.index[-1] + relativedelta(months=-5) == Sets 2020 as the Forecast period.
# df_train.index[-1] + relativedelta(months=+5) == Sets the five months of 2021 as the Forecast period.
#? ==================
df_test = df_sysM[["Cost_pt"]][df_train.index[-1] + relativedelta(months=+1) : df_sysM.index[-1] + relativedelta(months=-5)]


FC = len(df_test.index) 

In [162]:
df_test.head()

Unnamed: 0_level_0,Cost_pt
End Time_D&T,Unnamed: 1_level_1
2020-01-31,-0.528704
2020-02-29,-0.605104
2020-03-31,-0.689709
2020-04-30,0.10242
2020-05-31,0.388106


In [52]:
create_corr_plot(df_train['Cost_pt'])
create_corr_plot(df_train['Cost_pt'], plot_pacf=True)


#### ARIMA 

In [39]:
model_arima_sysM = pmd.auto_arima(df_train['Cost_pt'], start_p=0, max_p=11,
                    d=0, max_d=1, start_q=0, max_q=1,
                    seasonal=False, error_action='ignore',
                    suppress_warnings=True, trace=True, information_criterion="aicc") 
#? Optional: stepwise=False, test='adf', maxiter=75

print("\n\n\n")
model_arima_sysM.summary()

Performing stepwise search to minimize aicc
 ARIMA(0,0,0)(0,0,0)[0]             : AICC=85.208, Time=0.01 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AICC=71.532, Time=0.01 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AICC=75.741, Time=0.02 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AICC=73.911, Time=0.02 sec
 ARIMA(1,0,1)(0,0,0)[0]             : AICC=73.913, Time=0.02 sec
 ARIMA(2,0,1)(0,0,0)[0]             : AICC=76.457, Time=0.06 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AICC=73.918, Time=0.03 sec

Best model:  ARIMA(1,0,0)(0,0,0)[0]          
Total fit time: 0.185 seconds






0,1,2,3
Dep. Variable:,y,No. Observations:,36.0
Model:,"SARIMAX(1, 0, 0)",Log Likelihood,-33.584
Date:,"Tue, 26 Jul 2022",AIC,71.168
Time:,14:19:53,BIC,74.335
Sample:,0,HQIC,72.273
,- 36,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ar.L1,0.6224,0.117,5.338,0.000,0.394,0.851
sigma2,0.3732,0.076,4.916,0.000,0.224,0.522

0,1,2,3
Ljung-Box (L1) (Q):,0.12,Jarque-Bera (JB):,1.54
Prob(Q):,0.73,Prob(JB):,0.46
Heteroskedasticity (H):,0.79,Skew:,0.27
Prob(H) (two-sided):,0.69,Kurtosis:,3.86


In [20]:

#? conf_int == Confidence Interval
prediction, conf_int = arima_model_sys.predict(n_periods=FC, return_conf_int=True)
df_cf= pd.DataFrame(conf_int, index=df_test.index)

df_forecast = pd.DataFrame(prediction, 
                        index=df_test.index, columns=['Cost_pt'])

#? FC, LABEL, CAT
go_plotter(df_train, df_test, df_forecast, df_cf, FC, LABEL, "System")
maape(df_test.values, df_forecast.values)

0.7785387105800013

In [40]:
arima_model_sys.get_params()

{'maxiter': 50,
 'method': 'lbfgs',
 'order': (1, 0, 0),
 'out_of_sample_size': 0,
 'scoring': 'mse',
 'scoring_args': {},
 'seasonal_order': (0, 0, 0, 0),
 'start_params': None,
 'trend': None,
 'with_intercept': False}

In [None]:
joblib.dump(model_arima_sysM,"./model/model_arima_sysM_Y2018S_100")

#### Grid-search SARIMA

In [25]:
model_sarima_sysM = pmd.auto_arima(df_train["Cost_pt"], test='adf', start_p=1, max_p=11,
                            d=0, max_d=1, start_q=0, max_q=1, 
                            seasonal=True, error_action='ignore', start_P=0, max_P=2,
                            D=0, max_D=1, start_Q=0, max_Q=2, m=12,
                            max_order=None, suppress_warnings=True, trace=True, information_criterion="aicc", maxiter=75
                            )
#? Optional: stepwise=False

print("\n\n\n")
model_sarima_sysM.summary()

Performing stepwise search to minimize aicc
 ARIMA(1,0,0)(0,0,0)[12] intercept   : AICC=61.536, Time=0.02 sec
 ARIMA(0,0,0)(0,0,0)[12] intercept   : AICC=66.972, Time=0.02 sec
 ARIMA(1,0,0)(1,0,0)[12] intercept   : AICC=61.223, Time=0.19 sec
 ARIMA(0,0,1)(0,0,1)[12] intercept   : AICC=inf, Time=0.47 sec
 ARIMA(0,0,0)(0,0,0)[12]             : AICC=64.629, Time=0.01 sec
 ARIMA(1,0,0)(2,0,0)[12] intercept   : AICC=inf, Time=1.10 sec
 ARIMA(1,0,0)(1,0,1)[12] intercept   : AICC=inf, Time=0.64 sec
 ARIMA(1,0,0)(0,0,1)[12] intercept   : AICC=inf, Time=0.37 sec
 ARIMA(1,0,0)(2,0,1)[12] intercept   : AICC=67.827, Time=0.88 sec
 ARIMA(0,0,0)(1,0,0)[12] intercept   : AICC=64.615, Time=0.06 sec
 ARIMA(2,0,0)(1,0,0)[12] intercept   : AICC=62.915, Time=0.09 sec
 ARIMA(1,0,1)(1,0,0)[12] intercept   : AICC=inf, Time=0.52 sec
 ARIMA(0,0,1)(1,0,0)[12] intercept   : AICC=inf, Time=0.22 sec
 ARIMA(2,0,1)(1,0,0)[12] intercept   : AICC=inf, Time=0.86 sec
 ARIMA(1,0,0)(1,0,0)[12]             : AICC=58.318, T

0,1,2,3
Dep. Variable:,y,No. Observations:,24.0
Model:,"SARIMAX(2, 0, 1)x(1, 0, [], 12)",Log Likelihood,-22.085
Date:,"Tue, 26 Jul 2022",AIC,54.17
Time:,13:45:16,BIC,60.06
Sample:,0,HQIC,55.733
,- 24,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ar.L1,1.3437,0.307,4.380,0.000,0.742,1.945
ar.L2,-0.6516,0.244,-2.672,0.008,-1.130,-0.174
ma.L1,-0.9576,0.434,-2.209,0.027,-1.807,-0.108
ar.S.L12,0.5994,0.265,2.265,0.024,0.081,1.118
sigma2,0.2763,0.113,2.440,0.015,0.054,0.498

0,1,2,3
Ljung-Box (L1) (Q):,0.0,Jarque-Bera (JB):,0.47
Prob(Q):,0.98,Prob(JB):,0.79
Heteroskedasticity (H):,0.8,Skew:,0.23
Prob(H) (two-sided):,0.76,Kurtosis:,2.49


In [26]:
prediction, conf_int = model_sarima_sysM.predict(n_periods=FC, return_conf_int=True)
df_cf= pd.DataFrame(conf_int, index=df_test.index)

df_forecast = pd.DataFrame(prediction, index=df_test.index, columns=['Cost_pt'])

#? FC, LABEL, CAT == "Category"
go_plotter(df_train, df_test, df_forecast, df_cf, FC, LABEL, "System")
maape(df_test.values, df_forecast.values)

0.855243051566765

In [23]:
model_sarima_sysM.get_params()

{'maxiter': 75,
 'method': 'lbfgs',
 'order': (2, 0, 1),
 'out_of_sample_size': 0,
 'scoring': 'mse',
 'scoring_args': {},
 'seasonal_order': (1, 0, 0, 12),
 'start_params': None,
 'trend': None,
 'with_intercept': False}

In [None]:
joblib.dump(model_sarima_sysM,"./model_sarima_sysM_Y2018S_201x100")

### 3.] Energy Trades

In [249]:
adfuller_test(df_enrM[["Cost_pt"]])

ADF Test Statistic : -0.57054397351737
p-value : 0.87749994645861
#Lags Used : 10
Number of Observations Used : 54
Weak evidence against Null hypothesis(Ho), Time series has a Unit root, indicating it IS Non-stationary


#### ACF & PACF 

In [53]:
LABEL = 'Y2017_S'

#? 60 as end of 2020. 48 end of 2019
df_train = df_enrM[['Cost_pt']][dict_train.get(LABEL):48]

#? Test Data Indexing
# df_sysM.index[-1] + relativedelta(months=-5) == Sets 2020 as the Forecast period.
# df_train.index[-1] + relativedelta(months=+5) == Sets the five months of 2021 as the Forecast period.
#? ==================
df_test = df_enrM[["Cost_pt"]][df_train.index[-1] + relativedelta(months=+1) : df_enrM.index[-1] + relativedelta(months=-5) ]

FC = len(df_test.index) 

In [54]:
create_corr_plot(df_train['Cost_pt'])
create_corr_plot(df_train['Cost_pt'], plot_pacf=True)

#### ARIMA

In [41]:
model_arima_enrM = pm.auto_arima(df_train['Cost_pt'], start_p=0, max_p=9,
                    d=1, max_d=1, start_q=0, max_q=1,
                    seasonal=False, error_action='ignore',
                    suppress_warnings=True, trace=True, information_criterion="aicc", stepwise=False)
#* Reliable mode: (0,1,0) at default stepwise=True
#? Optional arguments: stepwise=False, test='adf'maxiter=75

print("\n\n")
model_arima_enrM.summary()

 ARIMA(0,1,0)(0,0,0)[0] intercept   : AICC=75.878, Time=0.01 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AICC=77.495, Time=0.03 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AICC=77.459, Time=0.03 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AICC=79.999, Time=0.05 sec
 ARIMA(2,1,0)(0,0,0)[0] intercept   : AICC=79.999, Time=0.03 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AICC=82.703, Time=0.10 sec
 ARIMA(3,1,0)(0,0,0)[0] intercept   : AICC=82.435, Time=0.05 sec
 ARIMA(3,1,1)(0,0,0)[0] intercept   : AICC=inf, Time=0.27 sec
 ARIMA(4,1,0)(0,0,0)[0] intercept   : AICC=84.753, Time=0.08 sec
 ARIMA(4,1,1)(0,0,0)[0] intercept   : AICC=inf, Time=0.85 sec
 ARIMA(5,1,0)(0,0,0)[0] intercept   : AICC=87.851, Time=0.11 sec

Best model:  ARIMA(0,1,0)(0,0,0)[0] intercept
Total fit time: 1.623 seconds





0,1,2,3
Dep. Variable:,y,No. Observations:,36.0
Model:,"SARIMAX(0, 1, 0)",Log Likelihood,-35.757
Date:,"Tue, 26 Jul 2022",AIC,75.514
Time:,14:21:42,BIC,78.625
Sample:,0,HQIC,76.588
,- 36,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,-0.0232,0.116,-0.199,0.842,-0.251,0.205
sigma2,0.4518,0.095,4.733,0.000,0.265,0.639

0,1,2,3
Ljung-Box (L1) (Q):,0.77,Jarque-Bera (JB):,1.37
Prob(Q):,0.38,Prob(JB):,0.5
Heteroskedasticity (H):,1.06,Skew:,-0.35
Prob(H) (two-sided):,0.92,Kurtosis:,3.68


In [42]:

#? conf_int == Confidence Interval
prediction, conf_int = model_arima_enrM.predict(n_periods=FC, return_conf_int=True)
df_cf= pd.DataFrame(conf_int, index=df_test.index)

df_forecast = pd.DataFrame(prediction, index=df_test.index, columns=['Cost_pt'])

#? FC, LABEL, CAT
go_plotter(df_train, df_test, df_forecast, df_cf, FC, LABEL, "Energy")
maape(df_test.values, df_forecast.values)

0.8915583432285207

In [43]:
model_arima_enrM.get_params()

{'maxiter': 50,
 'method': 'lbfgs',
 'order': (0, 1, 0),
 'out_of_sample_size': 0,
 'scoring': 'mse',
 'scoring_args': {},
 'seasonal_order': (0, 0, 0, 0),
 'start_params': None,
 'trend': None,
 'with_intercept': True}

In [None]:
joblib.dump(model_arima_enrM,"./model/model_arima_enrM_Y2017S_010")

#### Grid-search SARIMA

In [286]:

model_sarima_enrM = pmd.auto_arima(df_train["Cost_pt"], test='adf', start_p=1, max_p=11,
                            d=1, max_d=1, start_q=1, max_q=1, 
                            seasonal=True, error_action='ignore', start_P=0, max_P=1,
                            D=1, max_D=1, start_Q=0, max_Q=1, m=12, max_order=None,
                            suppress_warnings=True, trace=True, information_criterion="aicc",
                            stepwise=False) #max_order=None

print("\n\n\n")
model_sarima_enrM.summary()

 ARIMA(0,1,0)(0,1,0)[12]             : AICC=29.129, Time=0.02 sec
 ARIMA(0,1,0)(0,1,1)[12]             : AICC=31.519, Time=0.06 sec
 ARIMA(0,1,0)(1,1,0)[12]             : AICC=31.519, Time=0.05 sec
 ARIMA(0,1,0)(1,1,1)[12]             : AICC=34.147, Time=0.09 sec
 ARIMA(0,1,1)(0,1,0)[12]             : AICC=29.410, Time=0.04 sec
 ARIMA(0,1,1)(0,1,1)[12]             : AICC=32.038, Time=0.32 sec
 ARIMA(0,1,1)(1,1,0)[12]             : AICC=32.038, Time=0.16 sec
 ARIMA(0,1,1)(1,1,1)[12]             : AICC=34.944, Time=0.16 sec
 ARIMA(1,1,0)(0,1,0)[12]             : AICC=31.383, Time=0.03 sec
 ARIMA(1,1,0)(0,1,1)[12]             : AICC=inf, Time=0.31 sec
 ARIMA(1,1,0)(1,1,0)[12]             : AICC=inf, Time=0.40 sec
 ARIMA(1,1,0)(1,1,1)[12]             : AICC=36.545, Time=0.47 sec
 ARIMA(1,1,1)(0,1,0)[12]             : AICC=32.005, Time=0.11 sec
 ARIMA(1,1,1)(0,1,1)[12]             : AICC=34.890, Time=0.31 sec
 ARIMA(1,1,1)(1,1,0)[12]             : AICC=34.880, Time=0.69 sec
 ARIMA(1,1,1)(1,

0,1,2,3
Dep. Variable:,y,No. Observations:,24.0
Model:,"SARIMAX(0, 1, 0)x(0, 1, 0, 12)",Log Likelihood,-13.474
Date:,"Mon, 25 Jul 2022",AIC,28.947
Time:,21:34:21,BIC,29.345
Sample:,0,HQIC,28.696
,- 24,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
sigma2,0.6783,0.252,2.689,0.007,0.184,1.173

0,1,2,3
Ljung-Box (L1) (Q):,0.12,Jarque-Bera (JB):,1.03
Prob(Q):,0.73,Prob(JB):,0.6
Heteroskedasticity (H):,0.38,Skew:,-0.75
Prob(H) (two-sided):,0.38,Kurtosis:,3.12


In [287]:
prediction, conf_int = model_sarima_enrM.predict(n_periods=FC, return_conf_int=True)
df_cf= pd.DataFrame(conf_int, index=df_test.index)

df_forecast = pd.DataFrame(prediction, index=df_test.index, columns=['Cost_pt'])

#? FC, LABEL, CAT
go_plotter(df_train, df_test, df_forecast, cf, FC, LABEL, "Energy")
maape(df_test.values, df_forecast.values)

0.9415048205138338

In [288]:
model_sarima_enrM.get_params()

{'maxiter': 50,
 'method': 'lbfgs',
 'order': (0, 1, 0),
 'out_of_sample_size': 0,
 'scoring': 'mse',
 'scoring_args': {},
 'seasonal_order': (0, 1, 0, 12),
 'start_params': None,
 'trend': None,
 'with_intercept': False}

In [None]:
joblib.dump(model_sarima_sysM,"./model_sarima_enrM_Y2018S_010x010")

## References

1.) Krish Naik, github -- https://github.com/krishnaik06/ARIMA-And-Seasonal-ARIMA/blob/master/Untitled.ipynb

2.) Auto-ARIMA -- https://www.alldatascience.com/time-series/forecasting-time-series-with-auto-arima/

3.) Seasonal Random Trend, Identifying ARIMA orders -- https://people.duke.edu/~rnau/411seart.htm