In [None]:
%%capture
%run 02_FeatEng.ipynb

In [None]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
import pmdarima as pm
from  sklearn.metrics import mean_squared_error  as skl_mse

In [None]:
import mle.tsa as mle_tsa

In [None]:
OUTPATH = 'data/out/'
OUTFILE = 'cons_gna95_sarimax.xlsx'

# Split Data

In [None]:
endog_GNA95_train = (data_train['cons_GNA95']/1e3).copy()
exog_GNA95_train = data_train[['price_GNA95_pct_change1_lag1']].copy()

In [None]:
endog_GNA95_test = (data_test['cons_GNA95']/1e3).copy()
exog_GNA95_test = data_test[['price_GNA95_pct_change1_lag1']].copy()

# Model Building

## Exoeneous Autosarima

In [None]:
# cast into kts
autosarima_gna95 = pm.auto_arima(y=endog_GNA95_train,
                                start_p=0, max_p=3,
                                start_q=0,max_q=3,
                                 trend='n', with_intercept=False,
                                d=1,
                                seasonal=True,
                                m=12, D=1,
                                start_P=0, max_P=3,
                                start_Q=0, max_Q=3,
                                information_criterion ='bic',
                              trace=True,
                             error_action='ignore',  
                             suppress_warnings=True, 
                             stepwise=True)

In [None]:
autosarima_gna95.summary()

## Exogeneous SARIMAX

In [None]:
sarima_gna95_exo = SARIMAX(endog=endog_GNA95_train, exog=exog_GNA95_train,
                           order=(2,1,0), seasonal_order=(0,1,1,12), trend='n')
sarima_gna95_exo = sarima_gna95_exo.fit()
sarima_gna95_exo.summary()

## Selected Model Diagnostics

In [None]:
sarima_gna95_exo.plot_diagnostics(figsize=(16,16))
plt.show()

## Prediction

In [None]:
p_gna95_train = sarima_gna95_exo.predict().to_frame('p_cons_GNA95')[1:]*1e3
p_gna95_train[['lower p_cons_GNA95', 'upper p_cons_GNA95']] =  sarima_gna95_exo.get_prediction().conf_int()[13:]*1e3
p_gna95_train['cons_GNA95'] = endog_GNA95_train*1e3
p_gna95_train['model'] = 'sarimax'
p_gna95_train['split'] = 'train'
p_gna95_train.index = p_gna95_train.index.date

# Forecast

In [None]:
sarima_gna95_exo_upd, p_gna95_test = mle_tsa.exog_forecast(
    train_endog=endog_GNA95_train,
    train_exog=exog_GNA95_train,
    test_endog=endog_GNA95_test,
    test_exog=exog_GNA95_test,
    res=sarima_gna95_exo)

In [None]:
p_gna95_test = p_gna95_test.rename(columns={'lower cons_GNA95': 'lower p_cons_GNA95', 'upper cons_GNA95': 'upper p_cons_GNA95'})
p_gna95_test.index = pd.to_datetime(p_gna95_test.index)
p_gna95_test['cons_GNA95'] = endog_GNA95_test
p_gna95_test= p_gna95_test*1e3

In [None]:
fig, axs = plt.subplots(2,1, sharex=True)

plt.suptitle("Forecasting Consumo GNA95")
ax = axs[0]
p_gna95_test.plot(label='forecast', y = 'p_cons_GNA95', ax=ax)
p_gna95_test.plot(y='cons_GNA95', label='actual', style='o', ax=ax)
ax.fill_between(p_gna95_test.index, p_gna95_test['lower p_cons_GNA95'], p_gna95_test['upper p_cons_GNA95'], color='k', alpha=0.1)
ax.set_ylabel("(t)")

ax = axs[1]
exog_GNA95_test.plot( y = 'price_GNA95_pct_change1_lag1', ax=ax)
ax.set_ylabel("(€/l)")

plt.legend()
plt.show()

# Model Assessment

In [None]:
p_gna95_test['split'] = 'test'
p_gna95_test['model'] = 'sarimax'
p_gna95_test.index = p_gna95_test.index.date

In [None]:
skl_mse(p_gna95_test['p_cons_GNA95']*1e3, data_test['cons_GNA95'])**0.5

In [None]:
mle_tsa.compute_ape(p_gna95_test['p_cons_GNA95'], data_test['cons_GNA95']).mean()

In [None]:
select_cols = ['model', 'split', 'cons_GNA95', 'p_cons_GNA95', 'lower p_cons_GNA95', 'upper p_cons_GNA95']

pd.concat([p_gna95_train, p_gna95_test], axis=0)[select_cols]#.to_excel(OUTPATH + OUTFILE, index_label='Date')