In [None]:
%%capture
%run 02_FeatEng.ipynb

In [None]:
from statsmodels.tsa.statespace.varmax import VARMAX
from statsmodels.tsa.vector_ar.var_model import VAR
from  sklearn.metrics import mean_squared_error  as skl_mse

In [None]:
import mle.tsa as mle_tsa

In [None]:
OUTPATH = 'data/out/'
OUTFILE = 'cons_gna95_var.xlsx'

# Split Data

In [None]:
endog_seas_GNA95_train = data_train[['cons_GNA95_seas_adj_pct_change1', 'price_GNA95_seas_adj_pct_change1']].copy()
endog_GNA95_train = data_train[['cons_GNA95_diff1_sdiff12', 'price_GNA95_diff1_sdiff12']].copy()

endog_GNA95_train['cons_GNA95_diff1_sdiff12'] = endog_GNA95_train['cons_GNA95_diff1_sdiff12']/1e3
exog_GNA95_train = data_train[['spot_GNA_pct_change1']].copy()

In [None]:
endog_seas_GNA95_test = data_test[['cons_GNA95_seas_adj_pct_change1', 'price_GNA95_seas_adj_pct_change1']].copy()
endog_GNA95_test = data_test[['cons_GNA95_diff1_sdiff12', 'price_GNA95_diff1_sdiff12']].copy()

endog_GNA95_test['cons_GNA95_diff1_sdiff12'] = endog_GNA95_test['cons_GNA95_diff1_sdiff12']/1e3
exog_GNA95_test = data_test[['spot_GNA_pct_change1']].copy()

# Model Building

## Seasonal Model + VAR

In [None]:
res = VAR(endog=endog_seas_GNA95_train).select_order()
res.summary()

In [None]:
mod_seas_GNA95 = VARMAX(endog_seas_GNA95_train, trend='n',order=(2, 0))
res_seas_GNA95 = mod_seas_GNA95.fit(maxiter=10000)
res_seas_GNA95.summary()

In [None]:
res_seas_GNA95_upd, p_seas_gna95_test = mle_tsa.endog_forecast(
    train_endog=endog_seas_GNA95_train[['cons_GNA95_seas_adj_pct_change1', 'price_GNA95_seas_adj_pct_change1']],
    test_endog=endog_seas_GNA95_test[['cons_GNA95_seas_adj_pct_change1', 'price_GNA95_seas_adj_pct_change1']],
    res=res_seas_GNA95)

In [None]:
ax = p_seas_gna95_test.plot(label='forecast', y = 'cons_GNA95_seas_adj_pct_change1')
(data_test['cons_GNA95_seas_adj_pct_change1']).plot(label='actual', style='o', ax=ax)
ax.fill_between(p_seas_gna95_test.index, p_seas_gna95_test['lower cons_GNA95_seas_adj_pct_change1'], p_seas_gna95_test['upper cons_GNA95_seas_adj_pct_change1'], color='k', alpha=0.1)
plt.legend()
plt.show()

In [None]:
p1_lst = []
p1_up_ci_lst = []
p1_lo_ci_lst = []
p0 = data_train['cons_GNA95'].iloc[-1]/1e3
p1_last = p0

for idx, row in p_seas_gna95_test.iterrows():
    seas = data_test.loc[idx, 'cons_GNA95_seas_comp']/1e3
    p1 = (row['cons_GNA95_seas_adj_pct_change1'] * p0 + p0) + seas
    p1_up = (row['upper cons_GNA95_seas_adj_pct_change1'] * p1_last + p1_last) + seas
    p1_lo = (row['lower cons_GNA95_seas_adj_pct_change1'] * p1_last + p1_last) + seas
    p0 = data_test.loc[idx, 'cons_GNA95']/1e3
    p1_last = p1
    p1_lst.append(p1)
    p1_up_ci_lst.append(p1_up)
    p1_lo_ci_lst.append(p1_lo)
    

In [None]:
p_seas_gna95_test['p_cons_GNA95'] = np.array(p1_lst)*1e3
p_seas_gna95_test['upper p_cons_GNA95'] = np.array(p1_up_ci_lst)*1e3
p_seas_gna95_test['lower p_cons_GNA95'] = np.array(p1_lo_ci_lst)*1e3

In [None]:
ax = p_seas_gna95_test.plot(y= 'p_cons_GNA95')
data_test.plot(y= 'cons_GNA95', style='o', ax=ax)
plt.show()

## VAR Only

In [None]:
res = VAR(endog=endog_GNA95_train).select_order()
res.summary()

In [None]:
mod_GNA95 = VARMAX(endog_GNA95_train, trend='n', order=(2, 0))
res_GNA95 = mod_GNA95.fit(maxiter=10000)
res_GNA95.summary()

In [None]:
mod_GNA95 = VARMAX(endog_GNA95_train, trend='n', order=(1, 0))
res_GNA95 = mod_GNA95.fit(maxiter=10000)
res_GNA95.summary()

## Model Diagnostics

In [None]:
res_GNA95.plot_diagnostics(figsize=(16,16))
plt.show()

## Prediction

In [None]:
p_gna95_train = res_GNA95.predict()[['cons_GNA95_diff1_sdiff12']]
p_gna95_train[['lower cons_GNA95_diff1_sdiff12', 'upper cons_GNA95_diff1_sdiff12']] = res_GNA95.get_prediction().conf_int()[['lower cons_GNA95_diff1_sdiff12', 'upper cons_GNA95_diff1_sdiff12']]

In [None]:
forecast_s = pd.Series(dtype=np.number)
up_ci_s = pd.Series(dtype=np.number)
lo_ci_s = pd.Series(dtype=np.number)

for idx, row in p_gna95_train[13:].iterrows():
    p1 = data.loc[idx - pd.DateOffset(months=1), 'cons_GNA95']/1e3
    p12 = data.loc[idx - pd.DateOffset(months=12), 'cons_GNA95_diff1']/1e3
    forecast = row['cons_GNA95_diff1_sdiff12'] + p1 + p12
    up = row['upper cons_GNA95_diff1_sdiff12'] + p1 + p12
    lo = row['lower cons_GNA95_diff1_sdiff12'] + p1 + p12
    forecast_s[idx] = forecast
    up_ci_s[idx] = up
    lo_ci_s[idx] = lo

In [None]:
p_gna95_train['p_cons_GNA95'] = forecast_s
p_gna95_train['lower p_cons_GNA95'] = up_ci_s
p_gna95_train['upper p_cons_GNA95'] = lo_ci_s
p_gna95_train = p_gna95_train[['p_cons_GNA95', 'lower p_cons_GNA95', 'upper p_cons_GNA95']]*1e3
p_gna95_train['cons_GNA95'] = data_train['cons_GNA95']
p_gna95_train.index = p_gna95_train.index.date
p_gna95_train['model'] = 'var'
p_gna95_train['split'] = 'train'

# Forecast

In [None]:
res_GNA95_upd, p_gna95_test = mle_tsa.endog_forecast(
    train_endog=endog_GNA95_train[['cons_GNA95_diff1_sdiff12', 'price_GNA95_diff1_sdiff12']],
    test_endog=endog_GNA95_test[['cons_GNA95_diff1_sdiff12', 'price_GNA95_diff1_sdiff12']],
    res=res_GNA95)

In [None]:
fig, axs = plt.subplots(2,1, sharex=True)

ax=axs[0]
p_gna95_test.plot(y='cons_GNA95_diff1_sdiff12', label='cons diff forecast', ax=ax)
ax.fill_between(p_gna95_test.index, p_gna95_test['lower cons_GNA95_diff1_sdiff12'], p_gna95_test['upper cons_GNA95_diff1_sdiff12'],  color='k', alpha=0.1)
endog_GNA95_test.plot(y='cons_GNA95_diff1_sdiff12', style='o', label='cons diff actual', ax=ax)

ax=axs[1]
p_gna95_test.plot(y='price_GNA95_diff1_sdiff12', label='price diff forecast', ax=ax)
ax.fill_between(p_gna95_test.index, p_gna95_test['lower price_GNA95_diff1_sdiff12'], p_gna95_test['upper price_GNA95_diff1_sdiff12'], color='k', alpha=0.1)
endog_GNA95_test.plot(y='price_GNA95_diff1_sdiff12', label='price diff actual',style='o', ax=ax)
plt.show()

In [None]:
forecast_lst = []
up_ci_lst = []
lo_ci_lst = []

for idx, row in p_gna95_test.iterrows():
    p1 = data.loc[idx - pd.DateOffset(months=1), 'cons_GNA95']/1e3
    p12 = data.loc[idx - pd.DateOffset(months=12), 'cons_GNA95_diff1']/1e3
    forecast = row['cons_GNA95_diff1_sdiff12'] + p1 + p12
    up = row['upper cons_GNA95_diff1_sdiff12'] + p1 + p12
    lo = row['lower cons_GNA95_diff1_sdiff12'] + p1 + p12
    forecast_lst.append(forecast)
    up_ci_lst.append(up)
    lo_ci_lst.append(lo)

In [None]:
p_gna95_test['p_cons_GNA95'] = np.array(forecast_lst)*1e3
p_gna95_test['lower p_cons_GNA95'] = np.array(up_ci_lst)*1e3
p_gna95_test['upper p_cons_GNA95'] = np.array(lo_ci_lst)*1e3

In [None]:
ax = p_gna95_test.plot(y= 'p_cons_GNA95', label='forecast')
data_test.plot(y='cons_GNA95', style='o', label='actual', ax=ax)
ax.fill_between(p_gna95_test.index, p_gna95_test['lower p_cons_GNA95'], p_gna95_test['upper p_cons_GNA95'], color='k', alpha=0.1)
plt.suptitle("Forecasting Consumo GNA95")
plt.ylabel('t')
plt.show()

# Model Assessment

In [None]:
p_gna95_test['model'] = 'var'
p_gna95_test['split'] = 'test'
p_gna95_test.index = p_gna95_test.index.date
p_gna95_test['cons_GNA95'] = data_test['cons_GNA95']

In [None]:
skl_mse(p_seas_gna95_test['cons_GNA95_seas_adj_pct_change1'], endog_seas_GNA95_test['cons_GNA95_seas_adj_pct_change1'])**0.5

In [None]:
skl_mse(p_gna95_test['cons_GNA95_diff1_sdiff12'], endog_GNA95_test['cons_GNA95_diff1_sdiff12'])**0.5

In [None]:
mle_tsa.compute_ape(data_test['cons_GNA95'], p_seas_gna95_test['p_cons_GNA95'], ).mean()

In [None]:
mle_tsa.compute_ape( data_test['cons_GNA95'], p_gna95_test['p_cons_GNA95']).mean()

In [None]:
select_cols = ['model', 'split', 'cons_GNA95', 'p_cons_GNA95', 'lower p_cons_GNA95', 'upper p_cons_GNA95']

pd.concat([p_gna95_train, p_gna95_test], axis=0)[select_cols].to_excel(OUTPATH + OUTFILE, index_label='Date')