In [None]:
# Run in Colab GPU instance (e.g. T4) as it is needed to load models fitted with a GPU (NP bug?)
!pip uninstall -y torch notebook notebook_shim tensorflow tensorflow-datasets prophet torchaudio torchdata torchtext torchvision
!pip install git+https://github.com/ourownstory/neural_prophet.git


In [None]:
import logging
import warnings
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
from matplotlib import gridspec
import pickle
import pandas as pd
import pickle
import os
import datetime
from tqdm.auto import tqdm

from sklearn.metrics import mean_squared_error, mean_absolute_error

# from prophet import Prophet
from neuralprophet import NeuralProphet, set_log_level

set_log_level("ERROR")
logging.getLogger("prophet").setLevel(logging.ERROR)
warnings.filterwarnings("ignore")

# MASE:  Mean Absolute Scaled Error compares the mean absolute error of the forecast with
# the mean absolute error of a naive forecast, i.e. the forecast that simply repeats the last observed value.

def mase(y_test,y_pred,y_train):
    return (y_test-y_pred.reset_index(drop=True)).abs().mean()/y_train.diff().dropna().abs().mean()


In [None]:
# Connecting to Google Drive and mount project to running instance

from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Need to manually set project path.
fpath='/content/drive/MyDrive/Colab Notebooks/Capstone'

print(f"Project filepath, fpath: {fpath}")

# Load cleaned and reformated sales data
with open(fpath+'/data/df_v_m2.pkl', 'rb') as f:
    ddict=pickle.load(f)
    df_v_m=ddict['df_v_m']
    df_v_m_test=ddict['df_v_m_test']
# read price data
df_p_m=pd.read_pickle(fpath+'/data/df_flat_pvp.pkl')
df_p_m_test=pd.read_pickle(fpath+'/data/df_flat_pvp_test.pkl')
df_p_m_test["MED_GOA"]=df_p_m_test.loc[:,[c for c in df_p_m_test.columns if 'GOA' in c]].median(axis=1)
df_p_m_test["MED_95"]=df_p_m_test.loc[:,[c for c in df_p_m_test.columns if '95' in c]].median(axis=1)

# nearest station info for each station (3&8 have same prices so use 2nd nearest for them)
stn_near=pd.read_pickle(fpath+'/data/stn_near.pkl')

# read holidays defined in data and test period
df_hol_base=pd.read_pickle(fpath+'/data/df_hol_base.pkl')

# add extra holidays/events to the base holidays
# Neuralprophet uses only 'event' not both 'holiday' and 'event' as prophet
# Defined as days that appear repeatedly in the time series as low sales days but are not official holidays
# are assumed to be foreseeable events that can be used to improve the forecast
with open(fpath+'/data/df_holi_extra.pkl', 'rb') as f:
    dict_hol_extra=pickle.load(f)


# dicts to store model performance statistics for later comparison
dict_MASE={}
dict_MAE={}


# 1. Seasonalities only

In [None]:
dict_M = {}
valid = False
modelnr=1
fit_model=False  # True to fit model, False to load fitted model from disk

fpathm = fpath+f"/m_{modelnr}"
if not os.path.exists(fpathm):
    os.mkdir(fpathm)

for stn in tqdm([f"ES{nr}_{pr}" for nr in range(1,13) for pr in ["95","GOA"]]):
# for stn in ['ES1_95']:
    # no extra holidays for ALL or 95 sales
    if stn[:2]=='ES' and stn[-3:]=="GOA": # seeing the effects of extra holidays if it improves the forecast and worth the effort
        df_hol_extra=dict_hol_extra[f"df_holi_extra_{stn}"]
        if df_hol_extra.loc[df_hol_extra.ds==datetime.datetime(2018,1,5)].shape[0]==1: #add extra event for 2019-01-04 if 2018-01-05 is present
            df_hol_extra=pd.concat([df_hol_extra,pd.DataFrame({'ds':[datetime.datetime(2019,1,4)],'event':['Ev01']},index=[0])],ignore_index=True)
        df_hol=pd.concat([df_hol_base,df_hol_extra],ignore_index=True)
        print(f"Added {df_hol_extra.shape[0]} extra holidays for",stn)
    else:
        df_hol=df_hol_base

    # read sales data for station
    df=df_v_m.loc[:,[stn]].reset_index().rename(columns={'sale_date':'ds',stn:'y'})
    df_test=df_v_m_test.loc[:,[stn]].reset_index().rename(columns={'sale_date':'ds',stn:'y'})


    m=NeuralProphet(
                    yearly_seasonality=True,
                    weekly_seasonality=True,
                    daily_seasonality=False,
                    n_forecasts=1,
                    quantiles=[0.05,0.95],
                    epochs=60,
                    )

    for e in df_hol.event.unique():
      m.add_events(e,regularization=None,mode='additive')#,lower_window=0,upper_window=1)

    df_new=m.create_df_with_events(df, df_hol)

    if fit_model:
       if valid:
          df_train, df_val = m.split_df(df_new, freq="D", valid_p=0.1)
          metrics=m.fit(df_train,freq='D',validation_df=df_val, progress='bar')
       else:
          df_train=df_new
          metrics=m.fit(df_train,freq='D', progress='bar')
       print(metrics.tail(1))

      # save fitted model to disk
       with open(fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl', "wb") as f:
          pickle.dump(m, f, pickle.HIGHEST_PROTOCOL)
    else:
        print("Loading model from file: "+fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl')
        with open(fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl', "rb") as f:
          m=pickle.load(f)

    m.set_plotting_backend("matplotlib")

    forecast_train = m.predict(df_new)
    forecast_train.to_pickle(fpathm+f"/{stn}_forecast_test.pkl")


    # plot forecast in training period
    fig, ax = plt.subplots(figsize=(18, 6))
    m.highlight_nth_step_ahead_of_each_forecast(1).plot(forecast_train, xlabel="Date", ylabel=stn, ax=ax)
    ax.set_title(f"Model {modelnr}: {stn} Train Forecast")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_train_forecast.png",bbox_inches='tight')
    plt.close()


    # plot residuals
    df_residuals = pd.DataFrame({"ds": forecast_train.ds, "residuals": forecast_train.y - forecast_train.yhat1})
    fig=plt.figure(figsize=(18, 5))

    spec = gridspec.GridSpec(ncols=2, nrows=1,
                            width_ratios=[3, 1], wspace=0.08)

    ax0=fig.add_subplot(spec[0])
    df_residuals.plot(x="ds", y="residuals",ax=ax0)
    ax0.set_title("Residuals")
    ax0.grid()

    ax1=fig.add_subplot(spec[1])
    ax1.set_title("Residuals histogram")
    df_residuals.residuals.hist(bins=50,ax=ax1,orientation='horizontal')
    ax1.set_yticklabels([])

    fig.suptitle(f"Model {modelnr}: {stn} Residuals")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_residuals.png",bbox_inches='tight')
    plt.close()

    # Forecast test period
    future = m.make_future_dataframe(m.create_df_with_events(pd.concat([df,df_test]),df_hol),
                                     periods=1, n_historic_predictions=31*4)

    forecast_test = m.predict(future).dropna()

    fig,ax=plt.subplots(1,1,figsize=(18,5))

    m.highlight_nth_step_ahead_of_each_forecast(1).plot(forecast_test,ax=ax)
    #m.highlight_nth_step_ahead_of_each_forecast_test(1).plot(forecast_test_test,ax=ax)
    ax.set_xlim(pd.Timestamp('2018-10-01'),pd.Timestamp('2019-01-31'))

    t_start_test='2019-01-01'
    forecast_test['residuals']=forecast_test.y-forecast_test.yhat1

    forecast_test.set_index('ds',inplace=True)

    forecast_test.to_pickle(fpathm+f"/{stn}_forecast_test.pkl")

    ax.plot(df_test.ds,df_test.y,'rx',label='test')
    ax.legend(['yhat','yhat','test','test'])

    mase_test =mase(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1'],forecast_test.loc[forecast_test.index<t_start_test,'y'])
    mase_test2=mase(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1'],forecast_test.loc[forecast_test.index>=t_start_test,'y'])

    rmse_test=np.sqrt(mean_squared_error(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1']))

    mae_test=forecast_test.loc[forecast_test.index>=t_start_test,'residuals'].abs().mean()
    mae_test_rel=mae_test/forecast_test.loc[forecast_test.index>=t_start_test,'y'].mean()
    mae_train=df_residuals.residuals.abs().mean()

    print(f"Model {modelnr}: {stn} MASE={mase_test*100:.1f}%,MASE2={mase_test2*100:.1f}%, MAE={mae_test:.4f}, MAE_rel={mae_test_rel*100:.1f}%")

    dict_MASE[(stn,modelnr)]=mase_test
    dict_MAE[(stn,modelnr)]=mae_test_rel
    dict_M[(stn,modelnr)]={'mase_test':mase_test,'mase_test2':mase_test2,'mae_test':mae_test,'rmse_test':rmse_test,'mae_test_rel':mae_test_rel,'mae_train':mae_train}

    ax.set_title(f"Model {modelnr}: {stn} MASE={mase_test*100:.1f}%, MAE_rel={mae_test_rel*100:.1f}%")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_forecast.png",bbox_inches='tight')

    with open(fpathm+f'/dict_MASE_MAE_{modelnr}.pkl', 'wb') as f:
      pickle.dump({'dict_MASE':dict_MASE,'dict_MAE':dict_MAE,'dict_M':dict_M}, f, pickle.HIGHEST_PROTOCOL)

    plt.close()



# 2. One (1) day ahead forecast with Auto-Regression

In [None]:
valid=False
dict_M = {}
modelnr=2
fit_model=False  # True to fit model, False to load fitted model from disk

fpathm = fpath+f"/m_{modelnr}"
if not os.path.exists(fpathm):
    os.mkdir(fpathm)


for stn in tqdm([f"ES{nr}_{pr}" for nr in range(1,13) for pr in ["95","GOA"]]):

    # no extra holidays for ALL or 95 sales
    if stn[:2]=='ES' and stn[-3:]=="GOA":
        df_hol_extra=dict_hol_extra[f"df_holi_extra_{stn}"]
        if df_hol_extra.loc[df_hol_extra.ds==datetime.datetime(2018,1,5)].shape[0]==1: #add extra event for 2019-01-04 if 2018-01-05 is present
            df_hol_extra=pd.concat([df_hol_extra,pd.DataFrame({'ds':[datetime.datetime(2019,1,4)],'event':['Ev01']},index=[0])],ignore_index=True)
        df_hol=pd.concat([df_hol_base,df_hol_extra],ignore_index=True)
        print(f"Added {df_hol_extra.shape[0]} extra holidays for",stn)
    else:
        df_hol=df_hol_base


    df=df_v_m.loc[:,[stn]].reset_index().rename(columns={'sale_date':'ds',stn:'y'})
    df_test=df_v_m_test.loc[:,[stn]].reset_index().rename(columns={'sale_date':'ds',stn:'y'})


    m=NeuralProphet(
                yearly_seasonality=True,
                weekly_seasonality=True,
                daily_seasonality=False,
                seasonality_mode="additive",
                n_lags=7,
                ar_reg=.1,
                n_forecasts=1,
                quantiles=[0.05,0.95],
                #epochs=60,
                )

    for e in df_hol.event.unique():
      m.add_events(e,regularization=None,mode='additive')#,lower_window=0,upper_window=1)

    df_new=m.create_df_with_events(df, df_hol)

    if fit_model:
       if valid:
          df_train, df_val = m.split_df(df_new, freq="D", valid_p=0.1)
          metrics=m.fit(df_train,freq='D',validation_df=df_val, progress='bar')
       else:
          df_train=df_new
          metrics=m.fit(df_train,freq='D', progress='bar')
       print(metrics.tail(1))

      # save fitted model to disk
       with open(fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl', "wb") as f:
          pickle.dump(m, f, pickle.HIGHEST_PROTOCOL)
    else:
        print("Loading model from file: "+fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl')
        with open(fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl', "rb") as f:
          m=pickle.load(f)


    forecast_train = m.predict(df_new)
    forecast_train.to_pickle(fpathm+f"/{stn}_forecast_test.pkl")



    # plot forecast in training period
    fig, ax = plt.subplots(figsize=(18, 6))
    m.highlight_nth_step_ahead_of_each_forecast(1).plot(forecast_train, xlabel="Date", ylabel=stn, ax=ax)
    ax.set_title(f"Model {modelnr}: {stn} Train Forecast")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_train_forecast.png",bbox_inches='tight')
    plt.close()

    df_residuals = pd.DataFrame({"ds": forecast_train.ds, "residuals": forecast_train.y - forecast_train.yhat1})

    # plot residuals
    fig=plt.figure(figsize=(18, 5))

    spec = gridspec.GridSpec(ncols=2, nrows=1,
                            width_ratios=[3, 1], wspace=0.08)

    ax0=fig.add_subplot(spec[0])
    df_residuals.plot(x="ds", y="residuals",ax=ax0)
    ax0.set_title("Residuals")
    ax0.grid()

    ax1=fig.add_subplot(spec[1])
    ax1.set_title("Residuals histogram")
    df_residuals.residuals.hist(bins=50,ax=ax1,orientation='horizontal')
    ax1.set_yticklabels([])

    fig.suptitle(f"Model {modelnr}: {stn} Residuals")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_residuals.png",bbox_inches='tight')
    plt.close()

    # Forecast test period
    future = m.make_future_dataframe(m.create_df_with_events(pd.concat([df,df_test]),df_hol), df_hol,
                                     periods=1, n_historic_predictions=31*4)

    forecast_test = m.predict(future).dropna()

    fig,ax=plt.subplots(1,1,figsize=(18,5))

    m.highlight_nth_step_ahead_of_each_forecast(1).plot(forecast_test,ax=ax)
    #m.highlight_nth_step_ahead_of_each_forecast_test(1).plot(forecast_test_test,ax=ax)
    ax.set_xlim(pd.Timestamp('2018-10-01'),pd.Timestamp('2019-01-31'))

    t_start_test='2019-01-01'
    forecast_test['residuals']=forecast_test.y-forecast_test.yhat1

    forecast_test.set_index('ds',inplace=True)
    forecast_test.to_pickle(fpathm+f"/{stn}_forecast_test.pkl")

    ax.plot(df_test.ds,df_test.y,'rx',label='test')
    ax.legend(['yhat','yhat','test','test'])

    mase_test =mase(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1'],forecast_test.loc[forecast_test.index<t_start_test,'y'])
    mase_test2=mase(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1'],forecast_test.loc[forecast_test.index>=t_start_test,'y'])

    rmse_test=np.sqrt(mean_squared_error(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1']))

    mae_test=forecast_test.loc[forecast_test.index>=t_start_test,'residuals'].abs().mean()
    mae_test_rel=mae_test/forecast_test.loc[forecast_test.index>=t_start_test,'y'].mean()
    mae_train=df_residuals.residuals.abs().mean()


    print(f"Model {modelnr}: {stn} MASE={mase_test*100:.1f}%,MASE2={mase_test2*100:.1f}%, MAE={mae_test:.4f}, MAE_rel={mae_test_rel*100:.1f}%")

    dict_MASE[(stn,modelnr)]=mase_test
    dict_MAE[(stn,modelnr)]=mae_test_rel
    dict_M[(stn,modelnr)]={'mase_test':mase_test,'mase_test2':mase_test2,'mae_test':mae_test,'rmse_test':rmse_test,'mae_test_rel':mae_test_rel,'mae_train':mae_train}

    ax.set_title(f"Model {modelnr}: {stn} MASE={mase_test*100:.1f}%, MAE_rel={mae_test_rel*100:.1f}%")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_forecast.png",bbox_inches='tight')

    with open(fpathm+f'/dict_MASE_MAE_{modelnr}.pkl', 'wb') as f:
      pickle.dump({'dict_MASE':dict_MASE,'dict_MAE':dict_MAE,'dict_M':dict_M}, f, pickle.HIGHEST_PROTOCOL)

    plt.close()

# 3 One (1) ahead forecast modeling AR with a Neural Network (AR-Net)

In [None]:
valid=False
dict_M = {}
modelnr=3
fit_model=False  # True to fit model, False to load fitted model from disk

fpathm = fpath+f"/m_{modelnr}"
if not os.path.exists(fpathm):
    os.mkdir(fpathm)

for stn in tqdm([f"ES{nr}_{pr}" for nr in range(1,13) for pr in ["95","GOA"]]):

    if stn[:2]=='ES' and stn[-3:]=="GOA":
        df_hol_extra=dict_hol_extra[f"df_holi_extra_{stn}"]
        df_hol=pd.concat([df_hol_base,df_hol_extra],ignore_index=True)
        print(f"Added {df_hol_extra.shape[0]} extra holidays for",stn)
    else:
        df_hol=df_hol_base

    df=df_v_m.loc[:,[stn]].reset_index().rename(columns={'sale_date':'ds',stn:'y'})
    df_test=df_v_m_test.loc[:,[stn]].reset_index().rename(columns={'sale_date':'ds',stn:'y'})

    nl=8
    valid=False

    m=NeuralProphet(
            yearly_seasonality=True,
            weekly_seasonality=True,
            daily_seasonality=False,
            seasonality_mode="additive",
            n_lags=7,
            n_forecasts=1,
            quantiles=[0.05,0.95],
            ar_layers=[nl, nl, nl, nl],
            ar_reg=10,
            # learning_rate=0.003,
            )

    for e in df_hol.event.unique():
      m.add_events(e,regularization=None,mode='additive')#,lower_window=0,upper_window=1)

    df_new=m.create_df_with_events(df, df_hol)

    if fit_model:
       if valid:
          df_train, df_val = m.split_df(df_new, freq="D", valid_p=0.1)
          metrics=m.fit(df_train,freq='D',validation_df=df_val, progress='bar')
       else:
          df_train=df_new
          metrics=m.fit(df_train,freq='D', progress='bar')
       print(metrics.tail(1))

      # save fitted model to disk
       with open(fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl', "wb") as f:
          pickle.dump(m, f, pickle.HIGHEST_PROTOCOL)
    else:
        print("Loading model from file: "+fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl')
        with open(fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl', "rb") as f:
          m=pickle.load(f)

    m.set_plotting_backend("matplotlib")
    forecast_train = m.predict(df_new)
    forecast_train.to_pickle(fpathm+f"/{stn}_forecast_test.pkl")

    # plot forecast in training period
    fig, ax = plt.subplots(figsize=(18, 6))
    m.highlight_nth_step_ahead_of_each_forecast(1).plot(forecast_train, xlabel="Date", ylabel=stn, ax=ax)
    ax.set_title(f"Model {modelnr}: {stn} Train Forecast")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_train_forecast.png",bbox_inches='tight')
    plt.close()

    df_residuals = pd.DataFrame({"ds": forecast_train.ds, "residuals": forecast_train.y - forecast_train.yhat1})

    # plot residuals
    fig=plt.figure(figsize=(18, 5))

    spec = gridspec.GridSpec(ncols=2, nrows=1,
                            width_ratios=[3, 1], wspace=0.08)

    ax0=fig.add_subplot(spec[0])
    df_residuals.plot(x="ds", y="residuals",ax=ax0)
    ax0.set_title("Residuals")
    ax0.grid()

    ax1=fig.add_subplot(spec[1])
    ax1.set_title("Residuals histogram")
    df_residuals.residuals.hist(bins=50,ax=ax1,orientation='horizontal')
    ax1.set_yticklabels([])

    fig.suptitle(f"Model {modelnr}: {stn} Residuals")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_residuals.png",bbox_inches='tight')
    plt.close()

    # Forecast test period
    future = m.make_future_dataframe(m.create_df_with_events(pd.concat([df,df_test]),df_hol),df_hol, periods=1, n_historic_predictions=31*4)
    forecast_test = m.predict(future).dropna()

    fig,ax=plt.subplots(1,1,figsize=(18,5))

    m.highlight_nth_step_ahead_of_each_forecast(1).plot(forecast_test,ax=ax)
    ax.set_xlim(pd.Timestamp('2018-10-01'),pd.Timestamp('2019-01-31'))

    t_start_test='2019-01-01'
    forecast_test['residuals']=forecast_test.y-forecast_test.yhat1

    forecast_test.set_index('ds',inplace=True)
    forecast_test.to_pickle(fpathm+f"/{stn}_forecast_test.pkl")

    ax.plot(df_test.ds,df_test.y,'rx',label='test')
    ax.legend(['yhat','yhat','test','test'])

    mase_test =mase(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1'],forecast_test.loc[forecast_test.index<t_start_test,'y'])
    mase_test2=mase(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1'],forecast_test.loc[forecast_test.index>=t_start_test,'y'])

    rmse_test=np.sqrt(mean_squared_error(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1']))

    mae_test=forecast_test.loc[forecast_test.index>=t_start_test,'residuals'].abs().mean()
    mae_test_rel=mae_test/forecast_test.loc[forecast_test.index>=t_start_test,'y'].mean()
    mae_train=df_residuals.residuals.abs().mean()

    print(f"Model {modelnr}: {stn} MASE={mase_test*100:.1f}%,MASE2={mase_test2*100:.1f}%, MAE={mae_test:.4f}, MAE_rel={mae_test_rel*100:.1f}%")

    dict_MASE[(stn,modelnr)]=mase_test
    dict_MAE[(stn,modelnr)]=mae_test_rel
    dict_M[(stn,modelnr)]={'mase_test':mase_test,'mase_test2':mase_test2,'mae_test':mae_test,'rmse_test':rmse_test,'mae_test_rel':mae_test_rel,'mae_train':mae_train}

    ax.set_title(f"Model {modelnr}: {stn} MASE={mase_test*100:.1f}%, MAE_rel={mae_test_rel*100:.1f}%")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_forecast.png",bbox_inches='tight')

    with open(fpathm+f'/dict_MASE_MAE_{modelnr}.pkl', 'wb') as f:
      pickle.dump({'dict_MASE':dict_MASE,'dict_MAE':dict_MAE,'dict_M':dict_M}, f, pickle.HIGHEST_PROTOCOL)


    plt.close()

# 4 AR-Net + meteo

In [None]:
dict_M={}
valid=False
modelnr=4
fit_model=False  # True to fit model, False to load fitted model from disk

fpathm = fpath+f"/m_{modelnr}"
if not os.path.exists(fpathm):
    os.mkdir(fpathm)

for stn in tqdm([f"ES{nr}_{pr}" for nr in range(1,13) for pr in ["95","GOA"]]):

    if stn[:2]=='ES' and stn[-3:]=="GOA":
        df_hol_extra=dict_hol_extra[f"df_holi_extra_{stn}"]
        if df_hol_extra.loc[df_hol_extra.ds==datetime.datetime(2018,1,5)].shape[0]==1: #add extra event for 2019-01-04 if 2018-01-05 is present
            df_hol_extra=pd.concat([df_hol_extra,pd.DataFrame({'ds':[datetime.datetime(2019,1,4)],'event':['Ev01']},index=[0])],ignore_index=True)
        df_hol=pd.concat([df_hol_base,df_hol_extra],ignore_index=True)
        print(f"Added {df_hol_extra.shape[0]} extra holidays for",stn)
    else:
        df_hol=df_hol_base

    df=df_v_m.loc[:,[stn]].reset_index().rename(columns={'sale_date':'ds',stn:'y'})
    df_test=df_v_m_test.loc[:,[stn]].reset_index().rename(columns={'sale_date':'ds',stn:'y'})

    nl=8
    valid=False

    # add weather data, temp, precipitation, wind
    df["tmed"] = df_v_m.tmed.values
    df["prec"] = df_v_m.prec.values
    df["velmedia"] = df_v_m.velmedia.values
    df["sol"] = df_v_m.sol.values
    df["racha"] = df_v_m.racha.values

    df_test["tmed"] = df_v_m_test.tmed.values
    df_test["prec"] = df_v_m_test.prec.values
    df_test["velmedia"] = df_v_m_test.velmedia.values
    df_test["sol"] = df_v_m_test.sol.values
    df_test["racha"]= df_v_m_test.racha.values

    m=NeuralProphet(
            yearly_seasonality=True,
            weekly_seasonality=True,
            daily_seasonality=False,
            seasonality_mode="additive",
            n_lags=7,
            n_forecasts=1,
            quantiles=[0.05,0.95],
            ar_layers=[nl, nl, nl, nl],
            ar_reg=10,
            # learning_rate=0.003,
            )

    for e in df_hol.event.unique():
      m.add_events(e,regularization=None,mode='additive')#,lower_window=0,upper_window=1)

    # add weather data as future regressors and assume reliable weather forecast is available for 1 day ahead
    m.add_future_regressor("tmed",normalize="standardize")
    m.add_future_regressor("prec",normalize="standardize")
    m.add_future_regressor("velmedia",normalize="standardize")
    m.add_future_regressor("sol",normalize="standardize")
    m.add_future_regressor("racha",normalize="standardize")


    df_new=m.create_df_with_events(df, df_hol)


    if fit_model:
       if valid:
          df_train, df_val = m.split_df(df_new, freq="D", valid_p=0.1)
          metrics=m.fit(df_train,freq='D',validation_df=df_val, progress='bar')
       else:
          df_train=df_new
          metrics=m.fit(df_train,freq='D', progress='bar')
       print(metrics.tail(1))

      # save fitted model to disk
       with open(fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl', "wb") as f:
          pickle.dump(m, f, pickle.HIGHEST_PROTOCOL)
    else:
        print("Loading model from file: "+fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl')
        with open(fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl', "rb") as f:
          m=pickle.load(f)

    m.set_plotting_backend("matplotlib")

    forecast_train = m.predict(df_new)
    forecast_train.to_pickle(fpathm+f"/{stn}_forecast_test.pkl")

    forecast_train = m.predict(df_new)


    # plot forecast in training period
    fig, ax = plt.subplots(figsize=(18, 6))
    m.highlight_nth_step_ahead_of_each_forecast(1).plot(forecast_train, xlabel="Date", ylabel=stn, ax=ax)
    ax.set_title(f"Model {modelnr}: {stn} Train Forecast")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_train_forecast.png",bbox_inches='tight')
    plt.close()

    df_residuals = pd.DataFrame({"ds": forecast_train.ds, "residuals": forecast_train.y - forecast_train.yhat1})

    # plot residuals
    fig=plt.figure(figsize=(18, 5))

    spec = gridspec.GridSpec(ncols=2, nrows=1,
                            width_ratios=[3, 1], wspace=0.08)

    ax0=fig.add_subplot(spec[0])
    df_residuals.plot(x="ds", y="residuals",ax=ax0)
    ax0.set_title("Residuals")
    ax0.grid()

    ax1=fig.add_subplot(spec[1])
    ax1.set_title("Residuals histogram")
    df_residuals.residuals.hist(bins=50,ax=ax1,orientation='horizontal')
    ax1.set_yticklabels([])

    fig.suptitle(f"Model {modelnr}: {stn} Residuals")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_residuals.png",bbox_inches='tight')
    plt.close()

    # Forecast test period
    # future = m.make_future_dataframe(m.create_df_with_events(pd.concat([df,df_test]),df_hol),df_hol, periods=1, n_historic_predictions=31*4)

    regressors_list=['tmed','prec','velmedia','sol','racha',]
    future = m.make_future_dataframe(m.create_df_with_events(pd.concat([df,df_test]),df_hol),
                                    regressors_df=m.create_df_with_events(pd.concat([df,df_test]),df_hol).loc[:,regressors_list],
                                    periods=1, n_historic_predictions=31*14)

    forecast_test = m.predict(future).dropna()


    fig,ax=plt.subplots(1,1,figsize=(18,5))

    m.highlight_nth_step_ahead_of_each_forecast(1).plot(forecast_test,ax=ax)
    ax.set_xlim(pd.Timestamp('2018-10-01'),pd.Timestamp('2019-01-31'))

    t_start_test='2019-01-01'
    forecast_test['residuals']=forecast_test.y-forecast_test.yhat1

    forecast_test.set_index('ds',inplace=True)
    forecast_test.to_pickle(fpathm+f"/{stn}_forecast_test.pkl")

    ax.plot(df_test.ds,df_test.y,'rx',label='test')
    ax.legend(['yhat','yhat','test','test'])

    mase_test =mase(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1'],forecast_test.loc[forecast_test.index<t_start_test,'y'])
    mase_test2=mase(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1'],forecast_test.loc[forecast_test.index>=t_start_test,'y'])

    rmse_test=np.sqrt(mean_squared_error(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1']))

    mae_test=forecast_test.loc[forecast_test.index>=t_start_test,'residuals'].abs().mean()
    mae_test_rel=mae_test/forecast_test.loc[forecast_test.index>=t_start_test,'y'].mean()
    mae_train=df_residuals.residuals.abs().mean()

    print(f"Model {modelnr}: {stn} MASE={mase_test*100:.1f}%,MASE2={mase_test2*100:.1f}%, MAE={mae_test:.4f}, MAE_rel={mae_test_rel*100:.1f}%")

    dict_MASE[(stn,modelnr)]=mase_test
    dict_MAE[(stn,modelnr)]=mae_test_rel
    dict_M[(stn,modelnr)]={'mase_test':mase_test,'mase_test2':mase_test2,'mae_test':mae_test,'rmse_test':rmse_test,'mae_test_rel':mae_test_rel,'mae_train':mae_train}

    ax.set_title(f"Model {modelnr}: {stn} MASE={mase_test*100:.1f}%, MAE_rel={mae_test_rel*100:.1f}%")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_forecast.png",bbox_inches='tight')

    with open(fpathm+f'/dict_MASE_MAE_{modelnr}.pkl', 'wb') as f:
      pickle.dump({'dict_MASE':dict_MASE,'dict_MAE':dict_MAE,'dict_M':dict_M}, f, pickle.HIGHEST_PROTOCOL)
    plt.close()

# 5 One (1) step ahead forecast using Auto-Regression $+\Delta y$ lagged regressor

In [None]:
valid=False
dict_M={}
modelnr=5
fit_model=False  # True to fit model, False to load fitted model from disk

fpathm = fpath+f"/m_{modelnr}"
if not os.path.exists(fpathm):
    os.mkdir(fpathm)

for stn in tqdm([f"ES{nr}_{pr}" for nr in range(1,13) for pr in ["95","GOA"]]):

    # no extra holidays for ALL or 95 sales
    if stn[:2]=='ES' and stn[-3:]=="GOA":
        df_hol_extra=dict_hol_extra[f"df_holi_extra_{stn}"]
        if df_hol_extra.loc[df_hol_extra.ds==datetime.datetime(2018,1,5)].shape[0]==1: #add extra event for 2019-01-04 if 2018-01-05 is present
            df_hol_extra=pd.concat([df_hol_extra,pd.DataFrame({'ds':[datetime.datetime(2019,1,4)],'event':['Ev01']},index=[0])],ignore_index=True)
        df_hol=pd.concat([df_hol_base,df_hol_extra],ignore_index=True)
        print(f"Added {df_hol_extra.shape[0]} extra holidays for",stn)
    else:
        df_hol=df_hol_base


    df=df_v_m.loc[:,[stn]].reset_index().rename(columns={'sale_date':'ds',stn:'y'})
    df_test=df_v_m_test.loc[:,[stn]].reset_index().rename(columns={'sale_date':'ds',stn:'y'})


    lagg_I=pd.concat([df,df_test],ignore_index=True).y.diff().fillna(0)

    #df2=df.copy(deep=True)
    df["I"] = lagg_I.iloc[:len(df)].values

    #df_test2=df_test.copy(deep=True)
    df_test["I"]=lagg_I.iloc[len(df):].values


    m=NeuralProphet(
                    yearly_seasonality=True,
                    weekly_seasonality=True,
                    daily_seasonality=False,
                    seasonality_mode="additive",
                    n_lags=7,
                    ar_reg=1,
                    n_forecasts=1,
                    quantiles=[0.05,0.95],
                    #epochs=60,
                    )

    m.add_lagged_regressor("I",normalize="standardize",n_lags=7)


    for e in df_hol.event.unique():
      m.add_events(e,regularization=None,mode='additive')#,lower_window=0,upper_window=1)

    df_new=m.create_df_with_events(df, df_hol)

    if fit_model:
       if valid:
          df_train, df_val = m.split_df(df_new, freq="D", valid_p=0.1)
          metrics=m.fit(df_train,freq='D',validation_df=df_val, progress='bar')
       else:
          df_train=df_new
          metrics=m.fit(df_train,freq='D', progress='bar')
       print(metrics.tail(1))

      # save fitted model to disk
       with open(fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl', "wb") as f:
          pickle.dump(m, f, pickle.HIGHEST_PROTOCOL)
    else:
        print("Loading model from file: "+fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl')
        with open(fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl', "rb") as f:
          m=pickle.load(f)

    m.set_plotting_backend("matplotlib")

    forecast_train = m.predict(df_new)
    forecast_train.to_pickle(fpathm+f"/{stn}_forecast_test.pkl")


    # plot forecast in training period
    fig, ax = plt.subplots(figsize=(18, 6))
    m.highlight_nth_step_ahead_of_each_forecast(1).plot(forecast_train, xlabel="Date", ylabel=stn, ax=ax)
    ax.set_title(f"Model {modelnr}: {stn} Train Forecast")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_train_forecast.png",bbox_inches='tight')
    plt.close()

    # plot residuals
    df_residuals = pd.DataFrame({"ds": forecast_train.ds, "residuals": forecast_train.y - forecast_train.yhat1})

    fig=plt.figure(figsize=(18, 5))
    spec = gridspec.GridSpec(ncols=2, nrows=1,
                            width_ratios=[3, 1], wspace=0.08)

    ax0=fig.add_subplot(spec[0])
    df_residuals.plot(x="ds", y="residuals",ax=ax0)
    ax0.set_title("Residuals")
    ax0.grid()

    ax1=fig.add_subplot(spec[1])
    ax1.set_title("Residuals histogram")
    df_residuals.residuals.hist(bins=50,ax=ax1,orientation='horizontal')
    ax1.set_yticklabels([])

    fig.suptitle(f"Model {modelnr}: {stn} Residuals")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_residuals.png",bbox_inches='tight')
    plt.close()

    # Forecast test period
    # Forecast test period
    future = m.make_future_dataframe(m.create_df_with_events(pd.concat([df,df_test]),df_hol), events_df=df_hol,
                        regressors_df=pd.DataFrame({'ds':np.append(df_new.ds.values,df_test.ds.values),'I':lagg_I.values}).reset_index(drop=True),
                                     periods=1, n_historic_predictions=31*4)

    forecast_test = m.predict(future).dropna()

    fig,ax=plt.subplots(1,1,figsize=(18,5))

    m.highlight_nth_step_ahead_of_each_forecast(1).plot(forecast_test,ax=ax)
    #m.highlight_nth_step_ahead_of_each_forecast_test(1).plot(forecast_test_test,ax=ax)
    ax.set_xlim(pd.Timestamp('2018-10-01'),pd.Timestamp('2019-01-31'))

    t_start_test='2019-01-01'
    forecast_test['residuals']=forecast_test.y-forecast_test.yhat1

    forecast_test.set_index('ds',inplace=True)
    forecast_test.to_pickle(fpathm+f"/{stn}_forecast_test.pkl")

    ax.plot(df_test.ds,df_test.y,'rx',label='test')
    ax.legend(['yhat','yhat','test','test'])

    mase_test =mase(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1'],forecast_test.loc[forecast_test.index<t_start_test,'y'])
    mase_test2=mase(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1'],forecast_test.loc[forecast_test.index>=t_start_test,'y'])

    rmse_test=np.sqrt(mean_squared_error(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1']))

    mae_test=forecast_test.loc[forecast_test.index>=t_start_test,'residuals'].abs().mean()
    mae_test_rel=mae_test/forecast_test.loc[forecast_test.index>=t_start_test,'y'].mean()
    mae_train=df_residuals.residuals.abs().mean()

    print(f"Model {modelnr}: {stn} MASE={mase_test*100:.1f}%,MASE2={mase_test2*100:.1f}%, MAE={mae_test:.4f}, MAE_rel={mae_test_rel*100:.1f}%")

    dict_MASE[(stn,modelnr)]=mase_test
    dict_MAE[(stn,modelnr)]=mae_test_rel
    dict_M[(stn,modelnr)]={'mase_test':mase_test,'mase_test2':mase_test2,'mae_test':mae_test,'rmse_test':rmse_test,'mae_test_rel':mae_test_rel,'mae_train':mae_train}

    ax.set_title(f"Model {modelnr}: {stn} MASE={mase_test*100:.1f}%, MAE_rel={mae_test_rel*100:.1f}%")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_forecast.png",bbox_inches='tight')

    with open(fpathm+f'/dict_MASE_MAE_{modelnr}.pkl', 'wb') as f:
      pickle.dump({'dict_MASE':dict_MASE,'dict_MAE':dict_MAE,'dict_M':dict_M}, f, pickle.HIGHEST_PROTOCOL)

    plt.close()

# 6 One (1) step ahead forecast using AR, $+\Delta y$ lagged regressor and meteo

In [None]:
dict_M={}
valid=False
modelnr=6
fit_model=False  # True to fit model, False to load fitted model from disk

fpathm = fpath+f"/m_{modelnr}"
if not os.path.exists(fpathm):
    os.mkdir(fpathm)

for stn in tqdm([f"ES{nr}_{pr}" for nr in range(1,13) for pr in ["95","GOA"]]):
# for stn in ['ES10_95']:
    # add extra holidays/events
    # with open(fpath+'/df_holi_extra.pkl', 'rb') as f:
    #     dict_hol_extra=pickle.load(f)

    if stn[:2]=='ES' and stn[-3:]=="GOA":
        df_hol_extra=dict_hol_extra[f"df_holi_extra_{stn}"]
        df_hol_extra=pd.concat([df_hol_extra,pd.DataFrame({'ds':[datetime.datetime(2019,1,4)],'event':['Ev01']},index=[0])],ignore_index=True)
        df_hol=pd.concat([df_hol_base,df_hol_extra],ignore_index=True)
        print(f"Added {df_hol_extra.shape[0]} extra holidays for",stn)
        print(df_hol_extra)
    else:
        df_hol=df_hol_base

    df=df_v_m.loc[:,[stn]].reset_index().rename(columns={'sale_date':'ds',stn:'y'})
    df_test=df_v_m_test.loc[:,[stn]].reset_index().rename(columns={'sale_date':'ds',stn:'y'})

    # add lagged regressor with difference of y
    # do the difference on concatenated df and df_test for continuity
    lagg_I=pd.concat([df,df_test],ignore_index=True).y.diff().fillna(0)

    df["I"] = lagg_I.iloc[:len(df)].values
    df_test["I"]=lagg_I.iloc[len(df):].values

    # add weather data, temp, precipitation, wind
    # df["tmax"] = df_v_m.tmax.values
    df["tmed"] = df_v_m.tmed.values
    # df["tmin"] = df_v_m.tmin.values
    df["prec"] = df_v_m.prec.values
    df["velmedia"] = df_v_m.velmedia.values
    df["sol"] = df_v_m.sol.values
    df["racha"] = df_v_m.racha.values

    # df_test["tmax"] = df_v_m_test.tmax.values
    df_test["tmed"] = df_v_m_test.tmax.values
    # df_test["tmin"] = df_v_m_test.tmax.values
    df_test["prec"] = df_v_m_test.prec.values
    df_test["velmedia"] = df_v_m_test.velmedia.values
    df_test["sol"] = df_v_m_test.sol.values
    df_test["racha"] = df_v_m_test.racha.values

    m=NeuralProphet(
                    yearly_seasonality=True,
                    weekly_seasonality=True,
                    daily_seasonality=False,
                    seasonality_mode="additive",
                    n_lags=7,
                    ar_reg=0.1,
                    n_forecasts=1,
                    quantiles=[0.05,0.95],
                    #epochs=60,
                    )

    # add holidays
    for e in df_hol.event.unique():
      m.add_events(e,regularization=None,mode='additive')#,lower_window=0,upper_window=1)

    # add lagged regressor
    m.add_lagged_regressor("I",normalize="standardize",n_lags=7,regularization=0.1)

    # add weather data as future regressors and assume reliable weather forecast is available for 1 day ahead
    # m.add_future_regressor("tmax",normalize="standardize")
    m.add_future_regressor("tmed",normalize="standardize")
    # m.add_future_regressor("tmin",normalize="standardize")
    m.add_future_regressor("prec",normalize="standardize")
    m.add_future_regressor("velmedia",normalize="standardize")
    m.add_future_regressor("sol",normalize="standardize")
    m.add_future_regressor("racha",normalize="standardize")

    df_new=m.create_df_with_events(df, df_hol)

    if fit_model:
       if valid:
          df_train, df_val = m.split_df(df_new, freq="D", valid_p=0.1)
          metrics=m.fit(df_train,freq='D',validation_df=df_val, progress='bar')
       else:
          df_train=df_new
          metrics=m.fit(df_train,freq='D', progress='bar')
       print(metrics.tail(1))

      # save fitted model to disk
       with open(fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl', "wb") as f:
          pickle.dump(m, f, pickle.HIGHEST_PROTOCOL)
    else:
        print("Loading model from file: "+fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl')
        with open(fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl', "rb") as f:
          m=pickle.load(f)

    m.set_plotting_backend("matplotlib")

    forecast_train = m.predict(df_new)
    forecast_train.to_pickle(fpathm+f"/{stn}_forecast_test.pkl")

    # plot forecast in training period
    fig, ax = plt.subplots(figsize=(18, 6))
    m.highlight_nth_step_ahead_of_each_forecast(1).plot(forecast_train, xlabel="Date", ylabel=stn, ax=ax)

    ax.set_title(f"Model {modelnr}: {stn} Train Forecast")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_train_forecast.png",bbox_inches='tight')
    plt.close()
    df_residuals = pd.DataFrame({"ds": forecast_train.ds, "residuals": forecast_train.y - forecast_train.yhat1})

    # plot residuals
    fig=plt.figure(figsize=(18, 5))

    spec = gridspec.GridSpec(ncols=2, nrows=1,
                            width_ratios=[3, 1], wspace=0.08)

    ax0=fig.add_subplot(spec[0])
    df_residuals.plot(x="ds", y="residuals",ax=ax0)
    ax0.set_title("Residuals")
    ax0.grid()

    ax1=fig.add_subplot(spec[1])
    ax1.set_title("Residuals histogram")
    df_residuals.residuals.hist(bins=50,ax=ax1,orientation='horizontal')
    ax1.set_yticklabels([])

    fig.suptitle(f"Model {modelnr}: {stn} Residuals")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_residuals.png",bbox_inches='tight')

    plt.close()

    # Forecast test period
    regressors_list=['I','tmed','prec','velmedia','sol','racha']
    #regressors_list=['I','tmax','tmed','tmin','prec','velmedia','sol','racha']


    future = m.make_future_dataframe(m.create_df_with_events(pd.concat([df,df_test]),df_hol), events_df=df_hol,
                                regressors_df=pd.concat([df,df_test]).loc[:,regressors_list],
                                periods=1, n_historic_predictions=31*4)


    forecast_test = m.predict(future).dropna()

    fig,ax=plt.subplots(1,1,figsize=(18,5))

    m.highlight_nth_step_ahead_of_each_forecast(1).plot(forecast_test,ax=ax)
    ax.set_xlim(pd.Timestamp('2018-10-01'),pd.Timestamp('2019-01-31'))

    t_start_test='2019-01-01'
    forecast_test['residuals']=forecast_test.y-forecast_test.yhat1

    forecast_test.set_index('ds',inplace=True)

    forecast_test.to_pickle(fpathm+f"/{stn}_forecast_test.pkl")

    ax.plot(df_test.ds,df_test.y,'rx',label='test')
    ax.legend(['yhat','yhat','test','test'])

    mase_test =mase(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1'],forecast_test.loc[forecast_test.index<t_start_test,'y'])
    mase_test2=mase(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1'],forecast_test.loc[forecast_test.index>=t_start_test,'y'])

    rmse_test=np.sqrt(mean_squared_error(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1']))

    mae_test=forecast_test.loc[forecast_test.index>=t_start_test,'residuals'].abs().mean()
    mae_test_rel=mae_test/forecast_test.loc[forecast_test.index>=t_start_test,'y'].mean()
    mae_train=df_residuals.residuals.abs().mean()

    print(f"Model {modelnr}: {stn} MASE={mase_test*100:.1f}%,MASE2={mase_test2*100:.1f}%, MAE={mae_test:.4f}, MAE_rel={mae_test_rel*100:.1f}%")

    dict_MASE[(stn,modelnr)]=mase_test
    dict_MAE[(stn,modelnr)]=mae_test_rel
    dict_M[(stn,modelnr)]={'mase_test':mase_test,'mase_test2':mase_test2,'mae_test':mae_test,'rmse_test':rmse_test,'mae_test_rel':mae_test_rel,'mae_train':mae_train}

    ax.set_title(f"Model {modelnr}: {stn} MASE={mase_test*100:.1f}%, MAE_rel={mae_test_rel*100:.1f}%")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_forecast.png",bbox_inches='tight')

    with open(fpathm+f'/dict_MASE_MAE_{modelnr}.pkl', 'wb') as f:
      pickle.dump({'dict_MASE':dict_MASE,'dict_MAE':dict_MAE,'dict_M':dict_M}, f, pickle.HIGHEST_PROTOCOL)

    plt.close()

## 7. Model 6 with added PVP deviation from median as lagged regressor

In [None]:
dict_M={}
valid=False
modelnr=7
fit_model=False  # True to fit model, False to load fitted model from disk

fpathm = fpath+f"/m_{modelnr}"
if not os.path.exists(fpathm):
    os.mkdir(fpathm)

for stn in tqdm([f"ES{nr}_{pr}" for nr in range(1,13) for pr in ["95","GOA"]]):
# for stn in ['ES5_95']:
    # add extra holidays/events
    # with open(fpath+'/df_holi_extra.pkl', 'rb') as f:
    #     dict_hol_extra=pickle.load(f)

    if stn[:2]=='ES' and stn[-3:]=="GOA":
        df_hol_extra=dict_hol_extra[f"df_holi_extra_{stn}"]
        df_hol_extra=pd.concat([df_hol_extra,pd.DataFrame({'ds':[datetime.datetime(2019,1,4)],'event':['Ev01']},index=[0])],ignore_index=True)
        df_hol=pd.concat([df_hol_base,df_hol_extra],ignore_index=True)
        print(f"Added {df_hol_extra.shape[0]} extra holidays for",stn)
        print(df_hol_extra)
    else:
        df_hol=df_hol_base

    df=df_v_m.loc[:,[stn]].reset_index().rename(columns={'sale_date':'ds',stn:'y'})
    df_test=df_v_m_test.loc[:,[stn]].reset_index().rename(columns={'sale_date':'ds',stn:'y'})

    # add lagged regressor with difference of y
    # do the difference on concatenated df and df_test for continuity
    lagg_I=pd.concat([df,df_test],ignore_index=True).y.diff().fillna(0)

    df["I"] = lagg_I.iloc[:len(df)].values
    df_test["I"]=lagg_I.iloc[len(df):].values

    # add weather data, temp, precipitation, wind
    # df["tmax"] = df_v_m.tmax.values
    df["tmed"] = df_v_m.tmed.values
    # df["tmin"] = df_v_m.tmin.values
    df["prec"] = df_v_m.prec.values
    df["velmedia"] = df_v_m.velmedia.values
    df["sol"] = df_v_m.sol.values
    df["racha"] = df_v_m.racha.values

    # add price deviation from median for this station
    df["pvpdev"]=(df_p_m[stn]-df_p_m["MED_"+stn.split('_')[1]]).values

    # df_test["tmax"] = df_v_m_test.tmax.values
    df_test["tmed"] = df_v_m_test.tmax.values
    # df_test["tmin"] = df_v_m_test.tmax.values
    df_test["prec"] = df_v_m_test.prec.values
    df_test["velmedia"] = df_v_m_test.velmedia.values
    df_test["sol"] = df_v_m_test.sol.values
    df_test["racha"] = df_v_m_test.racha.values

    # add price deviation from median for this station
    df_test["pvpdev"]=(df_p_m_test[stn]-df_p_m_test["MED_"+stn.split('_')[1]]).values

    m=NeuralProphet(
                    yearly_seasonality=True,
                    weekly_seasonality=True,
                    daily_seasonality=False,
                    seasonality_mode="additive",
                    n_lags=7,
                    ar_reg=0.1,
                    n_forecasts=1,
                    quantiles=[0.05,0.95],
                    #epochs=60,
                    )

    # add holidays
    for e in df_hol.event.unique():
      m.add_events(e,regularization=None,mode='additive')#,lower_window=0,upper_window=1)

    # add lagged regressor
    m.add_lagged_regressor("I",normalize="standardize",n_lags=7,regularization=0.1)
    m.add_lagged_regressor("pvpdev",normalize="standardize") #can add lags later

    # add weather data as future regressors and assume reliable weather forecast is available for 1 day ahead
    # m.add_future_regressor("tmax",normalize="standardize")
    m.add_future_regressor("tmed",normalize="standardize")
    # m.add_future_regressor("tmin",normalize="standardize")
    m.add_future_regressor("prec",normalize="standardize")
    m.add_future_regressor("velmedia",normalize="standardize")
    m.add_future_regressor("sol",normalize="standardize")
    m.add_future_regressor("racha",normalize="standardize")

    df_new=m.create_df_with_events(df, df_hol)

    if fit_model:
       if valid:
          df_train, df_val = m.split_df(df_new, freq="D", valid_p=0.1)
          metrics=m.fit(df_train,freq='D',validation_df=df_val, progress='bar')
       else:
          df_train=df_new
          metrics=m.fit(df_train,freq='D', progress='bar')
       print(metrics.tail(1))

      # save fitted model to disk
       with open(fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl', "wb") as f:
          pickle.dump(m, f, pickle.HIGHEST_PROTOCOL)
    else:
        print("Loading model from file: "+fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl')
        with open(fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl', "rb") as f:
          m=pickle.load(f)

    m.set_plotting_backend("matplotlib")

    forecast_train = m.predict(df_new)
    forecast_train.to_pickle(fpathm+f"/{stn}_forecast_test.pkl")

    # plot forecast in training period
    fig, ax = plt.subplots(figsize=(18, 6))
    m.highlight_nth_step_ahead_of_each_forecast(1).plot(forecast_train, xlabel="Date", ylabel=stn, ax=ax)

    ax.set_title(f"Model {modelnr}: {stn} Train Forecast")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_train_forecast.png",bbox_inches='tight')
    plt.close()
    df_residuals = pd.DataFrame({"ds": forecast_train.ds, "residuals": forecast_train.y - forecast_train.yhat1})

    # plot residuals
    fig=plt.figure(figsize=(18, 5))

    spec = gridspec.GridSpec(ncols=2, nrows=1,
                            width_ratios=[3, 1], wspace=0.08)

    ax0=fig.add_subplot(spec[0])
    df_residuals.plot(x="ds", y="residuals",ax=ax0)
    ax0.set_title("Residuals")
    ax0.grid()

    ax1=fig.add_subplot(spec[1])
    ax1.set_title("Residuals histogram")
    df_residuals.residuals.hist(bins=50,ax=ax1,orientation='horizontal')
    ax1.set_yticklabels([])

    fig.suptitle(f"Model {modelnr}: {stn} Residuals")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_residuals.png",bbox_inches='tight')

    plt.close()

    # Forecast test period
    regressors_list=['I','pvpdev','tmed','prec','velmedia','sol','racha']
    #regressors_list=['I','tmax','tmed','tmin','prec','velmedia','sol','racha']


    future = m.make_future_dataframe(m.create_df_with_events(pd.concat([df,df_test]),df_hol), events_df=df_hol,
                                regressors_df=pd.concat([df,df_test]).loc[:,regressors_list],
                                periods=1, n_historic_predictions=31*4)


    forecast_test = m.predict(future).dropna()

    fig,ax=plt.subplots(1,1,figsize=(18,5))

    m.highlight_nth_step_ahead_of_each_forecast(1).plot(forecast_test,ax=ax)
    ax.set_xlim(pd.Timestamp('2018-10-01'),pd.Timestamp('2019-01-31'))

    t_start_test='2019-01-01'
    forecast_test['residuals']=forecast_test.y-forecast_test.yhat1

    forecast_test.set_index('ds',inplace=True)

    forecast_test.to_pickle(fpathm+f"/{stn}_forecast_test.pkl")

    ax.plot(df_test.ds,df_test.y,'rx',label='test')
    ax.legend(['yhat','yhat','test','test'])

    mase_test =mase(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1'],forecast_test.loc[forecast_test.index<t_start_test,'y'])
    mase_test2=mase(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1'],forecast_test.loc[forecast_test.index>=t_start_test,'y'])

    rmse_test=np.sqrt(mean_squared_error(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1']))

    mae_test=forecast_test.loc[forecast_test.index>=t_start_test,'residuals'].abs().mean()
    mae_test_rel=mae_test/forecast_test.loc[forecast_test.index>=t_start_test,'y'].mean()
    mae_train=df_residuals.residuals.abs().mean()

    print(f"Model {modelnr}: {stn} MASE={mase_test*100:.1f}%,MASE2={mase_test2*100:.1f}%, MAE={mae_test:.4f}, MAE_rel={mae_test_rel*100:.1f}%")

    dict_MASE[(stn,modelnr)]=mase_test
    dict_MAE[(stn,modelnr)]=mae_test_rel
    dict_M[(stn,modelnr)]={'mase_test':mase_test,'mase_test2':mase_test2,'mae_test':mae_test,'rmse_test':rmse_test,'mae_test_rel':mae_test_rel,'mae_train':mae_train}

    ax.set_title(f"Model {modelnr}: {stn} MASE={mase_test*100:.1f}%, MAE_rel={mae_test_rel*100:.1f}%")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_forecast.png",bbox_inches='tight')

    with open(fpathm+f'/dict_MASE_MAE_{modelnr}.pkl', 'wb') as f:
      pickle.dump({'dict_MASE':dict_MASE,'dict_MAE':dict_MAE,'dict_M':dict_M}, f, pickle.HIGHEST_PROTOCOL)

    plt.close()

## 8. Model 7 with added PVP deviation of nearest station

In [None]:
dict_M={}
valid=False
modelnr=8
fit_model=False  # True to fit model, False to load fitted model from disk

fpathm = fpath+f"/m_{modelnr}"
if not os.path.exists(fpathm):
    os.mkdir(fpathm)

for stn in tqdm([f"ES{nr}_{pr}" for nr in range(1,13) for pr in ["95","GOA"]]):
#for stn in ['ES5_95']:
    # add extra holidays/events
    # with open(fpath+'/df_holi_extra.pkl', 'rb') as f:
    #     dict_hol_extra=pickle.load(f)

    if stn[:2]=='ES' and stn[-3:]=="GOA":
        df_hol_extra=dict_hol_extra[f"df_holi_extra_{stn}"]
        df_hol_extra=pd.concat([df_hol_extra,pd.DataFrame({'ds':[datetime.datetime(2019,1,4)],'event':['Ev01']},index=[0])],ignore_index=True)
        df_hol=pd.concat([df_hol_base,df_hol_extra],ignore_index=True)
        print(f"Added {df_hol_extra.shape[0]} extra holidays for",stn)
        print(df_hol_extra)
    else:
        df_hol=df_hol_base

    df=df_v_m.loc[:,[stn]].reset_index().rename(columns={'sale_date':'ds',stn:'y'})
    df_test=df_v_m_test.loc[:,[stn]].reset_index().rename(columns={'sale_date':'ds',stn:'y'})

    # add lagged regressor with difference of y
    # do the difference on concatenated df and df_test for continuity
    lagg_I=pd.concat([df,df_test],ignore_index=True).y.diff().fillna(0)

    df["I"] = lagg_I.iloc[:len(df)].values
    df_test["I"]=lagg_I.iloc[len(df):].values

    # add weather data, temp, precipitation, wind
    # df["tmax"] = df_v_m.tmax.values
    df["tmed"] = df_v_m.tmed.values
    # df["tmin"] = df_v_m.tmin.values
    df["prec"] = df_v_m.prec.values
    df["velmedia"] = df_v_m.velmedia.values
    df["sol"] = df_v_m.sol.values
    df["racha"] = df_v_m.racha.values

    # add price deviation from median for this station
    df["pvpdev"]=(df_p_m[stn]-df_p_m["MED_"+stn.split('_')[1]]).values
    df["pvpdev_n"]=(df_p_m[stn_near.loc[stn.split('_')[0]].Nearest+'_'+stn.split('_')[1]]\
                  -df_p_m["MED_"+stn.split('_')[1]]).values

    # df_test["tmax"] = df_v_m_test.tmax.values
    df_test["tmed"] = df_v_m_test.tmax.values
    # df_test["tmin"] = df_v_m_test.tmax.values
    df_test["prec"] = df_v_m_test.prec.values
    df_test["velmedia"] = df_v_m_test.velmedia.values
    df_test["sol"] = df_v_m_test.sol.values
    df_test["racha"] = df_v_m_test.racha.values

    # add price deviation from median for this station
    df_test["pvpdev"]=(df_p_m_test[stn]-df_p_m_test["MED_"+stn.split('_')[1]]).values
    df_test["pvpdev_n"]=(df_p_m_test[stn_near.loc[stn.split('_')[0]].Nearest+'_'+stn.split('_')[1]]\
                  -df_p_m_test["MED_"+stn.split('_')[1]]).values

    m=NeuralProphet(
                    yearly_seasonality=True,
                    weekly_seasonality=True,
                    daily_seasonality=False,
                    seasonality_mode="additive",
                    n_lags=7,
                    ar_reg=0.1,
                    n_forecasts=1,
                    quantiles=[0.05,0.95],
                    #epochs=60,
                    )

    # add holidays
    for e in df_hol.event.unique():
      m.add_events(e,regularization=None,mode='additive')#,lower_window=0,upper_window=1)

    # add lagged regressor
    m.add_lagged_regressor("I",normalize="standardize",n_lags=7,regularization=0.1)
    m.add_lagged_regressor("pvpdev",normalize="standardize") #can add lags later
    m.add_lagged_regressor("pvpdev_n",normalize="standardize") #can add lags later

    # add weather data as future regressors and assume reliable weather forecast is available for 1 day ahead
    # m.add_future_regressor("tmax",normalize="standardize")
    m.add_future_regressor("tmed",normalize="standardize")
    # m.add_future_regressor("tmin",normalize="standardize")
    m.add_future_regressor("prec",normalize="standardize")
    m.add_future_regressor("velmedia",normalize="standardize")
    m.add_future_regressor("sol",normalize="standardize")
    m.add_future_regressor("racha",normalize="standardize")

    df_new=m.create_df_with_events(df, df_hol)

    if fit_model:
       if valid:
          df_train, df_val = m.split_df(df_new, freq="D", valid_p=0.1)
          metrics=m.fit(df_train,freq='D',validation_df=df_val, progress='bar')
       else:
          df_train=df_new
          metrics=m.fit(df_train,freq='D', progress='bar')
       print(metrics.tail(1))

      # save fitted model to disk
       with open(fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl', "wb") as f:
          pickle.dump(m, f, pickle.HIGHEST_PROTOCOL)
    else:
        print("Loading model from file: "+fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl')
        with open(fpath+f'/models_1d/{stn}_model_{modelnr}_model.pkl', "rb") as f:
          m=pickle.load(f)

    m.set_plotting_backend("matplotlib")

    forecast_train = m.predict(df_new)
    forecast_train.to_pickle(fpathm+f"/{stn}_forecast_test.pkl")

    # plot forecast in training period
    fig, ax = plt.subplots(figsize=(18, 6))
    m.highlight_nth_step_ahead_of_each_forecast(1).plot(forecast_train, xlabel="Date", ylabel=stn, ax=ax)

    ax.set_title(f"Model {modelnr}: {stn} Train Forecast")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_train_forecast.png",bbox_inches='tight')
    plt.close()
    df_residuals = pd.DataFrame({"ds": forecast_train.ds, "residuals": forecast_train.y - forecast_train.yhat1})

    # plot residuals
    fig=plt.figure(figsize=(18, 5))

    spec = gridspec.GridSpec(ncols=2, nrows=1,
                            width_ratios=[3, 1], wspace=0.08)

    ax0=fig.add_subplot(spec[0])
    df_residuals.plot(x="ds", y="residuals",ax=ax0)
    ax0.set_title("Residuals")
    ax0.grid()

    ax1=fig.add_subplot(spec[1])
    ax1.set_title("Residuals histogram")
    df_residuals.residuals.hist(bins=50,ax=ax1,orientation='horizontal')
    ax1.set_yticklabels([])

    fig.suptitle(f"Model {modelnr}: {stn} Residuals")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_residuals.png",bbox_inches='tight')

    plt.close()

    # Forecast test period
    regressors_list=['I','pvpdev','pvpdev_n','tmed','prec','velmedia','sol','racha']


    future = m.make_future_dataframe(m.create_df_with_events(pd.concat([df,df_test]),df_hol), events_df=df_hol,
                                regressors_df=pd.concat([df,df_test]).loc[:,regressors_list],
                                periods=1, n_historic_predictions=31*4)


    forecast_test = m.predict(future).dropna()

    fig,ax=plt.subplots(1,1,figsize=(18,5))

    m.highlight_nth_step_ahead_of_each_forecast(1).plot(forecast_test,ax=ax)
    ax.set_xlim(pd.Timestamp('2018-10-01'),pd.Timestamp('2019-01-31'))

    t_start_test='2019-01-01'
    forecast_test['residuals']=forecast_test.y-forecast_test.yhat1

    forecast_test.set_index('ds',inplace=True)

    forecast_test.to_pickle(fpathm+f"/{stn}_forecast_test.pkl")

    ax.plot(df_test.ds,df_test.y,'rx',label='test')
    ax.legend(['yhat','yhat','test','test'])

    mase_test =mase(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1'],forecast_test.loc[forecast_test.index<t_start_test,'y'])
    mase_test2=mase(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1'],forecast_test.loc[forecast_test.index>=t_start_test,'y'])

    rmse_test=np.sqrt(mean_squared_error(df_test.y,forecast_test.loc[forecast_test.index>=t_start_test,'yhat1']))

    mae_test=forecast_test.loc[forecast_test.index>=t_start_test,'residuals'].abs().mean()
    mae_test_rel=mae_test/forecast_test.loc[forecast_test.index>=t_start_test,'y'].mean()
    mae_train=df_residuals.residuals.abs().mean()

    print(f"Model {modelnr}: {stn} MASE={mase_test*100:.1f}%,MASE2={mase_test2*100:.1f}%, MAE={mae_test:.4f}, MAE_rel={mae_test_rel*100:.1f}%")

    dict_MASE[(stn,modelnr)]=mase_test
    dict_MAE[(stn,modelnr)]=mae_test_rel
    dict_M[(stn,modelnr)]={'mase_test':mase_test,'mase_test2':mase_test2,'mae_test':mae_test,'rmse_test':rmse_test,'mae_test_rel':mae_test_rel,'mae_train':mae_train}

    ax.set_title(f"Model {modelnr}: {stn} MASE={mase_test*100:.1f}%, MAE_rel={mae_test_rel*100:.1f}%")
    fig.savefig(fpathm+f"/{stn}_model_{modelnr}_forecast.png",bbox_inches='tight')

    with open(fpathm+f'/dict_MASE_MAE_{modelnr}.pkl', 'wb') as f:
      pickle.dump({'dict_MASE':dict_MASE,'dict_MAE':dict_MAE,'dict_M':dict_M}, f, pickle.HIGHEST_PROTOCOL)

    plt.close()

# Appendix: plotting components of the model

In [None]:

# use most recent results to plot
fc=m.predict(df_new,decompose=True)


In [None]:
plt.rcParams.update({'axes.titlesize': 'large',
                     'axes.labelsize':'x-large',
                     'xtick.labelsize':'x-large',
                     'ytick.labelsize':'x-large'})

fig,ax=plt.subplots(3,1,figsize=(16,16))
fig.tight_layout(h_pad=10)

# trend
fc.set_index('ds').trend.plot(ax=ax[0],grid=True,lw=2,xlabel='',ylabel='Sales')
ax[0].set_title('Trend',fontsize=16)

# weekly seasonality
weekdays = [ 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
wkday={i:weekdays[i] for i in range(7)}

wk=fc.groupby(fc.ds.dt.weekday).season_weekly.max().reset_index()
wk['weekday']=wk.ds.map(wkday)
wk.set_index('weekday',inplace=True)
wk.drop('ds',axis=1,inplace=True)
wk.plot(kind='bar',ax=ax[1],grid=True,ylim=(-0.4,0.4),legend=False,xlabel='',alpha=0.5,width=1.0)
wk.plot(kind='line',ax=ax[1],grid=True,ylim=(-0.4,0.4),legend=False,xlabel='',lw=2,drawstyle='steps-mid')
for label in ax[1].get_xticklabels():
    label.set_ha("right")
    label.set_rotation(45)
ax[1].set_title('Seasonality by weekday',fontsize=16)

# yearly seasonality (day of year)
an=fc.groupby(fc.ds.dt.dayofyear).season_yearly.mean()
an=an.to_frame().set_index(pd.date_range(start='2020-01-01',end='2020-12-31',freq='d'))
an.plot.area(ax=ax[2],stacked=False,grid=True,ylim=(-0.4,0.4),legend=False)
ax[2].set_title('Seasonality by day of year',fontsize=16)
ax[2].set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec','Jan']);


In [None]:

plt.rcParams.update({'axes.titlesize': 'large',
                    'axes.labelsize':'medium',
                    'xtick.labelsize':'medium',
                    'ytick.labelsize':'medium'})


m.plot_parameters(components=['events','autoregression','lagged_regressors','future_regressors'],plotting_backend='matplotlib')


In [None]:
plt.rcParams.update({'axes.titlesize': 'x-large',
                     'axes.labelsize':'x-large',
                     'xtick.labelsize':'x-large',
                     'ytick.labelsize':'x-large'})

from sklearn import linear_model
fig,ax=plt.subplots(5,1,figsize=(16,16),layout='tight')

fx=fc.copy(deep=True)
lm=linear_model.LinearRegression().fit(fx.index.values.reshape(-1, 1),fx.ar1.interpolate(method='bfill').values.reshape(-1, 1))

fx['trend_2']=fx.trend+lm.predict(fx.index.values.reshape(-1, 1)).reshape(-1)
fx['ar1_2']=fx.ar1-lm.predict(fx.index.values.reshape(-1, 1)).reshape(-1)

fx['s2']=fx.trend_2+fx.season_yearly
fx['s3']=fx.s2+fx.season_weekly+fx.ar1_2+fx.lagged_regressor_I1
fx['s4']=fx.s3+fx.events_additive
fx['s5']=fx.s4+fx.future_regressors_additive#+fx.lagged_regressor_I1+fx.ar1_2

mae_trend=(fx.trend_2-fx.y).abs().mean()/fx.y.mean()
mae_s2=(fx.s2-fx.y).abs().mean()/fx.y.mean()
mae_s3=(fx.s3-fx.y).abs().mean()/fx.y.mean()
mae_s4=(fx.s4-fx.y).abs().mean()/fx.y.mean()
mae_s5=(fx.s5-fx.y).abs().mean()/fx.y.mean()


xlims=(pd.Timestamp('2018-01-01'),pd.Timestamp('2018-12-31'))
ylims=(0,1.0)

fx.set_index('ds').trend_2.plot(ax=ax[0],grid=True,lw=2,xlabel='',ylabel='Sales',ylim=(0,0.2),c='C1')
fx.set_index('ds').y.plot(ax=ax[0],alpha=0.25,color='C0',grid=True,xlabel='')
fx.set_index('ds').yhat1.plot(ax=ax[0],grid=True,lw=1,ls='--'
                              ,xlabel='',ylabel='Sales',ylim=ylims,c='C2')
ax[0].set_xlim(xlims)
ax[0].set_title(f'Trend + Autoregression (MAE={mae_trend*100:.1f}%)',fontsize=16)


fx.set_index('ds').s2.plot(ax=ax[1],grid=True,lw=2,xlabel='',ylabel='Sales',ylim=ylims,c='C1')
fx.set_index('ds').y.plot(ax=ax[1],alpha=0.25,color='C0',grid=True,xlabel='')
fx.set_index('ds').yhat1.plot(ax=ax[1],grid=True,lw=1,ls='--'
                              ,xlabel='',ylabel='Sales',ylim=ylims,c='C2')
ax[1].set_xlim(xlims)
ax[1].set_title(f'+ Seasonality yearly (MAE={mae_s2*100:.1f}%)',fontsize=16)

fx.set_index('ds').s3.plot(ax=ax[2],grid=True,lw=2,xlabel='',ylabel='Sales',ylim=ylims,c='C1')
fx.set_index('ds').y.plot(ax=ax[2],alpha=0.25,color='C0',grid=True,xlabel='')
fx.set_index('ds').yhat1.plot(ax=ax[2],grid=True,lw=1,ls='--'
                              ,xlabel='',ylabel='Sales',ylim=ylims,c='C2')
ax[2].set_xlim(xlims)
ax[2].set_title(f'+ Seasonality weekly (MAE={mae_s3*100:.1f}%)',fontsize=16)

fx.set_index('ds').s4.plot(ax=ax[3],grid=True,lw=2,xlabel='',ylabel='Sales',ylim=ylims,c='C1')
fx.set_index('ds').y.plot(ax=ax[3],alpha=0.25,color='C0',grid=True,xlabel='')
fx.set_index('ds').yhat1.plot(ax=ax[3],grid=True,lw=1,ls='--'
                              ,xlabel='',ylabel='Sales',ylim=ylims,c='C2')

ax[3].set_xlim(xlims)
ax[3].set_title(f'+ Events (MAE={mae_s4*100:.1f}%)',fontsize=16)

fx.set_index('ds').s5.plot(ax=ax[4],grid=True,lw=2,xlabel='',ylabel='Sales',ylim=ylims,c='C1')
fx.set_index('ds').y.plot(ax=ax[4],alpha=0.25,color='C0',grid=True,xlabel='')
fx.set_index('ds').yhat1.plot(ax=ax[4],grid=True,lw=1,ls='--'
                              ,xlabel='',ylabel='Sales',ylim=ylims,c='C2')
ax[4].set_title(f'+ Regressors (future+lagged+auto) = Total Forecast (MAE={mae_s5*100:.1f}%)',fontsize=16)
ax[4].set_xlim(xlims)


# fx.set_index('ds').ar1_2.plot(ax=ax[1],grid=True,lw=2,xlabel='',ylabel='Sales',title='Autoregression',ylim=(0,1))


fig.suptitle('Components of forecast for ES12_GOA (Training)',fontsize=16,y=1.0);