In [1]:
# imports
from prophet.diagnostics import cross_validation
from prophet.diagnostics import performance_metrics
import itertools
from prophet import Prophet
from sktime.performance_metrics.forecasting import mean_absolute_scaled_error, mean_absolute_error, mean_absolute_percentage_error
import pandas as pd
import numpy as np
import plotly.express as px
from pmdarima.preprocessing import FourierFeaturizer
from pmdarima import auto_arima, ARIMA
import matplotlib.pyplot as plt
import holidays
import time
import datetime
pd.options.plotting.backend = 'plotly'

  VALID_INDEX_TYPES = (pd.Int64Index, pd.RangeIndex, pd.PeriodIndex, pd.DatetimeIndex)
  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,
  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,


In [2]:
def preprocessing_ARIMA(ts, ts_holiday, split=True, yearly_seasonality=True, steps_ahead=30):
    '''
    split: to split into train and test set
    '''
    print(f'Preprocessing timeseries data with {steps_ahead} steps ahead')
    ts = ts['sales']/1e6
    ts.index.freq = 'D'
    # yearly seasonality
    if yearly_seasonality:
        fouri_terms = FourierFeaturizer(365.25, 2)
        y_prime, df_fouri = fouri_terms.fit_transform(ts)
        df_fouri.index = y_prime.index
    # combine exog. variables
    df_exog = pd.concat([df_fouri, ts_holiday], axis=1)
    df_exog['holiday'] = df_exog['holiday'].fillna(False).astype('int')
    df_exog.dropna(inplace=True)
    if split:
        # split
        y_train = y_prime.iloc[:-steps_ahead]
        y_test = y_prime.iloc[-steps_ahead:]
        exog_train = df_exog.iloc[:-steps_ahead]
        exog_test = df_exog.iloc[-steps_ahead:]
        return {'y_train':y_train,
                'y_test':y_test,
                'exog_train':exog_train,
                'exog_test':exog_test}
    else:
        return {'y':y_prime, 'exog': df_exog}

def preprocessing_prophet(ts, ts_holiday, steps_ahead=30, split = True):
    fb_df = ts[['sales']]
    fb_df['sales'] = ts['sales']/1e6
    #fb_df['holiday'] = fb_df['holiday'].fillna(False).astype('bool')
    fb_df = fb_df.reset_index().rename({'date': 'ds', 'sales': 'y'}, axis=1)
    if split:
        fb_train = fb_df.iloc[:-steps_ahead]
        fb_test = fb_df.iloc[-steps_ahead:]
        return {'y_train':fb_train,
                'y_test':fb_test}
    else:
        return fb_df

def holidays_ts_prophet(promotion = True, holiday_neg=False):
    holiday = pd.DataFrame(holidays.Vietnam(years=[2018, 2019, 2020, 2021]).items()).rename({0:'date', 1:'holiday_neg'}, axis=1)
    # Add Tet promotion
    promo = pd.DataFrame(holiday[holiday.holiday_neg == 'Vietnamese New Year'].date - datetime.timedelta(days=1))
    promo['promotion'] = "1 days before Tet Holiday"
    # Add Black Friday
    promo = promo.append(
                pd.DataFrame({'date':[datetime.date(2020,11,27), 
                                      datetime.date(2019,11,29),
                                      datetime.date(2018,11,23)],
                              'promotion':["Black Friday","Black Friday","Black Friday"]}))
    # set to 1 if holiday affect sales negatively
    holiday_off = holiday.replace({
            'Vietnamese New Year.*': 1, 
             '.*day of Tet Holiday': 1,
             'International Labor Day': 1,
             '\D': np.NaN}, 
            regex=True).dropna()
    if promotion:
        promotions = pd.DataFrame({
                'holiday':'big_promotion',
                'ds':promo.date,
                'lower_window': -14, # 2 weeks before
                'upper_window': 0})
    else:
        promotions = None
    if holiday_neg:
        holiday_negative = pd.DataFrame({
                'holiday':'holiday_off',
                'ds':holiday_off.date,
                'lower_window': 0,
                'upper_window': 0})
    else:
        holiday_negative = None
    return pd.concat((promotions, holiday_negative))

def auto_arima_model(y_train, exog_train, diff_num):
    time_start = time.time()
    print('start auto arima...')
    # Fit model to the level to find common order
    arima_model = auto_arima(
        y=y_train,
        exogenous=exog_train,
        D=diff_num, 
        seasonal=True, m=7 # Weekly seasonality
    )
    time_stop = time.time()
    print(f'finished auto arima, model: {arima_model}, total time: {round(time_stop-time_start)} sec')
    return arima_model

def cross_validation_prophet(prophet_data, param_grid, steps_ahead = 30):
    cv_prophet_result = pd.DataFrame(columns=['params','mape','rmse'])
    all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
    for params in all_params:
        # set up model
        prophet_model = Prophet(**params
                                ).add_seasonality(name='weekly', period=7, fourier_order=5, prior_scale=10
                                ).add_seasonality(name='yearly', period=365.25, fourier_order=5, prior_scale=1)
        #m.add_regressor('holiday')
        prophet_model.fit(prophet_data['y_train'])
        # set up CV
        df_cv = cross_validation(prophet_model, initial=(str(prophet_data['y_train'].shape[0]-steps_ahead*2)+' days'), period='7 days', horizon='30 days', parallel="processes")
        # evaluate
        df_p = performance_metrics(df_cv)
        cv_prophet_result = cv_prophet_result.append({'params':params, 
                                            'mape'  :df_p['mape'].values[-1],
                                            'rmse'  :df_p['rmse'].values[-1]}, ignore_index=True)
    return cv_prophet_result

def cross_validation_result(data, model_name, model, rolls=4, horizon=30, prophet_params=None):
    '''
    '''
    cv_score = []
    for i in range(rolls):
        if model_name=='arima':
            model.fit(y=data['y'].iloc[:-(rolls-i)*horizon], 
                      X=data['exog'].iloc[:-(rolls-i)*horizon])
            y_hat = model.predict(n_periods=horizon, 
                                  exogenous=data['exog'].iloc[np.r_[-(rolls-i)*horizon:-(rolls-i-1)*horizon]])
            y_test = data['y'].iloc[np.r_[-(rolls-i)*horizon:-(rolls-i-1)*horizon]]
            mape_OOS = round(mean_absolute_percentage_error(y_test, y_hat), 3)
        elif model_name=='prophet':
            model = Prophet(**prophet_params
                           ).add_seasonality(name='weekly', period=7, fourier_order=5, prior_scale=10
                           ).add_seasonality(name='yearly', period=365.25, fourier_order=5, prior_scale=1)
            model.fit(data.iloc[:-(rolls-i)*horizon,:])
            y_hat=model.predict(data.iloc[np.r_[-(rolls-i)*horizon:-(rolls-i-1)*horizon],:])['yhat']
            y_test = data['y'].iloc[np.r_[-(rolls-i)*horizon:-(rolls-i-1)*horizon]]
            mape_OOS = round(mean_absolute_percentage_error(y_test, y_hat), 3)
        cv_score.append(mape_OOS)
    return cv_score

In [3]:
def manual_holidays_gen(df_company):
    df_exog = pd.DataFrame(index=df_company.index)
    off_day_idx = [
        pd.date_range("2018-02-15", "2018-02-20"),  # Lunar New Year
        pd.date_range("2019-02-04", "2019-02-09"),  # Lunar New Year
        pd.date_range("2020-01-24", "2020-01-29"),  # Lunar New Year
        pd.date_range("2018-05-01", "2018-05-01"),  # Labor Day
        pd.date_range("2019-05-01", "2019-05-01"),  # Labor Day
        pd.date_range("2020-05-01", "2020-05-01"),  # Labor Day
    ]
    off_day_idx = pd.DatetimeIndex(np.unique(np.hstack(off_day_idx)))
    df_exog["off_day"] = False
    df_exog.loc[off_day_idx, "off_day"] = True

    promo_day_idx = [
        # Black Friday
        pd.date_range(end="2017-11-24", periods=14),
        pd.date_range(end="2018-11-23", periods=14),
        pd.date_range(end="2019-11-29", periods=14),
        pd.date_range(end="2020-11-27", periods=14),
        # End of year
        pd.date_range(end="2018-02-15", periods=15),
        pd.date_range(end="2019-02-04", periods=15),
        pd.date_range(end="2020-01-24", periods=15),
        pd.date_range(end="2021-01-31", periods=3),  # 2021 Tet is on 2021-02-12
        # Valentine
        pd.date_range(end="2018-02-14", periods=7),
        pd.date_range(end="2019-02-14", periods=7),
        pd.date_range(end="2020-02-14", periods=7),
        # International Women’s day
        pd.date_range(end="2018-03-08", periods=7),
        pd.date_range(end="2019-03-08", periods=7),
        pd.date_range(end="2020-03-08", periods=7),
        # Vietnam Reunification day
        pd.date_range(end="2018-04-30", periods=10),
        pd.date_range(end="2019-04-30", periods=10),
        pd.date_range(end="2020-04-30", periods=10),
        # End of school year
        pd.date_range(end="2018-05-27", periods=4),
        pd.date_range(end="2019-05-27", periods=4),
        pd.date_range(end="2020-05-27", periods=4),
        # New school year
        pd.date_range(end="2018-08-26", periods=3),
        pd.date_range(end="2019-08-26", periods=3),
        pd.date_range(end="2020-08-26", periods=3),
        # Vietnamese Women’s day
        pd.date_range(end="2018-10-20", periods=1),
        pd.date_range(end="2019-10-20", periods=1),
        pd.date_range(end="2020-10-20", periods=1),
    ]
    promo_day_idx = pd.DatetimeIndex(np.unique(np.hstack(promo_day_idx)))
    df_exog["promo_day"] = False
    df_exog.loc[promo_day_idx, "promo_day"] = True
    df_exog = df_exog.astype("int")
    return df_exog

In [4]:
df_store = pd.read_pickle("data/df_daily.pkl")
ts_holiday = pd.read_pickle('data/holiday.pkl')
df_company = df_store.groupby("date").sum()[["sales"]]
df_exog = manual_holidays_gen(df_company)

In [5]:
models_to_run = ['arima','prophet']
steps_ahead=7
prophet_holidays=holidays_ts_prophet(promotion=True, holiday_neg=True)
param_grid_prophet = {
    'changepoint_prior_scale': [0.01, 0.1, 1, 10],
    'changepoint_range': [0.8, 0.9],
    'holidays_prior_scale':[0.1, 1],
    'seasonality_mode': ['additive'],
    'holidays':[prophet_holidays],
    'daily_seasonality': [False],
    'weekly_seasonality': [False],
    'yearly_seasonality': [False],
}

  promo = promo.append(


In [7]:
cv_pipe_result = {}
best_model = {}
for model_name in models_to_run:
    if model_name =='arima':
        print(f'Start cross-validation for {model_name} models')
        time_start = time.time()
        data = preprocessing_ARIMA(ts=df_company, ts_holiday=ts_holiday, split=True, yearly_seasonality=True, steps_ahead=steps_ahead)
        arima_model = auto_arima_model(y_train=data['y_train'], exog_train=data['exog_train'], diff_num=1)
        # now get the data for cross-validatio and start the process
        data = preprocessing_ARIMA(ts=df_company, ts_holiday=ts_holiday, split=False, yearly_seasonality=True, steps_ahead=steps_ahead) 
        cv_score = cross_validation_result(data=data, model_name=model_name, model=arima_model, rolls=4, horizon=steps_ahead)
        # save result
        cv_pipe_result[model_name] = cv_score
        best_model[model_name] = arima_model
        time_stop = time.time()
        print(f'Finished cross-validation, total time: {round(time_stop-time_start)} sec')
    if model_name =='prophet':
        print(f'Start cross-validation for {model_name} models')
        time_start = time.time()
        data = preprocessing_prophet(ts=df_company, ts_holiday=prophet_holidays, steps_ahead=steps_ahead,split=True)
        cv_prophet_result = cross_validation_prophet(prophet_data=data, 
                                                     param_grid=param_grid_prophet,                                                       
                                                     steps_ahead = 30) # using cross validation for 30 days
        prophet_params = cv_prophet_result.sort_values('mape').iloc[0,0]
        # now get the data for cross-validatio and start the process
        data = preprocessing_prophet(ts=df_company, ts_holiday=prophet_holidays, steps_ahead=steps_ahead,split=False)
        cv_score = cross_validation_result(data=data, model_name=model_name, model=None, rolls=4, horizon=steps_ahead, prophet_params=prophet_params)
        # save result
        cv_pipe_result[model_name] = cv_score
        best_model[model_name] = prophet_params
        time_stop = time.time()
        print(f'Finished cross-validation, total time: {round(time_stop-time_start)} sec')

Start cross-validation for arima models
Preprocessing timeseries data with 7 steps ahead
start auto arima...
finished auto arima, model:  ARIMA(4,0,0)(2,1,0)[7]          , total time: 135 sec
Preprocessing timeseries data with 7 steps ahead


  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


Finished cross-validation, total time: 149 sec
Start cross-validation for prophet models


INFO:cmdstanpy:finish chain 1
INFO:prophet:Making 5 forecasts with cutoffs between 2020-11-27 00:00:00 and 2020-12-25 00:00:00
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x00000173855E1700>
  cv_prophet_result = cv_prophet_result.append({'params':params,
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
INFO:prophet:Making 5 forecasts with cutoffs between 2020-11-27 00:00:00 and 2020-12-25 00:00:00
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x00000173855E16A0>
  cv_prophet_result = cv_prophet_result.append({'params':params,
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
INFO:prophet:Making 5 forecasts with cutoffs between 2020-11-27 00:00:00 and 2020-12-25 00:00:00
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x0000017383E3

Finished cross-validation, total time: 281 sec


In [8]:
cv_pipe_result

{'arima': [0.223, 0.138, 0.16, 0.087], 'prophet': [0.212, 0.213, 0.16, 0.12]}

In [16]:
models_to_run = ['arima','prophet']
result_store_CV = {}
for model_name in models_to_run:
    if model_name =='arima':
        all_stores_result_CV = []
        for store in df_store["store_id"].unique():
            time_start = time.time()
            print(f"\nprocessing stores {store}...")
            store_name = "store_" + str(store)

            # data
            df_data = df_store[df_store["store_id"] == store].set_index("date")[["sales"]]
            arima_data = preprocessing_ARIMA(ts=df_data, 
                                        ts_holiday=ts_holiday, 
                                        split=False, 
                                        yearly_seasonality=True, 
                                        steps_ahead=steps_ahead) 

            # fit model to each store
            cv_score = cross_validation_result(
                data=arima_data, model_name=model_name, model=best_model[model_name], rolls=4, horizon=steps_ahead)
            # result
            all_stores_result_CV.append(np.mean(cv_score))
            # timing
            time_stop = time.time()
            print(f"finished stores {store}, total time: {time_stop-time_start}")
        all_stores_result_CV = pd.DataFrame({'store':df_store["store_id"].unique(),'mape':all_stores_result_CV})
        result_store_CV[model_name]=all_stores_result_CV
    if model_name =='prophet':
        all_stores_result_CV = []
        for store in df_store["store_id"].unique():  # [:2]:
            print(f"processing stores {store}...\n")
            store_name = "store_" + str(store)

            # data
            df_data = df_store[df_store["store_id"] == store].set_index("date")[["sales"]]
            prophet_store_data = preprocessing_prophet(
                ts=df_data, ts_holiday=prophet_holidays,
                steps_ahead=steps_ahead, split=False
            )

            # fit model to each store
            cv_score = cross_validation_result(data=prophet_store_data, 
                                            model_name=model_name, model=None, 
                                            rolls=4, horizon=steps_ahead, 
                                            prophet_params=best_model[model_name])
            # result
            all_stores_result_CV.append(np.mean(cv_score))
        all_stores_result_CV = pd.DataFrame({'store':df_store["store_id"].unique(),'mape':all_stores_result_CV})
        result_store_CV[model_name]=all_stores_result_CV


processing stores 307222...
Preprocessing timeseries data with 7 steps ahead
finished stores 307222, total time: 17.151570558547974

processing stores 307244...
Preprocessing timeseries data with 7 steps ahead
finished stores 307244, total time: 17.922162532806396

processing stores 307248...
Preprocessing timeseries data with 7 steps ahead
finished stores 307248, total time: 18.518723487854004

processing stores 320264...
Preprocessing timeseries data with 7 steps ahead
finished stores 320264, total time: 15.516824722290039

processing stores 328165...
Preprocessing timeseries data with 7 steps ahead
finished stores 328165, total time: 20.93858051300049

processing stores 349920...
Preprocessing timeseries data with 7 steps ahead
finished stores 349920, total time: 19.40898299217224

processing stores 349924...
Preprocessing timeseries data with 7 steps ahead
finished stores 349924, total time: 18.988917350769043

processing stores 349952...
Preprocessing timeseries data with 7 steps

  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


finished stores 566792, total time: 10.929089069366455
processing stores 307222...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 307244...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 307248...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 320264...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 328165...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 349920...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 349924...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 349952...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)


processing stores 349958...



  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 349962...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 349972...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 349978...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)


processing stores 349980...



  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)


processing stores 349998...



  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 350016...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 350018...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 350026...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)


processing stores 350028...



  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 350040...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 350046...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 350054...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 350056...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 350060...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)


processing stores 354468...



  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 387240...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)


processing stores 412585...



  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)


processing stores 441997...



  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 452387...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 461349...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 464495...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 471477...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 476061...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 480733...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 528854...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 536898...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 536902...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 566790...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1


processing stores 566792...



INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
  components = components.append(new_comp)
  components = components.append(new_comp)


In [18]:
print('arima: ',result_store_CV['arima'].mape.mean())
print('prophet: ',result_store_CV['prophet'].mape.mean())


arima:  0.3623815789473685
prophet:  0.3205197368421053


## ARIMA

In [24]:
model_name = 'arima'
best_model_group ={}

In [32]:
all_stores_result_CV = pd.DataFrame(columns=['store_level','store','mape'])
for store_level in df_store.store_level.unique():
    # get aggregated data for each store_level
    df_store_level = df_store[df_store.store_level == store_level].groupby('date').sum()
    # Cross-validation
    data = preprocessing_ARIMA(ts=df_store_level, ts_holiday=ts_holiday, split=True, yearly_seasonality=True, steps_ahead=steps_ahead)
    arima_model = auto_arima_model(y_train=data['y_train'], exog_train=data['exog_train'], diff_num=1)
    # now get the data for cross-validatio and start the process
    data = preprocessing_ARIMA(ts=df_store_level, ts_holiday=ts_holiday, split=False, yearly_seasonality=True, steps_ahead=steps_ahead) 
    cv_score = cross_validation_result(data=data, model_name=model_name, model=arima_model, rolls=4, horizon=steps_ahead)
    # save result
    cv_pipe_result[store_level] = cv_score
    best_model_group[store_level] = arima_model
    stores_result_CV=[]
    store_list=df_store[df_store['store_level']==store_level]['store_id'].unique()
    for store in store_list:  # print(store)
        print(f'processing stores {store}...')
        store_name = "store_" + str(store)
        # data
        df_data = df_store[df_store["store_id"] == store].set_index("date")[["sales"]]
        arima_data = preprocessing_ARIMA(ts=df_data, 
                                    ts_holiday=ts_holiday, 
                                    split=False, 
                                    yearly_seasonality=True, 
                                    steps_ahead=steps_ahead) 
        # fit model to each store
        cv_score = cross_validation_result(
            data=arima_data, model_name=model_name, model=best_model_group[store_level], rolls=4, horizon=steps_ahead)
        # result
        stores_result_CV.append(np.mean(cv_score))
    all_stores_result_CV = pd.concat(
                                [all_stores_result_CV,
                                pd.DataFrame({'store_level':store_level,'store':store_list,'mape':stores_result_CV})]
                                
    )



Preprocessing timeseries data with 7 steps ahead
start auto arima...
finished auto arima, model:  ARIMA(4,0,0)(2,1,0)[7]          , total time: 178 sec
Preprocessing timeseries data with 7 steps ahead
processing stores 307222...
Preprocessing timeseries data with 7 steps ahead
processing stores 307244...
Preprocessing timeseries data with 7 steps ahead
processing stores 328165...
Preprocessing timeseries data with 7 steps ahead
processing stores 349920...
Preprocessing timeseries data with 7 steps ahead
processing stores 349924...
Preprocessing timeseries data with 7 steps ahead
processing stores 349958...
Preprocessing timeseries data with 7 steps ahead
processing stores 349980...
Preprocessing timeseries data with 7 steps ahead
processing stores 350018...
Preprocessing timeseries data with 7 steps ahead
processing stores 350040...
Preprocessing timeseries data with 7 steps ahead
processing stores 350060...
Preprocessing timeseries data with 7 steps ahead
processing stores 528854...
P

In [33]:
result_group_arima = all_stores_result_CV
print(best_model_group)
print(cv_pipe_result)

{'A': ARIMA(order=(4, 0, 0), scoring_args={}, seasonal_order=(2, 1, 0, 7),
{'arima': [0.283, 0.169, 0.181, 0.096], 'prophet': [0.212, 0.213, 0.16, 0.12], 'A': [0.283, 0.169, 0.181, 0.096], 'B': [0.193, 0.138, 0.163, 0.162], 'C': [0.223, 0.099, 0.12, 0.086]}


In [35]:
result_group_arima.mape.mean()

0.36030263157894743

## Prophet

In [37]:
model_name = 'prophet'
best_model_group ={}

In [40]:
all_stores_result_CV = pd.DataFrame(columns=['store_level','store','mape'])
for store_level in df_store.store_level.unique():
    # get aggregated data for each store_level
    df_store_level = df_store[df_store.store_level == store_level].groupby('date').sum()

    data = preprocessing_prophet(ts=df_store_level, ts_holiday=prophet_holidays, steps_ahead=steps_ahead,split=True)
    cv_prophet_result = cross_validation_prophet(prophet_data=data, 
                                                    param_grid=param_grid_prophet,                                                       
                                                    steps_ahead = 30) # using cross validation for 30 days
    prophet_params = cv_prophet_result.sort_values('mape').iloc[0,0]
    # now get the data for cross-validatio and start the process
    data = preprocessing_prophet(ts=df_store_level, ts_holiday=prophet_holidays, steps_ahead=steps_ahead,split=False)
    cv_score = cross_validation_result(data=data, model_name=model_name, model=None, rolls=4, horizon=steps_ahead, prophet_params=prophet_params)
    # save result
    cv_pipe_result[store_level] = cv_score
    best_model_group[store_level] = arima_model
    stores_result_CV=[]
    store_list=df_store[df_store['store_level']==store_level]['store_id'].unique()
    for store in store_list:  # print(store)
        print(f'processing stores {store}...')
        store_name = "store_" + str(store)
        # data
        df_data= df_store[df_store["store_id"] == store].set_index("date")[["sales"]]
        prophet_store_data = preprocessing_prophet(
            ts=df_data, ts_holiday=prophet_holidays,
            steps_ahead=steps_ahead, split=False
        )

        # fit model to each store
        cv_score = cross_validation_result(data=prophet_store_data, 
                                        model_name=model_name, model=None, 
                                        rolls=4, horizon=steps_ahead, 
                                        prophet_params=best_model_group[store_level])
        # result
        stores_result_CV.append(np.mean(cv_score))
    all_stores_result_CV = pd.concat(
                                [all_stores_result_CV,
                                pd.DataFrame({'store_level':store_level,'store':store_list,'mape':stores_result_CV})]
    )

  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
INFO:prophet:Making 5 forecasts with cutoffs between 2020-11-27 00:00:00 and 2020-12-25 00:00:00
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x000001739144DB20>
  cv_prophet_result = cv_prophet_result.append({'params':params,
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
INFO:prophet:Making 5 forecasts with cutoffs between 2020-11-27 00:00:00 and 2020-12-25 00:00:00
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x00000173946E3E20>
  cv_prophet_result = cv_prophet_result.append({'params':params,
  components = components.append(new_comp)
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1
INFO:prophet:Making 5 forecasts with cutoffs between 2020-11-27 00:00:00 and 2020-12-25 00:00:00
INFO:prophet:Applying in parallel with 

KeyboardInterrupt: 

In [None]:
result_group_prophet = all_stores_result_CV
print(best_model_group)
print(cv_pipe_result)

{'A': ARIMA(order=(2, 0, 1), scoring_args={}, seasonal_order=(2, 1, 0, 7),
{'arima': [0.283, 0.169, 0.181, 0.096], 'prophet': [0.212, 0.213, 0.16, 0.12], 'A': [0.212, 0.213, 0.16, 0.12], 'B': [0.212, 0.213, 0.16, 0.12], 'C': [0.212, 0.213, 0.16, 0.12]}


In [None]:
result_group_arima.to_pickle("results/grouping/result_group_arima.pkl")
result_group_prophet.to_pickle("results/grouping/result_group_prophet.pkl")