In [1]:
# %%
# imports
from prophet.diagnostics import cross_validation
from prophet.diagnostics import performance_metrics
import itertools
from prophet import Prophet
from sktime.performance_metrics.forecasting import mean_absolute_scaled_error, mean_absolute_error, mean_absolute_percentage_error
import pandas as pd
import numpy as np
import plotly.express as px
from pmdarima.preprocessing import FourierFeaturizer
from pmdarima import auto_arima, ARIMA
import matplotlib.pyplot as plt
import holidays
pd.options.plotting.backend = 'plotly'

In [2]:
df_store = pd.read_pickle('data/df_daily.pkl')
df_company = df_store.groupby('date').sum()
train_data = df_company['sales']/1e6
train_data.index.freq = 'D'

In [8]:
train_data.plot(title='Sales TS in Million VND (aggregated data)')

In [3]:
# yearly seasonality
fouri_terms = FourierFeaturizer(365.25, 2)
y_prime, df_fouri = fouri_terms.fit_transform(train_data)
df_fouri.index = y_prime.index


In [9]:
df_fouri.head()

Unnamed: 0_level_0,FOURIER_S365-0,FOURIER_C365-0,FOURIER_S365-1,FOURIER_C365-1
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-08-07,0.017202,0.999852,0.034398,0.999408
2017-08-08,0.034398,0.999408,0.068755,0.997634
2017-08-09,0.051584,0.998669,0.103031,0.994678
2017-08-10,0.068755,0.997634,0.137185,0.990545
2017-08-11,0.085906,0.996303,0.171177,0.98524


In [12]:
# holiday
ts_holiday = pd.read_pickle('data/holiday.pkl')
print(ts_holiday.index)

DatetimeIndex(['2018-02-15', '2018-02-16', '2018-02-17', '2018-02-18',
               '2018-02-19', '2018-02-20', '2018-05-01', '2019-02-04',
               '2019-02-05', '2019-02-06', '2019-02-07', '2019-02-08',
               '2019-02-09', '2019-05-01', '2020-01-24', '2020-01-25',
               '2020-01-26', '2020-01-27', '2020-01-28', '2020-01-29',
               '2020-05-01'],
              dtype='datetime64[ns]', name='date', freq=None)


In [None]:
# promo => causing DATA LEAK, will not be used
#ts_promo = df_company['promo_count']

## ARIMA model

In [14]:
# combine exog. variables
df_exog = pd.concat([df_fouri, ts_holiday], axis=1)
df_exog['holiday'] = df_exog['holiday'].fillna(False).astype('int')

In [16]:
df_exog.head()

Unnamed: 0_level_0,FOURIER_S365-0,FOURIER_C365-0,FOURIER_S365-1,FOURIER_C365-1,holiday
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-08-07,0.017202,0.999852,0.034398,0.999408,0
2017-08-08,0.034398,0.999408,0.068755,0.997634,0
2017-08-09,0.051584,0.998669,0.103031,0.994678,0
2017-08-10,0.068755,0.997634,0.137185,0.990545,0
2017-08-11,0.085906,0.996303,0.171177,0.98524,0


In [15]:
# Split the time series as well as exogenous features data into train and test splits
steps_ahead = 14 # 2 weeks
y_to_train = y_prime.iloc[:-steps_ahead]
y_to_test = y_prime.iloc[-steps_ahead:]

exog_to_train = df_exog.iloc[:-steps_ahead]
exog_to_test = df_exog.iloc[-steps_ahead:]

In [17]:
# Fit model to the level to find common order
arima_model = auto_arima(
    y=y_to_train,
    exogenous=exog_to_train,
    D=1, 
    seasonal=True, m=7 # Weekly seasonality
    )
arima_model

ARIMA(order=(4, 0, 0), scoring_args={}, seasonal_order=(2, 1, 0, 7),

In [18]:
# Forecast
arima_y_fitted = arima_model.predict_in_sample(
    X=exog_to_train)
arima_y_forecast = arima_model.predict(
    n_periods=len(y_to_test), exogenous=exog_to_test)
arima_y_forecast = pd.Series(arima_y_forecast,
                             name='forecast',
                             index=y_to_test.index)

In [19]:
# metrics
# in-sample
mae_IS = round(mean_absolute_error(y_to_train, arima_y_fitted))
mape_IS = round(mean_absolute_percentage_error(
    y_to_train, arima_y_fitted), 3)

# out-sample
mae_OOS = round(mean_absolute_error(y_to_test, arima_y_forecast))
mape_OOS = round(mean_absolute_percentage_error(
    y_to_test, arima_y_forecast), 3)

print(f'Company:')
print('Out of sample:',
      f'MAPE: {mape_OOS}',
      f'MAE: {mae_OOS}\n',
      sep='\n')

Company:
Out of sample:
MAPE: 0.155
MAE: 140



In [20]:
print(arima_model)
res_whole_holiday_promo = pd.DataFrame()

 ARIMA(4,0,0)(2,1,0)[7]          


In [46]:
# LOOP all stores
for store in df_store['store_id'].unique():  # print(store)
    df_data = df_store[df_store['store_id'] == store].set_index('date')[
        ['sales', 'promo_count']]
    df_data.index.freq = 'D'
    train_data = df_data['sales']/1e6

    # Prepare the fourier terms to model annual seasonality; add as exogenous features to auto_arima
    fouri_terms = FourierFeaturizer(365.25, 2)
    y_prime, df_fouri = fouri_terms.fit_transform(train_data)
    # is exactly the same as manual calculation in the above cells
    df_fouri.index = y_prime.index

    # holiday
    # ts_holiday = pd.read_pickle('data/holiday.pkl')

    # promo
    #ts_promo = df_data['promo_count']

    # combine exog. variables
    df_exog = pd.concat([df_fouri, ts_holiday], axis=1)
    df_exog['holiday'] = df_exog['holiday'].fillna(False).astype('int')
    df_exog.dropna(inplace=True)

    # Split the time series as well as exogenous features data into train and test splits
    steps_ahead = 14
    y_to_train = y_prime.iloc[:-steps_ahead]
    y_to_test = y_prime.iloc[-steps_ahead:]

    exog_to_train = df_exog.iloc[:-steps_ahead]
    exog_to_test = df_exog.iloc[-steps_ahead:]

    # Fit model to each store in the level
    arima_model_store = ARIMA(
        order=arima_model.order,
        seasonal_order=arima_model.seasonal_order
    ).fit(y_to_train,
          X=exog_to_train
          )

    # Forecast
    arima_y_fitted = arima_model_store.predict_in_sample(
        X=exog_to_train)
    arima_y_forecast = arima_model_store.predict(
        n_periods=len(y_to_test), exogenous=exog_to_test)
    arima_y_forecast = pd.Series(arima_y_forecast,
                                 name='forecast',
                                 index=y_to_test.index)
    # Random walk
    RW_y_forecast = pd.Series(
        y_to_train[-1], name='RW_forecast', index=y_to_test.index)

    # metrics
    # in-sample
    mae_IS = round(mean_absolute_error(y_to_train, arima_y_fitted))
    mape_IS = round(mean_absolute_percentage_error(
        y_to_train, arima_y_fitted), 3)

    # out-sample
    mae_OOS = round(mean_absolute_error(y_to_test, arima_y_forecast))
    mape_OOS = round(mean_absolute_percentage_error(
        y_to_test, arima_y_forecast), 3)
    RW_mae_OOS = round(mean_absolute_error(y_to_test, RW_y_forecast))
    RW_mape_OOS = round(mean_absolute_percentage_error(
        y_to_test, RW_y_forecast), 3)

    print(f'Store {store}')
    print(f'MAPE: {mape_OOS}',
          f'MAE: {mae_OOS}\n',
          sep='\n')

    res_whole_holiday_promo = res_whole_holiday_promo.append({
        'store_id': store,
        'fc_IS': arima_y_fitted,
        'fc_OOS': arima_y_forecast,
        'fc_RW': RW_y_forecast,
        'arima_mape_OOS': mape_OOS,
        'arima_mape_IS': mape_IS,
        'arima_mae_OOS': mae_OOS,
        'arima_mae_IS': mae_IS,
        'RW_mape_OOS': RW_mape_OOS,
        'RW_mae_OOS': RW_mae_OOS,
    }, ignore_index=True)

Store 307222
MAPE: 0.537
MAE: 15

Store 307244
MAPE: 0.319
MAE: 6

Store 307248
MAPE: 0.298
MAE: 7

Store 320264
MAPE: 0.566
MAE: 11

Store 328165
MAPE: 0.253
MAE: 16

Store 349920
MAPE: 0.256
MAE: 10

Store 349924
MAPE: 0.327
MAE: 8

Store 349952
MAPE: 0.288
MAE: 8

Store 349958
MAPE: 0.351
MAE: 11

Store 349962
MAPE: 0.348
MAE: 5

Store 349972
MAPE: 0.31
MAE: 10

Store 349978
MAPE: 0.388
MAE: 4

Store 349980
MAPE: 0.389
MAE: 13

Store 349998
MAPE: 0.316
MAE: 6

Store 350016
MAPE: 0.268
MAE: 7




Maximum Likelihood optimization failed to converge. Check mle_retvals



Store 350018
MAPE: 0.19
MAE: 12

Store 350026
MAPE: 0.52
MAE: 6

Store 350028
MAPE: 0.359
MAE: 4

Store 350040
MAPE: 0.491
MAE: 8

Store 350046
MAPE: 0.222
MAE: 3

Store 350054
MAPE: 0.279
MAE: 8

Store 350056
MAPE: 0.382
MAE: 4

Store 350060
MAPE: 0.201
MAE: 6

Store 354468
MAPE: 0.521
MAE: 4

Store 387240
MAPE: 0.399
MAE: 4

Store 412585
MAPE: 0.374
MAE: 4




No frequency information was provided, so inferred frequency D will be used.


No frequency information was provided, so inferred frequency D will be used.



Store 441997
MAPE: 0.329
MAE: 4




No frequency information was provided, so inferred frequency D will be used.


No frequency information was provided, so inferred frequency D will be used.



Store 452387
MAPE: 0.493
MAE: 13




No frequency information was provided, so inferred frequency D will be used.


No frequency information was provided, so inferred frequency D will be used.



Store 461349
MAPE: 0.402
MAE: 6




No frequency information was provided, so inferred frequency D will be used.


No frequency information was provided, so inferred frequency D will be used.



Store 464495
MAPE: 0.469
MAE: 4




No frequency information was provided, so inferred frequency D will be used.


No frequency information was provided, so inferred frequency D will be used.



Store 471477
MAPE: 0.369
MAE: 6




No frequency information was provided, so inferred frequency D will be used.


No frequency information was provided, so inferred frequency D will be used.


Maximum Likelihood optimization failed to converge. Check mle_retvals



Store 476061
MAPE: 0.735
MAE: 3




No frequency information was provided, so inferred frequency D will be used.


No frequency information was provided, so inferred frequency D will be used.



Store 480733
MAPE: 0.364
MAE: 10




No frequency information was provided, so inferred frequency D will be used.


No frequency information was provided, so inferred frequency D will be used.


Maximum Likelihood optimization failed to converge. Check mle_retvals



Store 528854
MAPE: 0.275
MAE: 12




No frequency information was provided, so inferred frequency D will be used.


No frequency information was provided, so inferred frequency D will be used.


Maximum Likelihood optimization failed to converge. Check mle_retvals



Store 536898
MAPE: 0.229
MAE: 2




No frequency information was provided, so inferred frequency D will be used.


No frequency information was provided, so inferred frequency D will be used.


Maximum Likelihood optimization failed to converge. Check mle_retvals



Store 536902
MAPE: 0.34
MAE: 4




No frequency information was provided, so inferred frequency D will be used.


No frequency information was provided, so inferred frequency D will be used.



Store 566790
MAPE: 0.428
MAE: 12




No frequency information was provided, so inferred frequency D will be used.


No frequency information was provided, so inferred frequency D will be used.



Store 566792
MAPE: 0.563
MAE: 4



In [47]:
res_whole_holiday_promo.to_csv('results/res_RW_ARIMA_whole_holiday_promo.csv')
res_whole_holiday_promo = pd.read_csv('results/res_RW_ARIMA_whole_holiday_promo.csv', index_col=0)

In [48]:
res_whole_holiday_promo.head()

Unnamed: 0,RW_mae_OOS,RW_mape_OOS,arima_mae_IS,arima_mae_OOS,arima_mape_IS,arima_mape_OOS,fc_IS,fc_OOS,fc_RW,store_id
0,11.0,0.322,7.0,8.0,0.426,0.348,date\n2017-08-07 -3.362041\n2017-08-08 -...,date\n2021-01-18 3.441731\n2021-01-19 1...,date\n2021-01-18 26.073581\n2021-01-19 2...,307222.0
1,13.0,0.555,7.0,5.0,0.447,0.261,date\n2017-08-10 -2.811091\n2017-08-11 -...,date\n2021-01-18 7.969913\n2021-01-19 1...,date\n2021-01-18 30.4334\n2021-01-19 30....,307244.0
2,9.0,0.334,5.0,3.0,0.427,0.16,date\n2017-08-17 3.336353\n2017-08-18 ...,date\n2021-01-18 9.243291\n2021-01-19 2...,date\n2021-01-18 29.9636\n2021-01-19 29....,307248.0
3,11.0,0.497,5.0,6.0,0.479,0.381,date\n2017-08-25 6.363432\n2017-08-26 -...,date\n2021-01-18 6.946864\n2021-01-19 ...,date\n2021-01-18 21.2306\n2021-01-19 21....,320264.0
4,104.0,0.9,14.0,19.0,0.504,0.252,date\n2017-08-09 0.849118\n2017-08-10 ...,date\n2021-01-18 59.399858\n2021-01-19 ...,date\n2021-01-18 177.126951\n2021-01-19 ...,328165.0


## FB Prophet

In [34]:
fb_df = pd.concat([df_company[['sales']], ts_holiday], axis=1)
fb_df['sales'] = fb_df['sales']/1e6
fb_df['holiday'] = fb_df['holiday'].fillna(False).astype('bool')
fb_df = fb_df.reset_index().rename({'date': 'ds', 'sales': 'y'}, axis=1)
fb_train = fb_df.iloc[:-steps_ahead]
fb_test = fb_df.iloc[-steps_ahead:]

In [35]:
# Grid search set up
param_grid = {
    'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
    'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
}

all_params = [dict(zip(param_grid.keys(), v))
              for v in itertools.product(*param_grid.values())]

In [38]:
mape_PROPHET = []
for params in all_params:
    # set up model
    m = Prophet(**params)
    m.add_regressor('holiday')
    m.fit(fb_train)
    # set up CV
    df_cv = cross_validation(
        m,
        initial=(str(fb_train.shape[0]-steps_ahead*4)+' days'),
        period='7 days', horizon='14 days')
    # evaluate
    df_p = performance_metrics(df_cv)
    mape_PROPHET.append(df_p['mape'].values[-1])

# Find the best parameters
tuning_results = pd.DataFrame(all_params)
tuning_results['mape'] = mape_PROPHET
tuning_results.sort_values('mape', inplace=True)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 6 forecasts with cutoffs between 2020-11-29 00:00:00 and 2021-01-03 00:00:00
100%|██████████| 6/6 [00:08<00:00,  1.35s/it]
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 6 forecasts with cutoffs between 2020-11-29 00:00:00 and 2021-01-03 00:00:00
100%|██████████| 6/6 [00:08<00:00,  1.35s/it]
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 6 forecasts with cutoffs between 2020-11-29 00:00:00 and 2021-01-03 00:00:00
100%|██████████| 6/6 [00:08<00:00,  1.37s/it]
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 6 forecasts with cutoffs between 2020-11-29 00:00:00 and 2021-01-03 00:00:00
100%|██████████| 6/6 [00:07<00:00,  1.30s/it]
INFO:prophet:Disabling daily

In [40]:
# %% final PROPHET
fb_model = Prophet(
    changepoint_prior_scale=tuning_results.loc[0, 'changepoint_prior_scale'],
    seasonality_prior_scale=tuning_results.loc[0, 'seasonality_prior_scale'],
)

fb_model.fit(fb_df)
fb_fc = fb_model.predict(fb_test)
mape_PROPHET = round(mean_absolute_percentage_error(fb_test['y'], fb_fc['yhat']), 3)
mape_PROPHET

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


0.239

In [49]:
res_fb = pd.DataFrame()
# LOOP all stores
for store in df_store['store_id'].unique():  # print(store)
    df_data = df_store[df_store['store_id'] == store].set_index('date')[
        ['sales']]
    fb_df = pd.concat([df_data, ts_holiday], axis=1)
    fb_df['sales'] = fb_df['sales']/1e6
    fb_df['holiday'] = fb_df['holiday'].fillna(False).astype('bool')
    fb_df = fb_df.dropna().reset_index().rename(
        {'date': 'ds', 'sales': 'y'}, axis=1)
    fb_train = fb_df.iloc[:-steps_ahead]
    fb_test = fb_df.iloc[-steps_ahead:]

    fb_model = Prophet(
        changepoint_prior_scale=tuning_results.loc[0,'changepoint_prior_scale'],
        seasonality_prior_scale=tuning_results.loc[0,'seasonality_prior_scale'],
    )
    fb_model.add_regressor('holiday')
    #fb_model.add_regressor('promo_count')
    fb_model.fit(fb_train)
    fb_fc = fb_model.predict(fb_test)

    # metrics
    fb_mae_OOS = round(mean_absolute_error(fb_test['y'], fb_fc['yhat']))
    fb_mape_OOS = round(mean_absolute_percentage_error(
        fb_test['y'], fb_fc['yhat']), 3)

    print(f'Store {store}')
    print(f'MAPE: {fb_mape_OOS}',
          f'MAE: {fb_mae_OOS}\n',
          sep='\n')

    res_fb = res_fb.append({
        'store_id': store,
        'fb_fc': fb_fc['yhat'],
        'fb_mape_OOS': fb_mape_OOS,
        'fb_mae_OOS': fb_mae_OOS,
    }, ignore_index=True)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 307222
MAPE: 0.291
MAE: 10



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 307244
MAPE: 0.515
MAE: 12



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 307248
MAPE: 0.192
MAE: 5



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 320264
MAPE: 0.366
MAE: 8



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 328165
MAPE: 0.315
MAE: 25



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 349920
MAPE: 0.237
MAE: 12



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 349924
MAPE: 0.303
MAE: 7



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 349952
MAPE: 0.223
MAE: 6



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 349958
MAPE: 0.19
MAE: 7



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 349962
MAPE: 0.221
MAE: 4



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 349972
MAPE: 0.271
MAE: 8



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 349978
MAPE: 0.316
MAE: 4



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 349980
MAPE: 0.218
MAE: 8



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 349998
MAPE: 0.417
MAE: 7



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 350016
MAPE: 0.236
MAE: 6



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 350018
MAPE: 0.353
MAE: 24



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 350026
MAPE: 0.207
MAE: 3



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 350028
MAPE: 0.459
MAE: 5



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 350040
MAPE: 0.302
MAE: 4



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 350046
MAPE: 0.283
MAE: 3



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 350054
MAPE: 0.521
MAE: 15



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 350056
MAPE: 0.324
MAE: 3



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 350060
MAPE: 0.299
MAE: 9



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 354468
MAPE: 0.529
MAE: 5



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 387240
MAPE: 0.525
MAE: 6



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 412585
MAPE: 0.396
MAE: 4



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 441997
MAPE: 0.234
MAE: 3



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 452387
MAPE: 0.318
MAE: 10



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 461349
MAPE: 0.521
MAE: 7



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 464495
MAPE: 0.48
MAE: 4



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 471477
MAPE: 0.241
MAE: 4



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 476061
MAPE: 0.693
MAE: 3



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 480733
MAPE: 0.31
MAE: 8



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 528854
MAPE: 0.31
MAE: 15



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 536898
MAPE: 0.307
MAE: 3



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 536902
MAPE: 0.255
MAE: 3



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 566790
MAPE: 0.449
MAE: 18

Store 566792
MAPE: 0.845
MAE: 7



In [50]:
# Save results to csv
res_fb.to_csv('results/res_fb_noPromo.csv')
res_fb = pd.read_csv('results/res_fb_noPromo.csv')

In [60]:
res = pd.merge(res_fb.set_index('store_id')['fb_mape_OOS'],
               res_whole_holiday_promo.set_index(
                   'store_id')[['RW_mape_OOS', 'arima_mape_OOS']],
               'inner', left_index=True, right_index=True).reset_index()

# %%
fig = res[['fb_mape_OOS',
        #    'RW_mape_OOS',
           'arima_mape_OOS'
           ]].plot()

# RW_mean = round(res['RW_mape_OOS'].mean(), 3)
# fig.add_hline(y=RW_mean, line_dash="dot", line_color='red',
#               annotation_text=str(RW_mean))

arima_mean = round(res['arima_mape_OOS'].mean(), 3)
fig.add_hline(y=arima_mean, line_dash="dot", line_color='red',
              annotation_text=str(arima_mean)
        )

fb_mean = round(res['fb_mape_OOS'].mean(), 3)
fig.add_hline(y=fb_mean, line_dash="dot", line_color='blue',
              annotation_text=str(fb_mean),
              annotation_position="bottom right")

## FB Prophet with sales

In [86]:
fb_df = pd.concat([df_company[['sales']], ts_holiday], axis=1)
fb_df['sales'] = fb_df['sales']/1e6
#fb_df['holiday'] = fb_df['holiday'].fillna(False).astype('bool')
fb_df = fb_df.reset_index().rename({'date': 'ds', 'sales': 'y'}, axis=1)
fb_train = fb_df.iloc[:-steps_ahead]
fb_test = fb_df.iloc[-steps_ahead:]

In [87]:
import datetime

holiday = pd.DataFrame(holidays.Vietnam(years=[2018, 2019, 2020, 2021]).items()).rename({0:'date', 1:'holiday_neg'}, axis=1)
# Add Tet promotion
promo = pd.DataFrame(holiday[holiday.holiday_neg == 'Vietnamese New Year'].date - datetime.timedelta(days=1))
promo['promotion'] = "1 days before Tet Holiday"
# Add Black Friday
promo = promo.append(
            pd.DataFrame({'date':[datetime.date(2020,11,27), datetime.date(2019,11,29),datetime.date(2018,11,23)],
                          'promotion':["Black Friday","Black Friday","Black Friday"]})
        )

#holiday['promotion'] = np.NaN
# All together
#holiday = holiday.append(promo).sort_values('date')

# set to 1 if holiday affect sales negatively
holiday_off = holiday.replace({'Vietnamese New Year.*': 1, 
                 '.*day of Tet Holiday': 1,
                 'International Labor Day': 1,
                 '\D': np.NaN}, 
                regex=True).dropna()

promotions = pd.DataFrame({
        'holiday':'big_promotion',
        'ds':promo.date,
        'lower_window': -14, # 2 weeks before
        'upper_window': 0,
})

holiday_negative = pd.DataFrame({
        'holiday':'big_promotion',
        'ds':holiday_off.date,
        'lower_window': 0,
        'upper_window': 0,
})

holidays_fb = pd.concat((promotions, holiday_negative))

In [88]:
# Grid search set up
param_grid = {
    'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
    'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
}

all_params = [dict(zip(param_grid.keys(), v))
              for v in itertools.product(*param_grid.values())]

In [89]:
holidays_fb.head()

Unnamed: 0,holiday,ds,lower_window,upper_window
2,big_promotion,2018-02-15,-14,0
13,big_promotion,2019-02-04,-14,0
24,big_promotion,2020-01-24,-14,0
35,big_promotion,2021-02-11,-14,0
0,big_promotion,2020-11-27,-14,0


In [90]:
mape_PROPHET = []
for params in all_params:
    params['weekly_seasonality']=True
    params['holidays'] = holidays_fb
    # set up model
    m = Prophet(**params)
    #m.add_regressor('holiday')
    m.fit(fb_train)
    # set up CV
    df_cv = cross_validation(
        m,
        initial=(str(fb_train.shape[0]-steps_ahead*4)+' days'),
        period='7 days', horizon='14 days')
    # evaluate
    df_p = performance_metrics(df_cv)
    mape_PROPHET.append(df_p['mape'].values[-1])

# Find the best parameters
tuning_results = pd.DataFrame(all_params)
tuning_results['mape'] = mape_PROPHET
tuning_results.sort_values('mape', inplace=True)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 6 forecasts with cutoffs between 2020-11-29 00:00:00 and 2021-01-03 00:00:00
100%|██████████| 6/6 [00:07<00:00,  1.29s/it]
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 6 forecasts with cutoffs between 2020-11-29 00:00:00 and 2021-01-03 00:00:00
100%|██████████| 6/6 [00:07<00:00,  1.26s/it]
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 6 forecasts with cutoffs between 2020-11-29 00:00:00 and 2021-01-03 00:00:00
100%|██████████| 6/6 [00:07<00:00,  1.30s/it]
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 6 forecasts with cutoffs between 2020-11-29 00:00:00 and 2021-01-03 00:00:00
100%|██████████| 6/6 [00:08<00:00,  1.41s/it]
INFO:prophet:Disabling daily

In [92]:
# %% final PROPHET
params = {
    'changepoint_prior_scale':tuning_results.loc[0, 'changepoint_prior_scale'],
    'seasonality_prior_scale':tuning_results.loc[0, 'seasonality_prior_scale'],
    'weekly_seasonality':True,
    'holidays':holidays_fb
}

fb_model = Prophet(**params)

fb_model.fit(fb_df)
fb_fc = fb_model.predict(fb_test)
mape_PROPHET = round(mean_absolute_percentage_error(fb_test['y'], fb_fc['yhat']), 3)
mape_PROPHET

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


0.198

In [94]:
res_fb = pd.DataFrame()
# LOOP all stores
for store in df_store['store_id'].unique():  # print(store)
    df_data = df_store[df_store['store_id'] == store].set_index('date')[
        ['sales']]
    fb_df = pd.concat([df_data], axis=1)
    fb_df['sales'] = fb_df['sales']/1e6
    fb_df = fb_df.dropna().reset_index().rename(
        {'date': 'ds', 'sales': 'y'}, axis=1)
    fb_train = fb_df.iloc[:-steps_ahead]
    fb_test = fb_df.iloc[-steps_ahead:]

    params = {
    'changepoint_prior_scale':tuning_results.loc[0, 'changepoint_prior_scale'],
    'seasonality_prior_scale':tuning_results.loc[0, 'seasonality_prior_scale'],
    'weekly_seasonality':True,
    'holidays':holidays_fb
    }

    fb_model = Prophet(**params)
    #fb_model.add_regressor('promo_count')
    fb_model.fit(fb_train)
    fb_fc = fb_model.predict(fb_test)

    # metrics
    fb_mae_OOS = round(mean_absolute_error(fb_test['y'], fb_fc['yhat']))
    fb_mape_OOS = round(mean_absolute_percentage_error(
        fb_test['y'], fb_fc['yhat']), 3)

    print(f'Store {store}')
    print(f'MAPE: {fb_mape_OOS}',
          f'MAE: {fb_mae_OOS}\n',
          sep='\n')

    res_fb = res_fb.append({
        'store_id': store,
        'fb_fc': fb_fc['yhat'],
        'fb_mape_OOS': fb_mape_OOS,
        'fb_mae_OOS': fb_mae_OOS,
    }, ignore_index=True)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 307222
MAPE: 0.237
MAE: 8



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 307244
MAPE: 0.419
MAE: 9



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 307248
MAPE: 0.167
MAE: 5



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 320264
MAPE: 0.343
MAE: 7



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 328165
MAPE: 0.27
MAE: 23



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 349920
MAPE: 0.214
MAE: 12



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 349924
MAPE: 0.249
MAE: 6



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 349952
MAPE: 0.231
MAE: 6



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 349958
MAPE: 0.223
MAE: 8



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 349962
MAPE: 0.202
MAE: 3



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 349972
MAPE: 0.266
MAE: 8



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 349978
MAPE: 0.32
MAE: 4



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 349980
MAPE: 0.219
MAE: 8



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 349998
MAPE: 0.408
MAE: 7



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 350016
MAPE: 0.288
MAE: 7



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 350018
MAPE: 0.653
MAE: 34



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 350026
MAPE: 0.225
MAE: 3



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 350028
MAPE: 0.372
MAE: 4



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 350040
MAPE: 0.285
MAE: 4



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 350046
MAPE: 0.272
MAE: 3



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 350054
MAPE: 0.46
MAE: 13



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 350056
MAPE: 0.307
MAE: 3



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 350060
MAPE: 0.273
MAE: 8



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 354468
MAPE: 0.511
MAE: 4



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 387240
MAPE: 0.446
MAE: 5



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 412585
MAPE: 0.409
MAE: 4



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 441997
MAPE: 0.244
MAE: 3



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 452387
MAPE: 0.4
MAE: 12



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 461349
MAPE: 0.473
MAE: 6



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 464495
MAPE: 0.474
MAE: 4



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 471477
MAPE: 0.264
MAE: 5



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 476061
MAPE: 0.674
MAE: 3



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 480733
MAPE: 0.286
MAE: 8



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 528854
MAPE: 0.399
MAE: 18



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 536898
MAPE: 0.37
MAE: 4



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 536902
MAPE: 0.424
MAE: 5



INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Store 566790
MAPE: 0.554
MAE: 19

Store 566792
MAPE: 0.854
MAE: 8



In [95]:
# Save results to csv
res_fb.to_csv('results/res_fb_withPromo.csv')


## Results

In [99]:

res_ARIMA = pd.read_csv('results/res_RW_ARIMA_whole_holiday_promo.csv',index_col=0)
res_fb_noPromo = pd.read_csv('results/res_fb_noPromo.csv',index_col=0)
res_fb_withPromo = pd.read_csv('results/res_fb_withPromo.csv',index_col=0)

In [110]:
res = pd.merge(
    res_fb_noPromo.set_index('store_id')['fb_mape_OOS'],
    res_ARIMA.set_index('store_id')[['RW_mape_OOS', 'arima_mape_OOS']],
    'inner', left_index=True, right_index=True).reset_index()
res.head()

Unnamed: 0,store_id,fb_mape_OOS,RW_mape_OOS,arima_mape_OOS
0,307222.0,0.291,0.322,0.348
1,307222.0,0.291,0.322,0.537
2,307244.0,0.515,0.555,0.261
3,307244.0,0.515,0.555,0.319
4,307248.0,0.192,0.334,0.16


In [119]:

res = pd.merge(
    res_fb_noPromo.set_index('store_id')['fb_mape_OOS'],
    res_ARIMA.set_index('store_id')[['RW_mape_OOS', 'arima_mape_OOS']],
    'inner', left_index=True, right_index=True)
res = pd.merge(
    res,
    res_fb_withPromo.set_index('store_id').rename({'fb_mape_OOS':'fb_withPromo_mape_OOS'},axis = 1)['fb_withPromo_mape_OOS'],
    'inner', left_index=True, right_index=True).reset_index()


# %%
fig = res[['fb_mape_OOS',
            'fb_withPromo_mape_OOS',
            'RW_mape_OOS',
           'arima_mape_OOS'
           ]].plot()

RW_mean = round(res['RW_mape_OOS'].mean(), 3)
fig.add_hline(y=RW_mean, line_dash="dot", line_color='red',
               annotation_text=str(RW_mean),
               annotation_position="top right"
            )

arima_mean = round(res['arima_mape_OOS'].mean(), 3)
fig.add_hline(y=arima_mean, line_dash="dot", line_color='red',
              annotation_text=str(arima_mean),
              annotation_position="bottom right"
            )

fb_mean = round(res['fb_mape_OOS'].mean(), 3)
fig.add_hline(y=fb_mean, line_dash="dot", line_color='blue',
              annotation_text=str(fb_mean),
              annotation_position="top left")

fb_withPromo_mean = round(res['fb_withPromo_mape_OOS'].mean(), 3)
fig.add_hline(y=fb_withPromo_mean, line_dash="dot", line_color='blue',
              annotation_text=str(fb_withPromo_mean),
              annotation_position="top right")

In [121]:
# %%
fig = res[['fb_mape_OOS',
            'fb_withPromo_mape_OOS',
            'RW_mape_OOS',
           'arima_mape_OOS'
           ]].plot()