In [None]:
# %%
# imports
from prophet.diagnostics import cross_validation
from prophet.diagnostics import performance_metrics
import itertools
from prophet import Prophet
from sktime.performance_metrics.forecasting import mean_absolute_scaled_error, mean_absolute_error, mean_absolute_percentage_error
import pandas as pd
import numpy as np
import plotly.express as px
from pmdarima.preprocessing import FourierFeaturizer
from pmdarima import auto_arima, ARIMA
import matplotlib.pyplot as plt
import holidays
pd.options.plotting.backend = 'plotly'


In [48]:
# raw data
df_store = pd.read_pickle('data/df_daily.pkl')
df_company = df_store.groupby('date').sum()
train_data = df_company['sales']/1e6
train_data.index.freq = 'D'

# yearly seasonality
fouri_terms = FourierFeaturizer(365.25, 2)
y_prime, df_fouri = fouri_terms.fit_transform(train_data)
df_fouri.index = y_prime.index

# holiday
ts_holiday = pd.read_pickle('data/holiday.pkl')

# promo
ts_promo = df_company['promo_count']

# combine exog. variables
df_exog = pd.concat([df_fouri, ts_holiday, ts_promo], axis=1)
df_exog['holiday'] = df_exog['holiday'].fillna(False).astype('int')


# %%
# Split the time series as well as exogenous features data into train and test splits
steps_ahead = 92
y_to_train = y_prime.iloc[:-steps_ahead]
y_to_test = y_prime.iloc[-steps_ahead:]

exog_to_train = df_exog.iloc[:-steps_ahead]
exog_to_test = df_exog.iloc[-steps_ahead:]


# %%
# Fit model to the level to find common order
arima_model = auto_arima(
    y=y_to_train,
    exogenous=exog_to_train,
    D=1, 
    seasonal=True, m=7 # Weekly seasonality
    )
arima_model

# %%
# Forecast
arima_y_fitted = arima_model.predict_in_sample(
    X=exog_to_train)
arima_y_forecast = arima_model.predict(
    n_periods=len(y_to_test), exogenous=exog_to_test)
arima_y_forecast = pd.Series(arima_y_forecast,
                             name='forecast',
                             index=y_to_test.index)


res_raw = pd.DataFrame(arima_y_forecast)
res_raw['test'] = y_to_test


# metrics
mape_OOS_raw = round(mean_absolute_percentage_error(res_raw['test'], res_raw['forecast']), 3)

print(f'MAPE: {mape_OOS_raw}')


MAPE: 0.38


In [61]:
# BOXCOX
from pmdarima.preprocessing import BoxCoxEndogTransformer
transformer = BoxCoxEndogTransformer()
tran_data = transformer.fit_transform(train_data.replace(0,train_data.mean()))[0]
tran_data = pd.Series(tran_data, train_data.index)
tran_data.hist()
# DATA PREP
# yearly seasonality
fouri_terms = FourierFeaturizer(365.25, 2)
y_prime, df_fouri = fouri_terms.fit_transform(tran_data)
df_fouri.index = y_prime.index

# holiday
ts_holiday = pd.read_pickle('data/holiday.pkl')

# promo
ts_promo = df_company['promo_count']

# combine exog. variables
df_exog = pd.concat([df_fouri, ts_holiday, ts_promo], axis=1)
df_exog['holiday'] = df_exog['holiday'].fillna(False).astype('int')

# %%
# Split the time series as well as exogenous features data into train and test splits
steps_ahead = 92
y_to_train = y_prime.iloc[:-steps_ahead]
y_to_test = y_prime.iloc[-steps_ahead:]

exog_to_train = df_exog.iloc[:-steps_ahead]
exog_to_test = df_exog.iloc[-steps_ahead:]

# print(y_to_train,
#       y_to_test,
#       exog_to_train,
#       exog_to_test)

# Fit model to the level to find common order
arima_model_BC = auto_arima(
    y=y_to_train,
    exogenous=exog_to_train,
    D=1, 
    seasonal=True, m=7 # Weekly seasonality
    )

# Forecast
arima_y_forecast = arima_model_BC.predict(n_periods=len(y_to_test), exogenous=exog_to_test)
arima_y_forecast = transformer.inverse_transform(arima_y_forecast)[0]
arima_y_forecast = pd.Series(arima_y_forecast,
                             name='fc_BC',
                             index=y_to_test.index)


MAPE: 0.233


In [None]:
res = pd.DataFrame(arima_y_forecast)
res['test'] = transformer.inverse_transform(y_to_test)[0]


In [66]:
print(f'Raw: {mape_OOS_raw}', f'BC: {mape_BC}', res, sep='\n')


Raw: 0.38
BC: 0.233
                  fc_BC         test      fc_raw
date                                            
2020-11-01   559.755062   619.011000  468.171094
2020-11-02   323.684464   264.581400  235.293033
2020-11-03   344.293495   307.434750  187.300946
2020-11-04   384.388759   332.188850  337.931445
2020-11-05   372.810455   335.843400  321.597038
...                 ...          ...         ...
2021-01-27   540.647447   669.639716  535.186338
2021-01-28   548.016289   720.317779  585.404843
2021-01-29   637.832347   882.390069  554.882402
2021-01-30   831.414581  1219.184443  711.412299
2021-01-31  1117.788066  1726.496562  985.700204

[92 rows x 3 columns]


In [62]:
# res['fc_raw'] = res_raw['forecast']
# res = res[['test', 'fc_raw', 'fc_BC']]
res.plot()

In [92]:
# compare horizons: 1/2/3/4 weeks, 2/3 months
pd.Series(index=['1w','2w','3w','4w','2M','3M'], 
          data=[
            round(mean_absolute_percentage_error(res.test.head(7), res.fc_raw.head(7)), 3),
            round(mean_absolute_percentage_error(res.test.head(14), res.fc_raw.head(14)), 3),
            round(mean_absolute_percentage_error(res.test.head(21), res.fc_raw.head(21)), 3),
            round(mean_absolute_percentage_error(res.test.head(28), res.fc_raw.head(28)), 3),
            round(mean_absolute_percentage_error(res.test.head(61), res.fc_raw.head(61)), 3),
            round(mean_absolute_percentage_error(res.test.head(92), res.fc_raw.head(92)), 3)
          ]).plot()

In [108]:
hor_raw = []
hor_BC = []
for w in range(1, 14): 
    hor_raw.append(round(mean_absolute_percentage_error(res.test.head(w*7), res.fc_raw.head(w*7)), 3))
    hor_BC.append(round(mean_absolute_percentage_error(res.test.head(w*7), res.fc_BC.head(w*7)), 3))
hor = pd.DataFrame({'raw': hor_raw,
                    'BC': hor_BC}, index=np.arange(1,14).T)
hor

Unnamed: 0,raw,BC
1,0.22,0.119
2,0.285,0.149
3,0.262,0.156
4,0.343,0.184
5,0.337,0.221
6,0.321,0.225
7,0.311,0.222
8,0.3,0.202
9,0.31,0.206
10,0.327,0.199


In [111]:
fig = hor.plot(title='MAPE over various horizons')
fig.update_xaxes(title='weeks')
fig.update_yaxes(title='MAPE')
