In [2]:
# %%
# imports
from prophet.diagnostics import cross_validation
from prophet.diagnostics import performance_metrics
import itertools
from prophet import Prophet
from sktime.performance_metrics.forecasting import mean_absolute_scaled_error, mean_absolute_error, mean_absolute_percentage_error
import pandas as pd
import numpy as np
import plotly.express as px
from pmdarima.preprocessing import FourierFeaturizer
from pmdarima import auto_arima, ARIMA
import matplotlib.pyplot as plt
import holidays
pd.options.plotting.backend = 'plotly'


In [3]:
# raw data
df_store = pd.read_pickle('data/df_daily.pkl')
df_company = df_store.groupby('date').sum()
train_data = df_company['sales']/1e6
train_data.index.freq = 'D'

# yearly seasonality
fouri_terms = FourierFeaturizer(365.25, 2)
y_prime, df_fouri = fouri_terms.fit_transform(train_data)
df_fouri.index = y_prime.index

# holiday
ts_holiday = pd.read_pickle('data/holiday.pkl')

# promo
ts_promo = df_company['promo_count']

# combine exog. variables
# df_exog = pd.concat([df_fouri, ts_holiday, ts_promo], axis=1)
df_exog = pd.concat([df_fouri, ts_holiday], axis=1)
df_exog['holiday'] = df_exog['holiday'].fillna(False).astype('int')

# Split the time series as well as exogenous features data into train and test splits
steps_ahead = 92
y_to_train = y_prime.iloc[:-steps_ahead]
y_to_test = y_prime.iloc[-steps_ahead:]

exog_to_train = df_exog.iloc[:-steps_ahead]
exog_to_test = df_exog.iloc[-steps_ahead:]


In [4]:
# Fit model to the level to find common order
arima_model = auto_arima(
    y=y_to_train,
    exogenous=exog_to_train,
    D=1, 
    seasonal=True, m=7 # Weekly seasonality
    )
arima_model

# %%
# Forecast
arima_y_fitted = arima_model.predict_in_sample(
    X=exog_to_train)
arima_y_forecast = arima_model.predict(
    n_periods=len(y_to_test), exogenous=exog_to_test)
arima_y_forecast = pd.Series(arima_y_forecast,
                             name='forecast',
                             index=y_to_test.index)


res_raw = pd.DataFrame(arima_y_forecast)
res_raw['test'] = y_to_test


# metrics
mape_OOS_raw = round(mean_absolute_percentage_error(res_raw['test'], res_raw['forecast']), 3)

print(f'MAPE: {mape_OOS_raw}')


KeyboardInterrupt: 

In [5]:
# BOXCOX
from pmdarima.preprocessing import BoxCoxEndogTransformer
transformer = BoxCoxEndogTransformer()
tran_data = transformer.fit_transform(train_data.replace(0,train_data.mean()))[0]
tran_data = pd.Series(tran_data, train_data.index)
tran_data.hist()
# DATA PREP
# yearly seasonality
fouri_terms = FourierFeaturizer(365.25, 2)
y_prime, df_fouri = fouri_terms.fit_transform(tran_data)
df_fouri.index = y_prime.index

# holiday
ts_holiday = pd.read_pickle('data/holiday.pkl')

# promo
# ts_promo = df_company['promo_count']

# combine exog. variables
df_exog = pd.concat([df_fouri, ts_holiday], axis=1)
df_exog['holiday'] = df_exog['holiday'].fillna(False).astype('int')

# %%
# Split the time series as well as exogenous features data into train and test splits
steps_ahead = 92
y_to_train = y_prime.iloc[:-steps_ahead]
y_to_test = y_prime.iloc[-steps_ahead:]

exog_to_train = df_exog.iloc[:-steps_ahead]
exog_to_test = df_exog.iloc[-steps_ahead:]

# Fit model to the level to find common order
arima_model_BC = auto_arima(
    y=y_to_train,
    exogenous=exog_to_train,
    D=1, 
    seasonal=True, m=7 # Weekly seasonality
    )

# Forecast
arima_y_forecast = arima_model_BC.predict(n_periods=len(y_to_test), exogenous=exog_to_test)
arima_y_forecast = transformer.inverse_transform(arima_y_forecast)[0]
arima_y_forecast = pd.Series(arima_y_forecast,
                             name='fc_BC',
                             index=y_to_test.index)



In [7]:
res = pd.DataFrame(arima_y_forecast)
res['test'] = transformer.inverse_transform(y_to_test)[0]


In [9]:
res['fc_raw'] = res_raw['forecast']
res = res[['test', 'fc_raw', 'fc_BC']]
res.plot()

In [10]:
hor_raw = []
hor_BC = []
for w in range(1, 14): 
    hor_raw.append(round(mean_absolute_percentage_error(res.test.head(w*7), res.fc_raw.head(w*7)), 3))
    hor_BC.append(round(mean_absolute_percentage_error(res.test.head(w*7), res.fc_BC.head(w*7)), 3))
hor = pd.DataFrame({'raw': hor_raw,
                    'BC': hor_BC}, index=np.arange(1,14).T)
hor

Unnamed: 0,raw,BC
1,0.331,0.33
2,0.424,0.417
3,0.492,0.496
4,0.457,0.455
5,0.471,0.459
6,0.469,0.453
7,0.459,0.439
8,0.414,0.402
9,0.401,0.391
10,0.382,0.364


In [11]:
fig = hor.plot(title='MAPE over various horizons')
fig.update_xaxes(title='weeks')
fig.update_yaxes(title='MAPE')
