In [23]:
import pandas as pd
import numpy as np
from prophet import Prophet
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from utils import *



In [33]:
split_date = "2017-01-01"

In [34]:
path_m5 = "./"
df_all = pd.read_csv(path_m5+"sales_ms_all.csv")
df_cat = pd.read_csv(path_m5+"sales_ms_cat.csv")

In [35]:
df_all.head()

Unnamed: 0,day,y,sell_price,full_price,discount_pct,ds,weekday,wday,month
0,d_1,32631.0,4.07,4.402,7.69,2011-01-29,Saturday,1,1
1,d_2,31749.0,4.07,4.402,7.69,2011-01-30,Sunday,2,1
2,d_3,23783.0,4.07,4.402,7.69,2011-01-31,Monday,3,1
3,d_4,25412.0,4.07,4.402,7.69,2011-02-01,Tuesday,4,2
4,d_5,19146.0,4.07,4.402,7.69,2011-02-02,Wednesday,5,2


In [36]:
df_cat.tail()

Unnamed: 0,cat_id,day,y,sell_price,full_price,discount_pct,ds,weekday,wday,month
5818,0,d_1940,33599.0,3.344,3.475,3.574,2016-05-21,Saturday,1,5
5819,2,d_1940,12586.0,5.47,5.633,2.195,2016-05-21,Saturday,1,5
5820,2,d_1941,13091.0,5.47,5.633,2.195,2016-05-22,Sunday,2,5
5821,1,d_1941,5280.0,5.535,5.707,3.197,2016-05-22,Sunday,2,5
5822,0,d_1941,35967.0,3.344,3.475,3.574,2016-05-22,Sunday,2,5


In [37]:
df_train = df_all[df_all["ds"]<split_date]
df_test = df_all[df_all["ds"]>=split_date]

In [38]:
print(f"Train rows: {len(df_train)}")
print(f"Test rows: {len(df_test)}") 

Train rows: 1941
Test rows: 0


In [39]:
m =Prophet(
                     seasonality_mode = 'multiplicative',
                     daily_seasonality=True,
                     weekly_seasonality=True,
                     yearly_seasonality = True)
m.fit(df_train)
future = m.make_future_dataframe(periods=len(df_test))
forecast = m.predict(future)

In [40]:
forecast['ds'] = forecast['ds'].astype(np.datetime64)
df_all['ds'] = df_all['ds'].astype(np.datetime64)
forecast = forecast[["ds", "yhat", "yhat_lower", "yhat_upper"]]
df_all = pd.merge(df_all, forecast, on=["ds"], how="left")

In [41]:
df_all = df_all.sort_values("ds")

In [42]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(go.Scatter(x=df_all["ds"], y=df_all["y"],
                          mode='lines',
                          name='Sales',
                          marker_color='rgb(121,121,121)'))
fig.add_trace(go.Scatter(x=df_all["ds"], y=df_all["yhat"],
                          mode='lines+markers',
                          name='Prediccion',
                          marker_color='rgb(16,52,166)'))

fig.update_traces(mode='markers+lines')
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout({
    'height': 800,
    'template': 'plotly_white',
    'plot_bgcolor': 'rgba(0,0,0,0)',
    'paper_bgcolor': 'rgba(0,0,0,0)',
    'xaxis_title': 'Fecha',
    'yaxis_title': 'Unidades',
    'yaxis_tickformat': ',.0f',
    'yaxis.rangemode': 'tozero',
    'legend': {'orientation': 'h',
                'yanchor': 'bottom',
                'y': 1.02,
                'xanchor': 'right',
                'x': 1
                }
})

In [43]:
df_all["error"] = df_all["y"]-df_all["yhat"]

In [44]:
df_predictions = pd.DataFrame()
for category in df_cat["cat_id"].unique():
    print(category)
    df_tmp = df_cat[df_cat["cat_id"]==category].reset_index(drop=True)
    df_train = df_tmp[df_tmp["ds"]<split_date]
    df_test = df_tmp[df_tmp["ds"]>=split_date]
    m =Prophet(
                     seasonality_mode = 'multiplicative',
                     daily_seasonality=True,
                     weekly_seasonality=True,
                     yearly_seasonality = True)
    m.fit(df_train)
    future = m.make_future_dataframe(periods=len(df_train))
    forecast = m.predict(future)
    forecast['ds'] = forecast['ds'].astype(np.datetime64)
    forecast = forecast[["ds", "yhat", "yhat_lower", "yhat_upper"]]
    forecast["cat_id"] = category
    df_predictions = pd.concat([df_predictions, forecast])

0
2
1


In [45]:
df_predictions['ds'] = df_predictions['ds'].astype(np.datetime64)
df_cat['ds'] = df_cat['ds'].astype(np.datetime64)

In [46]:
df_cat = pd.merge(df_cat,df_predictions, on=["ds", "cat_id"])
df_cat["error"] = df_cat["y"]-df_cat["yhat"]

In [47]:
df_cat_all = df_cat.groupby("ds").agg({"y":"sum","yhat":"sum"}).reset_index()

In [48]:
y_true = df_all['y'].values
y_pred = df_all['yhat'].values
print("Forecast Daily error Prophet:")
print_metrics(y_true, y_pred)

Forecast Daily error Prophet:
MAE 2405.501
MSE 11124696.86
RMSE 3335.371
MAPE 5.515
SMAPE 0.074
MAAPE 0.073
R2 0.798


In [49]:
y_true = df_cat_all['y'].values
y_pred = df_cat_all['yhat'].values
print("Forecast Daily error Prophet category:")
print_metrics(y_true, y_pred)

Forecast Daily error Prophet category:
MAE 2402.101
MSE 11110337.52
RMSE 3333.217
MAPE 5.519
SMAPE 0.074
MAAPE 0.073
R2 0.798


In [50]:
df_all.to_csv("sales_ms_all_prophet.csv", index=False)
df_cat.to_csv("sales_ms_cat_prophet.csv", index=False)

In [51]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(go.Scatter(x=df_all["ds"], y=df_all["y"],
                          mode='lines',
                          name='Sales',
                          marker_color='rgb(121,121,121)'))
fig.add_trace(go.Scatter(x=df_all["ds"], y=df_all["yhat"],
                          mode='lines+markers',
                          name='Prediction all',
                          marker_color='rgb(16,52,166)'))
fig.add_trace(go.Scatter(x=df_cat_all["ds"], y=df_cat_all["yhat"],
                          mode='lines+markers',
                          name='Prediction Category',
                          marker_color='rgb(70,194,173)'))

fig.update_traces(mode='markers+lines')
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout({
    'height': 800,
    'template': 'plotly_white',
    'plot_bgcolor': 'rgba(0,0,0,0)',
    'paper_bgcolor': 'rgba(0,0,0,0)',
    'xaxis_title': 'Fecha',
    'yaxis_title': 'Unidades',
    'yaxis_tickformat': ',.0f',
    'yaxis.rangemode': 'tozero',
    'legend': {'orientation': 'h',
                'yanchor': 'bottom',
                'y': 1.02,
                'xanchor': 'right',
                'x': 1
                }
})