## Formatting data and prepare for Prophet prediction

In [1]:
import pandas as pd

# import dataframe
df = pd.read_csv('sales_overtime.csv')
# get rid of the month of june that only contains 1st of June
df.drop(df.tail(1).index,inplace=True) 
# format date column to datetime
df['date_mvt'] = pd.to_datetime(df['date_mvt'],format='%Y-%m-%d %H:%M:%S.%f UTC') 
# group by month on date column
df2 = df.groupby(pd.Grouper(key='date_mvt', axis=0,  freq='M')).sum() 
# get months out of index column back into df
df2.reset_index(inplace=True) 
# get rid of the first 2 months because outliers compared to rest of data
df2.drop(df.head(2).index,inplace=True) 

import plotly.express as px
# plot the total monthly revenue of the company overtime 
fig = px.line(df2,x='date_mvt',y='total_revenues') 

import matplotlib.pyplot as plt

from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics
from prophet.plot import plot_cross_validation_metric

from statsmodels.tsa.seasonal import seasonal_decompose

import logging
logging.getLogger('prophet').setLevel(logging.ERROR)
import warnings
warnings.filterwarnings("ignore")

# prepare df for times series predictions
df2.columns = ['ds', 'y'] 

# get the df into two train and test sets
train = df2.iloc[:105]
test = df2.iloc[105:]

train.shape, test.shape

# set the index for the function seasonal_decompose to work
df2.set_index("ds", inplace = True)

# Additive Decomposition (y = Trend + Seasonal + Residuals)
result_add = seasonal_decompose(df2['y'], model='additive')
result_add.plot();

# Multiplicative Decomposition (y = Trend * Seasonal * Residuals)
result_mul = seasonal_decompose(df2['y'], model='multiplicative')
result_mul.plot();

## Create a multiplicative model (without taking Covid into account)

In [16]:
# Create and fit model
model = Prophet(seasonality_mode='multiplicative', interval_width=0.95)
model.fit(train)

horizon = 36      # number of data points we want to forecast in the future (starting from the end of TRAIN)

future = model.make_future_dataframe(periods=horizon, freq='M') # create first column for forecast dataframe

forecast = model.predict(future) # create forcast dataframe using prediction from the model fitted on train

ax = model.plot(forecast) # visualize forecast with trend and lower&upper windows

model.plot_components(forecast); # visualize forecast components (trend and yearly seasonality)

# plot train, test and forecast on one plot
plt.figure(figsize=(15, 8))
plt.plot(train['ds'], train['y'], label="Train")
plt.plot(test['ds'], test['y'], label="Test")
plt.plot(forecast[-horizon:]['ds'], forecast[-horizon:]['yhat'], label="predictions")
plt.legend(loc="best")
plt.show()

10:29:53 - cmdstanpy - INFO - Chain [1] start processing
10:29:53 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x177cc31d0>

## Create a multiplicative model (taking Covid into account)

In [23]:
# modify model to include exception over covid time
# create df for lockdowns periods
lockdowns = pd.DataFrame([
    {'holiday': 'lockdown_1', 'ds': '2020-03-17', 'lower_window': 0, 'ds_upper': '2020-06-02'},
    {'holiday': 'lockdown_2', 'ds': '2020-10-30', 'lower_window': 0, 'ds_upper': '2020-12-15'},
    {'holiday': 'lockdown_3', 'ds': '2021-04-03', 'lower_window': 0, 'ds_upper': '2021-05-19'},
])
for t_col in ['ds', 'ds_upper']:
    lockdowns[t_col] = pd.to_datetime(lockdowns[t_col])
lockdowns['upper_window'] = (lockdowns['ds_upper'] - lockdowns['ds']).dt.days
lockdowns

# Create and fit model
model2 = Prophet(holidays=lockdowns,seasonality_mode='multiplicative', interval_width=0.95) 
model2 = model2.fit(train)
future2 = model2.make_future_dataframe(periods=horizon, freq='M')
forecast2 = model2.predict(future2)

ax2 = model2.plot(forecast2) # visualize forecast with trend and lower&upper windows

model2.plot_components(forecast2); # visualize forecast components (trend, yearly seasonality and lockdowns)

# Visualize breaks of trend
from prophet.plot import add_changepoints_to_plot
fig = model2.plot(forecast2)
a = add_changepoints_to_plot(fig.gca(), model2, forecast2)

# plot train, test and forecast on one plot
plt.figure(figsize=(15, 8))
plt.plot(train['ds'], train['y'], label="Train")
plt.plot(test['ds'], test['y'], label="Test")
plt.plot(forecast2[-horizon:]['ds'], forecast2[-horizon:]['yhat'], label="predictions")
plt.legend(loc="best")
plt.show()

10:29:53 - cmdstanpy - INFO - Chain [1] start processing
10:29:53 - cmdstanpy - INFO - Chain [1] done processing
