In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

# F&B Revenue Forecast Model

## Data

In [2]:
#import raw revenue data
df_2016 = pd.read_csv("../raw_data/orders2016.csv", sep=";")
df_2017 = pd.read_csv("../raw_data/orders2017.csv", sep=";")
df_2018 = pd.read_csv("../raw_data/orders2018.csv", sep=";")
df_2019 = pd.read_csv("../raw_data/orders2019.csv", sep=";")
df_2020 = pd.read_csv("../raw_data/orders2020.csv", sep=";")
df_2021 = pd.read_csv("../raw_data/orders2021.csv", sep=";")
df_2022 = pd.read_csv("../raw_data/orders2022.csv", sep=";")

df_list = [df_2016, df_2017, df_2018, df_2019, df_2020, df_2021, df_2022]

#Dropping unnecessary columns, grouping by "date", summing "item_price" to get daily revenues

for i, df in enumerate(df_list):
    df_list[i] = pd.DataFrame(df.groupby(by="date")["item_price"].sum()/100)

#Concat all data in one dataframe, rename the columns for prophet

df = pd.concat(df_list, ignore_index=False)
df = df.rename(columns={"date": "ds", "item_price": "y"})
df["ds"] = df.index
df = df.reset_index(drop=True)
df = df[["ds","y"]]
df

#turning the ds (date) column into datetime

df['ds']=pd.to_datetime(df['ds'])

#Dropping outliers
df = df[df["y"]>=60]
df = df[df["y"]<=2300]
df = df.reset_index(drop=True)

In [3]:
#Loading feature data with temp_encoded
feature_df = pd.read_csv("../feature_data/final_feature_selection.csv")
feature_df["ds"] = pd.to_datetime(feature_df['ds'])

In [4]:
#Loading weather prediction data
weather_forecast = pd.read_csv("../feature_data/finall_pred_weather.csv")
weather_forecast["ds"] = pd.to_datetime(weather_forecast["ds"])
weather_forecast["forecast dt iso"] = pd.to_datetime(weather_forecast["forecast dt iso"])

In [5]:
merged_df = pd.merge(df,feature_df,how="left")

In [108]:
#Setting variables
horizon = 16

#Splitting the data
split_date = "2022-10-14"
index_split = df[df["ds"]==split_date].index[0]
df_train = merged_df.iloc[:index_split]
df_test = merged_df.iloc[index_split:]
y_test = pd.DataFrame(df_test["y"])
weather_index_split = weather_forecast[weather_forecast["forecast dt iso"]==split_date].index[0]
weather_predict = weather_forecast.iloc[weather_index_split:weather_index_split+horizon,:]
weather_predict = weather_predict.drop(columns="forecast dt iso")

In [69]:
df_train.tail()

Unnamed: 0,ds,y,temp,humidity,wind_speed,wind_deg,rain,clouds,Holiday,inflation_rate,Consumption Climate,cov_lock,unemp_Berlin_Mitte
1528,2022-05-29,396.5,14.03,71,3.13,240,0.0,40,1,8.4,-26.2,0,0.089
1529,2022-05-30,310.9,15.3,56,4.02,270,0.0,75,0,8.4,-26.2,0,0.089
1530,2022-05-31,343.5,19.75,43,3.13,180,0.0,0,0,8.4,-26.2,0,0.087
1531,2022-06-01,254.1,19.69,50,7.72,230,0.0,40,0,8.4,-27.7,0,0.087
1532,2022-06-02,251.0,19.07,46,6.26,275,6.86,0,0,8.4,-27.7,0,0.087


In [70]:
df_test.head()

Unnamed: 0,ds,y,temp,humidity,wind_speed,wind_deg,rain,clouds,Holiday,inflation_rate,Consumption Climate,cov_lock,unemp_Berlin_Mitte
1533,2022-06-03,608.6,24.21,37,2.24,135,0.0,0,0,8.4,-27.7,0,0.087
1534,2022-06-04,1007.6,23.51,47,6.26,24,0.0,0,1,8.4,-27.7,0,0.087
1535,2022-06-05,1045.9,26.82,43,4.63,100,0.0,0,1,8.4,-27.7,0,0.087
1536,2022-06-06,737.1,24.09,57,4.12,220,0.0,75,1,8.4,-27.7,0,0.087
1537,2022-06-07,589.3,22.64,50,4.02,245,0.0,0,1,8.4,-27.7,0,0.087


## Model

### Fitting

In [20]:
help(Prophet)

Help on class Prophet in module prophet.forecaster:

class Prophet(builtins.object)
 |  Prophet(growth='linear', changepoints=None, n_changepoints=25, changepoint_range=0.8, yearly_seasonality='auto', weekly_seasonality='auto', daily_seasonality='auto', holidays=None, seasonality_mode='additive', seasonality_prior_scale=10.0, holidays_prior_scale=10.0, changepoint_prior_scale=0.05, mcmc_samples=0, interval_width=0.8, uncertainty_samples=1000, stan_backend=None)
 |  
 |  Prophet forecaster.
 |  
 |  Parameters
 |  ----------
 |  growth: String 'linear', 'logistic' or 'flat' to specify a linear, logistic or
 |      flat trend.
 |  changepoints: List of dates at which to include potential changepoints. If
 |      not specified, potential changepoints are selected automatically.
 |  n_changepoints: Number of potential changepoints to include. Not used
 |      if input `changepoints` is supplied. If `changepoints` is not supplied,
 |      then n_changepoints potential changepoints are selec

In [148]:
from prophet import Prophet

#Instantiate
#m = Prophet(mcmc_samples=300,changepoint_prior_scale = 0.5, seasonality_prior_scale = 0.1)
m = Prophet()

#Adding regressors/features
#m.add_regressor("temp", standardize="minmax")
#m.add_regressor("humidity", standardize="minmax")
#m.add_regressor("wind_speed", standardize="minmax")
#m.add_regressor("wind_deg", standardize="minmax")
#m.add_regressor("rain", standardize="minmax")
#m.add_regressor("clouds", standardize="minmax")
m.add_regressor("Holiday")
#m.add_regressor("inflation_rate", standardize="robust")
#m.add_regressor("Consumption Climate", standardize="minmax")
#m.add_regressor("cov_lock")
#m.add_regressor("unemp_Berlin_Mitte", standardize="minmax")

#Fitting the model
m = m.fit(df_train)

12:37:49 - cmdstanpy - INFO - Chain [1] start processing
12:37:50 - cmdstanpy - INFO - Chain [1] done processing


### Predicting

In [110]:
#Creating future dataframe
future = m.make_future_dataframe(periods=horizon)

In [111]:
#Adding feature values to future dataframe
future = pd.merge(future,feature_df,how="left")
future

Unnamed: 0,ds,y,temp,humidity,wind_speed,wind_deg,rain,clouds,Holiday,inflation_rate,Consumption Climate,cov_lock,unemp_Berlin_Mitte
0,2016-09-01,365.9,25.90,35,6.70,270,0.00,0,1,0.5,10.0,0,0.095
1,2016-09-02,358.6,22.18,46,4.63,280,0.00,75,1,0.5,10.0,0,0.095
2,2016-09-03,487.5,24.94,41,6.70,270,0.00,40,0,0.5,10.0,0,0.095
3,2016-09-04,90.9,19.94,73,6.70,230,0.00,75,0,0.5,10.0,0,0.095
4,2016-09-05,125.0,19.94,73,4.63,320,0.21,40,0,0.5,10.0,0,0.095
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1676,2022-10-25,292.7,16.67,74,5.66,260,0.00,75,1,8.4,-41.9,0,0.090
1677,2022-10-26,346.6,16.98,73,3.09,150,0.00,0,1,8.4,-41.9,0,0.090
1678,2022-10-27,414.5,19.57,73,4.63,200,0.00,0,1,8.4,-41.9,0,0.090
1679,2022-10-28,620.0,21.83,68,4.12,190,0.00,0,1,8.4,-41.9,0,0.090


In [112]:
#Update Future Timeframe with prediction weather data instead of historical weather data to prevent overfitting
cols_to_update = ['temp','humidity', 'clouds', 'wind_speed', 'wind_deg', 'rain']
future.loc[future.index[-(horizon):], cols_to_update] = weather_predict[cols_to_update].values

In [113]:
future.tail(16)

Unnamed: 0,ds,y,temp,humidity,wind_speed,wind_deg,rain,clouds,Holiday,inflation_rate,Consumption Climate,cov_lock,unemp_Berlin_Mitte
1665,2022-10-14,286.5,14.78,69.0,2.61,187.0,0.0,76.0,0,8.4,-41.9,0,0.09
1666,2022-10-15,871.7,16.82,72.0,3.82,216.0,0.0,78.0,0,8.4,-41.9,0,0.09
1667,2022-10-16,140.9,15.06,83.0,2.53,262.0,0.84,89.0,0,8.4,-41.9,0,0.09
1668,2022-10-17,632.8,18.22,70.0,2.99,171.0,0.0,92.0,0,8.4,-41.9,0,0.09
1669,2022-10-18,226.9,13.43,83.0,1.83,202.0,4.84,78.0,0,8.4,-41.9,0,0.09
1670,2022-10-19,217.7,12.13,64.0,2.55,203.0,0.0,0.0,0,8.4,-41.9,0,0.09
1671,2022-10-20,260.2,10.39,58.0,3.13,98.0,0.0,0.0,0,8.4,-41.9,0,0.09
1672,2022-10-21,323.5,11.0,51.0,5.34,126.0,0.0,46.0,0,8.4,-41.9,0,0.09
1673,2022-10-22,743.1,10.31,92.0,3.89,188.0,7.32,100.0,1,8.4,-41.9,0,0.09
1674,2022-10-23,379.4,13.43,84.0,3.99,250.0,0.42,99.0,1,8.4,-41.9,0,0.09


In [149]:
#Predicting
forecast = m.predict(future)
seven_day_forecast = forecast.tail(horizon)
seven_day_forecast_slim = seven_day_forecast[["ds","yhat_lower","yhat","yhat_upper"]]
prediction_forecast = seven_day_forecast_slim
prediction_forecast["y_true"] = y_test.head(horizon)
prediction_forecast["error"]=abs(prediction_forecast["yhat"]-prediction_forecast["y_true"])
prediction_forecast["mae%"]=(((prediction_forecast["yhat"]/prediction_forecast["y_true"])-1)*100).round(2)
prediction_forecast

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prediction_forecast["y_true"] = y_test.head(horizon)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prediction_forecast["error"]=abs(prediction_forecast["yhat"]-prediction_forecast["y_true"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prediction_forecast["mae%"]=(((prediction_forecast["yhat"]/p

Unnamed: 0,ds,yhat_lower,yhat,yhat_upper,y_true,error,mae%
1665,2022-10-14,-36.019865,306.949473,649.804512,286.5,20.449473,7.14
1666,2022-10-15,271.83281,587.406057,926.484377,871.7,284.293943,-32.61
1667,2022-10-16,167.050087,477.546059,834.495907,140.9,336.646059,238.93
1668,2022-10-17,-240.205198,87.978688,425.872495,632.8,544.821312,-86.1
1669,2022-10-18,-244.560248,82.85843,416.365379,226.9,144.04157,-63.48
1670,2022-10-19,-255.313126,80.783467,399.118914,217.7,136.916533,-62.89
1671,2022-10-20,-262.922813,82.855566,415.153608,260.2,177.344434,-68.16
1672,2022-10-21,-78.098498,276.403637,606.203981,323.5,47.096363,-14.56
1673,2022-10-22,252.1005,620.127554,955.072891,743.1,122.972446,-16.55
1674,2022-10-23,166.2184,506.623293,843.813891,379.4,127.223293,33.53


In [150]:
#MAE
from sklearn.metrics import mean_absolute_error

y_true = prediction_forecast["y_true"]
y_pred = prediction_forecast["yhat"]

mae = mean_absolute_error(y_true, y_pred)

mae

221.11846007325994

In [151]:
mae_7 = mean_absolute_error(y_true[:8], y_pred[:8])
mae_7

211.45121081469796

In [152]:
mae_10 = mean_absolute_error(y_true[:11], y_pred[:11])
mae_10

207.16739176882112

In [153]:
mae_14 = mean_absolute_error(y_true[:14], y_pred[:14])
mae_14

216.5555916368015

In [134]:
from prophet.utilities import regressor_coefficients

coefficients = regressor_coefficients(m)
coefficients

Unnamed: 0,regressor,regressor_mode,center,coef_lower,coef,coef_upper
0,temp,additive,17.305988,31.134262,31.134262,31.134262


## Saving the model

saving 3 different models for 3 predictions: 23.05.2022 / 01.08.2022 / 10.10.2022

### Saving prediction as csv

In [154]:
prediction_forecast.to_csv("../output_data/prediction_10.csv")

### Saving the model as json

In [None]:
#saving the model

from prophet.serialize import model_to_json, model_from_json

with open('model_10.json', 'w') as fout:
    fout.write(model_to_json(m))