#Directory, Libraries and Data

In [None]:
%cd /content/drive/MyDrive/Business Analyst course/Predictive Analytics/Facebook Prophet

In [29]:
#Import libraries
import numpy as np
import pandas as pd
from fbprophet import Prophet
from fbprophet.diagnostics import cross_validation
from fbprophet.utilities import regressor_coefficients

In [None]:
#get the data
df = pd.read_csv("DHS_Daily_Report_2020.csv")
df.head()

In [None]:
#plot
df['Total Individuals in Shelter'].plot(figsize = (9,6))

#Data preparation

In [None]:
#Date Variable
df.Date = pd.to_datetime(df.Date,
                         format = "%m/%d/%Y")
df.Date

In [None]:
#Renaming Variables
df = df.rename(columns = {'Date': 'ds',
                          'Total Individuals in Shelter': 'y'})
df.head(1)

#Holidays

In [None]:
#Easter
easter_dates = pd.to_datetime(df[df.Easter == 1].ds)
easter = pd.DataFrame({'holiday': 'easter',
                       'ds': easter_dates,
                       'lower_window': -5,
                       'upper_window': 2})
easter

In [14]:
#Thanksgiving
thanksgiving_dates = pd.to_datetime(df[df.Thanksgiving == 1].ds)
thanksgiving = pd.DataFrame({'holiday': 'thanksgiving',
                       'ds': thanksgiving_dates,
                       'lower_window': -3,
                       'upper_window': 5})

In [None]:
#Combine events
holidays = pd.concat([easter, thanksgiving])
holidays

In [None]:
#Remove events from main dataframe
df = df.drop(columns = ["Easter", "Thanksgiving"])
df.head()

#Facebook Prophet

In [17]:
#Training and test split
test_days = 31
training_set = df.iloc[:-test_days,:]
test_set = df.iloc[-test_days:, : ]
print(test_set.head(1))
print(training_set.tail(1))

             ds      y  Christmas  Temperature
2526 2020-12-01  53686          0        13.47
             ds      y  Christmas  Temperature
2525 2020-11-30  53745          0        16.62


In [18]:
#Facebook Prophet Model
m = Prophet(growth= 'linear',
            yearly_seasonality = True,
            weekly_seasonality = True,
            daily_seasonality = False,
            holidays = holidays,
            seasonality_mode = "multiplicative",
            seasonality_prior_scale = 10,
            holidays_prior_scale = 10,
            changepoint_prior_scale = 0.05)
m.add_regressor('Christmas')
m.add_regressor('Temperature')
m.fit(training_set)

<fbprophet.forecaster.Prophet at 0x7faf5cfa3450>

In [None]:
#Coefficient Regressor
regressor_coefficients(m)

#Accuracy Assessment

In [None]:
#Create future dataframe
future = m.make_future_dataframe(periods = test_days,
                                 freq = 'D')
future = pd.concat([future, df.iloc[:,2:]],
                   axis = 1)
future.head(2)

In [None]:
#Forecasting
forecast = m.predict(future)
forecast.head()

In [None]:
#Retrieve predictions
predictions = forecast.yhat[-test_days:]
predictions[:2]

In [None]:
#MAE RMSE
from sklearn.metrics import mean_squared_error, mean_absolute_error
print(mean_absolute_error(test_set['y'], predictions))
print(np.sqrt(mean_squared_error(test_set['y'], predictions)))

#Visualization

In [None]:
#forecast
m.plot(forecast);

In [None]:
#plot components
m.plot_components(forecast);

#Parameter Tuning

In [26]:
#Grid
from sklearn.model_selection import ParameterGrid
param_grid = {'seasonality_prior_scale': [5, 10, 20],
              'holidays_prior_scale': [5, 10, 20],
              'changepoint_prior_scale': [0.01, 0.05, 0.1]}
grid = ParameterGrid(param_grid)

In [None]:
#hyperparameter
rmse = []

#Parameter tuning loop
for params in grid:
  #model
  m = Prophet(growth = 'linear',
              yearly_seasonality = True,
              weekly_seasonality = True,
              daily_seasonality = False,
              seasonality_mode = 'multiplicative',
              seasonality_prior_scale = params['seasonality_prior_scale'],
              holidays_prior_scale = params['holidays_prior_scale'],
              changepoint_prior_scale = params['changepoint_prior_scale'])
  m.add_regressor('Christmas')
  m.fit(training_set)

  #cross_validation
  df_cv = cross_validation(model = m,
                           horizon = '31 days',
                           initial = '2300 days',
                           parallel = "processes")

  #get the error
  error = np.sqrt(mean_squared_error(df_cv['y'], df_cv['yhat']))

  #store the results
  rmse.append(error)

In [None]:
#Fetch the best parameters
best_params = grid[np.argmin(rmse)]
print(best_params)