# Forecast - LinkedIn Silverkite

## Libraries and Data

In [None]:
# Install Greykite - wrapper for Silverkite, Prophet, and ARIMA
!pip install greykite

In [3]:
# Libraries
import numpy as np
import pandas as pd
from greykite.framework.templates.autogen.forecast_config import *
from greykite.framework.templates.forecaster import Forecaster
from greykite.framework.templates.model_templates import ModelTemplateEnum
from greykite.common.features.timeseries_features import *
from greykite.common.evaluation import EvaluationMetricEnum
from plotly.offline import iplot
import yaml

In [4]:
# Data
df = pd.read_csv('../Data/nyc-data.csv')
future_df = pd.read_csv('../Data/future.csv')
df

Unnamed: 0,Date,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
0,1/1/2015,720.000885,0,0,0,3.68,41.305
1,1/2/2015,581.276773,0,0,0,4.73,131.574
2,1/3/2015,754.117039,0,0,0,7.23,162.700
3,1/4/2015,622.252774,0,0,0,10.96,160.281
4,1/5/2015,785.373319,0,0,0,6.92,51.077
...,...,...,...,...,...,...,...
2187,12/27/2020,685.915026,0,0,0,2.89,38.674
2188,12/28/2020,998.051170,0,0,0,8.83,166.712
2189,12/29/2020,847.123399,0,0,0,3.48,161.865
2190,12/30/2020,857.521043,0,0,0,5.97,179.634


In [5]:
future_df

Unnamed: 0,Date,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
0,1/1/2021,,0,0,0,5.0,154.221
1,1/2/2021,,0,0,0,11.11,264.805
2,1/3/2021,,0,0,0,3.89,115.499
3,1/4/2021,,0,0,0,6.67,124.65
4,1/5/2021,,0,0,0,5.56,77.968
5,1/6/2021,,0,0,0,5.56,234.2
6,1/7/2021,,0,0,0,6.11,142.041
7,1/8/2021,,0,0,0,3.89,252.094
8,1/9/2021,,0,0,0,2.78,100.483
9,1/10/2021,,0,0,0,6.11,71.6


In [6]:
# Merging dataframes and reset index
df = pd.concat([df, future_df])
df = df.reset_index(drop=True)
df

Unnamed: 0,Date,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
0,1/1/2015,720.000885,0,0,0,3.68,41.305
1,1/2/2015,581.276773,0,0,0,4.73,131.574
2,1/3/2015,754.117039,0,0,0,7.23,162.700
3,1/4/2015,622.252774,0,0,0,10.96,160.281
4,1/5/2015,785.373319,0,0,0,6.92,51.077
...,...,...,...,...,...,...,...
2218,1/27/2021,,0,0,0,3.33,39.664
2219,1/28/2021,,0,0,0,1.67,195.314
2220,1/29/2021,,0,0,0,-2.78,235.894
2221,1/30/2021,,0,0,0,1.11,152.752


In [7]:
# Rename variable
df = df.rename(columns={'Demand': 'y'})
df.head(1)

Unnamed: 0,Date,y,Easter,Thanksgiving,Christmas,Temperature,Marketing
0,1/1/2015,720.000885,0,0,0,3.68,41.305


## Silverkite Preparations

In [8]:
# Get best parameters
parameters = pd.read_csv('best-params-silverkite.csv',
                         index_col=0)
parameters

Unnamed: 0,"[('estimator__growth_term', 'linear'), ('estimator__fit_algorithm_dict', {'fit_algorithm': 'linear'})]"
mean_test_RMSE,47.8
param_estimator__fit_algorithm_dict,{'fit_algorithm': 'linear'}
param_estimator__growth_term,linear


In [9]:
# Store parameters
growth_term_param = parameters.loc['param_estimator__growth_term'][0]
fit_algorithm_param = parameters.loc['param_estimator__fit_algorithm_dict'][0]

In [10]:
# Specifying time-series names (variables)
metadata = MetadataParam(time_col='Date',
                         value_col='y',
                         freq='D',
                         train_end_date=pd.to_datetime('2020-12-31'))
metadata

MetadataParam(anomaly_info=None, date_format=None, freq='D', time_col='Date', train_end_date=Timestamp('2020-12-31 00:00:00'), value_col='y')

In [11]:
# Growth terms possibilities
growth = dict(growth_term = growth_term_param)
growth

{'growth_term': 'linear'}

In [12]:
# Seasonalities
seasonality = dict(yearly_seasonality = 'auto',  # Can also set to None or True
                   quarterly_seasonality = 'auto',  # 'auto' lets the model figure it out
                   monthly_seasonality = 'auto',
                   weekly_seasonality = 'auto',
                   daily_seasonality = 'auto')  # Can include hourly_seasonality if you have hourly data
seasonality

{'yearly_seasonality': 'auto',
 'quarterly_seasonality': 'auto',
 'monthly_seasonality': 'auto',
 'weekly_seasonality': 'auto',
 'daily_seasonality': 'auto'}

In [13]:
# Specifying events
events = dict(holidays_to_model_separately = ["New Year's Day"],  # Can include others in list if desired
              holiday_lookup_countries = ['US'],
              holiday_pre_num_days = 2,
              holiday_post_num_days = 2,
              holiday_pre_post_num_dict = {"New Year's Day": (3, 1)},  # Include a term for all separate holidays
              daily_event_df_dict = {"elections": pd.DataFrame({  # Adding your own event
                  'date': ['2016-11-08', '2020-11-03'],
                  'event_name': ['elections'] * 2
              })})
events

{'holidays_to_model_separately': ["New Year's Day"],
 'holiday_lookup_countries': ['US'],
 'holiday_pre_num_days': 2,
 'holiday_post_num_days': 2,
 'holiday_pre_post_num_dict': {"New Year's Day": (3, 1)},
 'daily_event_df_dict': {'elections':          date event_name
  0  2016-11-08  elections
  1  2020-11-03  elections}}

In [14]:
# Changepoints -> reflects changes in trend
changepoints = dict(changepoints_dict = dict(method = 'auto'))
changepoints

{'changepoints_dict': {'method': 'auto'}}

In [15]:
# Regressors (don't include them unless you know they have an impact)
regressors = dict(regressor_cols = ['Easter', 'Temperature', 'Marketing'])
regressors

{'regressor_cols': ['Easter', 'Temperature', 'Marketing']}

In [16]:
# Lagged Regressors (depends on forecasting horizon)
lagged_regressors = dict(lagged_regressor_dict = {'Temperature': 'auto',
                                                  'Easter': 'auto',
                                                  'Marketing': 'auto'})
lagged_regressors

{'lagged_regressor_dict': {'Temperature': 'auto',
  'Easter': 'auto',
  'Marketing': 'auto'}}

In [17]:
# Auto-regression (depends on forecasting horizon)
autoregression = dict(autoreg_dict = 'auto')
autoregression

{'autoreg_dict': 'auto'}

In [19]:
fit_algorithm_param

"{'fit_algorithm': 'linear'}"

In [24]:
# Fitting algorithms

custom = dict(fit_algorithm_dict = yaml.safe_load(fit_algorithm_param))
custom

{'fit_algorithm_dict': {'fit_algorithm': 'linear'}}

## Silverkite Model

In [25]:
# Build the model
model_components = ModelComponentsParam(growth=growth,
                                        seasonality=seasonality,
                                        events=events,
                                        changepoints=changepoints,
                                        regressors=regressors,
                                        lagged_regressors=lagged_regressors,
                                        autoregression=autoregression,
                                        custom=custom)
model_components

ModelComponentsParam(autoregression={'autoreg_dict': 'auto'}, changepoints={'changepoints_dict': {'method': 'auto'}}, custom={'fit_algorithm_dict': {'fit_algorithm': 'linear'}}, events={'holidays_to_model_separately': ["New Year's Day"], 'holiday_lookup_countries': ['US'], 'holiday_pre_num_days': 2, 'holiday_post_num_days': 2, 'holiday_pre_post_num_dict': {"New Year's Day": (3, 1)}, 'daily_event_df_dict': {'elections':          date event_name
0  2016-11-08  elections
1  2020-11-03  elections}}, growth={'growth_term': 'linear'}, hyperparameter_override=None, regressors={'regressor_cols': ['Easter', 'Temperature', 'Marketing']}, lagged_regressors={'lagged_regressor_dict': {'Temperature': 'auto', 'Easter': 'auto', 'Marketing': 'auto'}}, seasonality={'yearly_seasonality': 'auto', 'quarterly_seasonality': 'auto', 'monthly_seasonality': 'auto', 'weekly_seasonality': 'auto', 'daily_seasonality': 'auto'}, uncertainty=None)

In [26]:
# Cross-validation
evaluation_period = EvaluationPeriodParam(cv_min_train_periods=df.shape[0] - 180 - 31,
                                          cv_expanding_window=True,
                                          cv_max_splits=50,  # Ideally should be None, but there were problems with that
                                          cv_periods_between_splits=16)

In [27]:
# Evaluation metric
evaluation_metric = EvaluationMetricParam(
    cv_selection_metric=EvaluationMetricEnum.RootMeanSquaredError.name)

In [28]:
# Configuration
config = ForecastConfig(model_template=ModelTemplateEnum.SILVERKITE.name,
                        forecast_horizon=31,
                        metadata_param=metadata,
                        model_components_param=model_components,
                        evaluation_period_param=evaluation_period,
                        evaluation_metric_param=evaluation_metric)

In [None]:
# Forecasting
forecaster = Forecaster()
result = forecaster.run_forecast_config(df=df,
                                        config=config)

In [30]:
# Look at the model summary
summary = result.model[-1].summary()  # [-1] retreives estimator from the pipeline
print(summary)


Number of observations: 2192,   Number of features: 182
Method: Ordinary least squares
Number of nonzero features: 182

Residuals:
         Min           1Q       Median           3Q          Max
      -165.3       -32.22       -0.821        30.94        235.5

            Pred_col Estimate Std. Err  t value Pr(>|t|) sig. code              95%CI
           Intercept    65.87    3.057    21.55   <2e-16       ***     (59.88, 71.87)
events_New Years Day   -15.59    22.45  -0.6943    0.488              (-59.62, 28.44)
 events_N...rs Day-1   -47.53    22.52    -2.11    0.035         *    (-91.7, -3.358)
 events_N...rs Day-2     -6.5    22.55  -0.2882    0.773              (-50.73, 37.73)
 events_N...rs Day-3   -40.67     22.5   -1.808    0.071         .     (-84.8, 3.454)
 events_N...rs Day+1   -1.408    22.38 -0.06292    0.950               (-45.3, 42.48)
        events_Other    -6.43    6.757  -0.9516    0.341              (-19.68, 6.822)
      events_Other-1   -5.056    6.755  -0.7485  

In [33]:
# Visualization
fig = result.forecast.plot_components()
iplot(fig)

In [38]:
# Getting forecast result
forecast = result.forecast.df[['ts', 'forecast']]
forecast = forecast.rename(columns={'forecast': 'silverkite'})
predictions_silverkite = forecast.iloc[-31:, :]
predictions_silverkite

Unnamed: 0,ts,silverkite
2192,2021-01-01,750.518503
2193,2021-01-02,833.825895
2194,2021-01-03,752.872797
2195,2021-01-04,896.849763
2196,2021-01-05,779.597889
2197,2021-01-06,913.91292
2198,2021-01-07,825.707165
2199,2021-01-08,754.129233
2200,2021-01-09,821.320725
2201,2021-01-10,736.546737


In [39]:
# Exporting
predictions_silverkite.to_csv('Ensemble/predictions-silverkite.csv')