In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings("ignore")

In [1]:
from timeseries_functions import index_to_datetime, plot_all_df_columns, plot_series,\
plot_series_save_fig, plot_series_and_differences, run_augmented_Dickey_Fuller_test, \
plot_autocorrelation, plot_partial_autocorrelation, plot_decomposition

  from pandas.core import datetools


In [None]:
from timeseries_functions import make_col_vector, make_design_matrix, fit_linear_trend,\
plot_trend_data, plot_linear_trend

In [None]:
from fbprophet import Prophet

In [None]:
from prophet_functions import get_prophet_training_mse, get_prophet_test_mse,\
get_prophet_forecast, plot_prophet_forecast

#### add holiday component

In [None]:
christmas_dates = ['2015-12-25', '2016-12-25', '2017-12-25']
new_year_dates = ['2016-01-01', '2017-01-01', '2018-01-01']
thanksgiving_dates = ['2015-11-26', '2016-11-24', '2017-11-23']

In [None]:
christmas = pd.DataFrame({'holiday':'Christams', 'ds': pd.to_datetime(christmas_dates)})

In [None]:
thanksgiving = pd.DataFrame({'holiday':'Thanksgiving', 'ds': pd.to_datetime(thanksgiving_dates)})

In [None]:
new_years = pd.DataFrame({'holiday':'New Years', 'ds': pd.to_datetime(new_year_dates)})

In [None]:
holidays = pd.concat([christmas, thanksgiving, new_years])

In [None]:
holidays1 = holidays.copy()
holidays2 = holidays.copy()
holidays1_2 = holidays.copy()
holidays2_1 = holidays.copy()

In [None]:
holidays1['lower_window'] = -1
holidays1['upper_window'] = 1

In [None]:
holidays2['lower_window'] = -2
holidays2['upper_window'] = 2

In [None]:
holidays1_2['lower_window'] = -1
holidays1_2['upper_window'] = 2

In [None]:
holidays2_1['lower_window'] = -2
holidays2_1['upper_window'] = 1

In [None]:
holiday_windows = [holidays1, holidays2, holidays1_2, holidays2_1]

#### add regressor

In [None]:
help(Prophet.add_regressor)

### using weekly data

In [None]:
dr_df = pd.read_csv('doctors_hours_per_provider.csv')
RNPA_df = pd.read_csv('RNPA_hours_per_provider.csv')
ther_df = pd.read_csv('therapists_hours_per_provider.csv')

In [None]:
dr_dt_index = pd.read_csv('doctors_hours_per_provider.csv', index_col=0)
RNPA_dt_index = pd.read_csv('RNPA_hours_per_provider.csv', index_col=0)
ther_dt_index = pd.read_csv('therapists_hours_per_provider.csv', index_col=0)

In [None]:
all_df = [dr_dt_index, RNPA_dt_index, ther_dt_index]

In [None]:
train_start = '2015-01-12'
train_end = '2018-02-26'
test_start = '2018-03-05'
test_end = '2018-04-30'

In [None]:
dr_train = dr_dt_index.loc[train_start:train_end]
dr_test = dr_dt_index.loc[test_start:]

In [None]:
len(dr_train), len(dr_test)

In [None]:
def get_prophet_training_mse(forecast, df_name, periods):
    """compute error over all known dates, actual vs yhat"""
    predictions = forecast.iloc[0:-periods]
    mse = mean_squared_error(predictions['y'], predictions['yhat'])
    print('MSE for {name} training set is {error}'.format(name=df_name, error=mse))

In [None]:
def get_prophet_test_mse(forecast, df_name, periods):
    """compute error over all known dates, actual vs yhat"""
    predictions = forecast.iloc[-155:-periods]
    predictions.dropna(inplace=True, axis=0)
    mse = mean_squared_error(predictions['y'], predictions['yhat'])
    print('MSE for {name} test set is {error}'.format(name=df_name, error=mse))

In [None]:
def test_prophet_forecast(test_df, df_name, df_cols, date_hours_cols, pred_cols, periods):
    test_df.columns = df_cols
    test_df = df[date_hours_cols]
    test_df.columns = ['ds', 'y']
    model = Prophet()
    model.fit(test_df)
    future = model.make_future_dataframe(periods=periods)
    forecast = model.predict(future)
    df_pred = pd.concat([test_df, forecast[pred_cols]], axis=1)
    predictions = forecast.iloc[-periods:]
    get_training_mse(df_pred, df_name, periods)
    get_test_mse(df_pred, df_name, periods)
    return model, forecast, df_pred

In [None]:
def get_prophet_forecast(df, df_name, df_cols, date_hours_cols, pred_cols, periods):
    df.columns = df_cols
    df = df[date_hours_cols]
    df.columns = ['ds', 'y']
    model = Prophet()
    model.fit(df)
    future = model.make_future_dataframe(periods=periods)
    forecast = model.predict(future)
    df_pred = pd.concat([df, forecast[pred_cols]], axis=1)
    predictions = forecast.iloc[-periods:]
    get_prophet_training_mse(df_pred, df_name, periods)
    get_prophet_test_mse(df_pred, df_name, periods)
    return model, forecast, df_pred

In [None]:
def get_prophet_forecast_w_holidays(df, df_name, df_cols, date_hours_cols, \
                pred_cols, periods, holidays=holidays):
    df.columns = df_cols
    df = df[date_hours_cols]
    df.columns = ['ds', 'y']
    model = Prophet(holidays=holidays)
    model.fit(df)
    future = model.make_future_dataframe(periods=periods)
    forecast = model.predict(future)
    df_pred = pd.concat([df, forecast[pred_cols]], axis=1)
    predictions = forecast.iloc[-periods:]
    get_prophet_training_mse(df_pred, df_name, periods)
    get_prophet_test_mse(df_pred, df_name, periods)
    return model, forecast, df_pred

In [None]:
def plot_prophet_forecast(model, forecast, df_name):
    model.plot(forecast, xlabel='Date', ylabel='{n} Hours'.format(n=df_name))
    model.plot_components(forecast)
# add save plot to figure

In [None]:
def prophet_forecast_to_csv(prediction_df, file_name):
    """Save prophet predictions to csv file"""
    prediction_df.columns = ['Date', 'True_Hours', 'Predicted_Hours', 'Lower_Limit', 'Upper_Limit']
    prediction_df.to_csv('{}_predictions.csv'.format(file_name))

In [None]:
df_cols = ['date', 'Number_Providers', 'Hours', 'Hours_per_Provider']
date_hours_cols = ['date', 'Hours']
periods = 90
pred_cols = ['yhat', 'yhat_lower', 'yhat_upper']

In [None]:
# save doctors forecast to csv file
dr_model, forecast, dr_pred = get_prophet_forecast(dr_df, 'Doctors', df_cols, date_hours_cols,\
                pred_cols, periods)

In [None]:
prophet_forecast_to_csv(dr_pred, 'doctors_prophet')

In [None]:
dr_forecast = pd.read_csv('doctors_prophet_predictions.csv', index_col=0)

In [None]:
weekly_data = [dr_df, RNPA_df, ther_df]
df_names = ['Doctors', 'RN/PAs', 'therapists']

In [None]:
# Get forecast and plot for all 3 categories
for i in range(len(weekly_data)):
    model, forecast, predictions_df = get_prophet_forecast(df=weekly_data[i], df_name=df_names[i], df_cols=df_cols,\
                     date_hours_cols=date_hours_cols, pred_cols=pred_cols, periods=periods)
    plot_prophet_forecast(model, forecast, df_name=df_names[i])

In [None]:
# get forecast with holidays w/ different windows included
holiday_windows = [holidays1, holidays2, holidays1_2, holidays2_1]
for i in range(len(weekly_data)):
    for h in holiday_windows:
        model, forecast, predictions_df = get_prophet_forecast_w_holidays(df=weekly_data[i], df_name=df_names[i], df_cols=df_cols,\
                     date_hours_cols=date_hours_cols, pred_cols=pred_cols, periods=periods, holidays=h)
#         plot_prophet_forecast(model, forecast, df_name=df_names[i])