In [None]:
import numpy as np
import pandas as pd
from pandas.tools.plotting import table
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings("ignore")
%load_ext autoreload
%autoreload 2

In [None]:
from functions.timeseries_functions import index_to_datetime, weekly_resample, plot_all_df_columns, plot_series,\
plot_series_save_fig, plot_series_and_differences, run_augmented_Dickey_Fuller_test, \
plot_autocorrelation, plot_partial_autocorrelation, plot_decomposition

In [None]:
from fbprophet import Prophet

In [None]:
from functions.prophet_functions import get_prophet_training_mse, get_prophet_test_mse,\
get_prophet_forecast, plot_prophet_forecast, get_prophet_forecast_w_holidays,\
prophet_forecast_to_csv, get_prophet_forecast_date_index, get_prophet_forecast_holidays_date_index

#### add holiday component

In [None]:
christmas_dates = ['2015-12-25', '2016-12-25', '2017-12-25']
new_year_dates = ['2016-01-01', '2017-01-01', '2018-01-01']
thanksgiving_dates = ['2015-11-26', '2016-11-24', '2017-11-23']

In [None]:
christmas = pd.DataFrame({'holiday':'Christams', 'ds': pd.to_datetime(christmas_dates)})

In [None]:
thanksgiving = pd.DataFrame({'holiday':'Thanksgiving', 'ds': pd.to_datetime(thanksgiving_dates)})

In [None]:
new_years = pd.DataFrame({'holiday':'New Years', 'ds': pd.to_datetime(new_year_dates)})

In [None]:
holidays = pd.concat([christmas, thanksgiving, new_years])

In [None]:
holidays1 = holidays.copy()
holidays2 = holidays.copy()
holidays1_2 = holidays.copy()
holidays2_1 = holidays.copy()

In [None]:
holidays1['lower_window'] = -1
holidays1['upper_window'] = 1

In [None]:
holidays2['lower_window'] = -2
holidays2['upper_window'] = 2

In [None]:
holidays1_2['lower_window'] = -1
holidays1_2['upper_window'] = 2

In [None]:
holidays2_1['lower_window'] = -2
holidays2_1['upper_window'] = 1

In [None]:
holiday_windows = [holidays1, holidays2, holidays1_2, holidays2_1]

### using weekly data

In [None]:
dr_df = pd.read_csv('./data/doctors_hours_per_provider.csv')
RNPA_df = pd.read_csv('./data/RNPA_hours_per_provider.csv')
ther_df = pd.read_csv('./data/therapists_hours_per_provider.csv')

In [None]:
dr_df.columns

#### test MSE for different holiday intervals to determine best interval for each category

In [None]:
weekly_data = [dr_df, RNPA_df, ther_df]
df_names = ['Doctors', 'RN/PAs', 'therapists']

In [None]:
df_cols = ['date', 'Number_Providers', 'Hours', 'Hours_per_Provider']
date_hours_cols = ['date', 'Hours']
periods = 90
pred_cols = ['yhat', 'yhat_lower', 'yhat_upper']

In [None]:
# get MSE for forecasts with holidays w/ different windows included
holiday_windows = [holidays1, holidays2, holidays1_2, holidays2_1]
for i in range(len(weekly_data)):
    for h in holiday_windows:
        model, forecast, predictions_df = get_prophet_forecast_w_holidays(df=weekly_data[i], df_name=df_names[i], df_cols=df_cols,\
                     date_hours_cols=date_hours_cols, pred_cols=pred_cols, periods=periods, holidays=h)

#### plot models for all categories

In [None]:
weekly_data = [dr_df, RNPA_df, ther_df]
df_names = ['Doctors', 'RN/PAs', 'therapists']

In [None]:
date_col = 'date'
hours_col = 'Hours'
periods = 90
pred_cols = ['yhat', 'yhat_lower', 'yhat_upper']

In [None]:
# Get forecast and plot for all 3 categories w/out holidays
for i in range(len(weekly_data)):
    model, forecast, pred, predictions_df = get_prophet_forecast_date_index(df=weekly_data[i], date_col=date_col,\
                     hours_col=hours_col, pred_cols=pred_cols, periods=periods)
    plot_prophet_forecast(model, forecast)

#### Add in holidays

In [None]:
date_col = 'date'
hours_col = 'Hours'
periods = 90
pred_cols = ['yhat', 'yhat_lower', 'yhat_upper']

In [None]:
# Doctor's forecast + holidays
dr_model_h, dr_forecast_h, dr_pred_h, dr_h_df = get_prophet_forecast_holidays_date_index(df=dr_df, date_col=date_col,\
                     hours_col=hours_col, pred_cols=pred_cols, periods=periods, holidays=holidays1)
# plot forecast & decomposition w holidays
plot_prophet_forecast(dr_model_h, dr_forecast_h)

In [None]:
avg_dr_hours = dr_df['Hours_per_Provider'].mean()

In [None]:
# add predicted number of providers column
dr_h_df['Predicted_num_Providers'] = round(dr_h_df['yhat'] / avg_dr_hours, 1)

In [None]:
# fig = plot_prophet_forecast(dr_model_h, dr_forecast_h, xlabel='Date', ylabel='Hours')
dr_fig = dr_model_h.plot(dr_forecast_h, xlabel='Date', ylabel='Hours')
dr_fig.savefig('./images/dr_prophet_model.png')

In [None]:
dr_h_df.columns

In [None]:
dr_h_df.index

In [None]:
prophet_forecast_to_csv(dr_h_df, 'doctors_prophet_holidays')

In [None]:
# get May predictions
dr_pred = dr_h_df[['Predicted_Hours', 'Predicted_num_Providers']]
dr_may_hours = round(dr_pred[['Predicted_Hours']].resample('W-MON').mean()['2018-05-07':'2018-05-28'],2)
dr_may_providers = round(dr_pred[['Predicted_num_Providers']].resample('W-MON').mean()['2018-05-07':'2018-05-28'],2)

In [None]:
# merge into single dataframe
dr_may18 = pd.concat([dr_may_hours, dr_may_providers], axis=1)
# save to csv 
dr_may18.to_csv('./data/May2018_doctors_Prophet_predictions.csv')

In [None]:
dr_may18

#### RN/PAs

In [None]:
date_col = 'date'
hours_col = 'Hours'
periods = 90
pred_cols = ['yhat', 'yhat_lower', 'yhat_upper']

In [None]:
# RN/PA forecast + holidays
RNPA_model_h, RNPA_forecast_h, RNPA_pred_h, RNPA_h_df = get_prophet_forecast_holidays_date_index(df=RNPA_df, date_col=date_col,\
        hours_col=hours_col, pred_cols=pred_cols, periods=periods, holidays=holidays1_2)
# plot forecast & decomposition w holidays
plot_prophet_forecast(RNPA_model_h, RNPA_forecast_h)

In [None]:
# fig = plot_prophet_forecast(dr_model_h, dr_forecast_h, xlabel='Date', ylabel='Hours')
RNPA_fig = RNPA_model_h.plot(RNPA_forecast_h, xlabel='Date', ylabel='Hours')
RNPA_fig.savefig('./images/RNPA_prophet_model.png')

In [None]:
avg_RNPA_hours = RNPA_df['Hours_per_Provider'].mean()

In [None]:
# add predicted number of providers column
RNPA_h_df['Predicted_num_Providers'] = round(RNPA_h_df['yhat'] / avg_RNPA_hours, 1)

In [None]:
prophet_forecast_to_csv(RNPA_h_df, 'RNPA_prophet_holidays')

In [None]:
# get May predictions
RNPA_pred = RNPA_h_df[['Predicted_Hours', 'Predicted_num_Providers']]
RNPA_may_hours = round(RNPA_pred[['Predicted_Hours']].resample('W-MON').mean()['2018-05-07':'2018-05-28'],2)
RNPA_may_providers = round(RNPA_pred[['Predicted_num_Providers']].resample('W-MON').mean()['2018-05-07':'2018-05-28'],2)

In [None]:
RNPA_may_hours, RNPA_may_providers

In [None]:
# merge hours and providers data into single df
RNPA_may18 = pd.concat([RNPA_may_hours, RNPA_may_providers], axis=1)
# save to csv 
RNPA_may18.to_csv('./data/May2018_RNPAs_Prophet_predictions.csv')

In [None]:
RNPA_may18

#### therapists

In [None]:
date_col = 'date'
hours_col = 'Hours'
periods = 90
pred_cols = ['yhat', 'yhat_lower', 'yhat_upper']

In [None]:
# Therapists forecast + holidays
ther_model_h, ther_forecast_h, ther_pred_h, ther_h_df = get_prophet_forecast_holidays_date_index(df=ther_df, date_col=date_col,\
        hours_col=hours_col, pred_cols=pred_cols, periods=periods, holidays=holidays1)
# plot forecast & decomposition w holidays
plot_prophet_forecast(ther_model_h, ther_forecast_h)

In [None]:
# fig = plot_prophet_forecast(dr_model_h, dr_forecast_h, xlabel='Date', ylabel='Hours')
ther_fig = ther_model_h.plot(ther_forecast_h, xlabel='Date', ylabel='Hours')
ther_fig.savefig('./images/ther_prophet_model.png')

In [None]:
avg_ther_hours = ther_df['Hours_per_Provider'].mean()

In [None]:
# add predicted number of providers column
ther_h_df['Predicted_num_Providers'] = round(ther_h_df['yhat'] / avg_ther_hours, 1)

In [None]:
prophet_forecast_to_csv(ther_h_df, 'therapist_prophet_holidays')

In [None]:
ther_h_df

In [None]:
# get May predictions
ther_pred = ther_h_df[['Predicted_Hours', 'Predicted_num_Providers']]

In [None]:
ther_pred.index

In [None]:
ther_pred.loc['2018-05-07':'2018-05-28']

In [None]:
ther_pred.shape

In [None]:
ther_may_hours = round(ther_pred[['Predicted_Hours']].resample('W-MON').mean()['2018-05-07':'2018-05-28'],2)

In [None]:
ther_may_hours

In [None]:
ther_may_providers = round(ther_pred[['Predicted_num_Providers']].resample('W-MON').mean()['2018-05-07':'2018-05-28'],2)

In [None]:
ther_may_providers

In [None]:
ther_may18 = pd.concat([ther_may_hours, ther_may_providers], axis=1)

In [None]:
# save to csv 
ther_may18.to_csv('./data/May2018_therapists_Prophet_predictions.csv')