In [None]:
import numpy as np
import pandas as pd
from pandas.tools.plotting import table
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings("ignore")
%load_ext autoreload
%autoreload 2

In [None]:
from timeseries_functions import index_to_datetime, plot_all_df_columns, plot_series,\
plot_series_save_fig, plot_series_and_differences, run_augmented_Dickey_Fuller_test, \
plot_autocorrelation, plot_partial_autocorrelation, plot_decomposition

In [None]:
from timeseries_functions import make_col_vector, make_design_matrix, fit_linear_trend,\
plot_trend_data, plot_linear_trend

In [None]:
from fbprophet import Prophet

In [None]:
from prophet_functions import get_prophet_training_mse, get_prophet_test_mse,\
get_prophet_forecast, plot_prophet_forecast, get_prophet_forecast_w_holidays,\
prophet_forecast_to_csv

#### add holiday component

In [None]:
christmas_dates = ['2015-12-25', '2016-12-25', '2017-12-25']
new_year_dates = ['2016-01-01', '2017-01-01', '2018-01-01']
thanksgiving_dates = ['2015-11-26', '2016-11-24', '2017-11-23']

In [None]:
christmas = pd.DataFrame({'holiday':'Christams', 'ds': pd.to_datetime(christmas_dates)})

In [None]:
thanksgiving = pd.DataFrame({'holiday':'Thanksgiving', 'ds': pd.to_datetime(thanksgiving_dates)})

In [None]:
new_years = pd.DataFrame({'holiday':'New Years', 'ds': pd.to_datetime(new_year_dates)})

In [None]:
holidays = pd.concat([christmas, thanksgiving, new_years])

In [None]:
holidays1 = holidays.copy()
holidays2 = holidays.copy()
holidays1_2 = holidays.copy()
holidays2_1 = holidays.copy()

In [None]:
holidays1['lower_window'] = -1
holidays1['upper_window'] = 1

In [None]:
holidays2['lower_window'] = -2
holidays2['upper_window'] = 2

In [None]:
holidays1_2['lower_window'] = -1
holidays1_2['upper_window'] = 2

In [None]:
holidays2_1['lower_window'] = -2
holidays2_1['upper_window'] = 1

In [None]:
holiday_windows = [holidays1, holidays2, holidays1_2, holidays2_1]

### using weekly data

In [None]:
dr_df = pd.read_csv('./data/doctors_hours_per_provider.csv')
RNPA_df = pd.read_csv('./data/RNPA_hours_per_provider.csv')
ther_df = pd.read_csv('./data/therapists_hours_per_provider.csv')

In [None]:
train_start = '2015-01-12'
train_end = '2018-02-26'
test_start = '2018-03-05'
test_end = '2018-04-30'

In [None]:
dr_df.columns

In [None]:
def get_prophet_forecast(df, df_name, df_cols, date_hours_cols, pred_cols, periods):
    """
    """
    df.columns = df_cols
    df = df[date_hours_cols]
    df.columns = ['ds', 'y']
    model = Prophet()
    model.fit(df)
    future = model.make_future_dataframe(periods=periods)
    forecast = model.predict(future)
    df_pred = pd.concat([df, forecast[pred_cols]], axis=1)
    predictions = forecast.iloc[-periods:]
    get_prophet_training_mse(df_pred, df_name, periods)
    get_prophet_test_mse(df_pred, df_name, periods)
    return model, forecast, df_pred

In [None]:
df_cols = ['date', 'Number_Providers', 'Hours', 'Hours_per_Provider']
date_hours_cols = ['date', 'Hours']
periods = 90
pred_cols = ['yhat', 'yhat_lower', 'yhat_upper']

In [None]:
# get doctors forecast and save to csv file
dr_model, forecast, dr_pred = get_prophet_forecast(dr_df, 'Doctors', df_cols, date_hours_cols,\
                pred_cols, periods)

In [None]:
prophet_forecast_to_csv(dr_pred, 'doctors_prophet')

In [None]:
dr_forecast = pd.read_csv('./data/doctors_prophet_predictions.csv', index_col=0)

In [None]:
weekly_data = [dr_df, RNPA_df, ther_df]
df_names = ['Doctors', 'RN/PAs', 'therapists']

In [None]:
# Get forecast and plot for all 3 categories
for i in range(len(weekly_data)):
    model, forecast, predictions_df = get_prophet_forecast(df=weekly_data[i], df_name=df_names[i], df_cols=df_cols,\
                     date_hours_cols=date_hours_cols, pred_cols=pred_cols, periods=periods)
    plot_prophet_forecast(model, forecast)

In [None]:
# get MSE for forecasts with holidays w/ different windows included
holiday_windows = [holidays1, holidays2, holidays1_2, holidays2_1]
for i in range(len(weekly_data)):
    for h in holiday_windows:
        model, forecast, predictions_df = get_prophet_forecast_w_holidays(df=weekly_data[i], df_name=df_names[i], df_cols=df_cols,\
                     date_hours_cols=date_hours_cols, pred_cols=pred_cols, periods=periods, holidays=h)

In [None]:
# Doctor's forecast + holidays
dr_model_h, dr_forecast_h, dr_pred_h = get_prophet_forecast_w_holidays(dr_df, 'Doctors', df_cols, date_hours_cols,\
                pred_cols, periods, holidays=h)
# plot forecast & decomposition w holidays
plot_prophet_forecast(dr_model_h, dr_forecast_h)

In [None]:
# fig = plot_prophet_forecast(dr_model_h, dr_forecast_h, xlabel='Date', ylabel='Hours')
dr_fig = dr_model_h.plot(dr_forecast_h, xlabel='Date', ylabel='Hours')
dr_fig.savefig('./images/dr_prophet_model.png')

In [None]:
prophet_forecast_to_csv(dr_pred_h, 'doctors_prophet_holidays')

#### RN/PAs

In [None]:
# RN/PA forecast + holidays
RNPA_model_h, RNPA_forecast_h, RNPA_pred_h = get_prophet_forecast_w_holidays(RNPA_df, 'RN/PAs', df_cols, date_hours_cols,\
                pred_cols, periods, holidays=h)
# plot forecast & decomposition w holidays
plot_prophet_forecast(RNPA_model_h, RNPA_forecast_h)

In [None]:
# fig = plot_prophet_forecast(dr_model_h, dr_forecast_h, xlabel='Date', ylabel='Hours')
RNPA_fig = RNPA_model_h.plot(RNPA_forecast_h, xlabel='Date', ylabel='Hours')
RNPA_fig.savefig('./images/RNPA_prophet_model.png')

In [None]:
prophet_forecast_to_csv(RNPA_pred_h, 'RNPA_prophet_holidays')

#### therapists

In [None]:
# Therapists forecast + holidays
ther_model_h, ther_forecast_h, ther_pred_h = get_prophet_forecast_w_holidays(ther_df, 'Therapists', df_cols, date_hours_cols,\
                pred_cols, periods, holidays=h)
# plot forecast & decomposition w holidays
plot_prophet_forecast(ther_model_h, ther_forecast_h)

In [None]:
# fig = plot_prophet_forecast(dr_model_h, dr_forecast_h, xlabel='Date', ylabel='Hours')
ther_fig = ther_model_h.plot(ther_forecast_h, xlabel='Date', ylabel='Hours')
ther_fig.savefig('./images/ther_prophet_model.png')

In [None]:
prophet_forecast_to_csv(ther_pred_h, './data/therapist_prophet_holidays')

#### predict number providers for 3 month forecast

In [None]:
# import data
dr_num = pd.read_csv('./data/doctors_hours_per_provider.csv', index_col=0)
RNPA_num = pd.read_csv('./data/RNPA_hours_per_provider.csv',  index_col=0)
therapists_num = pd.read_csv('./data/therapists_hours_per_provider.csv',  index_col=0)

In [None]:
dr_num.tail()

In [None]:
# calculate average hours/provider category
providers = [dr_num, RNPA_num, therapists_num]
avg_provider_hours = []
for p in providers:
    avg = p['Hours_per_Provider'].mean()
    avg_provider_hours.append(round(avg, 2))

In [None]:
dr_num.columns

In [None]:
dr_avg_hours = dr_num['Hours_per_Provider'].mean()

In [None]:
dr_avg_hours

In [None]:
avg_provider_hours

In [None]:
# import forecast data
dr_forecast_h = pd.read_csv('./data/doctors_prophet_holidays_predictions.csv', index_col=0)
RNPA_forecast_h = pd.read_csv('./data/RNPA_prophet_holidays_predictions.csv', index_col=0)
ther_forecast_h = pd.read_csv('./data/therapist_prophet_holidays_predictions.csv', index_col=0)

In [None]:
forecasts = [dr_forecast_h, RNPA_forecast_h, ther_forecast_h]

In [None]:
dr_3mo_forecast = dr_forecast_h[174:186]
RNPA_3mo_forecast = RNPA_forecast_h[174:186]
ther_3mo_forecast = ther_forecast_h[174:186]

In [None]:
dr_3mo_forecast.columns

In [None]:
dr_3mo_forecast['Est_Num_Providers'] = round(dr_3mo_forecast['Predicted_Hours'] / 19.62)
RNPA_3mo_forecast['Est_Num_Providers'] = round(RNPA_3mo_forecast['Predicted_Hours'] / 13.21)
ther_3mo_forecast['Est_Num_Providers'] = round(ther_3mo_forecast['Predicted_Hours'] / 11.05)

In [None]:
# reset index to forecast week number
dr_3mo_forecast = dr_3mo_forecast.reset_index()

In [None]:
dr_3mo_forecast.index = dr_3mo_forecast.index+1

In [None]:
# round up hours column
dr_3mo_forecast['Predicted_Hours'] = round(dr_3mo_forecast['Predicted_Hours'])

In [None]:
dr_3mo_forecast =  dr_3mo_forecast[['Predicted_Hours', 'Est_Num_Providers']]
dr_3mo_forecast.rename_axis('Week')

In [None]:
# save df to image
dr_3mo_forecast.to_csv('./data/dr_12_week_prophet_forecast.csv')

In [None]:
dr_8_to_16_wk_prophet = dr_forecast_h[184:193]

In [None]:
dr_8_to_16_wk_prophet['Est_Num_Providers'] = round(dr_8_to_16_wk_prophet['Predicted_Hours'] / 19.62)

In [None]:
# reset index to forecast week number
dr_8_to_16_wk_prophet = dr_8_to_16_wk_prophet.reset_index()

In [None]:
dr_8_to_16_wk_prophet.index = dr_8_to_16_wk_prophet.index+8

In [None]:
# round up hours column
dr_8_to_16_wk_prophet['Predicted_Hours'] = round(dr_8_to_16_wk_prophet['Predicted_Hours'])

In [None]:
dr_8_to_16_wk_prophet =  dr_8_to_16_wk_prophet[['Predicted_Hours', 'Est_Num_Providers']]
dr_8_to_16_wk_prophet.rename_axis('Week')

In [None]:
# save df to image
dr_8_to_16_wk_prophet.to_csv('./data/dr_8_to_16_wk_prophet.csv')

#### save doctors predictions as images for presentation

In [None]:
from pandas.tools.plotting import table
fig, ax = plt.subplots(figsize=(10,10)) # set size frame
ax.xaxis.set_visible(False)  # hide axes
ax.yaxis.set_visible(False)  
ax.set_frame_on(False)  
tabla = table(ax, dr_3mo_forecast, loc='center', colWidths=[0.2]*len(dr_3mo_forecast.columns))
tabla.auto_set_font_size(False) 
tabla.set_fontsize(16)
tabla.scale(1.5, 2) 
plt.savefig('./images/dr_12wk_Prophet_forecast.png', transparent=True)

In [None]:
from pandas.tools.plotting import table
fig, ax = plt.subplots(figsize=(10,10)) # set size frame
ax.xaxis.set_visible(False)  # hide axes
ax.yaxis.set_visible(False)  
ax.set_frame_on(False)  
tabla = table(ax, dr_8_to_16_wk_prophet, loc='center', colWidths=[0.2]*len(dr_8_to_16_wk_prophet.columns))
tabla.auto_set_font_size(False) 
tabla.set_fontsize(16)
tabla.scale(1.5, 2) 
plt.savefig('./images/dr_8_to_16_wk_prophet.png', transparent=True)

#### add regressor for adding additional variable: number of providers

In [None]:
# Potential next steps: add exogenous variable - number of providers to each model
# help(Prophet.add_regressor)