In [None]:
import numpy as np
import pandas as pd
from pandas.tools.plotting import table
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings("ignore")
%load_ext autoreload
%autoreload 2

In [None]:
from timeseries_functions import index_to_datetime, plot_all_df_columns, plot_series,\
plot_series_save_fig, plot_series_and_differences, run_augmented_Dickey_Fuller_test, \
plot_autocorrelation, plot_partial_autocorrelation, plot_decomposition

In [None]:
from timeseries_functions import make_col_vector, make_design_matrix, fit_linear_trend,\
plot_trend_data, plot_linear_trend

In [None]:
from fbprophet import Prophet

In [None]:
from prophet_functions import get_prophet_training_mse, get_prophet_test_mse,\
get_prophet_forecast, plot_prophet_forecast, get_prophet_forecast_w_holidays,\
prophet_forecast_to_csv

#### add holiday component

In [None]:
christmas_dates = ['2015-12-25', '2016-12-25', '2017-12-25']
new_year_dates = ['2016-01-01', '2017-01-01', '2018-01-01']
thanksgiving_dates = ['2015-11-26', '2016-11-24', '2017-11-23']

In [None]:
christmas = pd.DataFrame({'holiday':'Christams', 'ds': pd.to_datetime(christmas_dates)})

In [None]:
thanksgiving = pd.DataFrame({'holiday':'Thanksgiving', 'ds': pd.to_datetime(thanksgiving_dates)})

In [None]:
new_years = pd.DataFrame({'holiday':'New Years', 'ds': pd.to_datetime(new_year_dates)})

In [None]:
holidays = pd.concat([christmas, thanksgiving, new_years])

In [None]:
holidays1 = holidays.copy()
holidays2 = holidays.copy()
holidays1_2 = holidays.copy()
holidays2_1 = holidays.copy()

In [None]:
holidays1['lower_window'] = -1
holidays1['upper_window'] = 1

In [None]:
holidays2['lower_window'] = -2
holidays2['upper_window'] = 2

In [None]:
holidays1_2['lower_window'] = -1
holidays1_2['upper_window'] = 2

In [None]:
holidays2_1['lower_window'] = -2
holidays2_1['upper_window'] = 1

In [None]:
holiday_windows = [holidays1, holidays2, holidays1_2, holidays2_1]

### using weekly data

In [None]:
dr_df = pd.read_csv('./data/doctors_hours_per_provider.csv')
RNPA_df = pd.read_csv('./data/RNPA_hours_per_provider.csv')
ther_df = pd.read_csv('./data/therapists_hours_per_provider.csv')

In [None]:
dr_df.columns

#### test MSE for different holiday intervals to determine best interval for each category

In [None]:
weekly_data = [dr_df, RNPA_df, ther_df]
df_names = ['Doctors', 'RN/PAs', 'therapists']

In [None]:
df_cols = ['date', 'Number_Providers', 'Hours', 'Hours_per_Provider']
date_hours_cols = ['date', 'Hours']
periods = 90
pred_cols = ['yhat', 'yhat_lower', 'yhat_upper']

In [None]:
# get MSE for forecasts with holidays w/ different windows included
holiday_windows = [holidays1, holidays2, holidays1_2, holidays2_1]
for i in range(len(weekly_data)):
    for h in holiday_windows:
        model, forecast, predictions_df = get_prophet_forecast_w_holidays(df=weekly_data[i], df_name=df_names[i], df_cols=df_cols,\
                     date_hours_cols=date_hours_cols, pred_cols=pred_cols, periods=periods, holidays=h)

In [None]:
def get_prophet_forecast_date_index(df, date_col, hours_col, pred_cols, periods):
    """
    Inputs:
        df: dataframe containing timeseries/dates and weekly hours
        date_col: (str) name for columns containing the date
        hours_col: (str) name for columns containing the appointment hours data
        periods: (int) number of periods to forecast.
    Outputs:
        Prophet model
        forecast: table of all data plus predictions
        predictions: table of just predictions
    """
    # set index = date
    df.index = df[date_col]
    # index to datetime
    df.index = pd.to_datetime(df.index)
    # rename date and hours columns
    df['ds'] = df[date_col]
    df['y'] = df[hours_col]
    df = df[['ds', 'y']]
    # create model
    model = Prophet()
    model.fit(df)
    # get predictions
    future = model.make_future_dataframe(periods=periods)
    forecast = model.predict(future)
    predictions = forecast.iloc[-periods:]
    # set index as date
    forecast.index = forecast['ds']
    predictions.index = predictions['ds']
    df_pred = pd.concat([df, forecast[pred_cols]], axis=1)
    return model, forecast, predictions, df_pred

In [None]:
def get_prophet_forecast_holidays_date_index(df, date_col, hours_col, pred_cols, periods, holidays):
    """
    Inputs:
        df: dataframe containing timeseries/dates and weekly hours
        date_col: (str) name for columns containing the date
        hours_col: (str) name for columns containing the appointment hours data
        periods: (int) number of periods to forecast
        holidays: (dataframe) of holidays with holiday names, dates (datetime
            format, upper and lower windows (ints, optional))
    Outputs:
        Prophet model
        forecast: table of all data plus predictions
        predictions: table of just predictions
    """
    # set index = date
    df.index = df[date_col]
    # index to datetime
    df.index = pd.to_datetime(df.index)
    # rename date and hours columns
    df['ds'] = df[date_col]
    df['y'] = df[hours_col]
    df = df[['ds', 'y']]
    # create model
    model = Prophet(holidays=holidays)
    model.fit(df)
    # get predictions
    future = model.make_future_dataframe(periods=periods)
    forecast = model.predict(future)
    predictions = forecast.iloc[-periods:]
    # set index as date
    forecast.index = forecast['ds']
    predictions.index = predictions['ds']
    df_pred = pd.concat([df, forecast[pred_cols]], axis=1)
    return model, forecast, predictions, df_pred

In [None]:
def prophet_forecast_to_csv(prediction_df, file_name):
    """Save prophet predictions in dataframe format to csv file"""
    prediction_df.columns = ['Date', 'True_Hours', 'Predicted_Hours', 'Lower_Limit', 'Upper_Limit', 'Predicted_num_Providers']
    prediction_df.to_csv('./data/{}_predictions.csv'.format(file_name))

#### plot models for all categories

In [None]:
weekly_data = [dr_df, RNPA_df, ther_df]
df_names = ['Doctors', 'RN/PAs', 'therapists']

In [None]:
date_col = 'date'
hours_col = 'Hours'
periods = 90
pred_cols = ['yhat', 'yhat_lower', 'yhat_upper']

In [None]:
# Get forecast and plot for all 3 categories w/out holidays
for i in range(len(weekly_data)):
    model, forecast, pred, predictions_df = get_prophet_forecast_date_index(df=weekly_data[i], date_col=date_col,\
                     hours_col=hours_col, pred_cols=pred_cols, periods=periods)
    plot_prophet_forecast(model, forecast)

#### Add in holidays

In [None]:
date_col = 'date'
hours_col = 'Hours'
periods = 90
pred_cols = ['yhat', 'yhat_lower', 'yhat_upper']

In [None]:
# Doctor's forecast + holidays
dr_model_h, dr_forecast_h, dr_pred_h, dr_h_df = get_prophet_forecast_holidays_date_index(df=dr_df, date_col=date_col,\
                     hours_col=hours_col, pred_cols=pred_cols, periods=periods, holidays=holidays1)
# plot forecast & decomposition w holidays
plot_prophet_forecast(dr_model_h, dr_forecast_h)

In [None]:
avg_dr_hours = dr_df['Hours_per_Provider'].mean()

In [None]:
# add predicted number of providers column
dr_h_df['Predicted_num_Providers'] = round(dr_h_df['yhat'] / avg_dr_hours, 1)

In [None]:
# fig = plot_prophet_forecast(dr_model_h, dr_forecast_h, xlabel='Date', ylabel='Hours')
dr_fig = dr_model_h.plot(dr_forecast_h, xlabel='Date', ylabel='Hours')
dr_fig.savefig('./images/dr_prophet_model_date_index.png')

In [None]:
dr_h_df.columns

In [None]:
dr_h_df.index

In [None]:
prophet_forecast_to_csv(dr_h_df, 'doctors_prophet_holidays_date_index')

#### RN/PAs

In [None]:
date_col = 'date'
hours_col = 'Hours'
periods = 90
pred_cols = ['yhat', 'yhat_lower', 'yhat_upper']

In [None]:
# RN/PA forecast + holidays
RNPA_model_h, RNPA_forecast_h, RNPA_pred_h, RNPA_h_df = get_prophet_forecast_holidays_date_index(df=RNPA_df, date_col=date_col,\
        hours_col=hours_col, pred_cols=pred_cols, periods=periods, holidays=holidays1_2)
# plot forecast & decomposition w holidays
plot_prophet_forecast(RNPA_model_h, RNPA_forecast_h)

In [None]:
# fig = plot_prophet_forecast(dr_model_h, dr_forecast_h, xlabel='Date', ylabel='Hours')
RNPA_fig = RNPA_model_h.plot(RNPA_forecast_h, xlabel='Date', ylabel='Hours')
RNPA_fig.savefig('./images/RNPA_prophet_model_date_index.png')

In [None]:
avg_RNPA_hours = RNPA_df['Hours_per_Provider'].mean()

In [None]:
# add predicted number of providers column
RNPA_h_df['Predicted_num_Providers'] = round(RNPA_h_df['yhat'] / avg_RNPA_hours, 1)

In [None]:
prophet_forecast_to_csv(RNPA_h_df, 'RNPA_prophet_holidays_date_index')

#### therapists

In [None]:
date_col = 'date'
hours_col = 'Hours'
periods = 90
pred_cols = ['yhat', 'yhat_lower', 'yhat_upper']

In [None]:
# Therapists forecast + holidays
ther_model_h, ther_forecast_h, ther_pred_h, ther_h_df = get_prophet_forecast_holidays_date_index(df=ther_df, date_col=date_col,\
        hours_col=hours_col, pred_cols=pred_cols, periods=periods, holidays=holidays1)
# plot forecast & decomposition w holidays
plot_prophet_forecast(ther_model_h, ther_forecast_h)

In [None]:
# fig = plot_prophet_forecast(dr_model_h, dr_forecast_h, xlabel='Date', ylabel='Hours')
ther_fig = ther_model_h.plot(ther_forecast_h, xlabel='Date', ylabel='Hours')
ther_fig.savefig('./images/ther_prophet_model_date_index.png')

In [None]:
avg_ther_hours = ther_df['Hours_per_Provider'].mean()

In [None]:
# add predicted number of providers column
ther_h_df['Predicted_num_Providers'] = round(ther_h_df['yhat'] / avg_ther_hours, 1)

In [None]:
prophet_forecast_to_csv(ther_h_df, 'therapist_prophet_holidays_date_index')

In [None]:
ther_h_df

#### get 12 week forecasts

In [None]:
forecasts = [dr_h_df, RNPA_h_df, ther_h_df]

In [None]:
dr_pred_h

In [None]:
dr_3mo_forecast = dr_h_df[174:186]
RNPA_3mo_forecast = RNPA_h_df[174:186]
ther_3mo_forecast = ther_h_df[174:186]

In [None]:
dr_3mo_forecast.columns

In [None]:
# round predicted hours column
dr_3mo_forecast['Predicted_Hours'] = round(dr_3mo_forecast['Predicted_Hours'])

In [None]:
dr_3mo_forecast

In [None]:
dr_3mo_forecast =  dr_3mo_forecast[['Predicted_Hours', 'Predicted_num_Providers']]
dr_3mo_forecast.rename_axis('Week')

In [None]:
# save df to image
dr_3mo_forecast.to_csv('./data/dr_12_week_prophet_forecast_date_index.csv')

#### get doctors 8-16 week forecast

In [None]:
dr_8_to_16_wk_prophet = dr_h_df[184:193]

In [None]:
dr_8_to_16_wk_prophet

In [None]:
# reset index to forecast week number
dr_8_to_16_wk_prophet = dr_8_to_16_wk_prophet.reset_index()

In [None]:
dr_8_to_16_wk_prophet.index = dr_8_to_16_wk_prophet.index+8

In [None]:
# round up hours column
dr_8_to_16_wk_prophet['Predicted_Hours'] = round(dr_8_to_16_wk_prophet['Predicted_Hours'],1)

In [None]:
dr_8_to_16_wk_prophet =  dr_8_to_16_wk_prophet[['Predicted_Hours', 'Predicted_num_Providers']]
dr_8_to_16_wk_prophet.rename_axis('Week')

In [None]:
# save df to image
dr_8_to_16_wk_prophet.to_csv('./data/dr_8_to_16_wk_prophet_date_index.csv')

#### save doctors predictions as images for presentation

In [None]:
from pandas.tools.plotting import table
fig, ax = plt.subplots(figsize=(10,10)) # set size frame
ax.xaxis.set_visible(False)  # hide axes
ax.yaxis.set_visible(False)  
ax.set_frame_on(False)  
tabla = table(ax, dr_3mo_forecast, loc='center', colWidths=[0.3]*len(dr_3mo_forecast.columns))
tabla.auto_set_font_size(False) 
tabla.set_fontsize(16)
tabla.scale(1.5, 2) 
plt.savefig('./images/dr_12wk_Prophet_forecast_date_index.png', transparent=True)

In [None]:
from pandas.tools.plotting import table
fig, ax = plt.subplots(figsize=(10,10)) # set size frame
ax.xaxis.set_visible(False)  # hide axes
ax.yaxis.set_visible(False)  
ax.set_frame_on(False)  
tabla = table(ax, dr_8_to_16_wk_prophet, loc='center', colWidths=[0.3]*len(dr_8_to_16_wk_prophet.columns))
tabla.auto_set_font_size(False) 
tabla.set_fontsize(16)
tabla.scale(1.5, 2) 
plt.savefig('./images/dr_8_to_16_wk_prophet_date_index.png', transparent=True)

#### Next Steps: 
##### add regressor for adding additional variable: number of providers

In [None]:
# Potential next steps: add exogenous variable - number of providers to each model
# help(Prophet.add_regressor)