In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import mean_squared_error

In [None]:
from timeseries_functions import index_to_datetime, downsample_data_week, plot_series,\
plot_series_save_fig, plot_series_and_differences, run_augmented_Dickey_Fuller_test, \
plot_autocorrelation, plot_partial_autocorrelation, plot_decomposition,\
get_seasonal_decomposition, plot_2_series_double_yaxis

In [None]:
# prophet uses an  additive regression model
from fbprophet import Prophet

In [None]:
from prophet_functions import get_prophet_training_mse, get_prophet_test_mse,\
get_prophet_forecast, plot_prophet_forecast

### using weekly data

In [None]:
dr_df = pd.read_csv('doctors_hours_per_provider.csv')
RNPA_df = pd.read_csv('RNPA_hours_per_provider.csv')
ther_df = pd.read_csv('therapists_hours_per_provider.csv')

In [None]:
train_start = '2015-01-18'
train_end = '2018-02-25'
test_start = '2018-03-04'
test_end = '2018-04-29'

In [None]:
dr_df.loc[test_start:test_end]

In [None]:
dr_train = dr_df.loc[train_start:train_end]
dr_test = dr_df.loc[test_start:]

In [None]:
len(dr_test)

In [None]:
def get_prophet_training_mse(forecast, df_name, periods):
    """compute error over all known dates, actual vs yhat"""
    predictions = forecast.iloc[0:-periods]
    mse = mean_squared_error(predictions['y'], predictions['yhat'])
    print('MSE for {name} training set is {error}'.format(name=df_name, error=mse))

In [None]:
def get_prophet_test_mse(forecast, df_name, periods):
    """compute error over all known dates, actual vs yhat"""
    predictions = forecast.iloc[-152:-periods]
    predictions.dropna(inplace=True, axis=0)
    mse = mean_squared_error(predictions['y'], predictions['yhat'])
    print('MSE for {name} test set is {error}'.format(name=df_name, error=mse))

In [None]:
def test_prophet_forecast(test_df, df_name, df_cols, date_hours_cols, pred_cols, periods):
    test_df.columns = df_cols
    test_df = df[date_hours_cols]
    test_df.columns = ['ds', 'y']
    model = Prophet()
    model.fit(test_df)
    future = model.make_future_dataframe(periods=periods)
    forecast = model.predict(future)
    df_pred = pd.concat([test_df, forecast[pred_cols]], axis=1)
    predictions = forecast.iloc[-periods:]
    get_training_mse(df_pred, df_name, periods)
    get_test_mse(df_pred, df_name, periods)
    return model, forecast, df_pred

In [None]:
def get_prophet_forecast(df, df_name, df_cols, date_hours_cols, pred_cols, periods):
    df.columns = df_cols
    df = df[date_hours_cols]
    df.columns = ['ds', 'y']
    model = Prophet()
    model.fit(df)
    future = model.make_future_dataframe(periods=periods)
    forecast = model.predict(future)
    df_pred = pd.concat([df, forecast[pred_cols]], axis=1)
    predictions = forecast.iloc[-periods:]
    get_prophet_training_mse(df_pred, df_name, periods)
    get_prophet_test_mse(df_pred, df_name, periods)
    return model, forecast, df_pred

In [None]:
def plot_prophet_forecast(model, forecast, df_name):
    model.plot(forecast, xlabel='Date', ylabel='{n} Hours'.format(n=df_name))
    model.plot_components(forecast)

In [None]:
df_cols = ['date', 'Number_Providers', 'Hours', 'Hours_per_Provider']
date_hours_cols = ['date', 'Hours']
periods = 90
pred_cols = ['yhat', 'yhat_lower', 'yhat_upper']

In [None]:
weekly_data = [dr_df, RNPA_df, ther_df]
df_names = ['Doctors', 'RN/PAs', 'therapists']

In [None]:
# Run for all 3 categories
for i in range(len(weekly_data)):
    model, forecast, predictions_df = get_prophet_forecast(df=weekly_data[i], df_name=df_names[i], df_cols=df_cols,\
                     date_hours_cols=date_hours_cols, pred_cols=pred_cols, periods=periods)
    plot_prophet_forecast(model, forecast, df_name=df_names[i])

In [None]:
# predictions_df

#### import daily data for each category

In [None]:
# appointments = pd.read_csv('appointments_through_04-2018.csv', index_col='AppointmentDate')

In [None]:
# # group by specialty
# doctors = appointments[appointments['Specialty'] == 'doctor']
# RN_PA = appointments[appointments['Specialty'] == 'RN/PA']
# therapists = appointments[appointments['Specialty'] == 'therapist']

In [None]:
# specialties = [doctors, RN_PA, therapists]

In [None]:
# for s in specialties:
#     s.index = pd.to_datetime(s.index)

In [None]:
# doc_hours = doctors.groupby(doctors.index.date)['AppointmentDuration'].sum()/60
# RN_PA_hours = RN_PA.groupby(RN_PA.index.date)['AppointmentDuration'].sum()/60
# therapist_hours = therapists.groupby(therapists.index.date)['AppointmentDuration'].sum()/60

In [None]:
# doc_hours = doc_hours.to_frame()
# doc_hours['DS'] = doc_hours.index