In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from statsmodels.graphics import tsaplots
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA, ARIMAResults, ARMA
from statsmodels.tsa.arima_process import ArmaProcess
from sklearn.metrics import mean_squared_error
%load_ext autoreload
%autoreload 2
import pyflux as pf
import math

In [None]:
from timeseries_functions import index_to_datetime, downsample_data_week, plot_series,\
plot_series_save_fig, plot_series_and_differences, run_augmented_Dickey_Fuller_test, \
plot_autocorrelation, plot_partial_autocorrelation, plot_decomposition

In [None]:
from timeseries_functions import make_col_vector, make_design_matrix, fit_linear_trend,\
plot_trend_data, plot_linear_trend

In [None]:
from AR_MA_functions import get_AR_model, plot_AR_model, get_AR_model_order_BIC,\
plot_BIC_AR_model, get_MA_model, plot_MA_model

In [None]:
from ARIMA_functions import get_ARIMA_model, plot_ARIMA_model, plot_ARIMA_resids,\
get_ARIMA_forecast, plot_ARIMA_forecast_and_CI, plot_data_plus_ARIMA_predictions, \
test_rolling_ARIMA_forecast,get_predictions_df_and_plot_rolling_ARIMA_forecast

In [None]:
appointments = pd.read_csv('appointments_through_04-2018.csv', index_col=0)

In [None]:
appointments.index = pd.to_datetime(appointments.index)

In [None]:
appointments['Hours_Spent'] = appointments['AppointmentDuration'] / 60

In [None]:
appointments.columns, appointments.index

In [None]:
# group by specialty
doctor = appointments[appointments['Specialty'] == 'doctor']
RN_PA = appointments[appointments['Specialty'] == 'RN/PA']
therapist = appointments[appointments['Specialty'] == 'therapist']

In [None]:
dr_hours = doctor.groupby(doctor.index.date)['Hours_Spent'].sum()
RNPA_hours = RN_PA.groupby(RN_PA.index.date)['Hours_Spent'].sum()
therapist_hours = therapist.groupby(therapist.index.date)['Hours_Spent'].sum()

In [None]:
dr_hours.index

In [None]:
num_dr = doctor.groupby([doctor.index.date])['Provider'].nunique()
num_RNPA = RN_PA.groupby([RN_PA.index.date])['Provider'].nunique()
num_therapists = therapist.groupby([therapist.index.date])['Provider'].nunique()

In [None]:
providers = [num_dr, num_RNPA, num_therapists]
hours = [dr_hours, RNPA_hours, therapist_hours]

In [None]:
type(num_dr)

#### test for stationarity in all time series

In [None]:
for p in providers:   
    run_augmented_Dickey_Fuller_test(series=p, num_diffs=2)

In [None]:
for h in hours:
    run_augmented_Dickey_Fuller_test(series=h, num_diffs=2)

#### determine number of AR and MA terms to add for each time series

In [None]:
for p in providers:
    params = {'figure.figsize': [4,4],'axes.grid.axis': 'both', 'axes.grid': True, 'axes.labelsize': 'Medium', 'font.size': 12.0, \
'lines.linewidth': 2}
    plot_partial_autocorrelation(series=p, params=params, lags=30, alpha=0.05, title='PACF')
    plot_autocorrelation(series=p, params=params, lags=30, alpha=0.05, title='ACF')

In [None]:
for h in hours:
    params = {'figure.figsize': [4,4],'axes.grid.axis': 'both', 'axes.grid': True, 'axes.labelsize': 'Medium', 'font.size': 12.0, \
'lines.linewidth': 2}
    plot_partial_autocorrelation(series=h, params=params, lags=30, alpha=0.05, title='PACF')
    plot_autocorrelation(series=h, params=params, lags=30, alpha=0.05, title='ACF')

#### combine dataframes by specialty and downsample to weekly

In [None]:
# convert series to dataframes for merging
num_dr = num_dr.to_frame()
num_RNPA = num_RNPA.to_frame()
num_therapists = num_therapists.to_frame()

In [None]:
dr_hours = dr_hours.to_frame()
RNPA_hours = RNPA_hours.to_frame()
therapist_hours = therapist_hours.to_frame()

In [None]:
doctors = pd.merge(left=num_dr, right=dr_hours, how='outer', left_index=True, right_index=True)

In [None]:
doctors.index

In [None]:
RNPA = pd.merge(left=num_RNPA, right=RNPA_hours, how='outer', left_index=True, right_index=True)

In [None]:
therapists = pd.merge(left=num_therapists, right=therapist_hours, left_index=True, right_index=True)

In [None]:
combined_df = [doctors, RNPA, therapists]

In [None]:
for c in combined_df:
    c.index = pd.to_datetime(c.index)

In [None]:
doctors = downsample_data_week(doctors)

In [None]:
RNPA = downsample_data_week(RNPA)

In [None]:
therapists = downsample_data_week(therapists)

In [None]:
# therapists.index

#### run naive regression on undifferenced timeseries

In [None]:
doctors.columns

In [None]:
doctors['const']=1
model1=sm.OLS(endog=doctors['Hours_Spent'],exog=doctors['Provider','const'])
results1=model1.fit()
print(results1.summary())