In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from statsmodels.graphics import tsaplots
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA, ARIMAResults, ARMA
from statsmodels.tsa.arima_process import ArmaProcess
from sklearn.metrics import mean_squared_error

import pyflux as pf
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
np.random.seed(42)
%load_ext autoreload
%autoreload 2

In [None]:
from timeseries_functions import index_to_datetime, downsample_data_week, plot_series,\
plot_series_save_fig, plot_series_and_differences, run_augmented_Dickey_Fuller_test, \
plot_autocorrelation, plot_partial_autocorrelation, plot_decomposition,\
get_seasonal_decomposition, plot_2_series_double_yaxis

In [None]:
from timeseries_functions import make_col_vector, make_design_matrix, fit_linear_trend,\
plot_trend_data, plot_linear_trend

In [None]:
# plt.rcParams.keys()

### Next look at number of providers in each category over time

### Load data

In [None]:
appointments_full = pd.read_csv('appointments_full.csv')

In [None]:
appointments_full.shape

In [None]:
appointments = appointments_full[['AppointmentDate', 'Provider', 'Specialty', 'AppointmentDuration']]

In [None]:
appointments = appointments.set_index('AppointmentDate')

In [None]:
appointments.index = pd.to_datetime(appointments.index)

In [None]:
# keep data through April 2018
appointments = appointments['2018-04-30':]

In [None]:
# save appointments through April as separate csv
appointments.to_csv('appointments_through_04-2018.csv')

In [None]:
# group by specialty
doctors = appointments[appointments['Specialty'] == 'doctor']
RN_PA = appointments[appointments['Specialty'] == 'RN/PA']
therapists = appointments[appointments['Specialty'] == 'therapist']

In [None]:
# get count of unique providers for each specialty
# groupby provider and get count
doctors = doctors.groupby([doctors.index.date])['Provider'].nunique()
RN_PA = RN_PA.groupby([RN_PA.index.date])['Provider'].nunique()
therapists = therapists.groupby([therapists.index.date])['Provider'].nunique()

In [None]:
provider = [doctors, RN_PA, therapists]

In [None]:
for p in provider:
    index_to_datetime(p)

In [None]:
def downsample_data_week(data, fill_method='bfill'):
    downsampled = data.resample(rule='W').nunique()
    downsampled.fillna(method=fill_method, inplace=True)
    return downsampled

In [None]:
for p in provider:
    p = downsample_data_week(p)

In [None]:
doctors = downsample_data_week(doctors)
RN_PA = downsample_data_week(RN_PA)
therapists = downsample_data_week(therapists)

In [None]:
plot_series(doctors, figsize=(8,4), plot_name='Number of Doctors')

In [None]:
plot_series(RN_PA, figsize=(8,4), plot_name='Number of RN/PAs')

In [None]:
plot_series(therapists, figsize=(8,4), plot_name='Number of therapists')

In [None]:
pd.value_counts(doctors)

In [None]:
provider = [doctors, RN_PA, therapists]

In [None]:
def plot_series_and_differences(series, ax, num_diff, params, title=''):
    "Plot raw data and specified number of differences"
    plt.rcParams.update(params)
#     plt.xticks(rotation=30)
    ax[0].plot(series.index, series)
    ax[0].set_title('Raw series: {}'.format(title))
    ax[0].set_xticklabels(labels=series.index.date, rotation=30)
    for i in range(1, num_diff+1):
        diff = series.diff(i)
        ax[i].plot(series.index, diff)
        ax[i].set_title('Difference # {}'.format(str(i)))
        ax[i].set_xticklabels(labels=series.index.date, rotation=30)


In [None]:
params = {'figure.figsize': [8,8],'axes.grid.axis': 'both','axes.grid': True, 'axes.labelsize': 'Medium', 'font.size': 12.0, \
'lines.linewidth': 2}
fig, axes = plt.subplots(3, figsize=(10,12))
fig = plot_series_and_differences(series=doctors, ax=axes, num_diff=2, params=params,title='Number of Doctors')
plt.tight_layout()

In [None]:
params = {'figure.figsize': [8,8],'axes.grid.axis': 'both','axes.grid': True, 'axes.labelsize': 'Medium', 'font.size': 12.0, \
'lines.linewidth': 2}
fig, axes = plt.subplots(3, figsize=(10,12))
fig = plot_series_and_differences(series=RN_PA, ax=axes, num_diff=2, params=params, \
                            title='Number of RN/PAs')
plt.tight_layout()

In [None]:
params = {'figure.figsize': [8,8],'axes.grid.axis': 'both','axes.grid': True, 'axes.labelsize': 'Medium', 'font.size': 12.0, \
'lines.linewidth': 2}
fig, axes = plt.subplots(3, figsize=(10,12))
fig = plot_series_and_differences(series=therapists, ax=axes, num_diff=2, params=params, \
                            title='Number of Therapists')
plt.tight_layout()

In [None]:
params = {'figure.figsize': [8,8],'axes.grid.axis': 'both','axes.grid': True, 'axes.labelsize': 'Medium', 'font.size': 12.0, \
'lines.linewidth': 2}
plot_decomposition(doctors, params)

In [None]:
params = {'figure.figsize': [8,8],'axes.grid.axis': 'both','axes.grid': True, 'axes.labelsize': 'Medium', 'font.size': 12.0, \
'lines.linewidth': 2}
plot_decomposition(RN_PA, params)

In [None]:
params = {'figure.figsize': [8,8],'axes.grid.axis': 'both','axes.grid': True, 'axes.labelsize': 'Medium', 'font.size': 12.0, \
'lines.linewidth': 2}
plot_decomposition(therapists, params)

### determine demand/provider number ratio

#### import hours data

In [None]:
dr_hours = pd.read_csv('all_dr_hours.csv', index_col=0, header=None)
dr_hours.index = pd.to_datetime(dr_hours.index)

In [None]:
RNPA_hours = pd.read_csv('all_RN_PA_hours.csv', index_col=0, header=None)
RNPA_hours.index = pd.to_datetime(RNPA_hours.index)

In [None]:
ther_hours = pd.read_csv('all_therapist_hours.csv', index_col=0, header=None)
ther_hours.index = pd.to_datetime(ther_hours.index)

In [None]:
# provider = [doctors, RN_PA, therapists]
# plot hours and num providers on same plot, shared x axis

In [None]:
def plot_2_series_double_yaxis(x, y1, y2, figsize=(10,10), fontsize=12, title='', \
                               y1_label='', y2_label='', xlabel=''):
    x = x
    y1 = y1
    y2 = y2
    fig, ax = plt.subplots(figsize=figsize, sharex=True)
    ax2 = ax.twinx()
    ax.set_title(title, fontsize=fontsize+4)
    ax.plot(x, y1, 'r-')
    ax.set_ylabel(y1_label, fontsize=fontsize)
    ax.set_xlabel(xlabel, fontsize=fontsize)
    ax.set_xticklabels(labels=x, rotation=45)
    ax2.plot(x, y2, 'b-')
    ax2.set_ylabel(y2_label, fontsize=fontsize)
    plt.show()
    

In [None]:
x = dr_hours.index.date
dr_y1 = dr_hours.values
dr_y2 = doctors.values
fig, ax1 = plt.subplots(figsize=(14,8), sharex=True)
ax2 = ax1.twinx()
ax1.set_title('Doctors', fontsize=20)
ax1.plot(x, dr_y1, 'r-')
ax1.set_ylabel('Dr Appointment Hours', fontsize=16)
ax1.set_xlabel('Date', fontsize=16)
ax1.set_xticklabels(labels=dr_hours.index.date, rotation=45)
ax2.plot(x, dr_y2, 'b-')
ax2.set_ylabel('# of Doctors', fontsize=16)
plt.show()

In [None]:
dr_seasonal, dr_trend, dr_resids = get_seasonal_decomposition(dr_hours)

In [None]:
plot_2_series_double_yaxis(x=dr_hours.index.date, y1=dr_trend, \
    y2=doctors.values, figsize=(14,8), fontsize=16, title='Therapists', \
    y1_label='Dr Appointment Hours Trend', y2_label='# Doctors', xlabel='Date')

In [None]:
plot_2_series_double_yaxis(x=RNPA_hours.index.date, y1=RNPA_hours.values, \
    y2=RN_PA.values, figsize=(14,8), fontsize=16, title='RN/PAs', \
    y1_label='RN/PA Appointment Hours', y2_label='# RN/PAs', xlabel='Date')

In [None]:
plot_2_series_double_yaxis(x=ther_hours.index.date, y1=ther_hours.values, \
    y2=RN_PA.values, figsize=(14,8), fontsize=16, title='Therapists', \
    y1_label='Therapist Appointment Hours', y2_label='# Therapists', xlabel='Date')