In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from statsmodels.graphics import tsaplots
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA, ARIMAResults, ARMA
from statsmodels.tsa.arima_process import ArmaProcess
from statsmodels.stats.diagnostic import acorr_ljungbox
from sklearn.metrics import mean_squared_error

import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
np.random.seed(42)
%load_ext autoreload
%autoreload 2
np.random.seed(42)

In [None]:
from LSTM_functions import split_and_reshape_data, fit_sequential_LSTM, get_LSTM_predictions,\
inverse_transform, calculate_RMSE, prep_predictions_for_plotting, plot_data_LSTM_predictions

In [None]:
from timeseries_functions import index_to_datetime, plot_all_df_columns, weekly_resample, plot_series,\
plot_series_save_fig, plot_series_and_differences, run_augmented_Dickey_Fuller_test, \
plot_autocorrelation, plot_partial_autocorrelation, plot_decomposition

In [None]:
from timeseries_functions import make_col_vector, make_design_matrix, fit_linear_trend,\
plot_trend_data, plot_linear_trend

In [None]:
# plt.rcParams.keys()

### import data

In [None]:
dr_df = pd.read_csv('doctors_hours_per_provider.csv', index_col=0)
RNPA_df = pd.read_csv('RNPA_hours_per_provider.csv', index_col=0)
ther_df = pd.read_csv('therapists_hours_per_provider.csv', index_col=0)

In [None]:
all_df = [dr_df, RNPA_df, ther_df]

In [None]:
# convert index to datetime
for df in all_df:
    index_to_datetime(df)

In [None]:
train_start = '2015-01-12'
train_end = '2018-02-26'
test_start = '2018-03-05'

In [None]:
dr_train = dr_df.loc[train_start:train_end]
dr_test = dr_df.loc[test_start:]

In [None]:
RNPA_train = RNPA_df.loc[train_start:train_end]
RNPA_test = RNPA_df.loc[test_start:]

In [None]:
ther_train = ther_df.loc[train_start:train_end]
ther_test = ther_df.loc[test_start:]

In [None]:
train_df = [dr_train, RNPA_train, ther_train]
test_df = [dr_test, RNPA_test, ther_test]

In [None]:
dr_test

### Long Short-Term Memory network (LSTM)

#### Future predictions

In [None]:
# reshape and scale the data
data = dr_df['Hours'].copy()
# reshape to 2D array
data = data.reshape(-1,1)
# scale/normalize data
scaler = MinMaxScaler(feature_range=(0,1))
data = scaler.fit_transform(data)

In [None]:
# convert data values into dataset matrix
def create_dataset(data, num_steps=12):
    dataX, dataY = [], []
    for i in range(len(data)-num_steps-1):
        a = data[i:(i+num_steps),0]
        dataX.append(a)
        dataY.append(data[i + num_steps, 0])
    return np.array(dataX), np.array(dataY)

In [None]:
def create_entire_dataset(data, num_steps=12):
    X = []
    for i in range(len(data)-num_steps-1):
        a = data[i:(i+num_steps),0]
        X.append(a)
    return np.array(X)

In [None]:
data.shape

In [None]:
dataset = create_entire_dataset(data, num_steps=12)

In [None]:
dataset.shape

In [None]:
dataset = dataset.reshape(dataset.shape[0], 12, 1)

In [None]:
dataset.shape

In [None]:
X = dataset[:, :-1, :]
y = dataset[:, 1:, :]

In [None]:
X.shape, y.shape

In [None]:
# design network
model1 = Sequential()
model1.add(LSTM(4, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model1.add(Dense(1))
model1.compile(loss='mean_squared_error', optimizer='adam')
# fit network
model1.fit(X, y, epochs=100, batch_size=1, verbose=2)

In [None]:
model2 = Sequential()
model2.add(LSTM(4, batch_input_shape=(X.shape[0], None, 1), return_sequences=True, stateful=True))
model2.add(Dense(1))
model2.compile(loss='mean_squared_error', optimizer='adam')
# copy weights from previous model
model2.set_weights(model1.get_weights())
# fit network
model2.fit(X, y, epochs=100, batch_size=1, verbose=2)

In [None]:
# predict on current data --> not working yet
model2.reset_states()
predictions = model2.predict(dataset)

#### multivariate LSTM

In [None]:
all_df = [dr_df, RNPA_df, ther_df]

In [None]:
dr_df.columns, dr_df.index

In [None]:
def plot_all_df_columns(df, col_nums, params, title='', xlabel=''):
    i = 1
    values = df.values
    for col in col_nums:
        plt.subplot(len(col_nums), 1, i)
        plt.plot(values[:, col])
        plt.title(title)
        plt.ylabel(dr_df.columns[col])
        plt.xlabel(xlabel)
        i += 1
    plt.tight_layout()
    plt.show()

In [None]:
col_nums = [0, 1, 2]
params = {'figure.figsize': [12,8],'axes.grid.axis': 'both', 'axes.grid': True, 'axes.labelsize': 'Large', 'font.size': 12.0, \
'lines.linewidth': 3}

In [None]:
plt.rcParams.update(params)
plot_all_df_columns(dr_df, col_nums, params, title='Doctors', xlabel='Time in Weeks')

In [None]:
plot_all_df_columns(RNPA_df, col_nums, title='RN/PAs', xlabel='Time in Weeks')

In [None]:
plot_all_df_columns(ther_df, col_nums, title='Therapists', xlabel='Time in Weeks')

In [None]:
# all_df = [dr_df, RNPA_df, ther_df]S