In [100]:
import numpy as np
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from collections import defaultdict
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout

### Loading Series

In [101]:
# Pickling Call Volume 2018 series
redo_pickle = False

if redo_pickle == True:
    with open('hourly_call_volume_medical_2018.pickle','wb') as f:
        pickle.dump(hourly_call_volume_medical_2018,f)
        
if redo_pickle == False:
    with open('hourly_call_volume_medical_2018.pickle','rb') as f:
        hourly_call_volume_medical_2018 = pickle.load(f)

In [102]:
# Pickling All Call Volume series
redo_pickle = False

if redo_pickle == True:
    with open('hourly_call_volume_medical_all.pickle','wb') as f:
        pickle.dump(hourly_call_volume_medical_all,f)
        
if redo_pickle == False:
    with open('hourly_call_volume_medical_all.pickle','rb') as f:
        hourly_call_volume_medical_all = pickle.load(f)

In [103]:
# Pickling Unavailable Units 2018 series
redo_pickle = False

if redo_pickle == True:
    with open('unavailable_units_15_min_int_counts_2018_df.pickle','wb') as f:
        pickle.dump(unavailable_units_15_min_int_counts_2018_df,f)
        
if redo_pickle == False:
    with open('unavailable_units_15_min_int_counts_2018_df.pickle','rb') as f:
        unavailable_units_15_min_int_counts_2018_df = pickle.load(f)

### Functions

In [104]:
# split a univariate sequence into samples
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence)-1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

In [105]:
def train_val_test_split(data):
    # Creating 60-20-20 train-validation-test split
    size_train_test = int(len(data) * 0.8)
    train, test = data[:size_train_test], data[size_train_test:]
    size_train_val = int(len(train) * 0.75)
    train, val = train[:size_train_val], train[size_train_val:]
    print('Train:',len(train))
    print('Validation:',len(val))
    print('Test:',len(test))
    return train, val, test

In [106]:
def lstm_predictions(train, val, model, n_steps, n_features):
    predictions = []
    for i in range(len(val)-n_steps):
        x_input = np.array(val.values[i:i+n_steps])
        x_input = x_input.reshape((1, n_steps, n_features))
        yhat = model.predict(x_input, verbose=0)
        predictions.append(yhat)
    return array(predictions).reshape((len(predictions)))

In [107]:
def lstm_with_grid_search(train, val, n_steps_list=[6,12], batch_size_list=[32], epochs_list=[2]):
    '''Trains model with specific hyper parameters through grid search
    and returns dictionary of hyperparameters as key and mse as value.
    Output:{(n_steps,batch_size,epochs,predictions,model): mse}
    '''
    lstm_grid_search_results = defaultdict(int)
    for n_steps in n_steps_list:
        for batch_size in batch_size_list:
            for epochs in epochs_list:
                print('Model with:','n_steps:',n_steps,'batch_size:',batch_size,'epochs:',epochs)
                # splitting into n length sequences
                X, y = split_sequence(train, n_steps)

                # reshape from [samples, timesteps] into [samples, timesteps, features]
                n_features = 1 # Series is univariate
                X = X.reshape((X.shape[0], X.shape[1], n_features))

                # Model Architecture
                model = Sequential()
                model.add(LSTM(128, activation='relu', input_shape=(n_steps, n_features),return_sequences=True))
                model.add(Dropout(0.2, input_shape=(n_steps, n_features)))
                model.add(LSTM(128, activation='relu', input_shape=(n_steps, n_features),return_sequences=False)) # return_sequences=True
#                 model.add(LSTM(128, activation='relu', input_shape=(n_steps, n_features),return_sequences=False)) 
#                 model.add(Dropout(0.2, input_shape=(n_steps, n_features)))
                model.add(Dense(1))
                model.compile(optimizer='adam', loss='mse')
                
                # fitting model
                model.fit(X, y, batch_size=batch_size, epochs=epochs)
                
                # Making predictions on validation set
                predictions = lstm_predictions(train, val, model, n_steps, n_features)
                
                # Calculating MSE
                mse = mean_squared_error(val[n_steps:],predictions) 
                print('Test MSE: %.3f' % mse)
                
                lstm_grid_search_results[(n_steps,batch_size,epochs,predictions,model)] = mse 
    return dict(lstm_grid_search_results)

### LSTM Grid Search

In [69]:
train, val, test = train_val_test_split(hourly_call_volume_medical_2018)

Train: 5253
Validation: 1751
Test: 1752


In [88]:
lstm_grid_search_results = lstm_with_grid_search(train, val)

Model with: n_steps: 6 batch_size: 32 epochs: 2
Epoch 1/2
Epoch 2/2
Test MSE: 18.466
Model with: n_steps: 12 batch_size: 32 epochs: 2
Epoch 1/2
Epoch 2/2
Test MSE: 16.699


In [112]:
min(lstm_grid_search_results, key=lstm_grid_search_results.get)

(12, 32, 2, <keras.engine.sequential.Sequential at 0x1a3a5ac898>)

In [111]:
# model.predict(np.array([1,2,4,26,34,4,4,5,45,3,34,2]).reshape((1, 12, 1)))

array([[14.301679]], dtype=float32)