In [1]:
from __future__ import print_function
import numpy as np
np.random.seed(1337)  # for reproducibility

from keras.preprocessing import sequence
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.recurrent import LSTM
import matplotlib.pyplot as plt

ImportError: No module named keras.preprocessing

In [None]:
# Prepare the data X_train, and y_train, X_test, y_test
DATA_PATH = '/Users/Mark/Downloads/MIMIC Data/Processed'

nb_samples = 1000
test_split = 0.2

# Loading data from .npz file into dictionary form
data = np.load(DATA_PATH)
X_raw = data['X'][0:nb_samples]
y_raw = data['ylos'][0:nb_samples]

# Spliting data for the use of training and testing
X_raw_test = X_raw[0:len(X)*test_split]
y_raw_test = y_raw[0:len(y)*test_split]
X_raw_train = X_raw[len(X)*test_split+1:]
y_raw_train = y_raw[len(y)*test_split+1:]

# Cut every samples, in matrix form (timesteps, variables), half in length of timesteps, create another set of samples
# Devide the output, LOS, of new samples in half indicating the remaining LOS
X_raw_train_half = [samp[0:(X_raw_train.shape[0])/2, :] for samp in X_raw_train]
y_raw_train_half = [samp/2 for samp in y_raw_train]

# Concatenate the original set of samples and the new set, half in length
# Set the output of the original set of samples to 0, meaning the remaining LOS
# Now, we have 2*nb_samples samples and results. Every sample have an output of remaining LOS, half of them are 0.
X_train = np.concatenate((X_raw_train,X_raw_train_half),axis=0)
y_train = np.concatenate((np.zeros(len(y_raw_train)), y_raw_train_half),axis=0)


In [None]:
# since we are using stateful rnn tsteps can be set to 1
nb_featrues = 12
tsteps = 300

# Number of samples used for every weight update iteration
batch_size = 5

# Number of overall training iteration
epochs = 3

# number of elements ahead that are used to make the prediction
lahead = 1

In [None]:
# Create a model with 1 output dimension, flexible input_dim and input_length, 
print('Creating Model')
model = Sequential()
model.add(LSTM(output_dim = 1, 
               return_sequences=False, 
               input_dim=None, 
               dropout_U=0.5, 
               activation='tanh', 
               inner_activation='hard_sigmoid'))


# Build the model, with lost function Mean Squared Error
# RMSProp as the optimizer, could also try 'sgd' or 'adagrad' later
model.compile(loss='mse', optimizer='rmsprop')

In [None]:
# X_train should be a 3D list or ndarray in the form of (nb_sample, time_steps, variables)
# y_train should be a 1D list or ndarray in the form of (output,) which is a vector of LOS's
# batch_size is how many samples been used for a single pass(forward and backward pass)
# validation_split is the fraction of training date been used for validation
print('Train...')
model.fit(X_train, 
          y_train, 
          batch_size = batch_size, 
          nb_epoch = epochs,
          validation_split = 0.1
          show_accuracy = True
          )



In [None]:
print('Evaluation...')
score, acc = model.evaluate(X_test, y_test,
                            batch_size=batch_size,
                            show_accuracy=True)
