In [None]:
from __future__ import absolute_import
from __future__ import print_function
import numpy as np
import argparse
import os
import imp
import re
from utils import utils
from utils.readers import read_ts
from models.preprocessing import Discretizer, Normalizer
from models.create_normalizer_state import create_normalizer
from models import metrics
from models import keras_utils
from models import common_utils
from keras.callbacks import ModelCheckpoint, CSVLogger

import sys; sys.argv=['']; del sys
parser = argparse.ArgumentParser()
common_utils.add_common_arguments(parser)
args = parser.parse_args()
args.data='s3://aws-glue-scripts-271538242229-us-west-2/data/in-hospital-mortality/' # preprocessed data is imported from s3
print(args)

In [None]:

args.network='models/rnn.py' 
args.period2=48.0
args.time_step=1.0 
#args.depth=1
args.dim=16
args.rec_dropout=0.3
args.dropout=0.0
args.mode='train' 
args.output_dir='models/output'
'''  
# uncomment following if test mode
# model weights and predictions to output in args.output_dir
'''
# args.load_state='models/output/k_lstm.n2.d0.3.dep1.bs4096.ts1.0.epoch1.test0.6755971312522888.state'

args.epochs=1
args.batch_size=8

if args.small_part:
    args.save_every = 2**30

args.data='data/in-hospital-mortality'
train_reader = read_ts(dataset_dir='data/in-hospital-mortality/train/',
                                         listfile='data/in-hospital-mortality/train_listfile.csv',                       
                                         period_length=args.period2)
val_reader = read_ts(dataset_dir='data/in-hospital-mortality/train/',
                                         listfile='data/in-hospital-mortality/val_listfile.csv',                           
                                         period_length=args.period2)

'''
uncomment following if run locally    
'''
# args.data='C:/Users/sy/Downloads/cse 6250/project/mimic3/data/in-hospital-mortality'
# train_reader = read_ts(dataset_dir=os.path.join(args.data, 'train'),
#                                      listfile=os.path.join(args.data, 'train_listfile.csv'),
#                                      period_length=args.period2)
# val_reader = read_ts(dataset_dir=os.path.join(args.data, 'train'),
#                                    listfile=os.path.join(args.data, 'val_listfile.csv'),
#                                    period_length=args.period2)

#create discretizer
discretizer = Discretizer(timestep=float(args.timestep),
                          store_masks=True,
                          impute_strategy='previous',
                          start_time='zero')
discretizer_header = discretizer.transform(train_reader.read_example(0)["X"])[1].split(',')

args_dict = dict(args._get_kwargs())
args.target_repl_coef=0.5
target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train')
args_dict['target_repl'] = target_repl
args_dict['model']='GRU' 
args_dict['is_bidirectional']=True 
args_dict['depth']=2
args_dict['channel_wise']=True 
args_dict['header'] = discretizer_header # for cw

if args.small_part:
    args.save_every = 2**30

'''
uncomment following if you want to use different normalizer to train new model 
'''
# '--timestep' : Rate of the re-sampling to discretize time-series. default=1.0
#'--impute_strategy': 'Strategy for imputing missing values.' choices=['zero', 'next', 'previous', 'normal_value'],
#'--start_time': start time of discretization. choices=['zero', 'relative'] Zero means beginning of the ICU stay. Relative use the time of the first ICU event'  
#'--n_samples': 'How many samples to use to estimates means and standard deviations. Set -1 to use all training samples.
#'--output_dir':default='models'
#'--data' : 'Path to the data.'

# create_normalizer(timestep=1.0, impute_strategy='previous',start_time='zero', n_samples=10, dataset_dir='data/in-hospital-mortality/train/',
#                                          listfile='data/in-hospital-mortality/train_listfile.csv', output_dir='models')

#load normalizer

cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]
normalizer = Normalizer(fields=cont_channels)   # choose which  columns to standardize
args.normalizer_state='ts1.00_imputeprevious_startzero_masksTrue_n17903.normalizer' # create new normalizer in the block above as  needed 
normalizer_state = args.normalizer_state
if normalizer_state is None:    
    create_normalizer(data=args.data)
normalizer_state= os.path.join('models/',normalizer_state)            
normalizer.load_params(normalizer_state)

# build the model
print("==> using model {}".format(args.network))
model_module = imp.load_source(os.path.basename(args.network), args.network)

model = model_module.Network(**args_dict)

suffix = ".bs{}{}{}.ts{}{}".format(args.batch_size,
                                   ".L1{}".format(args.l1) if args.l1 > 0 else "",
                                   ".L2{}".format(args.l2) if args.l2 > 0 else "",
                                   args.timestep,
                                   ".trc{}".format(args.target_repl_coef) if args.target_repl_coef > 0 else "")
model.final_name = model.say_name() + suffix
print("==> model.final_name:", model.final_name)

print("==> compiling the model")
optimizer_config = {'class_name': args.optimizer,
                    'config': {'lr': args.lr,
                               'beta_1': args.beta_1}}

if target_repl:
    loss = ['binary_crossentropy'] * 2
    loss_weights = [1 - args.target_repl_coef, args.target_repl_coef]
else:
    loss = 'binary_crossentropy'
    loss_weights = None

model.compile(optimizer=optimizer_config,
              loss=loss,
              loss_weights=loss_weights)
model.summary()

# Load model weights
n_trained_chunks = 0
#print (args.load_state)

if args.load_state != None:
    model.load_weights(args.load_state)
    n_trained_chunks = int(re.match(".*epoch([0-9]+).*", args.load_state).group(1))


# read data
train_raw = utils.load_data(train_reader, discretizer, normalizer, args.small_part)
val_raw = utils.load_data(val_reader, discretizer, normalizer, args.small_part)

if target_repl:
    T = train_raw[0][0].shape[0]

    def extend_labels(data):
        data = list(data)
        labels = np.array(data[1])  # (B,)
        data[1] = [labels, None]
        data[1][1] = np.expand_dims(labels, axis=-1).repeat(T, axis=1)  # (B, T)
        data[1][1] = np.expand_dims(data[1][1], axis=-1)  # (B, T, 1)
        return data

    train_raw = extend_labels(train_raw)
    val_raw = extend_labels(val_raw)

if args.mode == 'train':

    path = os.path.join(args.output_dir, model.final_name + '.epoch{epoch}.test{val_loss}.state')

    metrics_callback = keras_utils.keras_metrics(train_data=train_raw, val_data=val_raw,
                                                              target_repl=(args.target_repl_coef > 0),
                                                              batch_size=args.batch_size,
                                                              verbose=args.verbose)
    dirname = os.path.dirname(path)
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    saver = ModelCheckpoint(path, verbose=1, period=args.save_every)

    logs = os.path.join(args.output_dir, 'logs')
    if not os.path.exists(logs):
        os.makedirs(logs)
    csv_logger = CSVLogger(os.path.join(logs, model.final_name + '.csv'),
                           append=True, separator=';')

    print("==> training")
    model.fit(x=train_raw[0],
              y=train_raw[1],
              validation_data=val_raw,
              epochs=n_trained_chunks + args.epochs,
              initial_epoch=n_trained_chunks,
              callbacks=[metrics_callback, saver, csv_logger],
              shuffle=True,
              verbose=args.verbose,
              batch_size=args.batch_size)

elif args.mode == 'test':
    del train_reader
    del val_reader
    del train_raw
    del val_raw

    test_reader = read_ts(dataset_dir=os.path.join(args.data, 'test'),
                                            listfile=os.path.join(args.data, 'test_listfile.csv'),
                                            period_length=args.period2)
    ret = utils.load_data(test_reader, discretizer, normalizer, args.small_part,
                          return_names=True)

    data = ret["data"][0]
    labels = ret["data"][1]
    names = ret["names"]

    predictions = model.predict(data, batch_size=args.batch_size, verbose=1)
    predictions = np.array(predictions)[:, 0]
    metrics.print_metrics_binary(labels, predictions)

    path = os.path.join(args.output_dir, "test_predictions", os.path.basename(args.load_state)) + ".csv"
    utils.save_results(names, predictions, labels, path)

else:
    raise ValueError("Wrong value for args.mode")
