In [22]:
from keras.layers import Input, LSTM, Dense, Dropout
from keras.models import Sequential, load_model, Model
from matplotlib import cm, pyplot as plt
from sklearn import metrics
from os.path import expanduser as eu
from os.path import isfile, join
from os import listdir
import sklearn.metrics as metrics
import numpy as np
import random
import time
import keras
import os

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto(allow_soft_placement=True,gpu_options = tf.GPUOptions(allow_growth=True))
set_session(tf.Session(config=config))

def load_min_model_helper(MPATH):
    print("[PROGRESS] Starting load_min_model_helper()")
    print("[DEBUG] MPATH {}".format(MPATH))
    mfiles = os.listdir(MPATH)
    full_mod_name = MPATH.split("/")[-1]
    mfiles = [f for f in mfiles if "val_loss" in f]
    loss_lst = [float(f.split("val_loss:")[1].split("_")[0]) for f in mfiles]
    min_ind = loss_lst.index(min(loss_lst))
    min_mod_name = "{}/{}".format(MPATH,mfiles[min_ind])
    if DEBUG: print("[DEBUG] min_mod_name {}".format(mfiles[min_ind]))
    return(load_model(min_mod_name))

def split_data(trainvalX,trainvalY):
    train_ratio = 0.9
    nine_tenths_ind = int(train_ratio*trainvalX.shape[0])
    X_train = trainvalX[0:nine_tenths_ind,:]
    y_train = trainvalY[0:nine_tenths_ind]
    X_valid = trainvalX[nine_tenths_ind:trainvalX.shape[0],:]
    y_valid = trainvalY[nine_tenths_ind:trainvalX.shape[0]]
    del trainvalX
    gc.collect()
    
    # Randomize
    random.seed(10)
    indices = np.arange(0,X_train.shape[0])
    random.shuffle(indices)
    X_train = X_train[indices,:]
    y_train = y_train[indices]

    indices = np.arange(0,X_valid.shape[0])
    random.shuffle(indices)
    X_valid = X_valid[indices,:]
    y_valid = y_valid[indices]
    return(X_train, y_train, X_valid, y_valid)

def create_lstm_model(hyper_dict):
    print("[PROGRESS] Starting create_lstm_model()")
    loss_func = "binary_crossentropy"; lookback = 60
    
    # Load hyperparameters
    node_size = hyper_dict["node_size"]
    epoch_num = hyper_dict["epoch_num"]
    num_layer = hyper_dict["num_layer"]
    opt_name  = hyper_dict["opt_name"]
    is_tune   = hyper_dict["is_tune"]
    b_size    = hyper_dict["b_size"]
    drop      = hyper_dict["drop"]
    lr        = hyper_dict["lr"]
    assert num_layer >= 1, "Need at least one layer"
    
    # Form the model name (for saving the model)
    mod_name  = "lstm"
    for i in range(0,num_layer):
        mod_name += "_{}n".format(node_size)
    mod_name += "_{}ep_{}opt_{}lr".format(epoch_num,opt_name,lr)
    mod_name += "_{}drop_{}bs".format(drop,b_size)
    MODDIR = PATH+"models/"+mod_name+"/"
    if is_tune: MODDIR = PATH+"models/tune/"+mod_name+"/"    
    if not os.path.exists(MODDIR): os.makedirs(MODDIR)
        
    # Set opt based on opt_name
    if hyper_dict["opt_name"] is "rmsprop":
        opt = keras.optimizers.RMSprop(lr)
    elif hyper_dict["opt_name"] is "sgd":
        opt = keras.optimizers.SGD(lr)
    elif hyper_dict["opt_name"] is "adam":
        opt = keras.optimizers.Adam(lr)
    
    # Create model
    model = Sequential()
    # If one layer, no need to return sequences
    if num_layer == 1:
        model.add(LSTM(node_size, recurrent_dropout=drop, input_shape=(lookback,1)))
    else: # More than one layer
        model.add(LSTM(node_size, recurrent_dropout=drop, return_sequences=True, input_shape=(lookback,1)))
    
        for i in range(1,num_layer): 
            if i == num_layer-1: # Don't return sequences at last hidden layer
                model.add(LSTM(node_size, recurrent_dropout=drop, dropout=drop))
            else:
                model.add(LSTM(node_size, return_sequences=True, recurrent_dropout=drop, dropout=drop))
    model.add(Dense(1, activation='sigmoid'))
    return(model)
    
def train_lstm_model(RESDIR,trainvalX,trainvalY,data_type,
                    label_type,hosp_data,hyper_dict):
    
    ###### Split data ######
    X_train, y_train, X_valid, y_valid = split_data(trainvalX,trainvalY)
    
    ###### Form model ######
    model = create_lstm_model(hyper_dict)
    
    # Train and Save
    with open(MODDIR+"loss.txt", "w") as f:
        f.write("{}\t{}\t{}\t{}\n".format("i", "train_loss", "val_loss", "epoch_time"))
    diffs = []; best_loss_so_far = float("inf")
    start_time = time.time(); per_iter_size = 300000
    np.random.seed(10)
    for i in range(0,epoch_num):
        if per_iter_size < X_train.shape[0]:
            per_iter_size = X_train.shape[0]
        inds = np.random.choice(X_train.shape[0],per_iter_size,replace=False)
        curr_x = X_train[inds,]; curr_y = y_train[inds,]
        history = model.fit(curr_x, curr_y, epochs=1, batch_size=1000, 
                            validation_data=(X_valid,y_valid))

        # Save details about training
        train_loss = history.history['loss'][0]
        val_loss = history.history['val_loss'][0]
        epoch_time = time.time() - start_time
        with open(MODDIR+"loss.txt", "a") as f:
            f.write("{}\t{}\t{}\t{}\n".format(i, train_loss, val_loss, epoch_time))

        # Save model each iteration
        model.save("{}val_loss:{}_epoch:{}_{}.h5".format(MODDIR,val_loss,i,mod_name))
    return(MODDIR)

def load_model_and_test(RESDIR,MODDIR,X_test,y_test,data_type,hosp_data):
    model = load_min_model_helper(MODDIR)
    save_path = RESDIR+"hosp{}_data/{}/".format(hosp_data,data_type)
    if not os.path.exists(save_path): os.makedirs(save_path)
    print("[DEBUG] Loading model from {}".format(save_path))
    ypred = model.predict(X_test)
    np.save(save_path+"ypred.npy",ypred)
    np.save(save_path+"y_test.npy",y_test)
    auc = metrics.average_precision_score(y_test, ypred)
    np.random.seed(231)
    auc_lst = []
    roc_auc_lst = []
    for i in range(0,100):
        inds = np.random.choice(X_test.shape[0], X_test.shape[0], replace=True)
        auc = metrics.average_precision_score(y_test[inds], ypred[inds])
        auc_lst.append(auc)
        roc_auc = metrics.roc_auc_score(y_test[inds], ypred[inds])
        roc_auc_lst.append(roc_auc)
    auc_lst = np.array(auc_lst)
    roc_auc_lst = np.array(roc_auc_lst)
    print("[DEBUG] auc_lst.mean(): {}".format(auc_lst.mean()))
    print("[DEBUG] roc_auc_lst.mean(): {}".format(roc_auc_lst.mean()))

    SP = RESDIR+"hosp{}_data/".format(hosp_data)
    f = open('{}conf_int_hospdata{}_prauc.txt'.format(SP,hosp_data),'a')
    f.write("{}, {}+-{}\n".format(data_type,auc_lst.mean().round(4),2*np.std(auc_lst).round(4)))
    f.close()
    f = open('{}conf_int_hospdata{}_rocauc.txt'.format(SP,hosp_data),'a')
    f.write("{}, {}+-{}\n".format(data_type,roc_auc_lst.mean().round(4),2*np.std(roc_auc_lst).round(4)))
    f.close()
    np.save("{}auc_lst".format(save_path,data_type), auc_lst)
    np.save("{}roc_auc_lst".format(save_path,data_type), roc_auc_lst)

In [23]:
PATH = "/projects/leelab2/hughchen/RELIC/repr_learning/"

In [29]:
hyper_dict = {"is_tune":True,"opt_name":"rmsprop","lr":0.001,"drop":0.5,
              "b_size":1000,"epoch_num":50,"node_size":200, "num_layer":3}
model = create_lstm_model(hyper_dict)

[PROGRESS] Starting create_lstm_model()


In [30]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_4 (LSTM)                (None, 60, 200)           161600    
_________________________________________________________________
lstm_5 (LSTM)                (None, 60, 200)           320800    
_________________________________________________________________
lstm_6 (LSTM)                (None, 200)               320800    
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 201       
Total params: 803,401
Trainable params: 803,401
Non-trainable params: 0
_________________________________________________________________


In [None]:
import sys
sys.path.append("..")
from xgb_setup import *
os.nice(5)
PATH = "/projects/leelab2/hughchen/RELIC/repr_learning/"
DPATH = "/homes/gws/hughchen/phase/downstream_prediction/"
RESULTPATH = PATH+"/results/"; MODELPATH = PATH+"/models/"
lookback = 60
DEBUG = False
label_type_eta_currfeat_lst = [("nibpm60",0.1,"NIBPM")]

for label_type, eta, curr_feat in label_type_eta_currfeat_lst:
    print("\n[Progress] label_type: {}, eta: {}, curr_feat {}".format(label_type, "NA", curr_feat))

    xgb_type = "mlp_{}_top15".format(label_type)
    RESDIR = '{}{}/'.format(RESULTPATH, xgb_type)
    if not os.path.exists(RESDIR): os.makedirs(RESDIR)

    for hosp_data in [0,1]:
        print("\n[Progress] hosp_data {}".format(hosp_data))

        data_type = "raw[top15]+nonsignal"

        print("\n[Progress] data_type {}".format(data_type))
        (trainvalX,trainvalY) = load_data(DPATH,data_type,label_type,True,
                                          hosp_data,curr_feat,DEBUG=DEBUG)



[Progress] label_type: nibpm60, eta: NA, curr_feat NIBPM

[Progress] hosp_data 0

[Progress] data_type raw[top15]+nonsignal
[DEBUG] Y.shape: (1837676,)
[DEBUG] Starting load_raw_data
[DEBUG] DPATH /homes/gws/hughchen/phase/downstream_prediction//data/nibpm60/hospital_0/

[Progress] hosp_data 1

[Progress] data_type raw[top15]+nonsignal
[DEBUG] Y.shape: (2332902,)
[DEBUG] Starting load_raw_data
[DEBUG] DPATH /homes/gws/hughchen/phase/downstream_prediction//data/nibpm60/hospital_1/


In [None]:
import sys
sys.path.append("..")
from xgb_setup import *
os.nice(5)
PATH = "/projects/leelab2/hughchen/RELIC/repr_learning/"
DPATH = "/homes/gws/hughchen/phase/downstream_prediction/"
RESULTPATH = PATH+"/results/"; MODELPATH = PATH+"/models/"
lookback = 60
DEBUG = False

# label_type_eta_currfeat_lst = [("desat_bool92_5_nodesat",0.02,"SAO2"),
#                                ("nibpm60",0.1,"NIBPM"), 
#                                ("etco235",0.1,"ETCO2")]

label_type_eta_currfeat_lst = [("nibpm60",0.1,"NIBPM")]

for label_type, _, curr_feat in label_type_eta_currfeat_lst:
    print("\n[Progress] label_type: {}, eta: {}, curr_feat {}".format(label_type, "NA", curr_feat))

    xgb_type = "mlp_{}_top15".format(label_type)
    RESDIR = '{}{}/'.format(RESULTPATH, xgb_type)
    if not os.path.exists(RESDIR): os.makedirs(RESDIR)

    for hosp_data in [0,1]:
        print("\n[Progress] hosp_data {}".format(hosp_data))

        data_type = "raw[top15]+nonsignal"

        print("\n[Progress] data_type {}".format(data_type))
        (trainvalX,trainvalY) = load_data(DPATH,data_type,label_type,True,
                                          hosp_data,curr_feat,DEBUG=DEBUG)
        print("[Progress] trainvalX.shape {}".format(trainvalX.shape))
        if not DEBUG:
            MODDIR = train_mlp_model(RESDIR,trainvalX,trainvalY,
                                     data_type,label_type,hosp_data)

        (test1X,test1Y)       = load_data(DPATH,data_type,label_type,False,
                                          hosp_data,curr_feat,DEBUG=DEBUG)
        print("[Progress] test1X.shape    {}".format(test1X.shape))
        if not DEBUG:
            load_mlp_model_and_test(RESDIR,MODDIR,test1X,test1Y,
                                    data_type,label_type,hosp_data)


[Progress] label_type: desat_bool92_5_nodesat, eta: NA, curr_feat SAO2

[Progress] hosp_data 0

[Progress] hosp_model 0

[Progress] data_type ema[top15]+nonsignal
[DEBUG] Y.shape: (3920564,)
[Progress] trainvalX.shape (3920564, 117)
[PROGRESS] Starting create_model()
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Epoch 1/1
Train on 3528507 samples, validate on 392057 samples
Ep