## Train LSTM->XGB on TBI signal data

Code to train an XGBoost model on the TBI data (embedded by the LSTM) to predict hypoxemia in the future (low SAO2).

Note that the data is private and we are unable to make it publicly available in this repo.

### Load data and setup paths

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "4"

import numpy as np
from tbi_downstream_prediction import split_data

import keras
from keras.utils import multi_gpu_model
from keras.layers import Input, LSTM, Dense, Dropout
from keras.models import Sequential, load_model, Model
from matplotlib import cm, pyplot as plt
from sklearn import metrics
from os.path import expanduser as eu
from os.path import isfile, join
import numpy as np
import random, time

import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto(allow_soft_placement=True,gpu_options = tf.GPUOptions(allow_growth=True))
set_session(tf.Session(config=config))

PATH = "/homes/gws/hughchen/phase/tbi_subset/"
DPATH = PATH+"tbi/processed_data/hypoxemia/"
data_type = "raw[top11]"

feat_lst = ["ECGRATE", "ETCO2", "ETSEV", "ETSEVO", "FIO2", "NIBPD", "NIBPM", 
            "NIBPS","PEAK", "PEEP", "PIP", "RESPRATE", "SAO2", "TEMP1", "TV"]

# Exclude these features
weird_feat_lst = ["ETSEV", "PIP", "PEEP", "TV"]
feat_inds = np.array([feat_lst.index(feat) for feat in feat_lst if feat not in weird_feat_lst])
feat_lst2 = [feat for feat in feat_lst if feat not in weird_feat_lst]

y_tbi = np.load(DPATH+"tbiy.npy")
X_tbi = np.load(DPATH+"X_tbi_imp_standard.npy")

X_tbi2 = X_tbi[:,feat_inds,:]
(X_test, y_test, X_valid, y_valid, X_train, y_train) = split_data(DPATH,X_tbi2,y_tbi,flatten=False)

PATH = "/homes/gws/hughchen/phase/tbi_subset/"
RESULTPATH = PATH+"results/"
label_type = "desat_bool92_5_nodesat"
lstm_type = "biglstmdropoutv3_{}".format(label_type)
RESDIR = '{}{}/'.format(RESULTPATH, lstm_type)
if not os.path.exists(RESDIR): os.makedirs(RESDIR)
GPUNUM = len(os.environ["CUDA_VISIBLE_DEVICES"].split(","))

### Load best LSTM model from "Run LSTM" and create an embedding model

In [None]:
opt_name="rmsprop";lr=0.001;drop=0.5;b_size=1000;epoch_num=200

# Fixed hyperpara
print("[PROGRESS] Starting create_train_model()")
lookback = 60; h1 = 50; h2 = 50
loss_func = "binary_crossentropy"

# Form the model name (for saving the model)
mod_name  = "multivariate_biglstmdropoutv3_{}n_{}n_{}ep".format(h1,h2,epoch_num)
mod_name += "_{}opt_{}lr".format(opt_name,lr)
mod_name += "_{}drop_{}bs".format(drop,b_size)
MODDIR = PATH+"models/tune_biglstm/"+mod_name+"/"

# Load the best model (in terms of validation performance)
min_mod = load_min_model_helper(MODDIR)

########## Form Model/Data #########
min_mod_weights = min_mod.get_weights()

X_train_lst = []; X_valid_lst = []; X_test_lst  = []
sig_lst     = []; encoded_lst = []

for i in range(0,len(feat_lst2)):
    X_train_lst.append(X_train[:,:,i:(i+1)])
    X_valid_lst.append(X_valid[:,:,i:(i+1)])
    X_test_lst.append(X_test[:,:,i:(i+1)])

    sig = Input(shape=(lookback,1))
    lstm1 = LSTM(h1, recurrent_dropout=drop, return_sequences=True, 
                 weights=min_mod_weights[(i*3):((i+1)*3)])
    lstm2 = LSTM(h2, recurrent_dropout=drop, dropout=drop,
                 weights=min_mod_weights[(33+(i*3)):(33+((i+1)*3))])

    encoded = lstm2(lstm1(sig))
    sig_lst.append(sig); encoded_lst.append(encoded)

model = Model(inputs=sig_lst, outputs=encoded_lst)
opt = "rmsprop"; loss="binary_crossentropy"
if GPUNUM > 1: model = multi_gpu_model(model,gpus=GPUNUM)
model.compile(optimizer=opt, loss=loss_func)

# Create embeddings
X_train_embed_lst = model.predict(X_train_lst)
X_valid_embed_lst = model.predict(X_valid_lst)
X_test_embed_lst  = model.predict(X_test_lst)

np.save(DPATH+"X_train_embed_lstmbigdropoutv3_50n_arr",np.concatenate(X_train_embed_lst,1))
np.save(DPATH+"X_valid_embed_lstmbigdropoutv3_50n_arr",np.concatenate(X_valid_embed_lst,1))
np.save(DPATH+"X_test_embed_lstmbigdropoutv3_50n_arr", np.concatenate(X_test_embed_lst,1))

### Load embedded data and train XGB model

In [None]:
from tbi_downstream_prediction import *
PATH = "/homes/gws/hughchen/phase/tbi_subset/"
DPATH = PATH+"tbi/processed_data/hypoxemia/"
RESULTPATH = PATH+"results/"

# Load data
X_train = np.load(DPATH+"X_train_embed_lstmbigdropoutv3_50n_arr.npy",mmap_mode="r")
X_valid = np.load(DPATH+"X_valid_embed_lstmbigdropoutv3_50n_arr.npy",mmap_mode="r")
X_test  = np.load(DPATH+"X_test_embed_lstmbigdropoutv3_50n_arr.npy",mmap_mode="r")

# Set important variables
label_type = "desat_bool92_5_nodesat"; eta = 0.02
hosp_data = "tbi"; data_type = "lstm_big_50n[top11]"
mod_type = "xgb_{}_eta{}".format(label_type,eta)

# Set up result directory
RESDIR = '{}results/{}/'.format(PATH, mod_type)
if not os.path.exists(RESDIR): os.makedirs(RESDIR)

# Set parameters to train model
param = {'max_depth':6, 'eta':eta, 'subsample':0.5, 'gamma':1.0, 
         'min_child_weight':10, 'base_score':y_train.mean(), 
         'objective':'binary:logistic', 'eval_metric':["logloss"]}

# Train and save xgb model
train_save_xgb_model(X_train,y_train,X_valid,y_valid,RESDIR,
                     param,hosp_data,data_type)

# Test xgb model
test_xgb_model(X_test,y_test,RESDIR,param,hosp_data,data_type)