### Heterogeneous variable experiment

Embedding models trained in a particular source hospital.  Then, assume the target hospital has a subset of the variables available in the source hospital.  In particular, we will reduce the number of available variables one by one according to the feature importances.

The aim of the experiment is to compare whether PHASE embeddings (next) consistently outperform conventional approaches with subsets of the features.

In [None]:
import os
from phase.prediction import *
from sklearn.preprocessing import StandardScaler

PATH  = os.path.expanduser("~/phase/")
DPATH = PATH+"downstream_prediction/"
RESULTPATH = PATH+"/results/"
MODELPATH  = PATH+"/models/"

lookback = 60
DEBUG = False

hypox_feat_lst = ["SAO2","ETCO2","FIO2","PIP","ECGRATE","TV","ETSEVO","RESPRATE",
                  "NIBPD","ETSEV","PEAK","PEEP","NIBPS","NIBPM","TEMP1"]

hypot_feat_lst = ["NIBPM","NIBPD","NIBPS","ECGRATE","PIP","ETCO2","ETSEVO","ETSEV",
                  "SAO2","PEAK","TV","RESPRATE","FIO2","TEMP1","PEEP"]

hypoc_feat_lst = ["ETCO2","TV","FIO2","PEAK","RESPRATE","ETSEV","PIP","PEEP",
                  "ETSEVO","TEMP1","ECGRATE","NIBPS","NIBPD","SAO2","NIBPM"]

label_type_lst = [("desat_bool92_5_nodesat",0.02,"SAO2",hypox_feat_lst),
                  ("nibpm60",0.1,"NIBPM",hypot_feat_lst), 
                  ("etco235",0.1,"ETCO2",hypoc_feat_lst)]

for label_type, eta, curr_feat, feat_lst in label_type_lst[1:]:
    print("\n\n[PROGRESS] ************ label_type {}".format(label_type))
    for hosp_data in [0,1]:
        print("\n\n[PROGRESS] ******** hosp_data {}".format(hosp_data))
        data_type_lst = ["raw[top15]+nonsignal", "nextfive_0[top15]+nonsignal", "nextfive_1[top15]+nonsignal"]
        
        xgb_type = "xgb_{}_top15_eta{}".format(label_type,eta)
        RESDIR = '{}{}/'.format(RESULTPATH, xgb_type) 
        if not os.path.exists(RESDIR): os.makedirs(RESDIR)
        for data_type in data_type_lst:
            print("\n\n[PROGRESS] ****** data_type {}".format(data_type))
            for num_feats in [1,3,5,7,9,11,13,15]:
                print("[PROGRESS] *** num_feats {}".format(num_feats))
                # Data type name for training/testing
                data_type2 = data_type.replace("top15","top"+str(num_feats))
                print("\n[Progress] data_type {}".format(data_type))

                (trainvalX,trainvalY) = load_data(DPATH,data_type,label_type,True,hosp_data,
                                                  curr_feat,feat_lst=feat_lst[:num_feats])
                print("[Progress] trainvalX.shape {}".format(trainvalX.shape))
                if not DEBUG:
                    train_xgb_model(RESDIR,trainvalX,trainvalY,data_type2,
                                    label_type,hosp_data,eta)

                (test1X,test1Y) = load_data(DPATH,data_type,label_type,False,hosp_data,
                                            curr_feat,feat_lst=feat_lst[:num_feats])
                print("[Progress] test1X.shape    {}".format(test1X.shape))
                if not DEBUG:
                    load_xgb_model_and_test(RESDIR,test1X,test1Y,data_type2,
                                            label_type,hosp_data,xgb_type,eta)



[PROGRESS] ************ label_type nibpm60


[PROGRESS] ******** hosp_data 0


[PROGRESS] ****** data_type raw[top15]+nonsignal
[PROGRESS] *** num_feats 1

[Progress] data_type raw[top15]+nonsignal
[Progress] trainvalX.shape (1837676, 66)
[Progress] test1X.shape    (234659, 66)
[PROGRESS] *** num_feats 3

[Progress] data_type raw[top15]+nonsignal
[Progress] trainvalX.shape (1837676, 186)
[Progress] test1X.shape    (234659, 186)
[PROGRESS] *** num_feats 5

[Progress] data_type raw[top15]+nonsignal
[Progress] trainvalX.shape (1837676, 306)
[Progress] test1X.shape    (234659, 306)
[PROGRESS] *** num_feats 7

[Progress] data_type raw[top15]+nonsignal
[Progress] trainvalX.shape (1837676, 426)
