In [4]:
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import Huber
from tensorflow.keras.optimizers.schedules import ExponentialDecay

from tensorflow.keras.callbacks import EarlyStopping

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    tf.config.experimental.set_memory_growth(gpus[0], True)
  except RuntimeError as e:
    print(e)

from sklearn import metrics
from sklearn.preprocessing import LabelEncoder

In [5]:
demo_df = pd.read_csv('/home/bmlserver/jk/iPynb/NN/DATA/Demographics.csv')
print(demo_df.shape)

demo_df.head(5)

(1837, 13)


Unnamed: 0,idx,current_age_yrs,Female,Male,Asian,Black,Native American,Other,Pacific Islander,Unknown_race,White,SMOKER_N,SMOKER_Y
0,890,80.0,1,0,0,0,0,0,0,0,1,1,0
1,1879,64.12,0,1,0,0,0,0,0,0,1,0,1
2,1783,93.42,0,1,0,0,0,0,0,0,1,0,1
3,3896,93.82,1,0,0,0,0,0,0,0,1,1,0
4,1193,71.78,1,0,0,0,0,0,0,0,1,1,0


In [6]:
processed_demo_list = []

for df in [demo_df]:
    # remove zero variance featurs
    df = df.loc[:,df.apply(pd.Series.nunique) != 1]
    
    # set index 
    df = df.set_index('idx')

    # normalize features
    df = df.apply(lambda x: (x - x.mean())/(x.std()))
    
    processed_demo_list.append(df)

demo_df = pd.concat( processed_demo_list, axis=1 )

demo_df.head(5)

Unnamed: 0_level_0,current_age_yrs,Female,Male,Asian,Black,Native American,Other,Pacific Islander,Unknown_race,White,SMOKER_N,SMOKER_Y
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
890,0.648272,0.858799,-0.858799,-0.291564,-0.284921,-0.040434,-0.359168,-0.0844,-0.381029,0.822026,0.477302,-0.477302
1879,-0.093297,-1.163783,1.163783,-0.291564,-0.284921,-0.040434,-0.359168,-0.0844,-0.381029,0.822026,-2.09397,2.09397
1783,1.274963,-1.163783,1.163783,-0.291564,-0.284921,-0.040434,-0.359168,-0.0844,-0.381029,0.822026,-2.09397,2.09397
3896,1.293643,0.858799,-0.858799,-0.291564,-0.284921,-0.040434,-0.359168,-0.0844,-0.381029,0.822026,0.477302,-0.477302
1193,0.264412,0.858799,-0.858799,-0.291564,-0.284921,-0.040434,-0.359168,-0.0844,-0.381029,0.822026,0.477302,-0.477302


In [7]:
out_med_df = pd.read_csv('/home/bmlserver/jk/iPynb/NN/DATA/OUT_MED.csv')
print(out_med_df.shape)

out_med_df.head(5)

(1837, 1283)


Unnamed: 0,idx,"Outpatient_LAXATIVES, LOCAL/RECTAL:Binary",Outpatient_PLATELET AGGREGATION INHIBITORS:Binary,Outpatient_nan:Binary,"Outpatient_NOSE PREPARATIONS, VASOCONSTRICTORS(OTC):Binary","Outpatient_ANALGESIC/ANTIPYRETICS,NON-SALICYLATE:Binary",Outpatient_ANTIHYPERLIPIDEMIC - HMG COA REDUCTASE INHIBITORS:Binary,Outpatient_SELECTIVE SEROTONIN REUPTAKE INHIBITOR (SSRIS):Binary,"Outpatient_VASODILATORS,CORONARY:Binary",Outpatient_ANTIEMETIC/ANTIVERTIGO AGENTS:Binary,...,Outpatient_LHRH(GNRH)AGNST PIT.SUP-CENTRAL PRECOCIOUS PUBERTY:Frequeny,Outpatient_THROMBOLYTIC - NUCLEOTIDE TYPE:Frequeny,Outpatient_SELECTIVE SEROTONIN 5-HT2A INVERSE AGONISTS (SSIA):Frequeny,Outpatient_ANTINEOPLASTIC - HEDGEHOG PATHWAY INHIBITOR:Frequeny,Outpatient_ORGAN TRANSPLANTATION PRESERVATION SOLUTIONS:Frequeny,Outpatient_FEEDING DEVICES:Frequeny,"Outpatient_DRUGS TO TX GAUCHER DX-TYPE 1, SUBSTRATE REDUCING:Frequeny","Outpatient_TOPICAL PREPARATIONS,NON-MEDICINAL:Frequeny","Outpatient_ANTI-INFLAMMATORY, INTERLEUKIN-1 BETA BLOCKERS:Frequeny","Outpatient_ACNE AGENTS,SYSTEMIC:Frequeny"
0,84,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2248,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2271,0,0,0,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1691,1,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,3286,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
processed_out_med_list = []

for df in [out_med_df]:

    df = df.loc[:,df.apply(pd.Series.nunique) != 1]
    
    df = df.set_index('idx')

    df = df.apply(lambda x: (x - x.mean())/(x.std()))
    
    processed_out_med_list.append(df)

out_med_df = pd.concat( processed_out_med_list, axis=1 )

In [9]:
in_med_df = pd.read_csv('/home/bmlserver/jk/iPynb/NN/DATA/INP_MED.csv')
print(in_med_df.shape)

in_med_df.head(5)

(1837, 1283)


Unnamed: 0,idx,"LAXATIVES, LOCAL/RECTAL:Binary",PLATELET AGGREGATION INHIBITORS:Binary,nan:Binary,"NOSE PREPARATIONS, VASOCONSTRICTORS(OTC):Binary","ANALGESIC/ANTIPYRETICS,NON-SALICYLATE:Binary",ANTIHYPERLIPIDEMIC - HMG COA REDUCTASE INHIBITORS:Binary,SELECTIVE SEROTONIN REUPTAKE INHIBITOR (SSRIS):Binary,"VASODILATORS,CORONARY:Binary",ANTIEMETIC/ANTIVERTIGO AGENTS:Binary,...,LHRH(GNRH)AGNST PIT.SUP-CENTRAL PRECOCIOUS PUBERTY:Frequeny,THROMBOLYTIC - NUCLEOTIDE TYPE:Frequeny,SELECTIVE SEROTONIN 5-HT2A INVERSE AGONISTS (SSIA):Frequeny,ANTINEOPLASTIC - HEDGEHOG PATHWAY INHIBITOR:Frequeny,ORGAN TRANSPLANTATION PRESERVATION SOLUTIONS:Frequeny,FEEDING DEVICES:Frequeny,"DRUGS TO TX GAUCHER DX-TYPE 1, SUBSTRATE REDUCING:Frequeny","TOPICAL PREPARATIONS,NON-MEDICINAL:Frequeny","ANTI-INFLAMMATORY, INTERLEUKIN-1 BETA BLOCKERS:Frequeny","ACNE AGENTS,SYSTEMIC:Frequeny"
0,84,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2248,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2271,0,0,0,0,1,1,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,1691,1,1,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,3286,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [10]:
processed_in_med_list = []

for df in [in_med_df]:

    df = df.loc[:,df.apply(pd.Series.nunique) != 1]
    
    df = df.set_index('idx')

    df = df.apply(lambda x: (x - x.mean())/(x.std()))
    
    processed_in_med_list.append(df)

in_med_df = pd.concat( processed_in_med_list, axis=1 )

In [11]:
icd_df = pd.read_csv('/home/bmlserver/jk/iPynb/NN/DATA/ICD.csv')
print(icd_df.shape)

icd_df.head(5)

(1837, 283)


Unnamed: 0,idx,MATERNAL CAUSES OF PERINATAL MORBIDITY AND MORTALITY:frequency,OTHER CONDITIONS ORIGINATING IN THE PERINATAL PERIOD:frequency,COMPLICATIONS MAINLY RELATED TO PREGNANCY:frequency,COMPLICATIONS OCCURRING MAINLY IN THE COURSE OF LABOR AND DELIVERY:frequency,COMPLICATIONS OF THE PUERPERIUM:frequency,ECTOPIC AND MOLAR PREGNANCY:frequency,"NORMAL DELIVERY, AND OTHER INDICATIONS FOR CARE IN PREGNANCY, LABOR, AND DELIVERY:frequency",OTHER MATERNAL AND FETAL COMPLICATIONS:frequency,OTHER PREGNANCY WITH ABORTIVE OUTCOME:frequency,...,"Symptoms concerning nutrition, metabolism, and development:presence",Symptoms involving cardiovascular system:presence,Symptoms involving digestive system:presence,Symptoms involving head and neck:presence,Symptoms involving nervous and musculoskeletal systems:presence,Symptoms involving respiratory system and other chest symptoms:presence,Symptoms involving skin and other integumentary tissue:presence,Symptoms involving urinary system:presence,SUPPLEMENTARY CLASSIFICATION OF EXTERNAL CAUSES OF INJURY AND POISONING:presence,SUPPLEMENTARY CLASSIFICATION OF FACTORS INFLUENCING HEALTH STATUS AND CONTACT WITH HEALTH SERVICES:presence
0,84,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,2248,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
2,2271,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
3,1691,0,0,0,0,0,0,0,0,0,...,0,1,1,0,0,1,0,0,0,0
4,3286,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
processed_icd_list = []

for df in [icd_df]:

    df = df.loc[:,df.apply(pd.Series.nunique) != 1]
    
    df = df.set_index('idx')

    df = df.apply(lambda x: (x - x.mean())/(x.std()))
    
    processed_icd_list.append(df)

icd_df = pd.concat( processed_icd_list, axis=1 )

icd_df.shape

(1837, 258)

In [13]:
lab_df = pd.read_csv('/home/bmlserver/jk/iPynb/NN/DATA/LABS.csv')
print(lab_df.shape)

lab_df.head(5)

(1837, 45)


Unnamed: 0,idx,albumin:Binary,alk:Binary,ast:Binary,anion:Binary,bilirubin:Binary,bun:Binary,bun_cre:Binary,calcium:Binary,creatinine:Binary,...,hemoglobin:Value,a1c:Value,hgb:Value,inr:Value,lactate:Value,platelet:Value,potassium:Value,ptt:Value,sodium:Value,wbc:Value
0,84,1,1,1,1,1,1,0,0,1,...,7.9,0.0,0.0,0.0,0.0,116.0,0.0,0.0,0.0,0.3
1,2248,1,1,1,1,1,1,0,0,1,...,12.4,0.0,0.0,0.0,0.0,655.0,0.0,0.0,0.0,13.8
2,2271,1,1,1,1,1,1,0,0,1,...,10.9,0.0,0.0,1.3,0.0,356.0,0.0,15.9,0.0,6.8
3,1691,1,1,1,1,1,1,0,0,1,...,11.7,0.0,0.0,0.0,1.8,426.0,0.0,0.0,0.0,10.0
4,3286,1,1,1,1,1,1,0,0,1,...,12.7,0.0,0.0,2.0,0.0,131.0,0.0,21.7,0.0,4.0


In [14]:
processed_lab_list = []

for df in [lab_df]:

    df = df.loc[:,df.apply(pd.Series.nunique) != 1]
    
    df = df.set_index('idx')

    df = df.apply(lambda x: (x - x.mean())/(x.std()))
    
    processed_lab_list.append(df)

lab_df = pd.concat( processed_lab_list, axis=1 )

In [15]:
vitals_df = pd.read_csv('/home/bmlserver/jk/iPynb/NN/DATA/Vitals.csv')
print(vitals_df.shape)

vitals_df.head(5)

(1837, 10)


Unnamed: 0,idx,SBP,DBP,height_inch,weight_kg,bmi,tempf,respirations,spO2,pulse
0,84,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2248,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2271,0.0,-7.0,-48.5,-5.65,-38.985,44.3,-28.5,5.0,12.0
3,1691,5.0,0.0,-48.0,-1.9,-37.85,43.05,-28.5,4.5,18.0
4,3286,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
processed_vitals_list = []

for df in [vitals_df]:

    df = df.loc[:,df.apply(pd.Series.nunique) != 1]
    
    df = df.set_index('idx')

    df = df.apply(lambda x: (x - x.mean())/(x.std()))
    
    processed_vitals_list.append(df)

vitals_df = pd.concat( processed_vitals_list, axis=1 )

In [17]:
vision_df = pd.read_csv('/home/bmlserver/jk/iPynb/NN/DATA/Vision.csv')
print(vision_df.shape)

vision_df.head(5)

(1837, 6)


Unnamed: 0.1,Unnamed: 0,idx,label,pred,pe_type,split
0,0,1436,0,0.405236,,train
1,1,1880,1,0.836337,segmental,train
2,2,2738,0,0.596504,,val
3,3,2883,0,0.108968,,train
4,4,2302,1,0.808755,segmental,train


In [18]:
vision_df = vision_df.set_index('idx')

df_demo = pd.concat([vision_df, demo_df], axis=1)
df_outMed = pd.concat([vision_df, out_med_df], axis=1)
df_inMed = pd.concat([vision_df, in_med_df], axis=1)
df_icd = pd.concat([vision_df, icd_df], axis=1)
df_lab = pd.concat([vision_df, lab_df], axis=1)
df_vitals = pd.concat([vision_df, vitals_df], axis=1)

In [19]:
df_demo.head(5)

Unnamed: 0_level_0,Unnamed: 0,label,pred,pe_type,split,current_age_yrs,Female,Male,Asian,Black,Native American,Other,Pacific Islander,Unknown_race,White,SMOKER_N,SMOKER_Y
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1436,0,0,0.405236,,train,0.11311,-1.163783,1.163783,-0.291564,-0.284921,-0.040434,2.782697,-0.0844,-0.381029,-1.215844,-2.09397,2.09397
1880,1,1,0.836337,segmental,train,0.384427,-1.163783,1.163783,-0.291564,-0.284921,-0.040434,-0.359168,-0.0844,-0.381029,0.822026,0.477302,-0.477302
2738,2,0,0.596504,,val,0.599239,0.858799,-0.858799,-0.291564,-0.284921,-0.040434,-0.359168,-0.0844,2.623046,-1.215844,0.477302,-0.477302
2883,3,0,0.108968,,train,-0.515449,0.858799,-0.858799,-0.291564,-0.284921,-0.040434,-0.359168,-0.0844,2.623046,-1.215844,0.477302,-0.477302
2302,4,1,0.808755,segmental,train,0.076685,0.858799,-0.858799,-0.291564,-0.284921,-0.040434,-0.359168,-0.0844,2.623046,-1.215844,-2.09397,2.09397


In [20]:
df_outMed.head(5)

Unnamed: 0_level_0,Unnamed: 0,label,pred,pe_type,split,"Outpatient_LAXATIVES, LOCAL/RECTAL:Binary",Outpatient_PLATELET AGGREGATION INHIBITORS:Binary,"Outpatient_NOSE PREPARATIONS, VASOCONSTRICTORS(OTC):Binary","Outpatient_ANALGESIC/ANTIPYRETICS,NON-SALICYLATE:Binary",Outpatient_ANTIHYPERLIPIDEMIC - HMG COA REDUCTASE INHIBITORS:Binary,...,"Outpatient_TOPICAL HYPERTRICHOTIC AGENTS, EYELASHES:Frequeny","Outpatient_ANTIVIRALS, HIV-SPECIFIC, NUCLEOTIDE ANALOG, RTI:Frequeny","Outpatient_ANTIVIRALS, HIV-SPECIFIC, PROTEASE INHIBITOR COMB:Frequeny","Outpatient_DURABLE MEDICAL EQUIPMENT,MISC(GROUP 1):Frequeny",Outpatient_DIABETIC SUPPLIES:Frequeny,"Outpatient_ANTISEPTICS,GENERAL:Frequeny","Outpatient_RENIN INHIBITOR, DIRECT:Frequeny","Outpatient_ANTIHYPERGLYCEMIC,DPP-4 INHIBITOR-BIGUANIDE COMBS.:Frequeny",Outpatient_BRACES AND RELATED DEVICES:Frequeny,Outpatient_NEEDLES/NEEDLELESS DEVICES:Frequeny
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1436,0,0,0.405236,,train,-0.112571,-0.201918,-0.033005,-0.191473,-0.22955,...,-0.023332,-0.033005,-0.023332,-0.040434,-0.044124,-0.023332,-0.033005,-0.023332,-0.023332,-0.033005
1880,1,1,0.836337,segmental,train,-0.112571,-0.201918,-0.033005,-0.191473,-0.22955,...,-0.023332,-0.033005,-0.023332,-0.040434,-0.044124,-0.023332,-0.033005,-0.023332,-0.023332,-0.033005
2738,2,0,0.596504,,val,-0.112571,-0.201918,-0.033005,-0.191473,-0.22955,...,-0.023332,-0.033005,-0.023332,-0.040434,-0.044124,-0.023332,-0.033005,-0.023332,-0.023332,-0.033005
2883,3,0,0.108968,,train,-0.112571,-0.201918,-0.033005,-0.191473,-0.22955,...,-0.023332,-0.033005,-0.023332,-0.040434,-0.044124,-0.023332,-0.033005,-0.023332,-0.023332,-0.033005
2302,4,1,0.808755,segmental,train,-0.112571,-0.201918,-0.033005,-0.191473,4.353971,...,-0.023332,-0.033005,-0.023332,-0.040434,-0.044124,-0.023332,-0.033005,-0.023332,-0.023332,-0.033005


In [21]:
processed_emr_dfs = []
for df in [demo_df, out_med_df, in_med_df, icd_df, lab_df, vitals_df]:
    
    processed_emr_dfs.append(df)

emr_df = pd.concat(processed_emr_dfs, axis=1)

df_emr = pd.concat([vision_df, emr_df], axis=1)

In [22]:
DEMO_EMR_FEATURE_COLS = demo_df.columns.tolist()
OUTMED_EMR_FEATURE_COLS = out_med_df.columns.tolist()
INMED_EMR_FEATURE_COLS = in_med_df.columns.tolist()
ICD_EMR_FEATURE_COLS = icd_df.columns.tolist()
LAB_EMR_FEATURE_COLS = lab_df.columns.tolist()
VITALS_EMR_FEATURE_COLS = vitals_df.columns.tolist()
PE_TYPE_COL = 'pe_type'
SPLIT_COL = 'split'
VISION_PRED_COL = 'pred'
EMR_PRED_COL = 'emr_pred'
DEMO_PRED_COL = 'demo_pred'
OUTMED_PRED_COL = 'outMed_pred'
INMED_PRED_COL = 'inMed_pred'
ICD_PRED_COL = 'icd_pred'
VITAL_PRED_COL = 'vital_pred'
FUSION_PRED_COL = 'late_fusion_pred'
LABEL_COL = 'label'

In [23]:
demo_df_train = df_demo[df_demo[SPLIT_COL] == 'train']
demo_df_val = df_demo[df_demo[SPLIT_COL] == 'val']
demo_df_test = df_demo[df_demo[SPLIT_COL] == 'test']

In [24]:
inMed_df_train = df_inMed[df_inMed[SPLIT_COL] == 'train']
inMed_df_val = df_inMed[df_inMed[SPLIT_COL] == 'val']
inMed_df_test = df_inMed[df_inMed[SPLIT_COL] == 'test']

In [25]:
outMed_df_train = df_outMed[df_outMed[SPLIT_COL] == 'train']
outMed_df_val = df_outMed[df_outMed[SPLIT_COL] == 'val']
outMed_df_test = df_outMed[df_outMed[SPLIT_COL] == 'test']

In [26]:
icd_df_train = df_icd[df_icd[SPLIT_COL] == 'train']
icd_df_val = df_icd[df_icd[SPLIT_COL] == 'val']
icd_df_test = df_icd[df_icd[SPLIT_COL] == 'test']

In [27]:
lab_df_train = df_lab[df_lab[SPLIT_COL] == 'train']
lab_df_val = df_lab[df_lab[SPLIT_COL] == 'val']
lab_df_test = df_lab[df_lab[SPLIT_COL] == 'test']

In [28]:
vital_df_train = df_vitals[df_vitals[SPLIT_COL] == 'train']
vital_df_val = df_vitals[df_vitals[SPLIT_COL] == 'val']
vital_df_test = df_vitals[df_vitals[SPLIT_COL] == 'test']

In [29]:
demoX_train = demo_df_train[DEMO_EMR_FEATURE_COLS].to_numpy()
demoX_test = demo_df_test[DEMO_EMR_FEATURE_COLS].to_numpy()
demoX_valid = demo_df_val[DEMO_EMR_FEATURE_COLS].to_numpy()
demoy_train = demo_df_train[LABEL_COL].to_numpy()
demoy_test = demo_df_test[LABEL_COL].to_numpy()
demoy_valid = demo_df_val[LABEL_COL].to_numpy()

In [30]:
outMedX_train = outMed_df_train[OUTMED_EMR_FEATURE_COLS].to_numpy()
outMedX_test = outMed_df_test[OUTMED_EMR_FEATURE_COLS].to_numpy()
outMedX_valid = outMed_df_val[OUTMED_EMR_FEATURE_COLS].to_numpy()
outMedy_train = outMed_df_train[LABEL_COL].to_numpy()
outMedy_test = outMed_df_test[LABEL_COL].to_numpy()
outMedy_valid = outMed_df_val[LABEL_COL].to_numpy()

In [31]:
inMedX_train = inMed_df_train[INMED_EMR_FEATURE_COLS].to_numpy()
inMedX_test = inMed_df_test[INMED_EMR_FEATURE_COLS].to_numpy()
inMedX_valid = inMed_df_val[INMED_EMR_FEATURE_COLS].to_numpy()
inMedy_train = inMed_df_train[LABEL_COL].to_numpy()
inMedy_test = inMed_df_test[LABEL_COL].to_numpy()
inMedy_valid = inMed_df_val[LABEL_COL].to_numpy()

In [32]:
icdX_train = icd_df_train[ICD_EMR_FEATURE_COLS].to_numpy()
icdX_test = icd_df_test[ICD_EMR_FEATURE_COLS].to_numpy()
icdX_valid = icd_df_val[ICD_EMR_FEATURE_COLS].to_numpy()
icdy_train = icd_df_train[LABEL_COL].to_numpy()
icdy_test = icd_df_test[LABEL_COL].to_numpy()
icdy_valid = icd_df_val[LABEL_COL].to_numpy()

In [33]:
labX_train = lab_df_train[LAB_EMR_FEATURE_COLS].to_numpy()
labX_test = lab_df_test[LAB_EMR_FEATURE_COLS].to_numpy()
labX_valid = lab_df_val[LAB_EMR_FEATURE_COLS].to_numpy()
laby_train = lab_df_train[LABEL_COL].to_numpy()
laby_test = lab_df_test[LABEL_COL].to_numpy()
laby_valid = lab_df_val[LABEL_COL].to_numpy()

In [34]:
vitalX_train = vital_df_train[VITALS_EMR_FEATURE_COLS].to_numpy()
vitalX_test = vital_df_test[VITALS_EMR_FEATURE_COLS].to_numpy()
vitalX_valid = vital_df_val[VITALS_EMR_FEATURE_COLS].to_numpy()
vitaly_train = vital_df_train[LABEL_COL].to_numpy()
vitaly_test = vital_df_test[LABEL_COL].to_numpy()
vitaly_valid = vital_df_val[LABEL_COL].to_numpy()

In [35]:
es = EarlyStopping(monitor='val_accuracy', min_delta=0.0003, patience=20, verbose=2, mode='auto')

In [36]:
# demo_model = keras.models.load_model('/home/bmlserver/jk/iPynb/NN/savedModels/demo_model/')

demo_model = keras.models.Sequential([
  keras.layers.Dense(10, activation=tf.nn.relu, input_shape=(12,), 
    kernel_regularizer=keras.regularizers.l1_l2(l1=0.02, l2=0.02)),
  keras.layers.Dense(10, activation=tf.nn.relu, 
    kernel_regularizer=keras.regularizers.l1_l2(l1=0.02, l2=0.02)),
  keras.layers.Dense(1, activation=tf.nn.sigmoid)
])

# demo_lr_schedule = ExponentialDecay(initial_learning_rate=0.1, decay_steps=10000, decay_rate=0.9)

# demo_optimizer = Adam(learning_rate=demo_lr_schedule)

demo_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

2022-05-16 16:47:53.400785: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-05-16 16:47:54.323804: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6562 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2080 SUPER, pci bus id: 0000:af:00.0, compute capability: 7.5


In [37]:
# outMed_model = keras.models.load_model('/home/bmlserver/jk/iPynb/NN/savedModels/outMed_model/')

outMed_model = keras.models.Sequential([
  keras.layers.Dense(20, activation=tf.nn.relu, input_shape=(566,), 
    kernel_regularizer=keras.regularizers.l1_l2(l1=0.02, l2=0.02)),
  keras.layers.Dense(20, activation=tf.nn.relu, 
    kernel_regularizer=keras.regularizers.l1_l2(l1=0.02, l2=0.02)),
  keras.layers.Dense(1, activation=tf.nn.sigmoid)
])

outMed_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [38]:
# inMed_model = keras.models.load_model('/home/bmlserver/jk/iPynb/NN/savedModels/inMed_model/')

inMed_model = keras.models.Sequential([
  keras.layers.Dense(20, activation=tf.nn.relu, input_shape=(618,), 
    kernel_regularizer=keras.regularizers.l1_l2(l1=0.02, l2=0.02)),
  keras.layers.Dense(20, activation=tf.nn.relu, 
    kernel_regularizer=keras.regularizers.l1_l2(l1=0.02, l2=0.02)),
  keras.layers.Dense(1, activation=tf.nn.sigmoid)
])

inMed_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [39]:
# icd_model = keras.models.load_model('/home/bmlserver/jk/iPynb/NN/savedModels/icd_model/')

icd_model = keras.models.Sequential([
  keras.layers.Dense(20, activation=tf.nn.relu, input_shape=(258,), 
    kernel_regularizer=keras.regularizers.l1_l2(l1=0.02, l2=0.02)),
  keras.layers.Dense(20, activation=tf.nn.relu, 
    kernel_regularizer=keras.regularizers.l1_l2(l1=0.02, l2=0.02)),
  keras.layers.Dense(1, activation=tf.nn.sigmoid)
])

icd_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [40]:
# lab_model = keras.models.load_model('/home/bmlserver/jk/iPynb/NN/savedModels/lab_model/')

lab_model = keras.models.Sequential([
  keras.layers.Dense(10, activation=tf.nn.relu, input_shape=(42,), 
    kernel_regularizer=keras.regularizers.l1_l2(l1=0.02, l2=0.02)),
  keras.layers.Dense(10, activation=tf.nn.relu, 
    kernel_regularizer=keras.regularizers.l1_l2(l1=0.02, l2=0.02)),
  keras.layers.Dense(1, activation=tf.nn.sigmoid)
])

lab_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [41]:
# vital_model = keras.models.load_model('/home/bmlserver/jk/iPynb/NN/savedModels/vital_model/')

vital_model = keras.models.Sequential([
  keras.layers.Dense(10, activation=tf.nn.relu, input_shape=(9,), 
      kernel_regularizer=keras.regularizers.l1_l2(l1=0.02, l2=0.02)),
  keras.layers.Dense(10, activation=tf.nn.relu, 
      kernel_regularizer=keras.regularizers.l1_l2(l1=0.02, l2=0.02)),
  keras.layers.Dense(1, activation=tf.nn.sigmoid)
])

vital_model.compile(optimizer= "adam", loss='binary_crossentropy', metrics=['accuracy'])

In [42]:
demoX_train.shape

(1454, 12)

In [43]:
demoy_train = demoy_train.reshape(1454, 1)
demoy_valid = demoy_valid.reshape(193, 1)

In [44]:
demo_model.fit(demoX_train, demoy_train, epochs=200, batch_size=100, 
                validation_data = (demoX_valid, demoy_valid), callbacks=[es])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 00021: early stopping


<keras.callbacks.History at 0x7f55419f7490>

In [45]:
inMedX_train.shape

(1454, 618)

In [46]:
inMedy_train = inMedy_train.reshape(1454, 1)
inMedy_valid = demoy_valid.reshape(193, 1)

In [47]:
inMed_model.fit(inMedX_train, inMedy_train, epochs=200, batch_size=100, 
                validation_data = (inMedX_valid, inMedy_valid), callbacks=[es])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 00021: early stopping


<keras.callbacks.History at 0x7f554025c550>

In [48]:
outMedX_train.shape

(1454, 566)

In [49]:
outMedy_train = outMedy_train.reshape(1454, 1)
outMedy_valid = demoy_valid.reshape(193, 1)

In [50]:
outMed_model.fit(outMedX_train, outMedy_train, epochs=200, batch_size=100, 
                validation_data = (outMedX_valid, outMedy_valid), callbacks=[es])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 00024: early stopping


<keras.callbacks.History at 0x7f55400f7520>

In [51]:
icdX_train.shape

(1454, 258)

In [52]:
icdy_train = icdy_train.reshape(1454, 1)
icdy_valid = demoy_valid.reshape(193, 1)

In [53]:
icdy_train

array([[0],
       [1],
       [0],
       ...,
       [0],
       [0],
       [0]])

In [54]:
icd_model.fit(icdX_train, icdy_train, epochs=200, batch_size=100, 
                validation_data = (icdX_valid, icdy_valid), callbacks=[es])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 00038: early stopping


<keras.callbacks.History at 0x7f54ef18fee0>

In [55]:
laby_train = laby_train.reshape(1454, 1)
laby_valid = demoy_valid.reshape(193, 1)

In [56]:
lab_model.fit(labX_train, laby_train, epochs=200, batch_size=100, 
                validation_data = (labX_valid, laby_valid), callbacks=[es])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 00024: early stopping


<keras.callbacks.History at 0x7f54eefb23a0>

In [57]:
vitaly_train = vitaly_train.reshape(1454, 1)
vitaly_valid = demoy_valid.reshape(193, 1)

In [58]:
vital_model.fit(vitalX_train, vitaly_train, epochs=200, batch_size=100, 
                validation_data = (vitalX_valid, vitaly_valid), callbacks=[es])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 00021: early stopping


<keras.callbacks.History at 0x7f54ed492550>

In [59]:
demo_preds = demo_model.predict(demoX_test)

In [60]:
inMed_preds = inMed_model.predict(inMedX_test)

In [61]:
outMed_preds = outMed_model.predict(outMedX_test)

In [62]:
icd_preds = icd_model.predict(icdX_test)

In [63]:
lab_preds = lab_model.predict(labX_test)

In [64]:
vital_preds = vital_model.predict(vitalX_test)

In [65]:
emr_df_test = df_emr[df_emr[SPLIT_COL] == 'test']
vision_preds = emr_df_test[VISION_PRED_COL].to_numpy()

In [66]:
emr_fusion_pred = np.mean(
    [demo_preds, inMed_preds, outMed_preds, icd_preds, lab_preds, vital_preds], axis=0
)

In [67]:
emr_fusion_pred.shape

(190, 1)

In [68]:
late_fusion_pred = np.mean(
    [icd_preds, lab_preds, demo_preds, inMed_preds, outMed_preds, vital_preds ,vision_preds.reshape(190, 1)], 
    axis=0
)

In [69]:
df_emr_T = df_emr[df_emr[SPLIT_COL] == 'test']

emr_df = df_emr_T.assign(late_fusion_pred = late_fusion_pred)

In [70]:
emr_df = emr_df.assign(emr_pred = emr_fusion_pred)

In [71]:
metrics.roc_auc_score(emr_df[LABEL_COL], emr_df[FUSION_PRED_COL])

0.9497727272727273

In [72]:
metrics.roc_auc_score(emr_df[LABEL_COL], emr_df[VISION_PRED_COL])

0.7961363636363636

In [73]:
metrics.roc_auc_score(emr_df[LABEL_COL], emr_df[EMR_PRED_COL])

0.9178409090909092

In [74]:
metrics.accuracy_score(emr_df[LABEL_COL], emr_df[VISION_PRED_COL].round(), normalize=True)

0.7526315789473684

In [75]:
metrics.accuracy_score(emr_df[LABEL_COL], emr_df[FUSION_PRED_COL].round(), normalize=True)

0.4421052631578947