In [1]:
import os

import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

import pandas as pd
import numpy as np

from torch.utils.data import TensorDataset, DataLoader

from sklearn import metrics

In [2]:
# Load data from .csv files
load_demo_df = pd.read_csv('/home/bmlserver/jk/008/data/Demographics.csv')
load_out_med_df = pd.read_csv('/home/bmlserver/jk/008/data/OUT_MED.csv')
load_in_med_df = pd.read_csv('/home/bmlserver/jk/008/data/INP_MED.csv')
load_icd_df = pd.read_csv('/home/bmlserver/jk/008/data/ICD.csv')
load_lab_df = pd.read_csv('/home/bmlserver/jk/008/data/LABS.csv')
load_vitals_df = pd.read_csv('/home/bmlserver/jk/008/data/Vitals.csv')
load_vision_df = pd.read_csv('/home/bmlserver/jk/008/data/Vision.csv')

In [3]:
processed_emr_dfs = []
for loadData in [load_demo_df, load_out_med_df, load_in_med_df, load_icd_df, load_lab_df, load_vitals_df]:
    # remove zero variance featurs
    loadData = loadData.loc[:,loadData.apply(pd.Series.nunique) != 1]
    # set index 
    loadData = loadData.set_index('idx')
    # normalize features
    loadData = loadData.apply(lambda x: (x - x.mean())/(x.std()))
    
    processed_emr_dfs.append(loadData)

In [4]:
processed_demo_df = pd.DataFrame(processed_emr_dfs[0])
processed_outMed_df = pd.DataFrame(processed_emr_dfs[1])
processed_inMed_df = pd.DataFrame(processed_emr_dfs[2])
processed_icd_df = pd.DataFrame(processed_emr_dfs[3])
processed_lab_df = pd.DataFrame(processed_emr_dfs[4])
processed_vital_df = pd.DataFrame(processed_emr_dfs[5])

processed_emr_df = pd.concat(processed_emr_dfs, axis=1)


In [5]:
# Define columns
DEMO_FEATURE_COLS = processed_demo_df.columns.tolist()
OUTMED_FEATURE_COLS = processed_outMed_df.columns.tolist()
INMED_FEATURE_COLS = processed_inMed_df.columns.tolist()
ICD_FEATURE_COLS = processed_icd_df.columns.tolist()
LAB_FEATURE_COLS = processed_lab_df.columns.tolist()
VITAL_FEATURE_COLS = processed_vital_df.columns.tolist()
EMR_FEATURE_COLS = processed_emr_df.columns.tolist()

PE_TYPE_COL = 'pe_type'
SPLIT_COL = 'split'
VISION_PRED_COL = 'pred'
EMR_PRED_COL = 'emr_pred'
FUSION_PRED_COL = 'late_fusion_pred'
LABEL_COL = 'label'

In [6]:
# vision df set index
vision_df = load_vision_df.set_index('idx')
# Join vision information with emr dataframe
demo_df = pd.concat([vision_df, processed_demo_df], axis=1)
outMed_df = pd.concat([vision_df, processed_outMed_df], axis=1)
inMed_df = pd.concat([vision_df, processed_inMed_df], axis=1)
icd_df = pd.concat([vision_df, processed_icd_df], axis=1)
lab_df = pd.concat([vision_df, processed_lab_df], axis=1)
vital_df = pd.concat([vision_df, processed_vital_df], axis=1)

emr_df = pd.concat([vision_df, processed_emr_df], axis=1)

In [7]:
# demo_df.to_csv("sample_demo.csv")
# outMed_df.to_csv("sample_outMed.csv")
# inMed_df.to_csv("sample_inMed.csv")
# icd_df.to_csv("sample_icd.csv")
# lab_df.to_csv("sample_lab.csv")
# vital_df.to_csv("sample_vital.csv")

In [8]:
# Create data split TRAIN / TEST / VAL EACH.
demo_df_train = demo_df[demo_df[SPLIT_COL] == 'train']
demo_df_val = demo_df[demo_df[SPLIT_COL] == 'val']
demo_df_test = demo_df[demo_df[SPLIT_COL] == 'test']

outMed_df_train = outMed_df[outMed_df[SPLIT_COL] == 'train']
outMed_df_val = outMed_df[outMed_df[SPLIT_COL] == 'val']
outMed_df_test = outMed_df[outMed_df[SPLIT_COL] == 'test']

inMed_df_train = inMed_df[inMed_df[SPLIT_COL] == 'train']
inMed_df_val = inMed_df[inMed_df[SPLIT_COL] == 'val']
inMed_df_test = inMed_df[inMed_df[SPLIT_COL] == 'test']

icd_df_train = icd_df[icd_df[SPLIT_COL] == 'train']
icd_df_val = icd_df[icd_df[SPLIT_COL] == 'val']
icd_df_test = icd_df[icd_df[SPLIT_COL] == 'test']

lab_df_train = lab_df[lab_df[SPLIT_COL] == 'train']
lab_df_val = lab_df[lab_df[SPLIT_COL] == 'val']
lab_df_test = lab_df[lab_df[SPLIT_COL] == 'test']

vital_df_train = vital_df[vital_df[SPLIT_COL] == 'train']
vital_df_val = vital_df[vital_df[SPLIT_COL] == 'val']
vital_df_test = vital_df[vital_df[SPLIT_COL] == 'test']

In [9]:
### DF to NP ###
demoX_train = demo_df_train[DEMO_FEATURE_COLS].to_numpy()
demoX_test = demo_df_test[DEMO_FEATURE_COLS].to_numpy()
demoX_valid = demo_df_val[DEMO_FEATURE_COLS].to_numpy()
demoy_train = demo_df_train[LABEL_COL].to_numpy()
demoy_test = demo_df_test[LABEL_COL].to_numpy()
demoy_valid = demo_df_val[LABEL_COL].to_numpy()

outMedX_train = outMed_df_train[OUTMED_FEATURE_COLS].to_numpy()
outMedX_test = outMed_df_test[OUTMED_FEATURE_COLS].to_numpy()
outMedX_valid = outMed_df_val[OUTMED_FEATURE_COLS].to_numpy()
outMedy_train = outMed_df_train[LABEL_COL].to_numpy()
outMedy_test = outMed_df_test[LABEL_COL].to_numpy()
outMedy_valid = outMed_df_val[LABEL_COL].to_numpy()

inMedX_train = inMed_df_train[INMED_FEATURE_COLS].to_numpy()
inMedX_test = inMed_df_test[INMED_FEATURE_COLS].to_numpy()
inMedX_valid = inMed_df_val[INMED_FEATURE_COLS].to_numpy()
inMedy_train = inMed_df_train[LABEL_COL].to_numpy()
inMedy_test = inMed_df_test[LABEL_COL].to_numpy()
inMedy_valid = inMed_df_val[LABEL_COL].to_numpy()

icdX_train = icd_df_train[ICD_FEATURE_COLS].to_numpy()
icdX_test = icd_df_test[ICD_FEATURE_COLS].to_numpy()
icdX_valid = icd_df_val[ICD_FEATURE_COLS].to_numpy()
icdy_train = icd_df_train[LABEL_COL].to_numpy()
icdy_test = icd_df_test[LABEL_COL].to_numpy()
icdy_valid = icd_df_val[LABEL_COL].to_numpy()

labX_train = lab_df_train[LAB_FEATURE_COLS].to_numpy()
labX_test = lab_df_test[LAB_FEATURE_COLS].to_numpy()
labX_valid = lab_df_val[LAB_FEATURE_COLS].to_numpy()
laby_train = lab_df_train[LABEL_COL].to_numpy()
laby_test = lab_df_test[LABEL_COL].to_numpy()
laby_valid = lab_df_val[LABEL_COL].to_numpy()

vitalX_train = vital_df_train[VITAL_FEATURE_COLS].to_numpy()
vitalX_test = vital_df_test[VITAL_FEATURE_COLS].to_numpy()
vitalX_valid = vital_df_val[VITAL_FEATURE_COLS].to_numpy()
vitaly_train = vital_df_train[LABEL_COL].to_numpy()
vitaly_test = vital_df_test[LABEL_COL].to_numpy()
vitaly_valid = vital_df_val[LABEL_COL].to_numpy()

In [10]:
### NP to torchTensor -> dataset -> dataloader
demo_TT_X_train = torch.from_numpy(demoX_train).float()
demo_TT_X_test = torch.from_numpy(demoX_test).float()
demo_TT_X_valid = torch.from_numpy(demoX_valid).float()
demo_TT_y_train = torch.from_numpy(demoy_train).float().view(-1, 1)
demo_TT_y_test = torch.from_numpy(demoy_test).float().view(-1, 1)
demo_TT_y_valid = torch.from_numpy(demoy_valid).float().view(-1, 1)
demo_dataset_train = TensorDataset(demo_TT_X_train, demo_TT_y_train)
demo_dataloader_train = DataLoader(demo_dataset_train, batch_size=demo_TT_X_test.shape[0])
demo_dataset_val = TensorDataset(demo_TT_X_valid, demo_TT_y_valid)
demo_dataloader_val = DataLoader(demo_dataset_val, batch_size=demo_TT_X_valid.shape[0])
demo_dataset_test = TensorDataset(demo_TT_X_test, demo_TT_y_test)
demo_dataloader_test = DataLoader(demo_dataset_test, batch_size=demo_TT_X_test.shape[0])

outMed_TT_X_train = torch.from_numpy(outMedX_train).float()
outMed_TT_X_test = torch.from_numpy(outMedX_test).float()
outMed_TT_X_valid = torch.from_numpy(outMedX_valid).float()
outMed_TT_y_train = torch.from_numpy(outMedy_train).float().view(-1, 1)
outMed_TT_y_test = torch.from_numpy(outMedy_test).float().view(-1, 1)
outMed_TT_y_valid = torch.from_numpy(outMedy_valid).float().view(-1, 1)
outMed_dataset_train = TensorDataset(outMed_TT_X_train, outMed_TT_y_train)
outMed_dataloader_train = DataLoader(outMed_dataset_train, batch_size=outMed_TT_X_test.shape[0])
outMed_dataset_val = TensorDataset(outMed_TT_X_valid, outMed_TT_y_valid)
outMed_dataloader_val = DataLoader(outMed_dataset_val, batch_size=outMed_TT_X_valid.shape[0])
outMed_dataset_test = TensorDataset(outMed_TT_X_test, outMed_TT_y_test)
outMed_dataloader_test = DataLoader(outMed_dataset_test, batch_size=outMed_TT_X_test.shape[0])

inMed_TT_X_train = torch.from_numpy(inMedX_train).float()
inMed_TT_X_test = torch.from_numpy(inMedX_test).float()
inMed_TT_X_valid = torch.from_numpy(inMedX_valid).float()
inMed_TT_y_train = torch.from_numpy(inMedy_train).float().view(-1, 1)
inMed_TT_y_test = torch.from_numpy(inMedy_test).float().view(-1, 1)
inMed_TT_y_valid = torch.from_numpy(inMedy_valid).float().view(-1, 1)
inMed_dataset_train = TensorDataset(inMed_TT_X_train, inMed_TT_y_train)
inMed_dataloader_train = DataLoader(inMed_dataset_train, batch_size=inMed_TT_X_test.shape[0])
inMed_dataset_val = TensorDataset(inMed_TT_X_valid, inMed_TT_y_valid)
inMed_dataloader_val = DataLoader(inMed_dataset_val, batch_size=inMed_TT_X_valid.shape[0])
inMed_dataset_test = TensorDataset(inMed_TT_X_test, inMed_TT_y_test)
inMed_dataloader_test = DataLoader(inMed_dataset_test, batch_size=inMed_TT_X_test.shape[0])

icd_TT_X_train = torch.from_numpy(icdX_train).float()
icd_TT_X_test = torch.from_numpy(icdX_test).float()
icd_TT_X_valid = torch.from_numpy(icdX_valid).float()
icd_TT_y_train = torch.from_numpy(icdy_train).float().view(-1, 1)
icd_TT_y_test = torch.from_numpy(icdy_test).float().view(-1, 1)
icd_TT_y_valid = torch.from_numpy(icdy_valid).float().view(-1, 1)
icd_dataset_train = TensorDataset(icd_TT_X_train, icd_TT_y_train)
icd_dataloader_train = DataLoader(icd_dataset_train, batch_size=icd_TT_X_test.shape[0])
icd_dataset_val = TensorDataset(icd_TT_X_valid, icd_TT_y_valid)
icd_dataloader_val = DataLoader(icd_dataset_val, batch_size=icd_TT_X_valid.shape[0])
icd_dataset_test = TensorDataset(icd_TT_X_test, icd_TT_y_test)
icd_dataloader_test = DataLoader(icd_dataset_test, batch_size=icd_TT_X_test.shape[0])

lab_TT_X_train = torch.from_numpy(labX_train).float()
lab_TT_X_test = torch.from_numpy(labX_test).float()
lab_TT_X_valid = torch.from_numpy(labX_valid).float()
lab_TT_y_train = torch.from_numpy(laby_train).float().view(-1, 1)
lab_TT_y_test = torch.from_numpy(laby_test).float().view(-1, 1)
lab_TT_y_valid = torch.from_numpy(laby_valid).float().view(-1, 1)
lab_dataset_train = TensorDataset(lab_TT_X_train, lab_TT_y_train)
lab_dataloader_train = DataLoader(lab_dataset_train, batch_size=lab_TT_X_test.shape[0])
lab_dataset_val = TensorDataset(lab_TT_X_valid, lab_TT_y_valid)
lab_dataloader_val = DataLoader(lab_dataset_val, batch_size=lab_TT_X_valid.shape[0])
lab_dataset_test = TensorDataset(lab_TT_X_test, lab_TT_y_test)
lab_dataloader_test = DataLoader(lab_dataset_test, batch_size=lab_TT_X_test.shape[0])

vital_TT_X_train = torch.from_numpy(vitalX_train).float()
vital_TT_X_test = torch.from_numpy(vitalX_test).float()
vital_TT_X_valid = torch.from_numpy(vitalX_valid).float()
vital_TT_y_train = torch.from_numpy(vitaly_train).float().view(-1, 1)
vital_TT_y_test = torch.from_numpy(vitaly_test).float().view(-1, 1)
vital_TT_y_valid = torch.from_numpy(vitaly_valid).float().view(-1, 1)
vital_dataset_train = TensorDataset(vital_TT_X_train, vital_TT_y_train)
vital_dataloader_train = DataLoader(vital_dataset_train, batch_size=vital_TT_X_test.shape[0])
vital_dataset_val = TensorDataset(vital_TT_X_valid, vital_TT_y_valid)
vital_dataloader_val = DataLoader(vital_dataset_val, batch_size=vital_TT_X_valid.shape[0])
vital_dataset_test = TensorDataset(vital_TT_X_test, vital_TT_y_test)
vital_dataloader_test = DataLoader(vital_dataset_test, batch_size=vital_TT_X_test.shape[0])

In [11]:
emr_df_test = emr_df[emr_df[SPLIT_COL] == 'test']


vision_preds = emr_df_test[VISION_PRED_COL].to_numpy()

In [12]:
demo_pred_np = np.loadtxt('/home/bmlserver/jk/iPynb/mmF_Final/demo_pred.csv')
icd_pred_np = np.loadtxt('/home/bmlserver/jk/iPynb/mmF_Final/icd_pred.csv')
inMed_pred_np = np.loadtxt('/home/bmlserver/jk/iPynb/mmF_Final/inMed_pred.csv')
outMed_pred_np = np.loadtxt('/home/bmlserver/jk/iPynb/mmF_Final/outMed_pred.csv')
lab_pred_np = np.loadtxt('/home/bmlserver/jk/iPynb/mmF_Final/lab_pred.csv')
vital_pred_np = np.loadtxt('/home/bmlserver/jk/iPynb/mmF_Final/vital_pred.csv')

In [13]:
demo_pred_np.reshape(190,1)
icd_pred_np.reshape(190,1)
inMed_pred_np.reshape(190,1)
outMed_pred_np.reshape(190,1)
lab_pred_np.reshape(190,1)
vital_pred_np.reshape(190,1)

array([[0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.28583071],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.28554749],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.34436452],
       [0.344

In [14]:
emr_fusion_pred = np.mean(
    [demo_pred_np, inMed_pred_np, outMed_pred_np, icd_pred_np, lab_pred_np, vital_pred_np], axis=0
)

In [15]:
emr_fusion_pred = emr_fusion_pred.reshape(190, 1)

In [16]:
late_fusion_pred = np.mean(
    [emr_fusion_pred, 
    vision_preds.reshape(190, 1)], 
    axis=0
)

In [17]:
late_fusion_pred

array([[0.28230461],
       [0.53984383],
       [0.56283205],
       [0.55386897],
       [0.59483109],
       [0.30035479],
       [0.46003823],
       [0.54395564],
       [0.33942139],
       [0.34252252],
       [0.53024097],
       [0.35233334],
       [0.34889964],
       [0.47535667],
       [0.36349055],
       [0.56982065],
       [0.36828963],
       [0.57076681],
       [0.30723634],
       [0.4358804 ],
       [0.56476151],
       [0.51752474],
       [0.37748059],
       [0.57629495],
       [0.36385899],
       [0.42089811],
       [0.36472863],
       [0.32190191],
       [0.49468049],
       [0.35055557],
       [0.4003922 ],
       [0.4508857 ],
       [0.56235998],
       [0.37211938],
       [0.32835429],
       [0.3450657 ],
       [0.27804277],
       [0.6624698 ],
       [0.61943704],
       [0.36315907],
       [0.38443361],
       [0.33962254],
       [0.40460141],
       [0.38015769],
       [0.33615805],
       [0.34070386],
       [0.42211818],
       [0.353

In [18]:
metrics.roc_auc_score(emr_df_test[LABEL_COL], late_fusion_pred)

0.8089772727272727

In [19]:
emr_df_test[LABEL_COL]

idx
2506    0
2832    1
1487    1
15      1
1763    1
       ..
2178    1
1047    0
897     1
2311    1
2876    1
Name: label, Length: 190, dtype: int64

In [20]:
metrics.roc_auc_score(emr_df_test[LABEL_COL], emr_fusion_pred)

0.5635227272727272

In [23]:
metrics.accuracy_score(emr_df_test[LABEL_COL], late_fusion_pred.round(), normalize=False)

125