In [7]:
#Import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import copy
import warnings
from sklearn import metrics
from sklearn.metrics import * 

plt.rc('font', size=12)
warnings.filterwarnings('ignore')

In [8]:
#Load data
#ESCAPE
escape = pd.read_csv("Preprocessed Data/EscapeAllData.csv", sep=",", index_col='DEIDNUM').sort_index() #all feature dataset
escapeSV = pd.read_csv("Preprocessed Data/EscapeAllDataSingleValue.csv", sep=",", index_col='DEIDNUM').sort_index() #all feature dataset
escapeLabels  = pd.read_csv("../Data/Original DataFrames/Labels.csv", sep=",", index_col='DEIDNUM').sort_index() #labels for prediction classes 

#BEST
#Only has discharge data + no hemo
best = pd.read_csv("Preprocessed Data/AllDataBEST.csv", sep=",", index_col='ID').sort_index() #all feature dataset
bestLabels  = pd.read_csv("Preprocessed Data/LabelsBEST.csv", sep=",", index_col='ID').sort_index() #labels for prediction classes 

#HF ACTION
#Only has discharge data + no hemo
hfaction = pd.read_csv("Preprocessed Data/AllDataHF-ACTION.csv", sep=",", index_col='ID').sort_index() #all feature dataset
hfactionLabels  = pd.read_csv("Preprocessed Data/LabelsHF-ACTION.csv", sep=",", index_col='ID').sort_index() #labels for prediction classes 

#Guide it
guide = pd.read_csv("Preprocessed Data/GUIDEAllData.csv", sep=",", index_col='ID').sort_index() #all feature dataset
guideSV = pd.read_csv("Preprocessed Data/GUIDEAllDataSingleValue.csv", sep=",", index_col='ID').sort_index() #all feature dataset
guideLabels  = pd.read_csv("Preprocessed Data/LabelsGUIDE-IT.csv", sep=",", index_col='ID').sort_index() #labels for prediction classes 


# #Cardiogenic shock
# cardShockHemo = pd.read_csv("../Data Validation/Cardiogenic Shock/Original DataFrames/HemoCardiogenicShock.csv", sep=",", index_col='ID').sort_index()
# cardShockAllData = pd.read_csv("../Data Validation/Cardiogenic Shock/Original DataFrames/AllDataCardiogenicShock.csv", sep=",", index_col='ID').sort_index()
# cardShockLabels = pd.read_csv("../Data Validation/Cardiogenic Shock/Original DataFrames/LabelsCardiogenicShock.csv", sep=",", index_col='ID').sort_index()
# cardShockHemoLabels = pd.read_csv("../Data Validation/Cardiogenic Shock/Original DataFrames/HemoLabelsCardiogenicShock.csv", sep=",", index_col='ID').sort_index()

# #Serial Cardiac
# serialHemo = pd.read_csv("../Data Validation/Serial Cardiac Caths/Original DataFrames/HemoSerialCardiac.csv", sep=",", index_col='ID').sort_index()
# serialAllData = pd.read_csv("../Data Validation/Serial Cardiac Caths/Original DataFrames/AllDataSerialCardiac.csv", sep=",", index_col='ID').sort_index()
# serialHemoLabels = pd.read_csv("../Data Validation/Serial Cardiac Caths/Original DataFrames/HemoLabelsSerialCardiac.csv", sep=",", index_col='ID').sort_index()
# serialLabels = pd.read_csv("../Data Validation/Serial Cardiac Caths/Original DataFrames/LabelsSerialCardiac.csv", sep=",", index_col='ID').sort_index()

# #HF-ACTION
# hfactionAllData = pd.read_csv("../Data Validation/HF-ACTION/Original DataFrames/AllDataHF-ACTION.csv", sep=",", index_col='ID').sort_index()
# hfactionLabels = pd.read_csv("../Data Validation/HF-ACTION/Original DataFrames/LabelsHF-ACTION.csv", sep=",", index_col='ID').sort_index()

# #BEST
# bestAllData = pd.read_csv("../Data Validation/BEST/Original DataFrames/AllDataBEST.csv", sep=",", index_col='ID').sort_index()
# bestLabels = pd.read_csv("../Data Validation/BEST/Original DataFrames/LabelsBEST.csv", sep=",", index_col='ID').sort_index()

# #Guide it
# guideAllData = pd.read_csv("../Data Validation/GUIDE-IT/Original DataFrames/AllDataGUIDE-IT.csv", sep=",", index_col='ID').sort_index()
# guideLabels = pd.read_csv("../Data Validation/GUIDE-IT/Original DataFrames/LabelsGUIDE-IT.csv", sep=",", index_col='ID').sort_index()



In [136]:
#Calculate ESCAPE Risk score
def calcESCAPEScore(data):
    lst = []
    for idx in data.index:
        pt = data.loc[idx]
        scr = 0
        if pt.iloc[0] > 70: #Age
            scr += 1
        if pt.iloc[1] > 40: #BUN
            scr += 1
        if pt.iloc[1] > 90: #BUN
            scr += 1
        if pt.iloc[2] < 300: #Walk ft
            scr += 1
        if pt.iloc[3] < 130: #SOD
            scr += 1
        if pt.iloc[4] == 1.0 or pt.iloc[5] == 1.0: #CPR or MEVT
            scr += 2
        if pt.iloc[6] > 240: #DIUR Dose
            scr += 1
        if pt.iloc[7] == 0.0: #BETA blockers
            scr += 1
        if pt.iloc[8] > 500: #BNP
            scr += 1
        if pt.iloc[8] > 1300: #BNP
            scr += 3
            
        #associate score with prob of outcome
        prob = 0
        if scr == 0:
            prob = 0.033
        elif scr == 1:
            prob = 0.065
        elif scr == 2:
            prob = 0.123
        elif scr == 3:
            prob = 0.223
        elif scr == 4:
            prob = 0.368
        elif scr == 5:
            prob = 0.543
        elif scr == 6:
            prob = 0.708
        elif scr == 7:
            prob = 0.831
        else: #>= 8 
            prob = 0.909

        lst.append(prob)


    rskDF = pd.DataFrame(lst, columns=['Score'], index=data.index)
    return rskDF

#OPTIMIZE-HF risk score calculation
def calcOPTIMIZEHF(data): 
    origIdx = data.index
    data = data.reset_index()
    ageDict = {20:0, 25:2, 30:3, 35:5, 40:6, 45:8, 50:9, 55:11, 60:13, 65:14, 70:16, 75:17, 80:19, 85:20, 90:22, 95:24}
    hrDict = {65:0, 70:1, 75:1, 80:2, 85:3, 90:4, 95:4, 100:5, 105:6, 110:6}
    sysBPDict = {50:22, 60:20, 70:18, 80:16, 90:14, 100:12, 110:10, 120:8, 130:6, 140:4, 150:2, 160:0}
    sodDict = {110:13, 115:11, 120:9, 125:7, 130:4, 135:2, 140:0, 145:2, 150:4, 155:6, 160:8, 165:10, 170:12}
    crtDict = {0.0:0, 0.5:2, 1.0:5, 1.5:7, 2.0:10, 2.5:12, 3.0:15, 3.5:17}
    #primary cause of admission is hf for all cohorts so score starts as 0
    
    lst = []
    for idx in data.index:
        pt = data.loc[idx]
        scr = 0

        if pt.isnull().values.any():
            prob = np.nan
            
        else:
            age = pt["Age"]
            ageNearest = 5 * round(age/5)
            scr += ageDict[ageNearest]

            hr = pt["HRTRT"]
            if hr >= 110:
                scr += 6
            elif hr <= 65:
                scr += 0
            else:
                hrNearest = 5 * round(hr/5)
                scr += hrDict[hrNearest]

            bp = pt["BPSYS"]
            if bp >= 160:
                scr += 0
            elif bp <= 50:
                scr += 22
            else:
                bpNearest = 10 * round(bp/10)
                scr += sysBPDict[bpNearest]

            sod = pt["SOD"]
            sodNearest = 5 * round(sod/5)
            scr += sodDict[sodNearest]

            crt = pt["CRT"]
            if crt >= 3.5:
                scr += 17
            else:
                crtNearest = 0.5 * round(crt/0.5)
                scr += crtDict[crtNearest]

            if pt['EjF'] < 40.0:
                scr += 1

            scr = 5 * round(scr/5)

            #associate score with prob of outcome
            prob = 0
            if scr == 30:
                prob = 0.02
            elif scr == 35:
                prob = 0.03
            elif scr == 40:
                prob = 0.05
            elif scr == 45:
                prob = 0.06
            elif scr == 50:
                prob = 0.1
            elif scr == 55:
                prob = 0.18
            elif scr == 60:
                prob = 0.25
            elif scr == 65:
                prob = 0.4
            elif scr == 70:
                prob = 0.5
            elif scr == 75:
                prob = 0.7
            elif scr == 80:
                prob = 0.8
            elif scr == 85:
                prob = 0.85
            elif scr == 90:
                prob = 0.91
            elif scr == 95:
                prob = 0.96
            else: #== 100 
                prob = 0.97
    
        lst.append(prob)
        
    rskDF = pd.DataFrame(lst, columns=['Score'], index=origIdx)
    return rskDF

#Calc effect risk score - 30 day mort
def calcEFFECT30Day(data):
    origIdx = data.index
    data = data.reset_index()
    
    lst = []
    for idx in data.index:
        pt = data.loc[idx]
        scr = 0

        scr += pt['Age']

        if pt['Resp'] <= 20:
            scr += 20
        elif pt['Resp'] >= 45:
            scr += 45
        else:
            scr += pt['Resp']

        bp = pt['BPSYS']
        if bp >= 180:
            scr -= 60
        elif bp >= 160 and bp <= 179:
            scr -= 55
        elif bp >= 140 and bp <= 159:
            scr -= 50
        elif bp >= 120 and bp <= 139:
            scr -= 45
        elif bp >= 100 and bp <= 119:
            scr -= 40
        elif bp >= 90 and bp <= 99:
            scr -= 35
        else: #<90
            scr -= 30

        if pt['BUN'] >= 60:
            scr += 60
        else:
            scr += pt['BUN']

        if pt['SOD'] < 136:
            scr += 10

        if pt['StrokeTIA'] == 1.0:
            scr += 10

        if pt['COPD'] == 1.0:
            scr += 10

        if pt['HEPT'] == 1.0:
            scr += 25

        if pt['MALIG'] == 1.0:
            scr += 15

        if pt['HEM'] < 10:
            scr += 0
            
        #Calculate prob
        if scr <= 60:
            prob = 0.4
        elif scr >= 61 and scr <= 90:
            prob = 3.4
        elif scr >= 91 and scr <= 120:
            prob = 12.2
        elif scr >= 121 and scr <= 150:
            prob = 32.7
        else: #scr > 150:
            prob = 59.0
            
        lst.append(prob)
        
    rskDF = pd.DataFrame(lst, columns=['Score'], index=origIdx)
    return rskDF


#Calc effect risk score - 30 day mort
def calcEFFECT1Year(data):
    origIdx = data.index
    data = data.reset_index()
    
    lst = []
    for idx in data.index:
        pt = data.loc[idx]
        scr = 0

        scr += pt['Age']

        if pt['Resp'] <= 20:
            scr += 20
        elif pt['Resp'] >= 45:
            scr += 45
        else:
            scr += pt['Resp']

        bp = pt['BPSYS']
        if bp >= 180:
            scr -= 50
        elif bp >= 160 and bp <= 179:
            scr -= 45
        elif bp >= 140 and bp <= 159:
            scr -= 40
        elif bp >= 120 and bp <= 139:
            scr -= 35
        elif bp >= 100 and bp <= 119:
            scr -= 30
        elif bp >= 90 and bp <= 99:
            scr -= 25
        else: #<90
            scr -= 20

        if pt['BUN'] >= 60:
            scr += 60
        else:
            scr += pt['BUN']

        if pt['SOD'] < 136:
            scr += 10

        if pt['StrokeTIA'] == 1.0:
            scr += 10

        if pt['COPD'] == 1.0:
            scr += 10

        if pt['HEPT'] == 1.0:
            scr += 35

        if pt['MALIG'] == 1.0:
            scr += 15

        if pt['HEM'] < 10:
            scr += 10
            
        #Calculate prob
        if scr <= 60:
            prob = 7.8
        elif scr >= 61 and scr <= 90:
            prob = 12.9
        elif scr >= 91 and scr <= 120:
            prob = 32.5
        elif scr >= 121 and scr <= 150:
            prob = 59.3
        else: #scr > 150:
            prob = 78.8
            
        lst.append(prob)
        
    rskDF = pd.DataFrame(lst, columns=['Score'], index=origIdx)
    return rskDF



In [119]:
def getAUC(labelDF, scoreDF):
    #make comparison df of predicted prob vs real labels
    lst = []
    scoreList = scoreDF['Score'].tolist()
    
    for i in range(len(scoreList)):
        if not np.isnan(scoreList[i]):
            est = scoreList[i]
            idx = scoreDF.iloc[i].name
            real = labelDF.loc[idx].item()
            
            lst.append([idx, est, real])
    
    compDF = pd.DataFrame(lst, columns=['IDX', 'Predicted', 'Real']).set_index('IDX')
    scr = roc_auc_score(compDF['Real'], compDF['Predicted'])
    
    return compDF, scr

In [122]:
#ESCAPE Risk score
esRs = escape.filter(['Age','BUN_D','SixFtWlk_D', 'SOD_D', 'CPR', 'MEVT', 'DIURDSE_D', 'BET_D', 'BNP_D'], axis=1)
res = calcESCAPEScore(esRs)

#given actual labels and predicted probability of the event (outcome), get AUC
df, val = getAUC(escapeLabels['Death'], res)
print("Escape Risk AUC:", val)
df

Escape Risk AUC: 0.6805149843124527


Unnamed: 0_level_0,Predicted,Real
IDX,Unnamed: 1_level_1,Unnamed: 2_level_1
72,0.368,1
81,0.033,0
86,0.123,0
267,0.368,1
814,0.065,0
...,...,...
98078,0.033,0
98508,0.033,0
99302,0.123,1
99912,0.033,0


In [123]:
#OPTIMIZE-HF
esRs = escapeSV.filter(['Age','HRTRT','BPSYS', 'SOD', 'CRT', 'EjF'], axis=1)
res = calcOPTIMIZEHF(esRs)
res

df, val = getAUC(escapeLabels['Death'], res)
print("OPTIMIZE-HF Risk AUC:", val)
df

OPTIMIZE-HF Risk AUC: 0.5472130208972315


Unnamed: 0_level_0,Predicted,Real
IDX,Unnamed: 1_level_1,Unnamed: 2_level_1
72,0.06,1
72,0.10,1
81,0.06,0
81,0.06,0
1084,0.03,0
...,...,...
97047,0.03,0
98508,0.06,0
99302,0.02,1
99302,0.97,1


In [137]:
#EFFECT
esRs = escapeSV.filter(['Age','Resp','BPSYS', 'BUN', 'SOD', 'StrokeTIA', 'COPD', 'HEPT', 'MALIG', 'HEM'], axis=1)
res = calcEFFECT30Day(esRs)
res

df, val = getAUC(escapeLabels['Death'], res)
print("EFFECT 30 Day Mortality Risk AUC:", val)
df

EFFECT 30 Day Mortality Risk AUC: 0.5871774586173321


Unnamed: 0_level_0,Predicted,Real
IDX,Unnamed: 1_level_1,Unnamed: 2_level_1
72,12.2,1
72,32.7,1
81,12.2,0
81,12.2,0
86,3.4,0
...,...,...
99302,3.4,1
99912,0.4,0
99912,0.4,0
99935,3.4,0


In [138]:
esRs = escapeSV.filter(['Age','Resp','BPSYS', 'BUN', 'SOD', 'StrokeTIA', 'COPD', 'HEPT', 'MALIG', 'HEM'], axis=1)
res = calcEFFECT1Year(esRs)
res

df, val = getAUC(escapeLabels['Death'], res)
print("EFFECT 1 Year Mortality Risk AUC:", val)
df

EFFECT 1 Year Mortality Risk AUC: 0.5742183273828843


Unnamed: 0_level_0,Predicted,Real
IDX,Unnamed: 1_level_1,Unnamed: 2_level_1
72,59.3,1
72,59.3,1
81,59.3,0
81,32.5,0
86,12.9,0
...,...,...
99302,12.9,1
99912,7.8,0
99912,7.8,0
99935,32.5,0


In [None]:
#TODO calc all scores and save them to a csv file to calc auc later


Needed Vars:
CPR or MEVT
BNP
Diuretics dose
inotropes
intermacs?
severe rv dysfunction?

HF diagnosed in last 18 months
Diuretics doses:
- Furosemide
- Bumetanide
- Torsemide
- Metolazone
- ChloroTZ

HGB
uric acid
Total cholesterol


In [None]:
#BEST has

Diuretics but no dose
inotropes
HGB
uric acid
Total cholesterol

In [None]:
#HF action has

BNP
Diuretics dose

Diuretics doses:
- Furosemide

Total cholesterol


In [None]:
#GUIDe it has
Needed Vars:
MEVT
BNP
Diuretics dose
inotropes

Diuretics doses - use furosemide from diurdse

uric acid
Total cholesterol


In [None]:
actual = []
actual.extend([0.077 for i in range(91)])
actual.extend([0.104 for i in range(125)])
actual.extend([0.167 for i in range(114)])
actual.extend([0.264 for i in range(53)])
actual.extend([0.448 for i in range(29)])
actual.extend([0.800 for i in range(15)])
actual.extend([0.750 for i in range(4)])
actual.extend([1.000 for i in range(1)])
actual.extend([1.000 for i in range(1)])

predBNP = []
predBNP.extend([0.033 for i in range(91)])
predBNP.extend([0.065 for i in range(125)])
predBNP.extend([0.123 for i in range(114)])
predBNP.extend([0.223 for i in range(53)])
predBNP.extend([0.368 for i in range(29)])
predBNP.extend([0.543 for i in range(15)])
predBNP.extend([0.708 for i in range(4)])
predBNP.extend([0.831 for i in range(1)])
predBNP.extend([0.909 for i in range(1)])

predNoBNP = []
predNoBNP.extend([0.053 for i in range(91)])
predNoBNP.extend([0.103 for i in range(125)])
predNoBNP.extend([0.189 for i in range(114)])
predNoBNP.extend([0.322 for i in range(53)])
predNoBNP.extend([0.492 for i in range(29)])
predNoBNP.extend([0.664 for i in range(15)])
predNoBNP.extend([0.801 for i in range(4)])
predNoBNP.extend([0.891 for i in range(1)])
predNoBNP.extend([0.943 for i in range(1)])
