In [1]:
#Import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import copy
import warnings
from sklearn import metrics
from sklearn.metrics import * 

plt.rc('font', size=12)
warnings.filterwarnings('ignore')

In [2]:
#Load Labels
escapeLabels  = pd.read_csv("../Data/Original DataFrames/Labels.csv", sep=",", index_col='DEIDNUM').sort_index() #labels for prediction classes 
bestLabels  = pd.read_csv("Preprocessed Data/LabelsBEST.csv", sep=",", index_col='ID').sort_index() #labels for prediction classes 
hfactionLabels  = pd.read_csv("Preprocessed Data/LabelsHF-ACTION.csv", sep=",", index_col='ID').sort_index() #labels for prediction classes 
guideLabels  = pd.read_csv("Preprocessed Data/LabelsGUIDE-IT.csv", sep=",", index_col='ID').sort_index() #labels for prediction classes 
cardShockLabels = pd.read_csv("../Data Validation/Cardiogenic Shock/Original DataFrames/LabelsCardiogenicShock.csv", sep=",", index_col='ID').sort_index()
serialLabels = pd.read_csv("../Data Validation/Serial Cardiac Caths/Original DataFrames/LabelsSerialCardiac.csv", sep=",", index_col='ID').sort_index()


In [None]:
#Need to get all scores and convert them to their mortality meaning
#Then need to combine scores + real label values into a dataframe
#Finally compute AUCs for each of the scores

In [51]:
scoreName = 'ADHERE'
dataName = 'ESCAPE'
index = 'DEIDNUM'
orig = pd.read_csv("Calculated Scores/" + scoreName + "/"+ dataName + "_" + scoreName + ".csv").set_index(index)


In [None]:
def makeLabels(data, labels):
    lst = []
    idx = sorted(data.index)
    for i in idx:
        lab = labels.loc[i]
        lst.append(lab['Death'])
        
    return pd.DataFrame(lst, columns=['Real'],index=idx)

def makeScoreDF(dt, lbls, index):
    #Get ESCAPE Score DF
    orig = pd.read_csv("Calculated Scores/ESCAPE/"+ dt + "_ESCAPE.csv").set_index(index)
    labels = makeLabels(orig, lbls)
    escDF = labels
    escDF['ESCAPE'] = orig[['ESCAPE']]

    #Get other scores
    for scr in ['ADHERE', 'GWTG', 'MAGGIC']:
        orig = pd.read_csv("Calculated Scores/" + scr + "/"+ dt + "_" + scr + ".csv").set_index(index)

        #Make real labels
        if scr == 'ADHERE':
            labels = makeLabels(orig, lbls)
            data = labels

        #TODO HERE --> convert meaning of scores
        data[scr] = orig[[scr]]

        if data[scr].astype(str).str.contains('-').any():
            data[[scr+'_Low', scr+'_High']] = data[scr].astype(str).str.split('-', expand=True)#.astype(float)

    #Add Optimize and Effect scores
    orig = pd.read_csv("Calculated Scores/OptimizeEffect/"+ dt + "_optimizeEffectScore.csv").set_index(index)
    data['OPTIMIZE-HF'] = orig['OPTIMIZE-HF']
    data['EFFECT 30 Day'] = orig['EFFECT 30 Day']
    data['EFFECT 1 Year'] = orig['EFFECT 1 Year']
    
    return escDF, data

def getAUC(df):
    colName = []
    lst = []
    for score in ['ADHERE_Low', 'ADHERE_High', 'GWTG_Low','GWTG_High', 'MAGGIC', 'OPTIMIZE-HF', 
                  'EFFECT 30 Day', 'EFFECT 1 Year']:
        print(score)
        
        dfCpy = copy.deepcopy(df).dropna()
        real = dfCpy['Real'].astype(float)
        scoreVal = dfCpy[score].astype(float)
        
        precision, recall, thresholds = precision_recall_curve(real, scoreVal)
        precAUC = auc(recall, precision)
        print("Prec Recall AUC:", precAUC)
        lst.append(precAUC)

        rocAUC = roc_auc_score(real, scoreVal)
        print("ROC AUC:", rocAUC)
        lst.append(rocAUC)
        
        colName.append(score + " Prec")
        colName.append(score + " ROC")
    
    return pd.DataFrame(lst, columns=colName)

In [None]:
# scores = ['ADHERE', 'ESCAPE', 'GWTG', 'MAGGIC']
# datasets = ['ESCAPE', 'BEST', 'HF-ACTION', 'GUIDE-IT', 'CardShock', 'SerialCardiac']
# labels = [escapeLabels, bestLabels, hfactionLabels, guideLabels, cardShockLabels, serialLabels]
escapeDF, scoreDF = makeScoreDF('ESCAPE', escapeLabels, 'DEIDNUM')

#ALSO TO DO SHFM

In [None]:
escapeDF, scoreDF = makeScoreDF('ESCAPE', escapeLabels, 'DEIDNUM')
aucDF = getAUC(scoreDF)
# escapeDF, scoreDF = makeScoreDF('GUIDE-IT', guideLabels, 'ID')

aucDF

In [None]:
escapeDF

In [None]:
def getAUC(labelDF, scoreDF):
    #make comparison df of predicted prob vs real labels
    lst = []
    scoreList = scoreDF['Score'].tolist()
    
    for i in range(len(scoreList)):
        if not np.isnan(scoreList[i]):
            est = scoreList[i]
            idx = scoreDF.iloc[i].name
            real = labelDF.loc[idx].item()
            
            lst.append([idx, est, real])
    
    compDF = pd.DataFrame(lst, columns=['IDX', 'Predicted', 'Real']).set_index('IDX')
    scr = roc_auc_score(compDF['Real'], compDF['Predicted'])
    
    return compDF, scr

In [None]:
#Test prec recall
esRs = escape.filter(['Age','BUN_D','SixFtWlk_D', 'SOD_D', 'CPR', 'MEVT', 'DIURDSE_D', 'BET_D', 'BNP_D'], axis=1)
res = calcESCAPEScore(esRs)
precision, recall, thresholds = precision_recall_curve(escapeLabels['Death'], res)
auc = auc(recall, precision)
auc

In [None]:
#ESCAPE Risk score
esRs = escape.filter(['Age','BUN_D','SixFtWlk_D', 'SOD_D', 'CPR', 'MEVT', 'DIURDSE_D', 'BET_D', 'BNP_D'], axis=1)
res = calcESCAPEScore(esRs)

#given actual labels and predicted probability of the event (outcome), get AUC
df, val = getAUC(escapeLabels['Death'], res)
print("Escape Risk AUC:", val)
df

In [None]:
def getAUC(dataName, scoreName, index, labels):
    data = pd.read_csv("Calculated Scores/"+scoreName+"/" + dataName+"_" + scoreName + ".csv")
    
    precision, recall, thresholds = precision_recall_curve(labels, data[scoreName])
    precAUC = auc(recall, precision)
    print("Prec Recall AUC:", precAUC)
    
    rocAUC = roc_auc_score(labels, data[scoreName])
    print("ROC AUC:", rocAUC)
    
    return precAUC, rocAUC

def makeLabels(data, labels, index):
    lst = []
    idx = sorted(data[index])
    for i in idx:
        lab = labels.loc[i]
        lst.append(lab)
        
    return pd.DataFrame(lst, columns=['Death'],index=idx)

def calcAUC(datasets, scores, labels, index):
    grandLst = []
    colNames = []
    for scr in scores:
        lst = []
        for d in range(len(datasets)):
            data = pd.read_csv("Calculated Scores/"+scr+"/" + datasets[d] +"_" + scr + ".csv")
            lbl = labels[d]
            
            #check and fix labels if needed
            if len(lbl) != len(data):
                lbl = makeLabels(data, lbl, index[d])
            
            
            
            df = lbl
            df['Score'] = data[scr]
            
            print(df)
            
            
            #Calc scores
            precision, recall, thresholds = precision_recall_curve(lbl, data[scr])
            precAUC = auc(recall, precision)

            rocAUC = roc_auc_score(lbl, data[scr])
            
            lst.append(precAUC)
            lst.append(rocAUC)
            
            colNames.append(scr + " " + datasets[d] + " Prec")
            colNames.append(scr + " " + datasets[d] + " ROC")
        
        grandLst.append(lst)
        

            
            
    aucDF = pd.DataFrame(grandLst, columns=colNames) 
    return aucDF

In [None]:
# scores = ['ADHERE', 'ESCAPE', 'GWTG', 'MAGGIC']
# datasets = ['ESCAPE', 'BEST', 'HF-ACTION', 'GUIDE-IT', 'CardShock', 'SerialCardiac']
# labels = [escapeLabels, bestLabels, hfactionLabels, guideLabels, cardShockLabels, serialLabels]

scores = ['ESCAPE']
datasets = ['ESCAPE', 'BEST']
labels = [escapeLabels['Death'], bestLabels['Death']]
index = ['DEIDNUM', 'ID']

calcAUC(datasets, scores, labels, index)

# getAUC(dataName='ESCAPE', scoreName="ESCAPE", index='DEIDNUM', labels=escapeLabels['Death'])

In [None]:
#Escape score
escape = pd.read_csv("Calculated Scores/ESCAPE_escapeScore.csv")
hfaction = pd.read_csv("Calculated Scores/HFACTION_escapeScore.csv")
best = pd.read_csv("Calculated Scores/BEST_escapeScore.csv")
card = pd.read_csv("Calculated Scores/CardiogenicShock_escapeScore.csv")
serial = pd.read_csv("Calculated Scores/SerialCardiac_escapeScore.csv")



In [43]:
def makeLabels(data, labels):
    lst = []
    idx = sorted(data.index)
    for i in idx:
        lab = labels.loc[i]
        lst.append(lab['Death'])
        
    return pd.DataFrame(lst, columns=['Real'],index=idx)

def makeScoreDF(dt, lbls, index):
    #Get ESCAPE Score DF
    orig = pd.read_csv("Calculated Scores/ESCAPE/"+ dt + "_ESCAPE.csv").set_index(index)
    labels = makeLabels(orig, lbls)
    escDF = labels
    escDF['ESCAPE'] = orig[['ESCAPE']]

    #Get other scores
    for scr in ['ADHERE', 'GWTG', 'MAGGIC']:
        orig = pd.read_csv("Calculated Scores/" + scr + "/"+ dt + "_" + scr + ".csv").set_index(index)

        #Make real labels
        if scr == 'ADHERE':
            labels = makeLabels(orig, lbls)
            data = labels

        #TODO HERE --> convert meaning of scores
        data[scr] = orig[[scr]]

        if data[scr].astype(str).str.contains('-').any():
            data[[scr+'_Low', scr+'_High']] = data[scr].astype(str).str.split('-', expand=True)#.astype(float)

    #Add Optimize and Effect scores
    orig = pd.read_csv("Calculated Scores/OptimizeEffect/"+ dt + "_optimizeEffectScore.csv").set_index(index)
    data['OPTIMIZE-HF'] = orig['OPTIMIZE-HF']
    data['EFFECT 30 Day'] = orig['EFFECT 30 Day']
    data['EFFECT 1 Year'] = orig['EFFECT 1 Year']
    
    return escDF, data

def getAUC(df):
    colName = []
    lst = []
    for score in ['ADHERE_Low', 'ADHERE_High', 'GWTG_Low','GWTG_High', 'MAGGIC', 'OPTIMIZE-HF', 
                  'EFFECT 30 Day', 'EFFECT 1 Year']:
        print(score)
        
        dfCpy = copy.deepcopy(df).dropna()
        real = dfCpy['Real'].astype(float)
        scoreVal = dfCpy[score].astype(float)
        
        precision, recall, thresholds = precision_recall_curve(real, scoreVal)
        precAUC = auc(recall, precision)
        print("Prec Recall AUC:", precAUC)
        lst.append(precAUC)

        rocAUC = roc_auc_score(real, scoreVal)
        print("ROC AUC:", rocAUC)
        lst.append(rocAUC)
        
        colName.append(score + " Prec")
        colName.append(score + " ROC")
    
    return pd.DataFrame(lst, columns=colName)

In [44]:
# scores = ['ADHERE', 'ESCAPE', 'GWTG', 'MAGGIC']
# datasets = ['ESCAPE', 'BEST', 'HF-ACTION', 'GUIDE-IT', 'CardShock', 'SerialCardiac']
# labels = [escapeLabels, bestLabels, hfactionLabels, guideLabels, cardShockLabels, serialLabels]
escapeDF, scoreDF = makeScoreDF('ESCAPE', escapeLabels, 'DEIDNUM')

#ALSO TO DO SHFM

In [48]:
escapeDF, scoreDF = makeScoreDF('ESCAPE', escapeLabels, 'DEIDNUM')
aucDF = getAUC(scoreDF)
# escapeDF, scoreDF = makeScoreDF('GUIDE-IT', guideLabels, 'ID')

aucDF

ADHERE_Low
Prec Recall AUC: 0.6179271708683474
ROC AUC: 0.575
ADHERE_High
Prec Recall AUC: 0.6179271708683474
ROC AUC: 0.575
GWTG_Low


ValueError: could not convert string to float: 

In [49]:
escapeDF

Unnamed: 0,Real,ESCAPE
72,1,0.368
81,0,0.033
86,0,0.123
267,1,0.368
814,0,0.065
...,...,...
98078,0,0.033
98508,0,0.033
99302,1,0.123
99912,0,0.033


In [None]:
def getAUC(labelDF, scoreDF):
    #make comparison df of predicted prob vs real labels
    lst = []
    scoreList = scoreDF['Score'].tolist()
    
    for i in range(len(scoreList)):
        if not np.isnan(scoreList[i]):
            est = scoreList[i]
            idx = scoreDF.iloc[i].name
            real = labelDF.loc[idx].item()
            
            lst.append([idx, est, real])
    
    compDF = pd.DataFrame(lst, columns=['IDX', 'Predicted', 'Real']).set_index('IDX')
    scr = roc_auc_score(compDF['Real'], compDF['Predicted'])
    
    return compDF, scr

In [None]:
#Test prec recall
esRs = escape.filter(['Age','BUN_D','SixFtWlk_D', 'SOD_D', 'CPR', 'MEVT', 'DIURDSE_D', 'BET_D', 'BNP_D'], axis=1)
res = calcESCAPEScore(esRs)
precision, recall, thresholds = precision_recall_curve(escapeLabels['Death'], res)
auc = auc(recall, precision)
auc

In [None]:
#ESCAPE Risk score
esRs = escape.filter(['Age','BUN_D','SixFtWlk_D', 'SOD_D', 'CPR', 'MEVT', 'DIURDSE_D', 'BET_D', 'BNP_D'], axis=1)
res = calcESCAPEScore(esRs)

#given actual labels and predicted probability of the event (outcome), get AUC
df, val = getAUC(escapeLabels['Death'], res)
print("Escape Risk AUC:", val)
df

In [64]:
def getAUC(dataName, scoreName, index, labels):
    data = pd.read_csv("Calculated Scores/"+scoreName+"/" + dataName+"_" + scoreName + ".csv")
    
    precision, recall, thresholds = precision_recall_curve(labels, data[scoreName])
    precAUC = auc(recall, precision)
    print("Prec Recall AUC:", precAUC)
    
    rocAUC = roc_auc_score(labels, data[scoreName])
    print("ROC AUC:", rocAUC)
    
    return precAUC, rocAUC

def makeLabels(data, labels, index):
    lst = []
    idx = sorted(data[index])
    for i in idx:
        lab = labels.loc[i]
        lst.append(lab)
        
    return pd.DataFrame(lst, columns=['Death'],index=idx)

def calcAUC(datasets, scores, labels, index):
    grandLst = []
    colNames = []
    for scr in scores:
        lst = []
        for d in range(len(datasets)):
            data = pd.read_csv("Calculated Scores/"+scr+"/" + datasets[d] +"_" + scr + ".csv")
            lbl = labels[d]
            
            #check and fix labels if needed
            if len(lbl) != len(data):
                lbl = makeLabels(data, lbl, index[d])
            
            
            
            df = lbl
            df['Score'] = data[scr]
            
            print(df)
            
            
            #Calc scores
            precision, recall, thresholds = precision_recall_curve(lbl, data[scr])
            precAUC = auc(recall, precision)

            rocAUC = roc_auc_score(lbl, data[scr])
            
            lst.append(precAUC)
            lst.append(rocAUC)
            
            colNames.append(scr + " " + datasets[d] + " Prec")
            colNames.append(scr + " " + datasets[d] + " ROC")
        
        grandLst.append(lst)
        

            
            
    aucDF = pd.DataFrame(grandLst, columns=colNames) 
    return aucDF

In [66]:
# scores = ['ADHERE', 'ESCAPE', 'GWTG', 'MAGGIC']
# datasets = ['ESCAPE', 'BEST', 'HF-ACTION', 'GUIDE-IT', 'CardShock', 'SerialCardiac']
# labels = [escapeLabels, bestLabels, hfactionLabels, guideLabels, cardShockLabels, serialLabels]

scores = ['ESCAPE']
datasets = ['ESCAPE', 'BEST']
labels = [escapeLabels['Death'], bestLabels['Death']]
index = ['DEIDNUM', 'ID']

calcAUC(datasets, scores, labels, index)

# getAUC(dataName='ESCAPE', scoreName="ESCAPE", index='DEIDNUM', labels=escapeLabels['Death'])

DEIDNUM
72                                                       1
81                                                       0
86                                                       0
267                                                      1
814                                                      0
                               ...                        
98508                                                    0
99302                                                    1
99912                                                    0
99935                                                    0
Score    0      0.368
1      0.033
2      0.123
3      ...
Name: Death, Length: 434, dtype: object


ValueError: unknown format is not supported

In [None]:
#Escape score
escape = pd.read_csv("Calculated Scores/ESCAPE_escapeScore.csv")
hfaction = pd.read_csv("Calculated Scores/HFACTION_escapeScore.csv")
best = pd.read_csv("Calculated Scores/BEST_escapeScore.csv")
card = pd.read_csv("Calculated Scores/CardiogenicShock_escapeScore.csv")
serial = pd.read_csv("Calculated Scores/SerialCardiac_escapeScore.csv")



In [None]:
#OPTIMIZE and EFFECT score
escape = pd.read_csv("Calculated Scores/ESCAPE_optimizeEffectScore.csv")
hfaction = pd.read_csv("Calculated Scores/HFACTION_optimizeEffectScore.csv")
best = pd.read_csv("Calculated Scores/BEST_optimizeEffectScore.csv")
card = pd.read_csv("Calculated Scores/CardiogenicShock_optimizeEffectScore.csv")
serial = pd.read_csv("Calculated Scores/SerialCardiac_optimizeEffectScore.csv")