In [1]:
#Import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import copy
import warnings
from sklearn import metrics
from sklearn.metrics import * 

plt.rc('font', size=12)
warnings.filterwarnings('ignore')

In [2]:
#Load Labels
escapeLabels  = pd.read_csv("../Data/Original DataFrames/Labels.csv", sep=",", index_col='DEIDNUM').sort_index() #labels for prediction classes 
bestLabels  = pd.read_csv("Preprocessed Data/LabelsBEST.csv", sep=",", index_col='ID').sort_index() #labels for prediction classes 
hfactionLabels  = pd.read_csv("Preprocessed Data/LabelsHF-ACTION.csv", sep=",", index_col='ID').sort_index() #labels for prediction classes 
guideLabels  = pd.read_csv("Preprocessed Data/LabelsGUIDE-IT.csv", sep=",", index_col='ID').sort_index() #labels for prediction classes 
cardShockLabels = pd.read_csv("../Data Validation/Cardiogenic Shock/Original DataFrames/LabelsCardiogenicShock.csv", sep=",", index_col='ID').sort_index()
serialLabels = pd.read_csv("../Data Validation/Serial Cardiac Caths/Original DataFrames/LabelsSerialCardiac.csv", sep=",", index_col='ID').sort_index()


In [3]:
def makeLabels(data, labels):
    lst = []
    idx = sorted(data.index)
    for i in idx:
        lab = labels.loc[i]
        lst.append(lab['Death'])
        
    return pd.DataFrame(lst, columns=['Real'],index=idx)

def convertGWTG(df):
    lstLow = []
    lstHigh = []
    df = df.reset_index()
    idx = sorted(df.index)
    for i in idx:
        val = df.loc[i]['GWTG']
        
        if val == "-":
            val = np.nan
        elif "-" in str(val):
            val = val.split('-')[0]
            
        val = float(val)

        if np.isnan(val):
            lstLow.append(np.nan)
            lstHigh.append(np.nan)
        elif val <= 33:
            lstLow.append(0.01)
            lstHigh.append(np.nan)
        elif val >= 34 and val <= 50:
            lstLow.append(0.01)
            lstHigh.append(0.05)
        elif val >= 51 and val <= 57:
            lstLow.append(0.06)
            lstHigh.append(0.10)
        elif val >= 58 and val <= 61:
            lstLow.append(0.11)
            lstHigh.append(0.15)
        elif val >= 62 and val <= 65:
            lstLow.append(0.16)
            lstHigh.append(0.20)
        elif val >= 66 and val <= 70:
            lstLow.append(0.21)
            lstHigh.append(0.30)
        elif val >= 71 and val <= 74:
            lstLow.append(0.31)
            lstHigh.append(0.4)
        elif val >= 75 and val <= 78:
            lstLow.append(0.41)
            lstHigh.append(0.50)
        else: #val >= 79
            lstLow.append(0.51)
            lstHigh.append(np.nan)
    
    return lstLow, lstHigh

def makeScoreDF(dataset, labels, index):
    #Get ESCAPE Score DF
    orig = pd.read_csv("Calculated Scores/ESCAPE/"+ dataset + "_ESCAPE.csv").set_index(index)
    lbls = makeLabels(orig, labels)
    escDF = lbls
    escDF['ESCAPE'] = orig[['ESCAPE']]
    
    #Make other scores DF
    #ADHERE
    orig = pd.read_csv("Calculated Scores/ADHERE/"+ dataset + "_ADHERE.csv").set_index(index)
    lbls = makeLabels(orig, labels)
    scrDF = lbls
    
    scrDF[['ADHERE_Low','ADHERE_High']] = orig['ADHERE'].astype(str).str.split('-', expand=True).astype(float)
    scrDF['ADHERE_Low'] = scrDF['ADHERE_Low'] / 100 #split and make btw 0 and 1
    scrDF['ADHERE_High'] = scrDF['ADHERE_High'] / 100

    #GWTG
    try:
        orig = pd.read_csv("Calculated Scores/GWTG/"+ dataset + "_GWTG.csv").set_index(index)
        low, high = convertGWTG(orig)
        scrDF['GWTG_Low'] = low
        scrDF['GWTG_High'] = high
    except:
        scrDF['GWTG_Low'] = np.nan
        scrDF['GWTG_High'] = np.nan
    
    #MAGGIC
    try:
        orig = pd.read_csv("Calculated Scores/MAGGIC/"+ dataset + "_MAGGIC.csv").set_index(index)
        scrDF["MAGGIC Y1"] = orig['Y1'] / 100
        scrDF['MAGGIC Y3'] = orig['Y3'] / 100
    except:
        scrDF["MAGGIC Y1"] = np.nan
        scrDF['MAGGIC Y3'] = np.nan
    
    #Add Optimize and Effect scores
    orig = pd.read_csv("Calculated Scores/OptimizeEffect/"+ dataset + "_optimizeEffectScore.csv").set_index(index)
    scrDF['OPTIMIZE-HF'] = orig['OPTIMIZE-HF']
    scrDF['EFFECT 30 Day'] = orig['EFFECT 30 Day']
    scrDF['EFFECT 1 Year'] = orig['EFFECT 1 Year']
    
    #Add SHFM
    orig = pd.read_csv("Calculated Scores/SHF/"+ dataset + "_SHF.csv").set_index(index)
    scrDF["SHFM Y1"] = orig['SHF1'] / 100
    scrDF['SHFM Y3'] = orig['SHF2'] / 100
    scrDF["SHFM Y5"] = orig['SHF5'] / 100
    
    
    return escDF, scrDF

def getAUC(df, scoreList):
    precLst = []
    rocLst = []
    for score in scoreList:
        dfCpy = df[df[score].notna()]
#         dfCpy = copy.deepcopy(df)#.dropna()
        real = dfCpy['Real']
        scoreVal = dfCpy[score]
        
#         print(real)
#         print(scoreVal)
        
        if not scoreVal.isnull().all():
            precision, recall, thresholds = precision_recall_curve(real, scoreVal)
            precAUC = auc(recall, precision)
    #         print("Prec Recall AUC:", precAUC)
            precLst.append(precAUC)

            rocAUC = roc_auc_score(real, scoreVal)
    #         print("ROC AUC:", rocAUC)
            rocLst.append(rocAUC)
        else:
            precLst.append("None")
            rocLst.append("None")
    
    return precLst, rocLst


In [8]:
datasets = ['ESCAPE', 'BEST', 'HF-ACTION', 'GUIDE-IT', 'CardShock', 'SerialCardiac']
labels = [escapeLabels, bestLabels, hfactionLabels, guideLabels, cardShockLabels, serialLabels]
index = ['DEIDNUM', 'ID', 'ID', 'ID','ID','ID']

scoreList = ['ADHERE_Low', 'ADHERE_High', 'GWTG_Low', 'GWTG_High',
           'MAGGIC Y1', 'MAGGIC Y3', 'OPTIMIZE-HF', 'EFFECT 30 Day',
           'EFFECT 1 Year', 'SHFM Y1', 'SHFM Y3', 'SHFM Y5']

prec = []
roc = []

for i in range(len(datasets)):
    preclst = []
    roclst = []
    escapeDF, scoreDF = makeScoreDF(dataset=datasets[i], labels=labels[i], index=index[i])

    pLst, rLst = getAUC(escapeDF, ['ESCAPE'])
    preclst.extend(pLst)
    roclst.extend(rLst)
    
    
    pLst, rLst = getAUC(scoreDF, scoreList)
    preclst.extend(pLst)
    roclst.extend(rLst)
    
    prec.append(preclst)
    roc.append(roclst)


    
cols = ['ESCAPE', 'ADHERE_Low', 'ADHERE_High', 'GWTG_Low', 'GWTG_High',
           'MAGGIC Y1', 'MAGGIC Y3', 'OPTIMIZE-HF', 'EFFECT 30 Day',
           'EFFECT 1 Year', 'SHFM Y1', 'SHFM Y3', 'SHFM Y5']

precDF = pd.DataFrame(prec, columns=cols, index=datasets)
rocDF = pd.DataFrame(roc, columns=cols, index=datasets)
precDF

Unnamed: 0,ESCAPE,ADHERE_Low,ADHERE_High,GWTG_Low,GWTG_High,MAGGIC Y1,MAGGIC Y3,OPTIMIZE-HF,EFFECT 30 Day,EFFECT 1 Year,SHFM Y1,SHFM Y3,SHFM Y5
ESCAPE,0.465807,0.531639,0.531639,0.529542,0.544959,0.451556,0.451556,0.397649,0.35475,0.36487,0.357256,0.353554,0.357473
BEST,0.421475,0.557367,0.557367,0.459937,0.477174,,,0.350243,0.503915,0.487535,0.417414,0.418435,0.417974
HF-ACTION,0.270107,0.228068,0.228068,,,0.34084,0.34084,0.255691,0.316165,0.304653,0.399662,0.399492,0.39954
GUIDE-IT,0.383379,0.363262,0.363262,0.313175,0.335518,0.311263,0.311263,0.371796,0.29809,0.317143,0.241725,0.252243,0.367049
CardShock,0.696771,0.741709,0.741709,,,0.737596,0.737596,0.644261,0.668123,0.691748,0.676757,0.678083,0.657797
SerialCardiac,0.546625,0.671924,0.671924,,,,,0.47539,0.593656,0.601229,0.572381,0.56755,0.561119


In [9]:
rocDF

Unnamed: 0,ESCAPE,ADHERE_Low,ADHERE_High,GWTG_Low,GWTG_High,MAGGIC Y1,MAGGIC Y3,OPTIMIZE-HF,EFFECT 30 Day,EFFECT 1 Year,SHFM Y1,SHFM Y3,SHFM Y5
ESCAPE,0.680515,0.594986,0.594986,0.596784,0.600587,0.639941,0.639941,0.430319,0.55016,0.548222,0.623452,0.622755,0.622373
BEST,0.586725,0.576283,0.576283,0.536415,0.538361,,,0.469083,0.609785,0.637686,0.612697,0.615549,0.614913
HF-ACTION,0.61281,0.544146,0.544146,,,0.676961,0.676961,0.45705,0.62783,0.647564,0.558359,0.557646,0.557993
GUIDE-IT,0.714789,0.601167,0.601167,0.536801,0.533696,0.689307,0.689307,0.467812,0.634985,0.632376,0.623218,0.625284,0.619007
CardShock,0.594915,0.526021,0.526021,,,0.677705,0.677705,0.43899,0.58438,0.61164,0.586812,0.588251,0.573426
SerialCardiac,0.564839,0.573527,0.573527,,,,,0.431771,0.610227,0.643946,0.587697,0.583898,0.579254
