In [1]:
#Import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import copy
import warnings
import scipy
import math
from sklearn import metrics
from sklearn.metrics import * 
from sklearn.preprocessing import label_binarize

plt.rc('font', size=12)
warnings.filterwarnings('ignore')

In [2]:
#Load Labels
escapeLabels  = pd.read_csv("../Data/Original DataFrames/Labels.csv", sep=",", index_col='DEIDNUM').sort_index() #labels for prediction classes 
bestLabels  = pd.read_csv("Preprocessed Data/LabelsBEST.csv", sep=",", index_col='ID').sort_index() #labels for prediction classes 
hfactionLabels  = pd.read_csv("Preprocessed Data/LabelsHF-ACTION.csv", sep=",", index_col='ID').sort_index() #labels for prediction classes 
guideLabels  = pd.read_csv("Preprocessed Data/LabelsGUIDE-IT.csv", sep=",", index_col='ID').sort_index() #labels for prediction classes 
cardShockLabels = pd.read_csv("../Data Validation/Cardiogenic Shock/Original DataFrames/LabelsCardiogenicShock.csv", sep=",", index_col='ID').sort_index()
serialLabels = pd.read_csv("../Data Validation/Serial Cardiac Caths/Original DataFrames/LabelsSerialCardiac.csv", sep=",", index_col='ID').sort_index()


In [3]:
# Loading scores with mortality labels
escapeHemoScores = pd.read_csv("../Data/Preprocessed Data/ESCAPE_Hemo.csv", sep=",", index_col='ID').sort_index()['ScoreDeath']
cardShockHemoScores = pd.read_csv("../Data Validation/Cardiogenic Shock/Preprocessed Data/CardiogenicShock_Hemo.csv", sep=",", index_col='ID').sort_index()['ScoreDeath']
serialHemoScores = pd.read_csv("../Data Validation/Serial Cardiac Caths/Preprocessed Data/SerialCardiac_Hemo.csv", sep=",", index_col='ID').sort_index()['ScoreDeath']

escapeAllScores = pd.read_csv("../Data/Preprocessed Data/ESCAPE_AllData.csv", sep=",", index_col='ID').sort_index()['ScoreDeath']
hfactionAllScores = pd.read_csv("../Data Validation/HF-ACTION/Preprocessed Data/HF-ACTION_AllData.csv", sep=",", index_col='ID').sort_index()['ScoreDeath']
bestAllScores = pd.read_csv("../Data Validation/BEST/Preprocessed Data/BEST_AllData.csv", sep=",", index_col='ID').sort_index()['ScoreDeath']
guideAllScores = pd.read_csv("../Data Validation/GUIDE-IT/Preprocessed Data/GUIDE-IT_AllData.csv", sep=",", index_col='ID').sort_index()['ScoreDeath']


In [4]:
def makeLabels(data, labels):
    lst = []
    idx = sorted(data.index)
    for i in idx:
        lab = labels.loc[i]
        lst.append(lab['Death'])
        
    return pd.DataFrame(lst, columns=['Real'],index=idx)


def convertCARNAEscape(data, scores, missing):
    lst = []
    for r in range(len(data)):
        row = data.iloc[r]
        idx = row.name
        
        if idx in missing:
            lst.append(np.nan)
        else:
            if type(scores.loc[idx]) == pd.Series:
                s = max(scores.loc[idx])
            else:
                s = scores.loc[idx]
            
            #convert score to prob value
            lst.append(CARNAScoreVals(s))

            
    return lst

def convertCARNA(data, scores, missing):
    lst = []
    idx = sorted(set(data.index))
    
    for i in idx:
        if i in missing:
            for row in range(len(data.loc[i])):
                lst.append(np.nan)
        else:
            
            sRows = scores.loc[i]
            
            try:
                sRowLen = len(sRows)
            except:
                sRowLen = 1
            
            if type(data.loc[i]) == pd.Series: #just do once
                
                if type(sRows) == pd.Series:
                    s = max(sRows)
                else:
                    s = sRows
                
                #convert score to prob value
                lst.append(CARNAScoreVals(s))
            
            else:
                for row in range(len(data.loc[i])):
                    if row >= sRowLen:
                        lst.append(np.nan)
                    else:

                        if sRowLen == 1:
                            s = sRows
                        else:
                            s = sRows.iloc[row]

                        #convert score to prob value
                        lst.append(CARNAScoreVals(s))
                                        
    return lst

def CARNAScoreVals(s):
    if s == 5:
        return 0.5
    elif s == 4:
        return 0.35
    elif s == 3:
        return 0.25
    elif s == 2:
        return 0.15
    else:# s == 1:
        return 0.09

#     if s == 5:
#         return 0.441140
#     elif s == 4:
#         return 0.418677
#     elif s == 3:
#         return  0.401663
#     elif s == 2:
#         return 0.185185
#     else:# s == 1:
#         return 0.327273
    
#     if s >= 3:
#         return 1
#     else:
#         return 0

            

    
def convertGWTG(df):
    lstLow = []
    lstHigh = []
    df = df.reset_index()
    idx = sorted(df.index)
    for i in idx:
        val = df.loc[i]['GWTG']
        
        if val == "-":
            val = np.nan
        elif "-" in str(val):
            val = val.split('-')[0]
            
        val = float(val)

        if np.isnan(val):
            lstLow.append(np.nan)
            lstHigh.append(np.nan)
        elif val <= 33:
            lstLow.append(0.01)
            lstHigh.append(np.nan)
        elif val >= 34 and val <= 50:
            lstLow.append(0.01)
            lstHigh.append(0.05)
        elif val >= 51 and val <= 57:
            lstLow.append(0.06)
            lstHigh.append(0.10)
        elif val >= 58 and val <= 61:
            lstLow.append(0.11)
            lstHigh.append(0.15)
        elif val >= 62 and val <= 65:
            lstLow.append(0.16)
            lstHigh.append(0.20)
        elif val >= 66 and val <= 70:
            lstLow.append(0.21)
            lstHigh.append(0.30)
        elif val >= 71 and val <= 74:
            lstLow.append(0.31)
            lstHigh.append(0.4)
        elif val >= 75 and val <= 78:
            lstLow.append(0.41)
            lstHigh.append(0.50)
        else: #val >= 79
            lstLow.append(0.51)
            lstHigh.append(np.nan)
    
    return lstLow, lstHigh

def makeScoreDF(dataset, labels, index, carnaScores=None):
    #Get ESCAPE Score DF
    orig = pd.read_csv("Calculated Scores/ESCAPE/"+ dataset + "_ESCAPE.csv").set_index(index).sort_index()
    lbls = makeLabels(orig, labels)
    escDF = lbls
    escDF['ESCAPE'] = orig[['ESCAPE']]
    
    try:
        missing = np.setdiff1d(labels.index, carnaScores.index)
        scrs = convertCARNAEscape(orig, carnaScores, missing)
        escDF['CARNA'] = scrs
    except:
        pass
    
    
    #Make other scores DF
    #ADHERE
    orig = pd.read_csv("Calculated Scores/ADHERE/"+ dataset + "_ADHERE.csv").set_index(index)
    lbls = makeLabels(orig, labels)
    scrDF = lbls
    
    scrDF[['ADHERE_Low','ADHERE_High']] = orig['ADHERE'].astype(str).str.split('-', expand=True).astype(float)
    scrDF['ADHERE_Low'] = scrDF['ADHERE_Low'] / 100 #split and make btw 0 and 1
    scrDF['ADHERE_High'] = scrDF['ADHERE_High'] / 100

    #GWTG
    try:
        orig = pd.read_csv("Calculated Scores/GWTG/"+ dataset + "_GWTG.csv").set_index(index)
        low, high = convertGWTG(orig)
        scrDF['GWTG_Low'] = low
        scrDF['GWTG_High'] = high
    except:
        scrDF['GWTG_Low'] = np.nan
        scrDF['GWTG_High'] = np.nan
    
    #MAGGIC
    try:
        orig = pd.read_csv("Calculated Scores/MAGGIC/"+ dataset + "_MAGGIC.csv").set_index(index)
        scrDF["MAGGIC Y1"] = orig['Y1'] / 100
        scrDF['MAGGIC Y3'] = orig['Y3'] / 100
    except:
        scrDF["MAGGIC Y1"] = np.nan
        scrDF['MAGGIC Y3'] = np.nan
    
    #Add Optimize and Effect scores
    orig = pd.read_csv("Calculated Scores/OptimizeEffect/"+ dataset + "_optimizeEffectScore.csv").set_index(index)
    scrDF['OPTIMIZE-HF'] = orig['OPTIMIZE-HF']
    scrDF['EFFECT 30 Day'] = orig['EFFECT 30 Day']
    scrDF['EFFECT 1 Year'] = orig['EFFECT 1 Year']
    
    #Add SHFM
    orig = pd.read_csv("Calculated Scores/SHF/"+ dataset + "_SHF.csv").set_index(index).sort_index()
    scrDF["SHFM Y1"] = orig['SHF1'] / 100
    scrDF['SHFM Y3'] = orig['SHF2'] / 100
    scrDF["SHFM Y5"] = orig['SHF5'] / 100
    
    try:
        scrs = convertCARNA(orig, carnaScores, missing)
        scrDF['CARNA'] = scrs
    except:
        pass
    

    return escDF, scrDF



In [5]:
def getAUC(df, scoreList):
    precLst = []
    rocLst = []
    for score in scoreList:
        dfCpy = df[df[score].notna()]
#         dfCpy = copy.deepcopy(df)#.dropna()
        real = dfCpy['Real']
        scoreVal = dfCpy[score]
        
#         print(real)
#         print(scoreVal)
        
        if not scoreVal.isnull().all():
            precision, recall, thresholds = precision_recall_curve(real, scoreVal)
            precAUC = auc(recall, precision)
    #         print("Prec Recall AUC:", precAUC)
            precLst.append(precAUC)

            rocAUC = roc_auc_score(real, scoreVal)
    #         print("ROC AUC:", rocAUC)
            rocLst.append(rocAUC)
        else:
            precLst.append(np.nan)
            rocLst.append(np.nan)
    
    return precLst, rocLst

#Return list of aucs across all data points
def getAUCList(df, scoreList):
    rocLst = []
    for score in scoreList:
        dfCpy = df[df[score].notna()]
        real = dfCpy['Real']
        scoreVal = dfCpy[score]
        
        print(real)
        print(scoreVal)
        
        if not scoreVal.isnull().all():
            rocAUC = roc_auc_score(real, scoreVal)
            print("ROC AUC:", rocAUC)
            rocLst.append(rocAUC)
        else:
            rocLst.append(np.nan)
    
    return rocLst

In [6]:
# Code from: https://github.com/yandexdataschool/roc_comparison

# AUC comparison adapted from
# https://github.com/Netflix/vmaf/
def compute_midrank(x):
    """Computes midranks.
    Args:
       x - a 1D numpy array
    Returns:
       array of midranks
    """
    J = np.argsort(x)
    Z = x[J]
    N = len(x)
    T = np.zeros(N, dtype=np.float)
    i = 0
    while i < N:
        j = i
        while j < N and Z[j] == Z[i]:
            j += 1
        T[i:j] = 0.5*(i + j - 1)
        i = j
    T2 = np.empty(N, dtype=np.float)
    # Note(kazeevn) +1 is due to Python using 0-based indexing
    # instead of 1-based in the AUC formula in the paper
    T2[J] = T + 1
    return T2


def fastDeLong(predictions_sorted_transposed, label_1_count):
    """
    The fast version of DeLong's method for computing the covariance of
    unadjusted AUC.
    Args:
       predictions_sorted_transposed: a 2D numpy.array[n_classifiers, n_examples]
          sorted such as the examples with label "1" are first
    Returns:
       (AUC value, DeLong covariance)
    Reference:
     @article{sun2014fast,
       title={Fast Implementation of DeLong's Algorithm for
              Comparing the Areas Under Correlated Receiver Operating Characteristic Curves},
       author={Xu Sun and Weichao Xu},
       journal={IEEE Signal Processing Letters},
       volume={21},
       number={11},
       pages={1389--1393},
       year={2014},
       publisher={IEEE}
     }
    """
    # Short variables are named as they are in the paper
    m = label_1_count
    n = predictions_sorted_transposed.shape[1] - m
    positive_examples = predictions_sorted_transposed[:, :m]
    negative_examples = predictions_sorted_transposed[:, m:]
    k = predictions_sorted_transposed.shape[0]

    tx = np.empty([k, m], dtype=np.float)
    ty = np.empty([k, n], dtype=np.float)
    tz = np.empty([k, m + n], dtype=np.float)
    for r in range(k):
        tx[r, :] = compute_midrank(positive_examples[r, :])
        ty[r, :] = compute_midrank(negative_examples[r, :])
        tz[r, :] = compute_midrank(predictions_sorted_transposed[r, :])
    aucs = tz[:, :m].sum(axis=1) / m / n - float(m + 1.0) / 2.0 / n
    v01 = (tz[:, :m] - tx[:, :]) / n
    v10 = 1.0 - (tz[:, m:] - ty[:, :]) / m
    sx = np.cov(v01)
    sy = np.cov(v10)
    delongcov = sx / m + sy / n
    return aucs, delongcov


def calc_pvalue(aucs, sigma):
    """Computes log(10) of p-values.
    Args:
       aucs: 1D array of AUCs
       sigma: AUC DeLong covariances
    Returns:
       log10(pvalue)
    """
    l = np.array([[1, -1]])
    z = np.abs(np.diff(aucs)) / np.sqrt(np.dot(np.dot(l, sigma), l.T))
    return np.log10(2) + scipy.stats.norm.logsf(z, loc=0, scale=1) / np.log(10)

def calc_pvalueV2(aucs, sigma):
    """Computes p-values.
    Args:
       aucs: 1D array of AUCs
       sigma: AUC DeLong covariances
    Returns:
       pvalue
    """
    l = np.array([[1, -1]])
    z = np.abs(np.diff(aucs)) / np.sqrt(np.dot(np.dot(l, sigma), l.T))
#     print("Z is", z)
    return 2 * (1-scipy.stats.norm.cdf(z, loc=0, scale=1))

def compute_ground_truth_statistics(ground_truth):
    assert np.array_equal(np.unique(ground_truth), [0, 1])
    order = (-ground_truth).argsort()
    label_1_count = int(ground_truth.sum())
    return order, label_1_count


def delong_roc_variance(ground_truth, predictions):
    """
    Computes ROC AUC variance for a single set of predictions
    Args:
       ground_truth: np.array of 0 and 1
       predictions: np.array of floats of the probability of being class 1
    """
    order, label_1_count = compute_ground_truth_statistics(ground_truth)
    predictions_sorted_transposed = predictions[np.newaxis, order]
    aucs, delongcov = fastDeLong(predictions_sorted_transposed, label_1_count)
    assert len(aucs) == 1, "There is a bug in the code, please forward this to the developers"
    return aucs[0], delongcov


def delong_roc_test(ground_truth, predictions_one, predictions_two):
    """
    Computes log(p-value) for hypothesis that two ROC AUCs are different
    Args:
       ground_truth: np.array of 0 and 1
       predictions_one: predictions of the first model,
          np.array of floats of the probability of being class 1
       predictions_two: predictions of the second model,
          np.array of floats of the probability of being class 1
    """
    order, label_1_count = compute_ground_truth_statistics(ground_truth)
    predictions_sorted_transposed = np.vstack((predictions_one, predictions_two))[:, order]
    aucs, delongcov = fastDeLong(predictions_sorted_transposed, label_1_count)
    pvals = calc_pvalueV2(aucs, delongcov)
    return aucs, delongcov, pvals

## Pvalue Hypothesis Testing for Invasive Hemo
Datasets: Escape, UVA card shock & UVA serial cardiac

In [7]:
datasets = ['ESCAPE', 'CardShock', 'SerialCardiac']
carnaScores = [escapeHemoScores, cardShockHemoScores, serialHemoScores]
labels = [escapeLabels, cardShockLabels, serialLabels]
index = ['DEIDNUM', 'ID', 'ID']
scoreList = ['ADHERE_Low', 'ADHERE_High', 'EFFECT 30 Day','EFFECT 1 Year','GWTG_Low', 'GWTG_High',
             'MAGGIC Y1', 'MAGGIC Y3', 'OPTIMIZE-HF', 'SHFM Y1', 'SHFM Y3', 'SHFM Y5']

pLst = []

for i in range(len(datasets)):
#     print("\nDATASET:", datasets[i])
    escapeDF, scoreDF = makeScoreDF(dataset=datasets[i], labels=labels[i], index=index[i], carnaScores=carnaScores[i])
    
    lst = []
    #do escape score first
    miniDF = escapeDF[['Real', 'CARNA', 'ESCAPE']]
    miniDF = miniDF.fillna(0)
    aucs, delongcov, pvals = delong_roc_test(ground_truth=miniDF['Real'], predictions_one=miniDF['CARNA'], predictions_two=miniDF['ESCAPE'])
#     print("AUCs for CARNA + ESCAPE", ":", aucs)
#     print("CARNA + ESCAPE", pvals[0][0])

    lst.append(abs(pvals[0][0]))
    
    #do other scores
    for s in scoreList:        
        miniDF = scoreDF[['Real', 'CARNA', s]]
#         miniDF = miniDF.dropna()
        miniDF = miniDF.fillna(0)

        
        if len(miniDF.index) == 0: #all NAN
            lst.append(np.nan)
#             print("CARNA +", s, ": NAN")

        else:
            aucs, delongcov, pvals = delong_roc_test(ground_truth=miniDF['Real'], predictions_one=miniDF['CARNA'], predictions_two=miniDF[s])
#             print("CARNA +", s, ":", pvals[0][0])
#             print("AUCs for CARNA + ", s, ":", aucs)
         
            lst.append(abs(pvals[0][0])) #note appending absolute value for two sided test

    pLst.append(lst)

    
cols = ['ESCAPE', 'ADHERE_Low', 'ADHERE_High', 'EFFECT 30 Day','EFFECT 1 Year','GWTG_Low', 'GWTG_High',
             'MAGGIC Y1', 'MAGGIC Y3', 'OPTIMIZE-HF', 'SHFM Y1', 'SHFM Y3', 'SHFM Y5']

df = pd.DataFrame(pLst, columns=cols, index=datasets)
df = df.round(3)

df.T

Unnamed: 0,ESCAPE,CardShock,SerialCardiac
ESCAPE,0.008,0.0,0.311
ADHERE_Low,0.262,0.0,0.413
ADHERE_High,0.262,0.0,0.413
EFFECT 30 Day,0.881,0.0,0.315
EFFECT 1 Year,0.832,0.0,0.028
GWTG_Low,0.593,0.021,0.0
GWTG_High,0.81,0.021,0.0
MAGGIC Y1,0.151,0.018,0.0
MAGGIC Y3,0.151,0.018,0.0
OPTIMIZE-HF,0.0,0.149,0.0


## Pvalue Hypothesis Testing for All Features
Datasets: Escape, HF-ACTION, BEST and GUIDE-IT

In [8]:
datasets = ['ESCAPE','HF-ACTION', 'BEST', 'GUIDE-IT']
labels = [escapeLabels, hfactionLabels, bestLabels, guideLabels]
index = ['DEIDNUM', 'ID', 'ID', 'ID']
carnaScores = [escapeAllScores, hfactionAllScores, bestAllScores, guideAllScores]
scoreList = ['ADHERE_Low', 'ADHERE_High', 'EFFECT 30 Day','EFFECT 1 Year','GWTG_Low', 'GWTG_High',
             'MAGGIC Y1', 'MAGGIC Y3', 'OPTIMIZE-HF', 'SHFM Y1', 'SHFM Y3', 'SHFM Y5']

pLst = []
for i in range(len(datasets)):
#     print("\nDATASET:", datasets[i])
    escapeDF, scoreDF = makeScoreDF(dataset=datasets[i], labels=labels[i], index=index[i], carnaScores=carnaScores[i])
    
    lst = []
    #do escape score first
    miniDF = escapeDF[['Real', 'CARNA', 'ESCAPE']]
    miniDF = miniDF.fillna(0)
    aucs, delongcov, pvals = delong_roc_test(ground_truth=miniDF['Real'], predictions_one=miniDF['CARNA'], predictions_two=miniDF['ESCAPE'])
#     print("AUCs for CARNA + ESCAPE", ":", aucs)
#     print("CARNA + ESCAPE", pvals[0][0])

    lst.append(abs(pvals[0][0]))
    
    #do other scores
    for s in scoreList:        
        miniDF = scoreDF[['Real', 'CARNA', s]]
#         miniDF = miniDF.dropna()
        miniDF = miniDF.fillna(0)
        
        if len(miniDF.index) == 0: #all NAN
            lst.append(np.nan)
#             print("CARNA +", s, ": NAN")

        else:
            aucs, delongcov, pvals = delong_roc_test(ground_truth=miniDF['Real'], predictions_one=miniDF['CARNA'], predictions_two=miniDF[s])
#             print("CARNA +", s, ":", pvals[0][0])
#             print("AUCs for CARNA + ", s, ":", aucs)
        
            lst.append(abs(pvals[0][0]))

    pLst.append(lst)

    
    
cols = ['ESCAPE', 'ADHERE_Low', 'ADHERE_High', 'EFFECT 30 Day','EFFECT 1 Year','GWTG_Low', 'GWTG_High',
             'MAGGIC Y1', 'MAGGIC Y3', 'OPTIMIZE-HF', 'SHFM Y1', 'SHFM Y3', 'SHFM Y5']

df = pd.DataFrame(pLst, columns=cols, index=datasets)
df = df.round(3)
df.T

Unnamed: 0,ESCAPE,HF-ACTION,BEST,GUIDE-IT
ESCAPE,0.0,0.115,0.0,0.0
ADHERE_Low,0.311,0.136,0.0,0.149
ADHERE_High,0.311,0.136,0.0,0.149
EFFECT 30 Day,0.163,0.011,0.0,0.0
EFFECT 1 Year,0.184,0.0,0.0,0.0
GWTG_Low,0.216,0.0,0.036,0.045
GWTG_High,0.096,0.0,0.0,0.341
MAGGIC Y1,0.525,0.829,0.157,0.954
MAGGIC Y3,0.525,0.829,0.157,0.954
OPTIMIZE-HF,0.005,0.0,0.004,0.007


## Full AUC Calculation

In [9]:
#NOTE USING ROC CALC HERE!!
datasets = ['ESCAPE','HF-ACTION', 'BEST', 'GUIDE-IT', 'CardShock', 'SerialCardiac']
labels = [escapeLabels, hfactionLabels, bestLabels, guideLabels, cardShockLabels, serialLabels]
index = ['DEIDNUM', 'ID', 'ID', 'ID','ID','ID']

scoreList = ['ADHERE_Low', 'ADHERE_High', 'EFFECT 30 Day','EFFECT 1 Year','GWTG_Low', 'GWTG_High',
             'MAGGIC Y1', 'MAGGIC Y3', 'OPTIMIZE-HF', 'SHFM Y1', 'SHFM Y3', 'SHFM Y5']

prec = []
roc = []

for i in range(len(datasets)):
    preclst = []
    roclst = []
    escapeDF, scoreDF = makeScoreDF(dataset=datasets[i], labels=labels[i], index=index[i])

    pLst, rLst = getAUC(escapeDF, ['ESCAPE'])
    preclst.extend(pLst)
    roclst.extend(rLst)
    
    
    pLst, rLst = getAUC(scoreDF, scoreList)
    preclst.extend(pLst)
    roclst.extend(rLst)
    
    prec.append(preclst)
    roc.append(roclst)


    
cols = ['ESCAPE', 'ADHERE_Low', 'ADHERE_High', 'EFFECT 30 Day','EFFECT 1 Year','GWTG_Low', 'GWTG_High',
             'MAGGIC Y1', 'MAGGIC Y3', 'OPTIMIZE-HF', 'SHFM Y1', 'SHFM Y3', 'SHFM Y5']

precDF = pd.DataFrame(prec, columns=cols, index=datasets)
rocDF = pd.DataFrame(roc, columns=cols, index=datasets)
rocDF

Unnamed: 0,ESCAPE,ADHERE_Low,ADHERE_High,EFFECT 30 Day,EFFECT 1 Year,GWTG_Low,GWTG_High,MAGGIC Y1,MAGGIC Y3,OPTIMIZE-HF,SHFM Y1,SHFM Y3,SHFM Y5
ESCAPE,0.680515,0.594986,0.594986,0.55016,0.548222,0.596784,0.600587,0.639941,0.639941,0.430319,0.623452,0.622755,0.622373
HF-ACTION,0.61281,0.544146,0.544146,0.62783,0.647564,,,0.676961,0.676961,0.45705,0.558359,0.557646,0.557993
BEST,0.586725,0.576283,0.576283,0.609785,0.637686,0.536415,0.538361,,,0.469083,0.612697,0.615549,0.614913
GUIDE-IT,0.714789,0.601167,0.601167,0.634985,0.632376,0.536801,0.533696,0.689307,0.689307,0.467812,0.623218,0.625284,0.619007
CardShock,0.594915,0.526021,0.526021,0.58438,0.61164,,,0.677705,0.677705,0.43899,0.586812,0.588251,0.573426
SerialCardiac,0.564839,0.573527,0.573527,0.610227,0.643946,,,,,0.431771,0.587697,0.583898,0.579254


In [10]:
precDF

Unnamed: 0,ESCAPE,ADHERE_Low,ADHERE_High,EFFECT 30 Day,EFFECT 1 Year,GWTG_Low,GWTG_High,MAGGIC Y1,MAGGIC Y3,OPTIMIZE-HF,SHFM Y1,SHFM Y3,SHFM Y5
ESCAPE,0.465807,0.531639,0.531639,0.35475,0.36487,0.529542,0.544959,0.451556,0.451556,0.397649,0.357256,0.353554,0.357473
HF-ACTION,0.270107,0.228068,0.228068,0.316165,0.304653,,,0.34084,0.34084,0.255691,0.399662,0.399492,0.39954
BEST,0.421475,0.557367,0.557367,0.503915,0.487535,0.459937,0.477174,,,0.350243,0.417414,0.418435,0.417974
GUIDE-IT,0.383379,0.363262,0.363262,0.29809,0.317143,0.313175,0.335518,0.311263,0.311263,0.371796,0.241725,0.252243,0.367049
CardShock,0.696771,0.741709,0.741709,0.668123,0.691748,,,0.737596,0.737596,0.644261,0.676757,0.678083,0.657797
SerialCardiac,0.546625,0.671924,0.671924,0.593656,0.601229,,,,,0.47539,0.572381,0.56755,0.561119


In [11]:
def simplifyDFAUC(aucDF):
    df = aucDF[['ESCAPE', 'ADHERE_Low', 'ADHERE_High', 'EFFECT 30 Day','EFFECT 1 Year','GWTG_Low', 'GWTG_High',
             'MAGGIC Y1', 'MAGGIC Y3', 'OPTIMIZE-HF', 'SHFM Y1', 'SHFM Y3', 'SHFM Y5']]
    adhere = []
    gwtg = []
    for i in aucDF.index:
        low = aucDF.loc[i]['ADHERE_Low']
        high = aucDF.loc[i]['ADHERE_High']
        adhere.append(max(low, high))
        
        low = aucDF.loc[i]['GWTG_Low']
        high = aucDF.loc[i]['GWTG_High']
        gwtg.append(max(low, high))
        
    df['ADHERE'] = adhere
    df['GWTG'] = gwtg
    
    df = df.transpose()
    df = df.round(3)
    
    return df
        
    
aucFinal = simplifyDFAUC(rocDF)
aucFinal

Unnamed: 0,ESCAPE,HF-ACTION,BEST,GUIDE-IT,CardShock,SerialCardiac
ESCAPE,0.681,0.613,0.587,0.715,0.595,0.565
ADHERE_Low,0.595,0.544,0.576,0.601,0.526,0.574
ADHERE_High,0.595,0.544,0.576,0.601,0.526,0.574
EFFECT 30 Day,0.55,0.628,0.61,0.635,0.584,0.61
EFFECT 1 Year,0.548,0.648,0.638,0.632,0.612,0.644
GWTG_Low,0.597,,0.536,0.537,,
GWTG_High,0.601,,0.538,0.534,,
MAGGIC Y1,0.64,0.677,,0.689,0.678,
MAGGIC Y3,0.64,0.677,,0.689,0.678,
OPTIMIZE-HF,0.43,0.457,0.469,0.468,0.439,0.432


In [12]:
aucFinal = simplifyDFAUC(precDF)
aucFinal

Unnamed: 0,ESCAPE,HF-ACTION,BEST,GUIDE-IT,CardShock,SerialCardiac
ESCAPE,0.466,0.27,0.421,0.383,0.697,0.547
ADHERE_Low,0.532,0.228,0.557,0.363,0.742,0.672
ADHERE_High,0.532,0.228,0.557,0.363,0.742,0.672
EFFECT 30 Day,0.355,0.316,0.504,0.298,0.668,0.594
EFFECT 1 Year,0.365,0.305,0.488,0.317,0.692,0.601
GWTG_Low,0.53,,0.46,0.313,,
GWTG_High,0.545,,0.477,0.336,,
MAGGIC Y1,0.452,0.341,,0.311,0.738,
MAGGIC Y3,0.452,0.341,,0.311,0.738,
OPTIMIZE-HF,0.398,0.256,0.35,0.372,0.644,0.475
