In [5]:
## This script reports statistics used in the paper

In [1]:
#Import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import copy
import warnings

plt.rc('font', size=12)
warnings.filterwarnings('ignore')

In [2]:
#Load ESCAPE training data
escapeAllData = pd.read_csv("Data/Original DataFrames/AllDataSingleValue.csv", sep=",", index_col='DEIDNUM').sort_index() #all feature dataset
escapeHemo = pd.read_csv("Data/Original DataFrames/HemoSingleValue.csv", sep=",", index_col='DEIDNUM').sort_index() #dataset with only hemodynamics
escapeLabels  = pd.read_csv("Data/Original DataFrames/Labels.csv", sep=",", index_col='DEIDNUM').sort_index() #labels for prediction classes 
escapeHemoLabels = escapeLabels[escapeLabels.index.isin(escapeHemo.index)]

#Cardiogenic Shock
cardShockHemo = pd.read_csv("Data Validation/Cardiogenic Shock/Original DataFrames/HemoCardiogenicShock.csv", sep=",", index_col='ID').sort_index()
cardShockAllData = pd.read_csv("Data Validation/Cardiogenic Shock/Original DataFrames/AllDataCardiogenicShock.csv", sep=",", index_col='ID').sort_index()
cardShockHemoLabels = pd.read_csv("Data Validation/Cardiogenic Shock/Original DataFrames/HemoLabelsCardiogenicShock.csv", sep=",", index_col='ID').sort_index()
cardShockLabels = pd.read_csv("Data Validation/Cardiogenic Shock/Original DataFrames/LabelsCardiogenicShock.csv", sep=",", index_col='ID').sort_index()

#Serial Cardiac
serialHemo = pd.read_csv("Data Validation/Serial Cardiac Caths/Original DataFrames/HemoSerialCardiac.csv", sep=",", index_col='ID').sort_index()
serialAllData = pd.read_csv("Data Validation/Serial Cardiac Caths/Original DataFrames/AllDataSerialCardiac.csv", sep=",", index_col='ID').sort_index()
serialHemoLabels = pd.read_csv("Data Validation/Serial Cardiac Caths/Original DataFrames/HemoLabelsSerialCardiac.csv", sep=",", index_col='ID').sort_index()
serialLabels = pd.read_csv("Data Validation/Serial Cardiac Caths/Original DataFrames/LabelsSerialCardiac.csv", sep=",", index_col='ID').sort_index()

#BEST
bestAllData = pd.read_csv("Data Validation/BEST/Original DataFrames/AllDataBEST.csv", sep=",", index_col='ID').sort_index()
bestLabels = pd.read_csv("Data Validation/BEST/Original DataFrames/LabelsBEST.csv", sep=",", index_col='ID').sort_index()

#Guide it
guideAllData = pd.read_csv("Data Validation/GUIDE-IT/Original DataFrames/AllDataGUIDE-IT.csv", sep=",", index_col='ID').sort_index()
guideLabels = pd.read_csv("Data Validation/GUIDE-IT/Original DataFrames/LabelsGUIDE-IT.csv", sep=",", index_col='ID').sort_index()


In [32]:
cardShockHemoLabels

Unnamed: 0_level_0,Death,Rehosp
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
Z1017923,1,1
Z1024990,1,0
Z104044,1,1
Z1099417,0,0
Z1140320,1,1
...,...,...
Z888391,1,1
Z927126,1,0
Z928524,1,1
Z93161,1,1


# Get Patient Cohort Baseline Characteristics

In [33]:

def getStats(data, labels):
    lst = []
    
    lst.append(len(labels))

    if len(data) == len(labels) * 2:
        lst.append(str(round(data.describe()['Age'][1],1)) + "±" + str(round(data.describe()['Age'][2],1)))
        gen = data['Gender'].value_counts()/2/len(labels)
        lst.append(round(gen[2.0]*100,1))
        gen = data['Race'].value_counts()/2/len(labels)
        lst.append(str(round(gen[1.0]*100,1))+"[" + str(round(gen[2.0]*100,1))+"]")
        lst.append(str(round(data.describe()['BMI'][1],1)) + "±" + str(round(data.describe()['BMI'][2],1)))
        lst.append(str(round(data.describe()['EjF'][1],1)) + "±" + str(round(data.describe()['EjF'][2],1)))
        lst.append(str(round(data.describe()['HR'][1],1)) + "±" + str(round(data.describe()['HR'][2],1)))
        lst.append(str(round(data.describe()['BPSYS'][1],1)) + "±" + str(round(data.describe()['BPSYS'][2],1)))
        lst.append(str(round(data.describe()['BPDIAS'][1],1)) + "±" + str(round(data.describe()['BPDIAS'][2],1)))
        
        lst.append(str(round(data.describe()['CRT'][1],1)) + "±" + str(round(data.describe()['CRT'][2],1)))
        lst.append(str(round(data.describe()['POT'][1],1)) + "±" + str(round(data.describe()['POT'][2],1)))
        lst.append(str(round(data.describe()['BUN'][1],1)) + "±" + str(round(data.describe()['BUN'][2],1)))
        lst.append(str(round(data.describe()['SOD'][1],1)) + "±" + str(round(data.describe()['SOD'][2],1)))

        
    else:
        lst.append(str(round(data.describe()['Age'][1],1)) + "±" + str(round(data.describe()['Age'][2],1)))
        gen = data['Gender'].value_counts()/len(labels)
        lst.append(round(gen[2.0]*100,1))
        gen = data['Race'].value_counts()/len(labels)
        lst.append(str(round(gen[1.0]*100,1))+"[" + str(round(gen[2.0]*100,1))+"]")
        lst.append(str(round(data.describe()['BMI'][1],1)) + "±" + str(round(data.describe()['BMI'][2],1)))
        lst.append(str(round(data.describe()['EjF'][1],1)) + "±" + str(round(data.describe()['EjF'][2],1)))
        lst.append(str(round(data.describe()['HR'][1],1)) + "±" + str(round(data.describe()['HR'][2],1)))
        lst.append(str(round(data.describe()['BPSYS'][1],1)) + "±" + str(round(data.describe()['BPSYS'][2],1)))
        lst.append(str(round(data.describe()['BPDIAS'][1],1)) + "±" + str(round(data.describe()['BPDIAS'][2],1)))
        
        lst.append(str(round(data.describe()['CRT'][1],1)) + "±" + str(round(data.describe()['CRT'][2],1)))
        lst.append(str(round(data.describe()['POT'][1],1)) + "±" + str(round(data.describe()['POT'][2],1)))
        lst.append(str(round(data.describe()['BUN'][1],1)) + "±" + str(round(data.describe()['BUN'][2],1)))
        lst.append(str(round(data.describe()['SOD'][1],1)) + "±" + str(round(data.describe()['SOD'][2],1)))

    lst.append(round(sum(labels['Death']) / len(labels),3))
    try:
        lst.append(round(sum(labels['Rehosp']) / len(labels),3))
    except:
        lst.append("N/A")
    try:
        lst.append(round(sum(labels['Readmission']) / len(labels),3))
    except:
        lst.append("N/A")
        
        
    return lst

def getStatsCath(data, labels):
    lst = []
    lst.append(len(labels))
    lst.append(str(round(data.describe()['Age'][1],1)) + "±" + str(round(data.describe()['Age'][2],1)))
    gen = data['Gender'].value_counts()/3/len(labels)
    lst.append(round(gen[2.0]*100,1))
    lst.append("N/A") #Race
    lst.append(str(round(data.describe()['BMI'][1],1)) + "±" + str(round(data.describe()['BMI'][2],1)))
    lst.append(str(round(data.describe()['EjF'][1],1)) + "±" + str(round(data.describe()['EjF'][2],1)))
    lst.append(str(round(data.describe()['HR'][1],1)) + "±" + str(round(data.describe()['HR'][2],1)))
    lst.append(str(round(data.describe()['BPSYS'][1],1)) + "±" + str(round(data.describe()['BPSYS'][2],1)))
    lst.append(str(round(data.describe()['BPDIAS'][1],1)) + "±" + str(round(data.describe()['BPDIAS'][2],1)))
    
    lst.append(str(round(data.describe()['CRT'][1],1)) + "±" + str(round(data.describe()['CRT'][2],1)))
    lst.append(str(round(data.describe()['POT'][1],1)) + "±" + str(round(data.describe()['POT'][2],1)))
    lst.append(str(round(data.describe()['BUN'][1],1)) + "±" + str(round(data.describe()['BUN'][2],1)))
    lst.append(str(round(data.describe()['SOD'][1],1)) + "±" + str(round(data.describe()['SOD'][2],1)))


    lst.append(round(sum(labels['Death']) / len(labels),3))
    lst.append(round(sum(labels['Rehosp']) / len(labels),3))
    lst.append("N/A")
        
    return lst
    

df = pd.DataFrame(index=["n", "Age (years)", "Gender (%, female)", "Race (%, white[minority])", "BMI", "EF", "HR", "BPSYS", "BPDIAS", "CRT", "POT", "BUN", "SOD", "Death", "Rehosp", "Readm"])
df['ESCAPE'] = getStats(escapeAllData, escapeLabels)
df['BEST'] = getStats(bestAllData, bestLabels)
df['GUIDE-IT'] = getStats(guideAllData, guideLabels)
df['UVA Cardiogenic Shock'] = getStatsCath(cardShockAllData, cardShockLabels)
df['UVA Serial Cath'] = getStatsCath(serialAllData, serialLabels)


# df = pd.DataFrame(bigLst, columns=['idx','ESCAPE', 'HF-ACTION']).set_index("idx")
df

Unnamed: 0,ESCAPE,BEST,GUIDE-IT,UVA Cardiogenic Shock,UVA Serial Cath
n,433,2707,388,364,183
Age (years),56.1±13.9,60.2±12.3,62.2±13.9,59.4±18.5,60.6±15.1
"Gender (%, female)",25.9,21.9,66.2,35.2,43.2
"Race (%, white[minority])",59.6[40.4],70.0[30.0],49.2[50.8],,
BMI,28.4±6.7,0.0±0.0,15.4±16.7,29.8±8.8,47.3±286.9
EF,19.3±6.6,23.0±7.3,24.0±8.2,31.7±17.4,31.3±18.0
HR,80.8±14.9,0.0±0.0,0.0±0.0,0.0±0.0,0.0±0.0
BPSYS,103.7±15.8,118.5±19.4,115.4±20.0,111.1±21.9,109.1±21.4
BPDIAS,64.1±11.5,71.9±11.7,70.2±13.5,62.2±15.5,59.9±17.2
CRT,6.7±34.0,1.2±0.4,1.6±0.7,1.7±1.3,1.7±1.0


## Get Percent Data missing

In [13]:
def getMissing(misData):
    return sum(misData.isnull().sum()) / (misData.shape[0] * misData.shape[1]) * 100

In [16]:
# All Data
print("ESCAPE:", round(getMissing(escapeAllData),1))
print("BEST:", round(getMissing(bestAllData),1))
print("GUIDE IT:", round(getMissing(guideAllData),1))
print("Card Shock:", round(getMissing(cardShockAllData),1))
print("Serial:", round(getMissing(serialAllData),1))


ESCAPE: 7.8
BEST: 2.0
GUIDE IT: 15.1
Card Shock: 10.4
Serial: 7.3


In [17]:
# Hemo
print("ESCAPE:", round(getMissing(escapeHemo),1))
print("Card Shock:", round(getMissing(cardShockHemo),1))
print("Serial:", round(getMissing(serialHemo),1))

ESCAPE: 12.0
Card Shock: 5.9
Serial: 9.2


# Plot Training Averaged AUC Graphs

In [None]:
from sklearn.metrics import auc

In [None]:
# Compute micro-average ROC curve and ROC area
fpr = 0.027
tpr = 0.946
roc = auc(fpr, tpr)

fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

# Plot and save averaged AUC graph
plt.figure(figsize=(10, 8))
plt.rcParams.update({'font.size': 18})
plt.plot(fpr, tpr,
     label='Averaged AUC: {0:0.3f}'
           ''.format(roc))

plt.plot([0, 1], [0, 1], 'k--', lw=2)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('1 - Specificity (False Positive Rate)')
plt.ylabel('Sensitivity (True Positive Rate)')
plt.legend(loc="lower right")

In [None]:
from sklearn.metrics import auc

In [None]:
# Compute micro-average ROC curve and ROC area
fpr = 0.027
tpr = 0.946
roc = auc(fpr, tpr)

fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

# Plot and save averaged AUC graph
plt.figure(figsize=(10, 8))
plt.rcParams.update({'font.size': 18})
plt.plot(fpr, tpr,
     label='Averaged AUC: {0:0.3f}'
           ''.format(roc))

plt.plot([0, 1], [0, 1], 'k--', lw=2)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('1 - Specificity (False Positive Rate)')
plt.ylabel('Sensitivity (True Positive Rate)')
plt.legend(loc="lower right")

# Get Outcome Percentages Based on Cluster Group

In [None]:
#Load ESCAPE training data
escapeAllData = pd.read_csv("Data/Preprocessed Data/ESCAPE_AllData.csv", sep=",", index_col='ID').sort_index() #all feature dataset
escapeHemo = pd.read_csv("Data/Preprocessed Data/ESCAPE_Hemo.csv", sep=",", index_col='ID').sort_index() #dataset with only hemodynamics
escapeLabels  = pd.read_csv("Data/Original DataFrames/Labels.csv", sep=",", index_col='DEIDNUM').sort_index() #labels for prediction classes 
escapeHemoLabels = escapeLabels[escapeLabels.index.isin(escapeHemo.index)]

#Cardiogenic Shock
cardShockHemo = pd.read_csv("Data Validation/Cardiogenic Shock/Preprocessed Data/CardiogenicShock_Hemo.csv", sep=",", index_col='ID').sort_index()
cardShockAllData = pd.read_csv("Data Validation/Cardiogenic Shock/Preprocessed Data/CardiogenicShock_AllData.csv", sep=",", index_col='ID').sort_index()
cardShockHemoLabels = pd.read_csv("Data Validation/Cardiogenic Shock/Original DataFrames/HemoLabelsCardiogenicShock.csv", sep=",", index_col='ID').sort_index()
cardShockLabels = pd.read_csv("Data Validation/Cardiogenic Shock/Original DataFrames/LabelsCardiogenicShock.csv", sep=",", index_col='ID').sort_index()

#Serial Cardiac
serialHemo = pd.read_csv("Data Validation/Serial Cardiac Caths/Preprocessed Data/SerialCardiac_Hemo.csv", sep=",", index_col='ID').sort_index()
serialAllData = pd.read_csv("Data Validation/Serial Cardiac Caths/Preprocessed Data/SerialCardiac_AllData.csv", sep=",", index_col='ID').sort_index()
serialHemoLabels = pd.read_csv("Data Validation/Serial Cardiac Caths/Original DataFrames/HemoLabelsSerialCardiac.csv", sep=",", index_col='ID').sort_index()
serialLabels = pd.read_csv("Data Validation/Serial Cardiac Caths/Original DataFrames/LabelsSerialCardiac.csv", sep=",", index_col='ID').sort_index()

#HF-ACTION
hfactionAllData = pd.read_csv("Data Validation/HF-ACTION/Preprocessed Data/HF-ACTION_AllData.csv", sep=",", index_col='ID').sort_index()
hfactionLabels = pd.read_csv("Data Validation/HF-ACTION/Original DataFrames/LabelsHF-ACTION.csv", sep=",", index_col='ID').sort_index()

#BEST
bestAllData = pd.read_csv("Data Validation/BEST/Preprocessed Data/BEST_AllData.csv", sep=",", index_col='ID').sort_index()
bestLabels = pd.read_csv("Data Validation/BEST/Original DataFrames/LabelsBEST.csv", sep=",", index_col='ID').sort_index()

#Guide it
guideAllData = pd.read_csv("Data Validation/GUIDE-IT/Preprocessed Data/GUIDE-IT_AllData.csv", sep=",", index_col='ID').sort_index()
guideLabels = pd.read_csv("Data Validation/GUIDE-IT/Original DataFrames/LabelsGUIDE-IT.csv", sep=",", index_col='ID').sort_index()


In [None]:
def getPercentOutcome(dataset, labels, outcome, risk):
    cluster = dataset.loc[dataset['Score' + outcome] == risk ]
    labelMatches = labels[labels.index.isin(cluster.index)]
    per = labelMatches[outcome].mean()
    return per


In [None]:
risks = [5, 4, 3, 2, 1]
lst = []
outcome = 'Death'
for r in risks:
    per1 = getPercentOutcome(escapeHemo, escapeHemoLabels, outcome, r)
    per2 = getPercentOutcome(cardShockHemo, cardShockHemoLabels, outcome, r)
    per3 = getPercentOutcome(serialHemo, serialHemoLabels, outcome, r)
    lst.append([r, per1, per2, per3])
    
df = pd.DataFrame(lst, columns = ['Cluster', 'ESCAPE', 'Card Shock', 'Serial']).set_index('Cluster')
print(df.mean(axis=1))
df.T


In [None]:
risks = [5, 4, 3, 2, 1]
lst = []
outcome = 'Rehosp'
for r in risks:
    per1 = getPercentOutcome(escapeHemo, escapeLabels, outcome, r)
#     per2 = getPercentOutcome(cardShockHemo, cardShockHemoLabels, outcome, r)
#     per3 = getPercentOutcome(serialHemo, serialHemoLabels, outcome, r)
    lst.append([r, per1, per2, per3])
    
df = pd.DataFrame(lst, columns = ['Cluster', 'ESCAPE', 'Card Shock', 'Serial']).set_index('Cluster')
print(df.mean(axis=1))
df.T

## All Data

In [None]:
risks = [5, 4, 3, 2, 1]
lst = []
outcome = 'Death'
for r in risks:
    per1 = getPercentOutcome(escapeAllData, escapeLabels, outcome, r)
    per2 = getPercentOutcome(cardShockAllData, cardShockLabels, outcome, r)
    per3 = getPercentOutcome(serialAllData, serialLabels, outcome, r)
    per4 = getPercentOutcome(hfactionAllData, hfactionLabels, outcome, r)
    per5 = getPercentOutcome(bestAllData, bestLabels, outcome, r)
    per6 = getPercentOutcome(guideAllData, guideLabels, outcome, r)
    lst.append([r, per1, per2, per3, per4, per5, per6])
    
df = pd.DataFrame(lst, columns = ['Cluster', 'ESCAPE', 'Card Shock', 'Serial', 'HF-ACTION', 'BEST', 'GUIDE']).set_index('Cluster')
print(df.mean(axis=1))
df.T

In [None]:
risks = [5, 4, 3, 2, 1]
lst = []
outcome = 'Rehosp'
for r in risks:
    per1 = getPercentOutcome(escapeAllData, escapeLabels, outcome, r)
#     per2 = getPercentOutcome(cardShockAllData, cardShockLabels, outcome, r)
#     per3 = getPercentOutcome(serialAllData, serialLabels, outcome, r)
    per4 = getPercentOutcome(hfactionAllData, hfactionLabels, outcome, r)
    per5 = getPercentOutcome(bestAllData, bestLabels, outcome, r)
    per6 = getPercentOutcome(guideAllData, guideLabels, outcome, r)
    lst.append([r, per1,  per4, per5, per6])
    
df = pd.DataFrame(lst, columns = ['Cluster', 'ESCAPE',  'HF-ACTION', 'BEST', 'GUIDE']).set_index('Cluster')
print(df.mean(axis=1))
df.T

In [None]:
risks = [5, 4, 3, 2, 1]
lst = []
outcome = 'Readmission'
for r in risks:
    per1 = getPercentOutcome(escapeAllData, escapeLabels, outcome, r)
#     per2 = getPercentOutcome(cardShockAllData, cardShockLabels, outcome, r)
#     per3 = getPercentOutcome(serialAllData, serialLabels, outcome, r)
    per4 = getPercentOutcome(hfactionAllData, hfactionLabels, outcome, r)
#     per5 = getPercentOutcome(bestAllData, bestLabels, outcome, r)
#     per6 = getPercentOutcome(guideAllData, guideLabels, outcome, r)
    lst.append([r, per1,  per4, ])
    
df = pd.DataFrame(lst, columns = ['Cluster', 'ESCAPE',  'HF-ACTION']).set_index('Cluster')
print(df.mean(axis=1))
df.T

# Get Outcome Percentages Based on Cluster Group

In [None]:
#Load ESCAPE training data
escapeAllData = pd.read_csv("Data/Preprocessed Data/ESCAPE_AllData.csv", sep=",", index_col='ID').sort_index() #all feature dataset
escapeHemo = pd.read_csv("Data/Preprocessed Data/ESCAPE_Hemo.csv", sep=",", index_col='ID').sort_index() #dataset with only hemodynamics
escapeLabels  = pd.read_csv("Data/Original DataFrames/Labels.csv", sep=",", index_col='DEIDNUM').sort_index() #labels for prediction classes 
escapeHemoLabels = escapeLabels[escapeLabels.index.isin(escapeHemo.index)]

#Cardiogenic Shock
cardShockHemo = pd.read_csv("Data Validation/Cardiogenic Shock/Preprocessed Data/CardiogenicShock_Hemo.csv", sep=",", index_col='ID').sort_index()
cardShockAllData = pd.read_csv("Data Validation/Cardiogenic Shock/Preprocessed Data/CardiogenicShock_AllData.csv", sep=",", index_col='ID').sort_index()
cardShockHemoLabels = pd.read_csv("Data Validation/Cardiogenic Shock/Original DataFrames/HemoLabelsCardiogenicShock.csv", sep=",", index_col='ID').sort_index()
cardShockLabels = pd.read_csv("Data Validation/Cardiogenic Shock/Original DataFrames/LabelsCardiogenicShock.csv", sep=",", index_col='ID').sort_index()

#Serial Cardiac
serialHemo = pd.read_csv("Data Validation/Serial Cardiac Caths/Preprocessed Data/SerialCardiac_Hemo.csv", sep=",", index_col='ID').sort_index()
serialAllData = pd.read_csv("Data Validation/Serial Cardiac Caths/Preprocessed Data/SerialCardiac_AllData.csv", sep=",", index_col='ID').sort_index()
serialHemoLabels = pd.read_csv("Data Validation/Serial Cardiac Caths/Original DataFrames/HemoLabelsSerialCardiac.csv", sep=",", index_col='ID').sort_index()
serialLabels = pd.read_csv("Data Validation/Serial Cardiac Caths/Original DataFrames/LabelsSerialCardiac.csv", sep=",", index_col='ID').sort_index()

#HF-ACTION
hfactionAllData = pd.read_csv("Data Validation/HF-ACTION/Preprocessed Data/HF-ACTION_AllData.csv", sep=",", index_col='ID').sort_index()
hfactionLabels = pd.read_csv("Data Validation/HF-ACTION/Original DataFrames/LabelsHF-ACTION.csv", sep=",", index_col='ID').sort_index()

#BEST
bestAllData = pd.read_csv("Data Validation/BEST/Preprocessed Data/BEST_AllData.csv", sep=",", index_col='ID').sort_index()
bestLabels = pd.read_csv("Data Validation/BEST/Original DataFrames/LabelsBEST.csv", sep=",", index_col='ID').sort_index()

#Guide it
guideAllData = pd.read_csv("Data Validation/GUIDE-IT/Preprocessed Data/GUIDE-IT_AllData.csv", sep=",", index_col='ID').sort_index()
guideLabels = pd.read_csv("Data Validation/GUIDE-IT/Original DataFrames/LabelsGUIDE-IT.csv", sep=",", index_col='ID').sort_index()


In [None]:
def getPercentOutcome(dataset, labels, outcome, risk):
    cluster = dataset.loc[dataset['Score' + outcome] == risk ]
    labelMatches = labels[labels.index.isin(cluster.index)]
    per = labelMatches[outcome].mean()
    return per


In [None]:
risks = [5, 4, 3, 2, 1]
lst = []
outcome = 'Death'
for r in risks:
    per1 = getPercentOutcome(escapeHemo, escapeHemoLabels, outcome, r)
    per2 = getPercentOutcome(cardShockHemo, cardShockHemoLabels, outcome, r)
    per3 = getPercentOutcome(serialHemo, serialHemoLabels, outcome, r)
    lst.append([r, per1, per2, per3])
    
df = pd.DataFrame(lst, columns = ['Cluster', 'ESCAPE', 'Card Shock', 'Serial']).set_index('Cluster')
print(df.mean(axis=1))
df.T


In [None]:
risks = [5, 4, 3, 2, 1]
lst = []
outcome = 'Rehosp'
for r in risks:
    per1 = getPercentOutcome(escapeHemo, escapeLabels, outcome, r)
#     per2 = getPercentOutcome(cardShockHemo, cardShockHemoLabels, outcome, r)
#     per3 = getPercentOutcome(serialHemo, serialHemoLabels, outcome, r)
    lst.append([r, per1, per2, per3])
    
df = pd.DataFrame(lst, columns = ['Cluster', 'ESCAPE', 'Card Shock', 'Serial']).set_index('Cluster')
print(df.mean(axis=1))
df.T

## All Data

In [None]:
risks = [5, 4, 3, 2, 1]
lst = []
outcome = 'Death'
for r in risks:
    per1 = getPercentOutcome(escapeAllData, escapeLabels, outcome, r)
    per2 = getPercentOutcome(cardShockAllData, cardShockLabels, outcome, r)
    per3 = getPercentOutcome(serialAllData, serialLabels, outcome, r)
    per4 = getPercentOutcome(hfactionAllData, hfactionLabels, outcome, r)
    per5 = getPercentOutcome(bestAllData, bestLabels, outcome, r)
    per6 = getPercentOutcome(guideAllData, guideLabels, outcome, r)
    lst.append([r, per1, per2, per3, per4, per5, per6])
    
df = pd.DataFrame(lst, columns = ['Cluster', 'ESCAPE', 'Card Shock', 'Serial', 'HF-ACTION', 'BEST', 'GUIDE']).set_index('Cluster')
print(df.mean(axis=1))
df.T

In [None]:
risks = [5, 4, 3, 2, 1]
lst = []
outcome = 'Rehosp'
for r in risks:
    per1 = getPercentOutcome(escapeAllData, escapeLabels, outcome, r)
#     per2 = getPercentOutcome(cardShockAllData, cardShockLabels, outcome, r)
#     per3 = getPercentOutcome(serialAllData, serialLabels, outcome, r)
    per4 = getPercentOutcome(hfactionAllData, hfactionLabels, outcome, r)
    per5 = getPercentOutcome(bestAllData, bestLabels, outcome, r)
    per6 = getPercentOutcome(guideAllData, guideLabels, outcome, r)
    lst.append([r, per1,  per4, per5, per6])
    
df = pd.DataFrame(lst, columns = ['Cluster', 'ESCAPE',  'HF-ACTION', 'BEST', 'GUIDE']).set_index('Cluster')
print(df.mean(axis=1))
df.T

In [None]:
risks = [5, 4, 3, 2, 1]
lst = []
outcome = 'Readmission'
for r in risks:
    per1 = getPercentOutcome(escapeAllData, escapeLabels, outcome, r)
#     per2 = getPercentOutcome(cardShockAllData, cardShockLabels, outcome, r)
#     per3 = getPercentOutcome(serialAllData, serialLabels, outcome, r)
    per4 = getPercentOutcome(hfactionAllData, hfactionLabels, outcome, r)
#     per5 = getPercentOutcome(bestAllData, bestLabels, outcome, r)
#     per6 = getPercentOutcome(guideAllData, guideLabels, outcome, r)
    lst.append([r, per1,  per4, ])
    
df = pd.DataFrame(lst, columns = ['Cluster', 'ESCAPE',  'HF-ACTION']).set_index('Cluster')
print(df.mean(axis=1))
df.T