In [1]:
import pandas as pd
from math import sqrt
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from xgboost import XGBClassifier
from xgboost import cv
from sklearn.model_selection import train_test_split
from sklearn.metrics import auc, roc_curve, roc_auc_score, average_precision_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score

In [3]:
par = pd.read_csv('params.csv', index_col=0).T.to_dict()
names_to_queries = {'All MI cases': '(FEMALE == 1.0) | (FEMALE == 0.0) | (FEMALE.isnull())',
                    'Female': 'FEMALE == 1.0',
                    'Female with NSTEMI': '(FEMALE == 1.0) & (NSTEMI == 1)',
                    'Female with STEMI': '(FEMALE == 1.0) & (NSTEMI == 0)',
                    'Male': 'FEMALE == 0.0',
                    'Male with NSTEMI': '(FEMALE == 0.0) & (NSTEMI == 1)',
                    'Male with STEMI': '(FEMALE == 0.0) & (NSTEMI == 0)',
                    'NSTEMI': 'NSTEMI == 1',
                    'STEMI': 'NSTEMI == 0',
                    }
par = {k: [{kk: [int(vv) if kk in ['max_depth', 'n_estimators'] else vv] for kk, vv in v.items()}, names_to_queries[k]] for k, v in par.items()}

In [4]:
def roc_auc_ci(AUC, y_true, positive=1):
    
    N1 = sum(y_true == positive)
    N2 = sum(y_true != positive)
    Q1 = AUC / (2 - AUC)
    Q2 = 2*AUC**2 / (1 + AUC)
    SE_AUC = sqrt((AUC*(1 - AUC) + (N1 - 1)*(Q1 - AUC**2) + (N2 - 1)*(Q2 - AUC**2)) / (N1*N2))
    lower = AUC - 1.96*SE_AUC
    upper = AUC + 1.96*SE_AUC
    if lower < 0:
        lower = 0
    if upper > 1:
        upper = 1
    return AUC, lower, upper

In [5]:
df=pd.read_csv('NIS_2012-5_Core+Sev+Hosp_Use.csv')

In [6]:
df=df.dropna(subset=['DIED'])
df=df.drop(columns=['ASTEMI', 'USTEMI', 'ISTEMI', 'PSTEMI', 'LOS'])

In [7]:
for k, v in par.items():
    print(f"{k}: {len(df.query(v[1]))}")

All MI cases: 457096
NSTEMI: 322966
STEMI: 134130
Male: 281221
Female: 175862
Male with STEMI: 91158
Female with STEMI: 42965
Male with NSTEMI: 190063
Female with NSTEMI: 132897


In [None]:
results = {}
for k, v in par.items():
    paramgrid = v[0]
    quer = v[1]
    namee = k

    df_temp = df.copy().query(quer)

    X, y = df_temp.drop('DIED', axis=1), df_temp.DIED
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

    paramgrid['scale_pos_weight'] = [len(y_train[y_train == 0.0]) / len(y_train[y_train == 1.0])]


    model = XGBClassifier(nthread=10, gpu_id=0, tree_method = 'gpu_hist')
    cv = StratifiedKFold()
    gridsearch = GridSearchCV(model, paramgrid, scoring='roc_auc', cv=cv, verbose=0)

    fit = gridsearch.fit(X_train, y_train)

    fit.score(X_test, y_test)
    best = fit.best_estimator_
    y_preds = best.predict(X_test)

    tn, fp, fn, tp = confusion_matrix(y_test, y_preds).ravel()
    a, cil, cih = roc_auc_ci(fit.best_score_, y_test)
    results[namee] = {
        'Number of patients': len(df_temp),
        'AUC': a,
        'AUC CI': [cil, cih],
        'Sensitivity': tp/(tp+fn),
        'Specificity': tn/(fp+tn),
        'F1 score': f1_score(y_test, y_preds),
        'Positive predictive value (Precision)': tp/(tp+fp),
        'Under Area Average Precision Curve': average_precision_score(y_test, y_preds),
        'Negative Predictive Value': tn/(tn+fn)
    }

In [9]:
pd.DataFrame(results).T

Unnamed: 0,Number of patients,AUC,AUC CI,Sensitivity,Specificity,F1 score,Positive predictive value (Precision),Under Area Average Precision Curve,Negative Predictive Value
All MI cases,457096,0.921921,"[0.917502659313978, 0.9263386105437607]",0.830657,0.848302,0.352289,0.223549,0.194152,0.989613
NSTEMI,322966,0.903082,"[0.8964349381484745, 0.9097281517586299]",0.81006,0.824497,0.256808,0.152592,0.13074,0.991093
STEMI,134130,0.931438,"[0.9252970028367502, 0.9375790546157472]",0.864166,0.846814,0.473044,0.325653,0.29213,0.986455
Male,281221,0.931739,"[0.9262151949537144, 0.937262911534663]",0.830461,0.868079,0.364088,0.233153,0.201435,0.990655
Female,175862,0.902102,"[0.8947125771962356, 0.9094913393700501]",0.784228,0.843368,0.356413,0.23061,0.193038,0.984915
Male with STEMI,91158,0.941681,"[0.9343134370596738, 0.9490487067627525]",0.873225,0.867922,0.47963,0.330611,0.29751,0.989206
Female with STEMI,42965,0.905951,"[0.8952890217743852, 0.9166121245809156]",0.818442,0.81777,0.493747,0.353504,0.309028,0.973681
Male with NSTEMI,190063,0.915411,"[0.9068634244646475, 0.9239593022910394]",0.815534,0.844938,0.263998,0.15749,0.134769,0.9923
Female with NSTEMI,132897,0.875784,"[0.8650201592767887, 0.8865472669062263]",0.752056,0.81812,0.25796,0.15568,0.127664,0.986666
