In [None]:
import json
import string
import warnings
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sys
import timeit

from collections import Counter
from gensim.models import Word2Vec

import pickle
import re
import hashlib
import scipy.stats as st

from sklearn.metrics import roc_curve, precision_recall_curve, auc
import xgboost as xgb
from tqdm import tqdm

from scipy.sparse import hstack, csr_matrix

warnings.filterwarnings("ignore")

## Helper functions

In [None]:
def save_obj(obj, name):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)


def load_obj(name):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

## Load data 

In [None]:
dtest1 = xgb.DMatrix('matrix/dtest1',)
dtest2 = xgb.DMatrix('matrix/dtest2')
dtest3 = xgb.DMatrix('matrix/dtest3')
dtest4 = xgb.DMatrix('matrix/dtest4')
dtest5 = xgb.DMatrix('matrix/dtest5')
dtest6 = xgb.DMatrix('matrix/dtest6')
dtest7 = xgb.DMatrix('matrix/dtest7')
dtest8 = xgb.DMatrix('matrix/dtest8')
dtest9 = xgb.DMatrix('matrix/dtest9')

## Load models

In [None]:
model1 = load_obj('model/xgboost_model1_akin')
model2 = load_obj('model/xgboost_model2_akin')
model3 = load_obj('model/xgboost_model3_akin')
model4 = load_obj('model/xgboost_model4_akin')
model5 = load_obj('model/xgboost_model5_akin')
model6 = load_obj('model/xgboost_model6_akin')
model7 = load_obj('model/xgboost_model7_akin')
model8 = load_obj('model/xgboost_model8_akin')
model9 = load_obj('model/xgboost_model9_akin')

In [None]:
y_test = df_test['INPT_DEATH_YN'].values
#y_test = df_test['AKIN_EVENT'].values

## ROC Curve

In [None]:
#ROC Curve
y_pred_prob1 = model1.predict(dtest1)
fpr1 , tpr1, thresholds1 = roc_curve(y_test, y_pred_prob1)


y_pred_prob2 = model2.predict(dtest2)
fpr2 , tpr2, thresholds2 = roc_curve(y_test, y_pred_prob2)

y_pred_prob3 = model3.predict(dtest3)
fpr3 , tpr3, thresholds3 = roc_curve(y_test, y_pred_prob3)

y_pred_prob4 = model4.predict(dtest4)
fpr4 , tpr4, thresholds4 = roc_curve(y_test, y_pred_prob4)

y_pred_prob5 = model5.predict(dtest5)
fpr5 , tpr5, thresholds5 = roc_curve(y_test, y_pred_prob5)

y_pred_prob6 = model6.predict(dtest6)
fpr6 , tpr6, thresholds6 = roc_curve(y_test, y_pred_prob6)

y_pred_prob7 = model7.predict(dtest7)
fpr7, tpr7, thresholds7 = roc_curve(y_test, y_pred_prob7)

y_pred_prob8 = model8.predict(dtest8)
fpr8 , tpr8, thresholds6 = roc_curve(y_test, y_pred_prob8)

y_pred_prob9 = model9.predict(dtest9)
fpr9 , tpr9, thresholds9 = roc_curve(y_test, y_pred_prob9)

plt.figure(figsize=(15,15))
plt.plot([0,1],[0,1], 'k--')
plt.plot(fpr1, tpr1, label= "Model1:baseline")
plt.plot(fpr2, tpr2, label= "Model2:labs")
plt.plot(fpr3, tpr3, label= "Model3:proc_name",c='y')
plt.plot(fpr4, tpr4, label= "Model4:medications")
plt.plot(fpr5, tpr5, label= "Model5:base+labs",c='r',linewidth=3.0)
plt.plot(fpr6, tpr6, label= "Model6:base+proc_name")
plt.plot(fpr7, tpr7, label= "Model7:base+meds")
plt.plot(fpr8, tpr8, label= "Model8:all sets",c='b',linewidth=3.0)
plt.plot(fpr9, tpr9, label= "Model9:selected features",c='g',linewidth=3.0)
plt.legend( prop={'size': 25})
plt.xlabel("FPR",fontsize=25)
plt.ylabel("TPR",fontsize=25)
plt.title('Receiver Operating Characteristic (ROC curves). AKI event',fontsize=25)
plt.show()
#plt.savefig('ROC_akin.png')

## PR curve

In [None]:
#ROC Curve

y_pred_prob1 = model1.predict(dtest1)
#fpr1 , tpr1, thresholds1 = roc_curve(y_test, y_pred_prob1)
precision1, recall1, thresholds1 = precision_recall_curve(y_test, y_pred_prob1)

y_pred_prob2 = model2.predict(dtest2)
precision2, recall2, thresholds2 = precision_recall_curve(y_test, y_pred_prob2)

y_pred_prob3 = model3.predict(dtest3)
precision3, recall3, thresholds3 = precision_recall_curve(y_test, y_pred_prob3)


y_pred_prob4 = model4.predict(dtest4)
precision4, recall4, thresholds4 = precision_recall_curve(y_test, y_pred_prob4)


y_pred_prob5 = model5.predict(dtest5)
precision5, recall5, thresholds5 = precision_recall_curve(y_test, y_pred_prob5)

y_pred_prob6 = model6.predict(dtest6)
precision6, recall6, thresholds6 = precision_recall_curve(y_test, y_pred_prob6)


y_pred_prob7 = model7.predict(dtest7)
precision7, recall7, thresholds7 = precision_recall_curve(y_test, y_pred_prob7)


y_pred_prob8 = model8.predict(dtest8)
precision8, recall8, thresholds8 = precision_recall_curve(y_test, y_pred_prob8)


y_pred_prob9 = model9.predict(dtest9)
precision9, recall9, thresholds9 = precision_recall_curve(y_test, y_pred_prob9)



plt.figure(figsize=(15,15))

plt.plot(precision1,recall1, label= "Model1:baseline")
plt.plot(recall2,precision2,  label= "Model2:labs")
plt.plot(recall3,precision3,  label= "Model3:proc_name",c='y')
plt.plot(recall4,precision4,  label= "Model4:medications")
ё
plt.legend( prop={'size': 15})
plt.xlabel("Recall",fontsize=15)
plt.ylabel("Precision",fontsize=15)
plt.title('Precision-Recall (PR curves).AKIN EVENT',fontsize=15)
plt.show()
#plt.savefig('plots/PR_akin.png')