# Further Analyses- Producing 95% CIs and Examining Most Important Features

In [18]:
from IPython.core.interactiveshell import InteractiveShell
from matplotlib import pyplot 
from numpy import mean
from numpy import std
from sklearn.calibration import calibration_curve
from sklearn.metrics import confusion_matrix as confusion
from sklearn.metrics import f1_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import precision_score 
from sklearn.metrics import recall_score as recall
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.utils import resample
import joblib
import numpy as np
import pandas as pd
import warnings
InteractiveShell.ast_node_interactivity = "all"

Bootstrapped CIs for First Round of Evals (Full Test Dataset)

In [2]:
#Loading datasets
Xi_hold=pd.read_csv(r'C:\Users\z5291979\OneDrive - UNSW\Documents\lsac-data\processed_data\Xi_hold.csv')
y_hold_si=pd.read_csv(r'C:\Users\z5291979\OneDrive - UNSW\Documents\lsac-data\processed_data\y_hold_si.csv')
y_hold_nssi=pd.read_csv(r'C:\Users\z5291979\OneDrive - UNSW\Documents\lsac-data\processed_data\y_hold_nssi.csv')
y_hold_att=pd.read_csv(r'C:\Users\z5291979\OneDrive - UNSW\Documents\lsac-data\processed_data\y_hold_att.csv')
y_hold_sitbs=pd.read_csv(r'C:\Users\z5291979\OneDrive - UNSW\Documents\lsac-data\processed_data\y_hold_sitbs.csv')

In [21]:
#Unnamed: 0 is an extra index column
Xi_hold=Xi_hold.drop(columns=['Unnamed: 0'])
Xi_hold.to_csv(r'C:\Users\z5291979\OneDrive - UNSW\Documents\lsac-data\processed_data\Xi_hold.csv')

In [42]:
#Defining a function to prepare 100 bootstrapped test sets, and obtain estimates of performance metrics for each resampled version of test data
algos=['LR', 'RF', 'XGB']
threshs1=[0.188868, 0.175579, 0.119263]
models1= {a: joblib.load(f'{a}_si.sav') for a in algos}

def boot(model, fulltest, thresh):
    #Suppressing warnings for this section. Warning appears because the bootstrapped samples do not have feature names as they are arrays while the model has previously been fitted on pandas dataframes with feature names
    #This is ok because the features are in the same order as the original test set
    warnings.simplefilter(action='ignore', category=UserWarning)
    n_iterations=100
    aucs=list()
    f1s=list()
    sens=list()
    specs=list()
    ppvs=list()
    values=fulltest.values
    for i in range(n_iterations):
        test=resample(values, n_samples=len(fulltest), stratify=values[:, -1], random_state=i)
        probs=model.predict_proba(test[:, :-1])
        probs=probs[:, 1]
        auc=roc_auc_score(test[:, -1], probs)
        aucs.append(auc)
        pred=np.where(probs>= thresh, 1, 0)
        f1=f1_score(test[:, -1], pred)
        f1s.append(f1)
        sen=recall(test[:, -1], pred)
        sens.append(sen)
        ppv=precision_score(test[:, -1], pred)
        ppvs.append(ppv)
        tn, fp, fn, tp=confusion(test[:, -1], pred).ravel()
        spec=tn/(tn+fp)
        specs.append(spec)
        
    metrics=["aucs", "f1s", "sens", "specs", "ppvs"]
    metricsdf=pd.DataFrame(zip(aucs, f1s, sens, specs, ppvs), columns=metrics)
    return metricsdf

#Iterating through tuples to evaluate LR, RF, and XGB Models 
#Value of the tuple algo is paired to the other corresponding value in threshs, i.e. 'LR' is paired with threshold of 0.188868
def runevals(models1, fulltest, threshs):
    for a, t in zip(algos, threshs):
        clf=models1[a]
        print(f'95% CIs for performance using {a} with threshold at {t}')
        metricsdf=boot(clf, fulltest, t)
        print(metricsdf.quantile([.025, .975]))
        print('\n')

In [43]:
fulltest_si=Xi_hold.join(y_hold_si)
runevals(models1, fulltest_si, threshs1)

95% CIs for performance using LR with threshold at 0.188868
           aucs       f1s    sens     specs      ppvs
0.025  0.694001  0.308598  0.3750  0.816746  0.262009
0.975  0.786711  0.447163  0.5625  0.864968  0.367685


95% CIs for performance using RF with threshold at 0.175579
           aucs       f1s      sens     specs      ppvs
0.025  0.777556  0.407189  0.624479  0.754305  0.295607
0.975  0.865730  0.509746  0.781250  0.818404  0.385700


95% CIs for performance using XGB with threshold at 0.119263
           aucs       f1s      sens     specs      ppvs
0.025  0.783426  0.385365  0.760417  0.654779  0.256433
0.975  0.866916  0.467795  0.901302  0.723618  0.320392




In [46]:
threshs2=[0.138621, 0.160344, 0.172887]
models2= {a: joblib.load(f'{a}_nssi.sav') for a in algos}

fulltest_nssi=Xi_hold.join(y_hold_nssi)
runevals(models2, fulltest_nssi, threshs2)

95% CIs for performance using LR with threshold at 0.138621
           aucs       f1s      sens     specs      ppvs
0.025  0.655818  0.224323  0.321429  0.855869  0.170443
0.975  0.808005  0.385919  0.553571  0.900446  0.293034


95% CIs for performance using RF with threshold at 0.160344
           aucs       f1s      sens     specs      ppvs
0.025  0.771589  0.315188  0.544196  0.821620  0.217391
0.975  0.888752  0.445714  0.795089  0.870022  0.313881


95% CIs for performance using XGB with threshold at 0.172887
           aucs       f1s      sens     specs      ppvs
0.025  0.769216  0.302383  0.391964  0.895988  0.248378
0.975  0.879479  0.511278  0.670089  0.936107  0.437224




In [44]:
threshs3=[0.074037, 0.073374, 0.087420]
models3= {a: joblib.load(f'{a}_att.sav') for a in algos}

fulltest_att=Xi_hold.join(y_hold_att)
runevals(models3, fulltest_att, threshs3)

95% CIs for performance using LR with threshold at 0.074037
           aucs       f1s      sens     specs      ppvs
0.025  0.690694  0.126141  0.296094  0.819907  0.080938
0.975  0.846409  0.267160  0.625000  0.869440  0.171988


95% CIs for performance using RF with threshold at 0.073374
           aucs       f1s      sens     specs     ppvs
0.025  0.800614  0.174382  0.562500  0.764562  0.10436
0.975  0.915505  0.280363  0.860156  0.817109  0.16923


95% CIs for performance using XGB with threshold at 0.08742
           aucs       f1s     sens     specs      ppvs
0.025  0.795508  0.224842  0.43750  0.873745  0.151437
0.975  0.910338  0.381892  0.78125  0.909003  0.254234




In [45]:
threshs4=[0.208806, 0.152130, 0.176893]
models4= {a: joblib.load(f'{a}_sitbs.sav') for a in algos}

fulltest_sitbs=Xi_hold.join(y_hold_sitbs)
runevals(models4, fulltest_sitbs, threshs4)

95% CIs for performance using LR with threshold at 0.208806
           aucs       f1s      sens     specs      ppvs
0.025  0.666220  0.379787  0.504132  0.750699  0.296542
0.975  0.756427  0.477128  0.624174  0.821752  0.392540


95% CIs for performance using RF with threshold at 0.15213
           aucs       f1s      sens     specs      ppvs
0.025  0.772562  0.431631  0.702479  0.672615  0.309099
0.975  0.845087  0.521524  0.830785  0.735197  0.380550


95% CIs for performance using XGB with threshold at 0.176893
           aucs       f1s      sens     specs      ppvs
0.025  0.781079  0.452331  0.632025  0.750781  0.346575
0.975  0.851117  0.545517  0.772934  0.810855  0.429257




Bootstrapped CIs for Second Round Evals (Reduced Test Dataset)

In [None]:
#Loading the reduced datasets created in the evaluation notebook
si_test=pd.read_csv(r'C:\Users\z5291979\OneDrive - UNSW\Documents\lsac-data\processed_data\y_hold_si.csv'