In [None]:
import jax
import jax.numpy as jnp
from jax import random
from jax import jit, vmap, grad

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score

In [None]:
import warnings
warnings.filterwarnings("ignore")

# Defining functions

In [None]:
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV

In [None]:
from sklearn.calibration import CalibratedClassifierCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import calibration_curve
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import roc_auc_score, accuracy_score, brier_score_loss
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB


In [None]:
def utility(p, y_true, th, a11, a12, a21):

    Ypred = 0.0 * y_true
    Ypred[p > th] = 1.0

    # TP and TN contribution to the utility function
    Tmask = Ypred == y_true
    U_T = np.sum(Tmask)

    # FP contribution to the utility function
    Fmask = ~Tmask
    Pmask = Ypred == 1.0
    U_FP = np.sum(Fmask * Pmask)

    # FN contribution to the utility function
    Nmask = ~Pmask
    U_FN = np.sum(Fmask * Nmask)

    U = (a11 * U_T - a12 * U_FP - a21 * U_FN) / len(p)

    return U

def max_utility(y_true, y_pred):

    threshold_vec = np.linspace(0.0, 1.0, 1001)

    # make utility curve
    u = [utility(y_pred, y_true, th, 1, 0, 0) for th in threshold_vec]

    return np.max(u)

In [None]:
def NetTrustScore(y_true, p):
    alpha = 1.0
    beta = 1.0

    y_hat = np.where(p >= 0.5, 1, 0)
    A=[]
    B=[]
    for i in range(0,p.shape[0]):
      if(y_hat[i]==y_true[i]):
        A.append(np.power(p[i],alpha))
      else:
        A.append(0)

      if(y_hat[i]!=y_true[i]):
        B.append(np.power(1.0 - p[i],beta))
      else:
        B.append(0)

    return ( np.sum(A) + np.sum(B) ) / y_true.shape[0]

# Additional Model Comparision Results

## Breast Cancer Dataset

In [None]:
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
X = data.data
y = data.target


In [None]:
# Initialize lists to store test statistics for each split
auc_scores = []
accuracy_scores = []
brier_scores = []
NetTrust_score = []
max_utility_scores = []

# Set the number of sims
num_sims = 50

# Initialize classifiers
rf_classifier = RandomForestClassifier(max_depth=3, n_estimators=200)
logreg_classifier = LogisticRegression()
knn_classifier = KNeighborsClassifier(weights = 'distance', n_neighbors=250)
nb_classifier = GaussianNB()

# Perform 20 random splits and evaluate each classifier
for i in range(num_sims):

    print("Iteration %i"%i)

    # Randomly split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=45323+i*543)

    # Train the classifiers
    rf_classifier.fit(X_train, y_train)
    logreg_classifier.fit(X_train, y_train)
    knn_classifier.fit(X_train, y_train)
    nb_classifier.fit(X_train, y_train)

    # Make predictions and probability estimates
    rf_proba = rf_classifier.predict_proba(X_test)
    logreg_proba = logreg_classifier.predict_proba(X_test)
    knn_predictions = knn_classifier.predict_proba(X_test)
    nb_proba = nb_classifier.predict_proba(X_test)

    # Calculate test statistics for each classifier
    rf_auc = roc_auc_score(y_test, rf_proba[:, 1])
    rf_accuracy = accuracy_score(y_test, rf_classifier.predict(X_test))
    rf_brier_score = brier_score_loss(y_test, rf_proba[:, 1])
    rf_NetTrust_score = NetTrustScore(y_test, rf_proba[:, 1][:, np.newaxis])
    rf_max_u_score = max_utility(y_test, rf_proba[:, 1])

    logreg_auc = roc_auc_score(y_test, logreg_proba[:, 1])
    logreg_accuracy = accuracy_score(y_test, logreg_classifier.predict(X_test))
    logreg_brier_score = brier_score_loss(y_test, logreg_proba[:, 1])
    logreg_NetTrust_score = NetTrustScore(y_test, logreg_proba[:, 1][:, np.newaxis])
    logreg_max_u_score = max_utility(y_test, logreg_proba[:, 1])

    knn_auc = roc_auc_score(y_test, knn_predictions[:, 1])
    knn_accuracy = accuracy_score(y_test, knn_classifier.predict(X_test))
    knn_brier_score = brier_score_loss(y_test, knn_predictions[:, 1])
    knn_NetTrust_score = NetTrustScore(y_test, knn_predictions[:, 1][:, np.newaxis])
    knn_max_u_score = max_utility(y_test, knn_predictions[:, 1])

    nb_auc = roc_auc_score(y_test, nb_proba[:, 1])
    nb_accuracy = accuracy_score(y_test, nb_classifier.predict(X_test))
    nb_brier_score = brier_score_loss(y_test, nb_proba[:, 1])
    nb_NetTrust_score = NetTrustScore(y_test, nb_proba[:, 1][:, np.newaxis])
    nb_max_u_score = max_utility(y_test, nb_proba[:, 1])

    # Append the test statistics to the corresponding lists
    auc_scores.append([rf_auc, logreg_auc, knn_auc, nb_auc])
    accuracy_scores.append([rf_accuracy, logreg_accuracy, knn_accuracy, nb_accuracy])
    brier_scores.append([rf_brier_score, logreg_brier_score, knn_brier_score, nb_brier_score])
    NetTrust_score.append([rf_NetTrust_score, logreg_NetTrust_score, knn_NetTrust_score, nb_NetTrust_score])
    max_utility_scores.append([rf_max_u_score, logreg_max_u_score, knn_max_u_score, nb_max_u_score])

Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Iteration 10
Iteration 11
Iteration 12
Iteration 13
Iteration 14
Iteration 15
Iteration 16
Iteration 17
Iteration 18
Iteration 19
Iteration 20
Iteration 21
Iteration 22
Iteration 23
Iteration 24
Iteration 25
Iteration 26
Iteration 27
Iteration 28
Iteration 29
Iteration 30
Iteration 31
Iteration 32
Iteration 33
Iteration 34
Iteration 35
Iteration 36
Iteration 37
Iteration 38
Iteration 39
Iteration 40
Iteration 41
Iteration 42
Iteration 43
Iteration 44
Iteration 45
Iteration 46
Iteration 47
Iteration 48
Iteration 49


In [None]:
# Calculate the average and variance for each test statistic
auc_avg = np.mean(auc_scores, axis=0)
auc_var = np.std(auc_scores, axis=0)

accuracy_avg = np.mean(accuracy_scores, axis=0)
accuracy_var = np.std(accuracy_scores, axis=0)

brier_avg = np.mean(brier_scores, axis=0)
brier_var = np.std(brier_scores, axis=0)

NetTrust_avg = np.mean(NetTrust_score, axis=0)
NetTrust_var = np.std(NetTrust_score, axis=0)

umax_avg = np.mean(max_utility_scores, axis=0)
umax_var = np.std(max_utility_scores, axis=0)

# Print the results
print(" Measure, RF, LR, knn, NB")
print("AUC & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$".format(auc_avg[0], auc_var[0], auc_avg[1], auc_var[1], auc_avg[2], auc_var[2], auc_avg[3],  auc_var[3]))
print("Accuracy & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$".format(accuracy_avg[0], accuracy_var[0], accuracy_avg[1], accuracy_var[1], accuracy_avg[2], accuracy_var[2], accuracy_avg[3],  accuracy_var[3]))
print("Brier & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$".format(brier_avg[0], brier_var[0], brier_avg[1], brier_var[1], brier_avg[2], brier_var[2], brier_avg[3],  brier_var[3]))
print("NetTrust & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$".format(NetTrust_avg[0][0], NetTrust_var[0][0], NetTrust_avg[1][0], NetTrust_var[1][0], NetTrust_avg[2][0], NetTrust_var[2][0], NetTrust_avg[3][0],  NetTrust_var[0][0]))
print("Max Utility & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$".format(umax_avg[0], umax_var[0], umax_avg[1], umax_var[1], umax_avg[2], umax_var[2], umax_avg[3],  umax_var[3]))





 Measure, RF, LR, knn, NB
AUC & $0.989 \pm 0.009$ & $0.990 \pm 0.007$ & $0.974 \pm 0.013$ & $0.988 \pm 0.007$
Accuracy & $0.955 \pm 0.018$ & $0.946 \pm 0.017$ & $0.903 \pm 0.025$ & $0.940 \pm 0.022$
Brier & $0.036 \pm 0.010$ & $0.039 \pm 0.012$ & $0.078 \pm 0.014$ & $0.056 \pm 0.020$
NetTrust & $0.618 \pm 0.040$ & $0.618 \pm 0.040$ & $0.665 \pm 0.028$ & $0.628 \pm 0.040$
Max Utility & $0.966 \pm 0.016$ & $0.961 \pm 0.015$ & $0.935 \pm 0.022$ & $0.956 \pm 0.016$


In [None]:

# Calculate the average and variance for each test statistic
auc_avg = np.mean(auc_scores, axis=0)
auc_var = np.std(auc_scores, axis=0)

accuracy_avg = np.mean(accuracy_scores, axis=0)
accuracy_var = np.std(accuracy_scores, axis=0)

brier_avg = np.mean(brier_scores, axis=0)
brier_var = np.std(brier_scores, axis=0)

# Print the results
print("AUC - Random Forest: {:.4f}, Logistic Regression: {:.4f},  KNN: {:.4f}, Naive Bayes: {:.4f}".format(auc_avg[0], auc_avg[1], auc_avg[2], auc_avg[3]))
print("AUC Std - Random Forest: {:.4f}, Logistic Regression: {:.4f},  KNN: {:.4f}, Naive Bayes: {:.4f}".format(auc_var[0], auc_var[1], auc_var[2], auc_var[3]))

print("Accuracy - Random Forest: {:.4f}, Logistic Regression: {:.4f}, KNN: {:.4f}, Naive Bayes: {:.4f}".format(accuracy_avg[0], accuracy_avg[1], accuracy_avg[2], accuracy_avg[3]))
print("Accuracy Std - Random Forest: {:.4f}, Logistic Regression: {:.4f}, KNN: {:.4f}, Naive Bayes: {:.4f}".format(accuracy_var[0], accuracy_var[1], accuracy_var[2], accuracy_var[3]))

print("Brier Score - Random Forest: {:.4f}, Logistic Regression: {:.4f},  KNN: {:.4f}, Naive Bayes: {:.4f}".format(brier_avg[0], brier_avg[1], brier_avg[2], brier_avg[3]))
print("Brier Score Std - Random Forest: {:.4f}, Logistic Regression: {:.4f}, KNN: {:.4f}, Naive Bayes: {:.4f}".format(brier_var[0], brier_var[1], brier_var[2], brier_var[3]))

AUC - Random Forest: 0.7972, Logistic Regression: 0.7885,  KNN: 0.7846, Naive Bayes: 0.7803
AUC Std - Random Forest: 0.0035, Logistic Regression: 0.0040,  KNN: 0.0034, Naive Bayes: 0.0040
Accuracy - Random Forest: 0.7317, Logistic Regression: 0.7357, KNN: 0.7313, Naive Bayes: 0.7229
Accuracy Std - Random Forest: 0.0045, Logistic Regression: 0.0049, KNN: 0.0035, Naive Bayes: 0.0038
Brier Score - Random Forest: 0.1819, Logistic Regression: 0.1767,  KNN: 0.1796, Naive Bayes: 0.1939
Brier Score Std - Random Forest: 0.0012, Logistic Regression: 0.0019, KNN: 0.0016, Naive Bayes: 0.0029


## Adult Dataset

In [None]:
# Load the dataset
import pandas as pd
from sklearn import preprocessing

data = pd.read_csv("https://raw.githubusercontent.com/ritwikvashistha/utrustworthy/main/Datasets/adult.csv")
#data=data.sample(frac=0.02,random_state=1)
# Convert categorical variables into one-hot encoding
categorical = ['workclass', 'education', 'marital-status', 'occupation',
               'relationship', 'race', 'gender', 'native-country', 'income']
for name in categorical:
    one_hot = pd.get_dummies(data[name], prefix=name)
    data = data.drop(name, axis=1)
    data = data.join(one_hot)

# Split data into X and y
y = data['income_<=50K'].values
X = data.drop(['income_<=50K','income_>50K'], axis=1).values


# Scale X
scaler = preprocessing.StandardScaler().fit(X)
X = scaler.transform(X)

In [None]:
# Initialize lists to store test statistics for each split
auc_scores = []
accuracy_scores = []
brier_scores = []
NetTrust_score = []
max_utility_scores = []

# Set the number of sims
num_sims = 50

# Initialize classifiers
rf_classifier = RandomForestClassifier(max_depth=3, n_estimators=200)
logreg_classifier = LogisticRegression()
knn_classifier = KNeighborsClassifier(weights = 'distance', n_neighbors=250)
nb_classifier = GaussianNB()

# Perform 20 random splits and evaluate each classifier
for i in range(num_sims):

    print("Iteration %i"%i)

    # Randomly split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=45323+i*543)

    # Train the classifiers
    rf_classifier.fit(X_train, y_train)
    logreg_classifier.fit(X_train, y_train)
    knn_classifier.fit(X_train, y_train)
    nb_classifier.fit(X_train, y_train)

    # Make predictions and probability estimates
    rf_proba = rf_classifier.predict_proba(X_test)
    logreg_proba = logreg_classifier.predict_proba(X_test)
    knn_predictions = knn_classifier.predict_proba(X_test)
    nb_proba = nb_classifier.predict_proba(X_test)

    # Calculate test statistics for each classifier
    rf_auc = roc_auc_score(y_test, rf_proba[:, 1])
    rf_accuracy = accuracy_score(y_test, rf_classifier.predict(X_test))
    rf_brier_score = brier_score_loss(y_test, rf_proba[:, 1])
    rf_NetTrust_score = NetTrustScore(y_test, rf_proba[:, 1][:, np.newaxis])
    rf_max_u_score = max_utility(y_test, rf_proba[:, 1])

    logreg_auc = roc_auc_score(y_test, logreg_proba[:, 1])
    logreg_accuracy = accuracy_score(y_test, logreg_classifier.predict(X_test))
    logreg_brier_score = brier_score_loss(y_test, logreg_proba[:, 1])
    logreg_NetTrust_score = NetTrustScore(y_test, logreg_proba[:, 1][:, np.newaxis])
    logreg_max_u_score = max_utility(y_test, logreg_proba[:, 1])

    knn_auc = roc_auc_score(y_test, knn_predictions[:, 1])
    knn_accuracy = accuracy_score(y_test, knn_classifier.predict(X_test))
    knn_brier_score = brier_score_loss(y_test, knn_predictions[:, 1])
    knn_NetTrust_score = NetTrustScore(y_test, knn_predictions[:, 1][:, np.newaxis])
    knn_max_u_score = max_utility(y_test, knn_predictions[:, 1])

    nb_auc = roc_auc_score(y_test, nb_proba[:, 1])
    nb_accuracy = accuracy_score(y_test, nb_classifier.predict(X_test))
    nb_brier_score = brier_score_loss(y_test, nb_proba[:, 1])
    nb_NetTrust_score = NetTrustScore(y_test, nb_proba[:, 1][:, np.newaxis])
    nb_max_u_score = max_utility(y_test, nb_proba[:, 1])

    # Append the test statistics to the corresponding lists
    auc_scores.append([rf_auc, logreg_auc, knn_auc, nb_auc])
    accuracy_scores.append([rf_accuracy, logreg_accuracy, knn_accuracy, nb_accuracy])
    brier_scores.append([rf_brier_score, logreg_brier_score, knn_brier_score, nb_brier_score])
    NetTrust_score.append([rf_NetTrust_score, logreg_NetTrust_score, knn_NetTrust_score, nb_NetTrust_score])
    max_utility_scores.append([rf_max_u_score, logreg_max_u_score, knn_max_u_score, nb_max_u_score])

Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Iteration 10
Iteration 11
Iteration 12
Iteration 13
Iteration 14
Iteration 15
Iteration 16
Iteration 17
Iteration 18
Iteration 19
Iteration 20
Iteration 21
Iteration 22
Iteration 23
Iteration 24
Iteration 25
Iteration 26
Iteration 27
Iteration 28
Iteration 29
Iteration 30
Iteration 31
Iteration 32
Iteration 33
Iteration 34
Iteration 35
Iteration 36
Iteration 37
Iteration 38
Iteration 39
Iteration 40
Iteration 41
Iteration 42
Iteration 43
Iteration 44
Iteration 45
Iteration 46
Iteration 47
Iteration 48
Iteration 49


In [None]:
# Calculate the average and variance for each test statistic
auc_avg = np.mean(auc_scores, axis=0)
auc_var = np.std(auc_scores, axis=0)

accuracy_avg = np.mean(accuracy_scores, axis=0)
accuracy_var = np.std(accuracy_scores, axis=0)

brier_avg = np.mean(brier_scores, axis=0)
brier_var = np.std(brier_scores, axis=0)

NetTrust_avg = np.mean(NetTrust_score, axis=0)
NetTrust_var = np.std(NetTrust_score, axis=0)

umax_avg = np.mean(max_utility_scores, axis=0)
umax_var = np.std(max_utility_scores, axis=0)

# Print the results
print(" Measure, RF, LR, knn, NB")
print("AUC & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$".format(auc_avg[0], auc_var[0], auc_avg[1], auc_var[1], auc_avg[2], auc_var[2], auc_avg[3],  auc_var[3]))
print("Accuracy & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$".format(accuracy_avg[0], accuracy_var[0], accuracy_avg[1], accuracy_var[1], accuracy_avg[2], accuracy_var[2], accuracy_avg[3],  accuracy_var[3]))
print("Brier & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$".format(brier_avg[0], brier_var[0], brier_avg[1], brier_var[1], brier_avg[2], brier_var[2], brier_avg[3],  brier_var[3]))
print("NetTrust & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$".format(NetTrust_avg[0][0], NetTrust_var[0][0], NetTrust_avg[1][0], NetTrust_var[1][0], NetTrust_avg[2][0], NetTrust_var[2][0], NetTrust_avg[3][0],  NetTrust_var[0][0]))
print("Max Utility & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$".format(umax_avg[0], umax_var[0], umax_avg[1], umax_var[1], umax_avg[2], umax_var[2], umax_avg[3],  umax_var[3]))


 Measure, RF, LR, knn, NB
AUC & $0.890 \pm 0.004$ & $0.906 \pm 0.003$ & $0.879 \pm 0.004$ & $0.860 \pm 0.005$
Accuracy & $0.792 \pm 0.005$ & $0.853 \pm 0.003$ & $0.834 \pm 0.003$ & $0.581 \pm 0.034$
Brier & $0.130 \pm 0.002$ & $0.102 \pm 0.002$ & $0.116 \pm 0.002$ & $0.415 \pm 0.034$
NetTrust & $0.703 \pm 0.002$ & $0.735 \pm 0.003$ & $0.720 \pm 0.002$ & $0.756 \pm 0.002$
Max Utility & $0.846 \pm 0.004$ & $0.853 \pm 0.003$ & $0.836 \pm 0.003$ & $0.765 \pm 0.004$


## Bankruptcy Dataset

In [None]:
data = pd.read_csv('https://raw.githubusercontent.com/ritwikvashistha/utrustworthy/main/Datasets/data%202.csv')
X = data.drop('Bankrupt?', axis=1)
y = data['Bankrupt?']

In [None]:
# Initialize lists to store test statistics for each split
auc_scores = []
accuracy_scores = []
brier_scores = []
NetTrust_score = []
max_utility_scores = []

# Set the number of sims
num_sims = 50

# Initialize classifiers
rf_classifier = RandomForestClassifier(max_depth=3, n_estimators=200)
logreg_classifier = LogisticRegression()
knn_classifier = KNeighborsClassifier(weights = 'distance', n_neighbors=250)
nb_classifier = GaussianNB()

# Perform 20 random splits and evaluate each classifier
for i in range(num_sims):

    print("Iteration %i"%i)

    # Randomly split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=45323+i*543)

    # Train the classifiers
    rf_classifier.fit(X_train, y_train)
    logreg_classifier.fit(X_train, y_train)
    knn_classifier.fit(X_train, y_train)
    nb_classifier.fit(X_train, y_train)

    # Make predictions and probability estimates
    rf_proba = rf_classifier.predict_proba(X_test)
    logreg_proba = logreg_classifier.predict_proba(X_test)
    knn_predictions = knn_classifier.predict_proba(X_test)
    nb_proba = nb_classifier.predict_proba(X_test)

    # Calculate test statistics for each classifier
    rf_auc = roc_auc_score(y_test, rf_proba[:, 1])
    rf_accuracy = accuracy_score(y_test, rf_classifier.predict(X_test))
    rf_brier_score = brier_score_loss(y_test, rf_proba[:, 1])
    rf_NetTrust_score = NetTrustScore(np.asarray(y_test), rf_proba[:, 1][:, np.newaxis])
    rf_max_u_score = max_utility(y_test, rf_proba[:, 1])

    logreg_auc = roc_auc_score(y_test, logreg_proba[:, 1])
    logreg_accuracy = accuracy_score(y_test, logreg_classifier.predict(X_test))
    logreg_brier_score = brier_score_loss(y_test, logreg_proba[:, 1])
    logreg_NetTrust_score = NetTrustScore(np.asarray(y_test), logreg_proba[:, 1][:, np.newaxis])
    logreg_max_u_score = max_utility(y_test, logreg_proba[:, 1])

    knn_auc = roc_auc_score(y_test, knn_predictions[:, 1])
    knn_accuracy = accuracy_score(y_test, knn_classifier.predict(X_test))
    knn_brier_score = brier_score_loss(y_test, knn_predictions[:, 1])
    knn_NetTrust_score = NetTrustScore(np.asarray(y_test), knn_predictions[:, 1][:, np.newaxis])
    knn_max_u_score = max_utility(y_test, knn_predictions[:, 1])

    nb_auc = roc_auc_score(y_test, nb_proba[:, 1])
    nb_accuracy = accuracy_score(y_test, nb_classifier.predict(X_test))
    nb_brier_score = brier_score_loss(y_test, nb_proba[:, 1])
    nb_NetTrust_score = NetTrustScore(np.asarray(y_test), nb_proba[:, 1][:, np.newaxis])
    nb_max_u_score = max_utility(y_test, nb_proba[:, 1])

    # Append the test statistics to the corresponding lists
    auc_scores.append([rf_auc, logreg_auc, knn_auc, nb_auc])
    accuracy_scores.append([rf_accuracy, logreg_accuracy, knn_accuracy, nb_accuracy])
    brier_scores.append([rf_brier_score, logreg_brier_score, knn_brier_score, nb_brier_score])
    NetTrust_score.append([rf_NetTrust_score, logreg_NetTrust_score, knn_NetTrust_score, nb_NetTrust_score])
    max_utility_scores.append([rf_max_u_score, logreg_max_u_score, knn_max_u_score, nb_max_u_score])

Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Iteration 10
Iteration 11
Iteration 12
Iteration 13
Iteration 14
Iteration 15
Iteration 16
Iteration 17
Iteration 18
Iteration 19
Iteration 20
Iteration 21
Iteration 22
Iteration 23
Iteration 24
Iteration 25
Iteration 26
Iteration 27
Iteration 28
Iteration 29
Iteration 30
Iteration 31
Iteration 32
Iteration 33
Iteration 34
Iteration 35
Iteration 36
Iteration 37
Iteration 38
Iteration 39
Iteration 40
Iteration 41
Iteration 42
Iteration 43
Iteration 44
Iteration 45
Iteration 46
Iteration 47
Iteration 48
Iteration 49


In [None]:
# Calculate the average and variance for each test statistic
auc_avg = np.mean(auc_scores, axis=0)
auc_var = np.std(auc_scores, axis=0)

accuracy_avg = np.mean(accuracy_scores, axis=0)
accuracy_var = np.std(accuracy_scores, axis=0)

brier_avg = np.mean(brier_scores, axis=0)
brier_var = np.std(brier_scores, axis=0)

NetTrust_avg = np.mean(NetTrust_score, axis=0)
NetTrust_var = np.std(NetTrust_score, axis=0)

umax_avg = np.mean(max_utility_scores, axis=0)
umax_var = np.std(max_utility_scores, axis=0)

# Print the results
print(" Measure, RF, LR, knn, NB")
print("AUC & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$".format(auc_avg[0], auc_var[0], auc_avg[1], auc_var[1], auc_avg[2], auc_var[2], auc_avg[3],  auc_var[3]))
print("Accuracy & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$".format(accuracy_avg[0], accuracy_var[0], accuracy_avg[1], accuracy_var[1], accuracy_avg[2], accuracy_var[2], accuracy_avg[3],  accuracy_var[3]))
print("Brier & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$".format(brier_avg[0], brier_var[0], brier_avg[1], brier_var[1], brier_avg[2], brier_var[2], brier_avg[3],  brier_var[3]))
print("NetTrust & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$".format(NetTrust_avg[0][0], NetTrust_var[0][0], NetTrust_avg[1][0], NetTrust_var[1][0], NetTrust_avg[2][0], NetTrust_var[2][0], NetTrust_avg[3][0],  NetTrust_var[0][0]))
print("Max Utility & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$ & ${:.3f} \pm {:.3f}$".format(umax_avg[0], umax_var[0], umax_avg[1], umax_var[1], umax_avg[2], umax_var[2], umax_avg[3],  umax_var[3]))


 Measure, RF, LR, knn, NB
AUC & $0.927 \pm 0.016$ & $0.576 \pm 0.039$ & $0.702 \pm 0.034$ & $0.654 \pm 0.056$
Accuracy & $0.969 \pm 0.004$ & $0.961 \pm 0.004$ & $0.968 \pm 0.004$ & $0.238 \pm 0.342$
Brier & $0.024 \pm 0.003$ & $0.042 \pm 0.004$ & $0.031 \pm 0.004$ & $0.740 \pm 0.341$
NetTrust & $0.051 \pm 0.003$ & $0.090 \pm 0.004$ & $0.057 \pm 0.003$ & $0.058 \pm 0.003$
Max Utility & $0.971 \pm 0.004$ & $0.968 \pm 0.004$ & $0.968 \pm 0.004$ & $0.968 \pm 0.004$
