### Evaluation of UQ - Accuracy Rejection Curves

In [5]:
import pandas as pd
import numpy as np
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from catboost import CatBoostClassifier
from math import log

In [6]:
# Uncertainty from Model 1, Model 1+2, Model 1+2+3, 
# instead of Model 1, Model 2, Model 3, 
# because n+1 Model always belong to Model n
# in an additive manner

# catboost delivers already laplace_smoothed_proba_predictions

# 1/m Uncertainty
def uncertainty(staged_preds):
    
    # function expects staged predictions from type generator
    # and returns aleatoric, epistemic and total uncertainty
    # for each prediction
    
    staged_preds = list(staged_preds)
    
    labels = [0,1] # labels are 0 or 1 for this task 
    num_trees = len(staged_preds) # number of trees
    num_samples =  len(staged_preds[0]) # number of samples
    print(num_trees)
    print(num_samples)

    # init uncertaintys
    total = []
    aleatoric = []
    epistemic = []

    # ALEATORIC UNCERTAINTY
    for i in range(num_samples):
        result = 0
        previous_p = 0
        
        tree_probas = [el[0][0] for el in staged_preds][::-1]
        tree_idx = num_trees - 1
        for previous, current in zip(tree_probas, tree_probas[1:]):
            if (previous != current):
                break
            tree_idx-=1
        
        for m in range(tree_idx):
            for y in labels:
                p = staged_preds[m][i][y]
                result = result + (p * log(p,2))
        
        final = -(1/tree_idx) * result

        aleatoric.append(final) 

    p_sum = 0
    for i in range(num_samples):
        result = 0
        
        tree_probas = [el[0][0] for el in staged_preds][::-1]
        tree_idx = num_trees - 1
        for previous, current in zip(tree_probas, tree_probas[1:]):
            if (previous != current):
                break
            tree_idx-=1
        
        for y in labels:
            p_sum = 0
            for m in range(tree_idx):
                
                p = staged_preds[m][i][y]
                p_sum = p + p_sum

            p_avg_sum = ((1/tree_idx)* p_sum)
            p_logged_avg_sum = p_avg_sum*log(p_avg_sum,2)
            result = result + p_logged_avg_sum
            
        result = result *(-1)
        total.append(result)
        epistemic.append(result - aleatoric[i])
    
    return aleatoric, epistemic, total


In [7]:
def get_prob_matrix(model, x_test, n_estimators, laplace_smoothing, log=False):
    porb_matrix = [[[] for j in range(n_estimators)] for i in range(x_test.shape[0])]
    for etree in range(n_estimators):
        # populate the porb_matrix with the tree_prob
        tree_prob = model.estimators_[etree].predict_proba(x_test)
        if laplace_smoothing > 0:
            leaf_index_array = model.estimators_[etree].apply(x_test)
            for data_index, leaf_index in enumerate(leaf_index_array):
                leaf_values = model.estimators_[etree].tree_.value[leaf_index]
                leaf_samples = np.array(leaf_values).sum()
                for i,v in enumerate(leaf_values[0]):
                    tmp = (v + laplace_smoothing) / (leaf_samples + (len(leaf_values[0]) * laplace_smoothing))
                    # print(f">>>>>>>>>>>>>>> {tmp}  data_index {data_index} prob_index {i} v {v} leaf_samples {leaf_samples}")
                    tree_prob[data_index][i] = (v + laplace_smoothing) / (leaf_samples + (len(leaf_values[0]) * laplace_smoothing))
                # exit()

        for data_index, data_prob in enumerate(tree_prob):
            porb_matrix[data_index][etree] = list(data_prob)

        if log:
            print(f"----------------------------------------[{etree}]")
            print(f"class {model.estimators_[etree].predict(x_test)}  prob \n{tree_prob}")
    return porb_matrix

def uncertainty_estimate(probs): # three dimentianl array with d1 as datapoints, (d2) the rows as samples and (d3) the columns as probability for each class
    p = np.array(probs)
    entropy = -p*np.ma.log10(p)
    entropy = entropy.filled(0)
    a = np.sum(entropy, axis=1)
    a = np.sum(a, axis=1) / entropy.shape[1]
    p_m = np.mean(p, axis=1)
    total = -np.sum(p_m*np.ma.log10(p_m), axis=1)
    total = total.filled(0)
    e = total - a
    return total, e, a

#### SPECT

In [39]:
train = pd.read_csv('SPECT .train', delimiter=',', header=None)
test = pd.read_csv('SPECT .test', delimiter=',',header=None)
data = pd.concat([train,test]).reset_index(drop=True)
x = data.drop(0, axis=1)
y = data[0]

In [131]:
test_size=0.3

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test_size,shuffle=True)
x_train, x_test, y_train, y_test = x_train.copy(), x_test.copy(), y_train.copy(), y_test.copy()
model = CatBoostClassifier(iterations=10,
                               learning_rate=0.5,
                               depth=10,
                              cat_features=x_train.columns-1,
                              silent=True)
# Fit model
model.fit(x_train, y_train)
# Get predicted classes
preds_class = model.predict(x_test)
# Get predicted probabilities for each class
preds_proba = model.predict_proba(x_test)

print("Accuracy = {}".format(accuracy_score(y_test, preds_class)))

staged_preds = model.staged_predict(x_test,
               prediction_type='Probability',
               ntree_start=0, 
               ntree_end=10)


model_probas = list(staged_preds)
model_probas_transposed = []
for i in range(len(model_probas[0])):
    model_probas_transposed.append([el[i] for el in model_probas])

total_uncertainty, epistemic_uncertainty, aleatoric_uncertainty = uncertainty_estimate(np.array(model_probas_transposed))

p = np.array(model_probas_transposed)

model_unique = []
for i in range(len(p)):
    model_unique.append(np.unique(p[i], axis=0))

entropy = -p*np.ma.log10(p)
entropy = entropy.filled(0)

Accuracy = 0.8518518518518519


In [40]:
ARC_eU_list, ARC_aU_list, ARC_random_list = [],[],[]


for i in range(100):
    test_size=0.3

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test_size,shuffle=True)
    x_train, x_test, y_train, y_test = x_train.copy(), x_test.copy(), y_train.copy(), y_test.copy()

    model = CatBoostClassifier(iterations=10,
                               learning_rate=0.5,
                               depth=10,
                              cat_features=x_train.columns-1,
                              silent=True)
    # Fit model
    model.fit(x_train, y_train)
    # Get predicted classes
    preds_class = model.predict(x_test)
    # Get predicted probabilities for each class
    preds_proba = model.predict_proba(x_test)

    print("Accuracy = {}".format(accuracy_score(y_test, preds_class)))

    staged_preds = model.staged_predict(x_test,
                   prediction_type='Probability',
                   ntree_start=0, 
                   ntree_end=10)


    model_probas = list(staged_preds)
    model_probas_transposed = []
    for i in range(len(model_probas[0])):
        model_probas_transposed.append([el[i] for el in model_probas])

    total_uncertainty, epistemic_uncertainty, aleatoric_uncertainty = uncertainty_estimate(np.array(model_probas_transposed))
    
    x_test['a']=aleatoric_uncertainty
    x_test['e']=epistemic_uncertainty

    num_rows = x_test.shape[0]
    discard_count = 0
    eU = x_test.sort_values('e', ascending=True)
    aU = x_test.sort_values('a', ascending=True)

    ARC_eU = ARC_aU = ARC_random = [preds_class]

    i = 0.05
    while i < 1:
        discard_count = int (num_rows*i)
        eU_samples = eU.iloc[:-discard_count,0:22]
        aU_samples = aU.iloc[:-discard_count,0:22]
        random_samples = x_test.sample((num_rows-discard_count))
        preds_class_eU = model.predict(eU_samples)
        preds_class_aU = model.predict(aU_samples)
        preds_class_random = model.predict(random_samples)
        accuracy_eu = accuracy_score(y_test[eU_samples.index], preds_class_eU)
        accuracy_au = accuracy_score(y_test[aU_samples.index], preds_class_aU)
        accuracy_random = accuracy_score(y_test[random_samples.index], preds_class_random)
        ARC_eU.append(accuracy_eu)
        ARC_aU.append(accuracy_au)
        ARC_random.append(accuracy_random)
        i+=0.05
    
    ARC_eU_list.append(ARC_eU)
    ARC_aU_list.append(ARC_aU)
    ARC_random_list.append(ARC_random)

Accuracy = 0.8148148148148148
Accuracy = 0.7160493827160493
Accuracy = 0.8518518518518519
Accuracy = 0.8765432098765432
Accuracy = 0.7901234567901234
Accuracy = 0.8271604938271605
Accuracy = 0.8888888888888888
Accuracy = 0.7777777777777778
Accuracy = 0.7654320987654321
Accuracy = 0.8148148148148148
Accuracy = 0.7654320987654321
Accuracy = 0.8148148148148148
Accuracy = 0.8024691358024691
Accuracy = 0.7901234567901234
Accuracy = 0.8395061728395061
Accuracy = 0.8395061728395061
Accuracy = 0.8395061728395061
Accuracy = 0.8148148148148148
Accuracy = 0.7777777777777778
Accuracy = 0.8888888888888888
Accuracy = 0.8765432098765432
Accuracy = 0.8395061728395061
Accuracy = 0.8395061728395061
Accuracy = 0.8024691358024691
Accuracy = 0.8148148148148148
Accuracy = 0.8518518518518519
Accuracy = 0.7654320987654321
Accuracy = 0.8271604938271605
Accuracy = 0.7901234567901234
Accuracy = 0.8641975308641975
Accuracy = 0.8518518518518519
Accuracy = 0.8518518518518519
Accuracy = 0.8395061728395061
Accuracy =

In [52]:
ARC_eU_mean, ARC_aU_mean, ARC_random_mean = [],[],[]
for i in range(len(ARC_eU_list[0])):
    ARC_eU_mean.append(np.array([el[i] for el in ARC_eU_list]).mean())
    ARC_aU_mean.append(np.array([el[i] for el in ARC_aU_list]).mean())
    ARC_random_mean.append(np.array([el[i] for el in ARC_random_list]).mean())

In [59]:
print('Reject   E    A    R ')
i = 0.05
for e,a,r in zip(ARC_eU_mean,ARC_aU_mean,ARC_random_mean):
    print(f'{i:.2f}:  {e:.2f} {a:.2f} {r:.2f} ')
    i +=0.05

Reject   E    A    R 
0.05:  0.83 0.83 0.82 
0.10:  0.83 0.84 0.82 
0.15:  0.84 0.85 0.82 
0.20:  0.84 0.86 0.82 
0.25:  0.84 0.87 0.82 
0.30:  0.85 0.88 0.82 
0.35:  0.85 0.90 0.83 
0.40:  0.85 0.91 0.83 
0.45:  0.86 0.92 0.83 
0.50:  0.86 0.93 0.82 
0.55:  0.86 0.93 0.82 
0.60:  0.86 0.94 0.82 
0.65:  0.87 0.94 0.82 
0.70:  0.87 0.95 0.84 
0.75:  0.87 0.95 0.82 
0.80:  0.87 0.96 0.82 
0.85:  0.88 0.97 0.84 
0.90:  0.88 0.98 0.80 
0.95:  0.88 0.98 0.85 


In [54]:
ARC_eU_mean

[0.8297402597402599,
 0.8335616438356166,
 0.8373913043478263,
 0.8403076923076922,
 0.8432786885245902,
 0.8450877192982454,
 0.8471698113207549,
 0.8530612244897959,
 0.8588888888888888,
 0.8607317073170733,
 0.8605405405405406,
 0.8627272727272727,
 0.8651724137931035,
 0.8688000000000001,
 0.8719047619047617,
 0.8705882352941176,
 0.8753846153846154,
 0.8766666666666666,
 0.88]