In [1]:
# Set notebook to use only one GPU
%env CUDA_VISIBLE_DEVICES=1     
%matplotlib inline

env: CUDA_VISIBLE_DEVICES=1


In [2]:
from result_analysis_functions import *

Using TensorFlow backend


Keras Uncertainty will use standalone Keras backend

# DUQ

In [None]:
from sklearn.metrics import roc_auc_score, auc
import matplotlib.patches as mpatches
from numpy import round

'''
Gets a numpy array down to a 2D array
'''
def get_in_shape(data):
    # If data has shape of 2 elements or less, assume it's already in shape
    if len(data.shape) < 3:
        return data
    else:
        while len(data.shape) > 2:
            data = np.vstack(data)
        return data


'''
data can be whatever shape.
manual roc plot creation reqs:
    y_pred: (9*50, 50, 576, 4)
'''
def make_roc_plot(y_true, y_pred, isStandard, unc_method):
    '''
    y_pred can be either of shape (50, 9, 50, 576, 4) or  (9, 50, 576, 4). We need it in shape (X, 4).
    y_true can be either of shape (50, 9, 576, 4) or (9, 576, 4).
    So apply same algorithm to get these sets into the shape (X, 4)
    '''
    thresholds = np.arange(0, 1.001, 0.001)
    tpr = []
    fpr = []
    unc = y_pred.max(axis=-1).flatten()
    y_true = get_in_shape(y_true)
    y_pred = get_in_shape(y_pred) if isStandard else get_in_shape(y_pred.mean(axis=-3))
    
    # print(f'y_true: {y_true.shape}, y_pred: {y_pred.shape}, certains: {unc.shape}')
    for t in thresholds:
        '''
        Order is reversed because FPR and TPR are reversed for some reason
        '''
        certains = (t < unc)           # Certain when uncertainty is below threshold
        uncertains = (t > unc)
        # Calculate TPR and FPR
        tp = sum(y_pred.argmax(axis=1)[uncertains] != y_true.argmax(axis=1)[uncertains])     # N. preds uncertain predictions that are incorrect
        fn = sum(y_pred.argmax(axis=1)[certains] != y_true.argmax(axis=1)[certains])      # Prediction that's certain and incorrect
        fp = sum(y_pred.argmax(axis=1)[uncertains] == y_true.argmax(axis=1)[uncertains])   # prediction that's uncertain and correct
        tn = sum(y_pred.argmax(axis=1)[certains] == y_true.argmax(axis=1)[certains]) # Prediction that's certain and correct
        # print(f'tp: {tp} fn: {fn} fp: {fp} tn: {tn}')
        fpr.append(fp / (fp + tn))
        tpr.append(tp / (tp + fn))
    # fig1, ax1 = plt.subplots()
    # hist_correct, bins_correct, _ = ax1.hist(auc, bins=10, density=False, alpha=0.5, label='Correct')
    # fig1.show()
    return np.array(tpr), np.array(fpr)


'''
I calculate AUROC and plot ROC separately because I want to get
mean AUROC of all 50 prediction sets along with their variance.
Then I plot ROC with all 50 prediction sets.
'''
def roc_plot_and_auroc(method, key, unc_method):
    y_pred = []
    y_true = []
    aucs_lst = []
    # isStandard = True if 'standard' in method else False
    isStandard = True
    # num_predictions = 50 if not isStandard else 1
    num_predictions = 1
    # creation of set of 50 predictions, as well as AUROC score calculation
    for n in range(num_predictions):
        # methods = load_predictions(n, 'duq')
        methods = load_dict_from_hdf5('predictions/predictions_duq.h5')
        data = methods[method][key]
        tpr, fpr = make_roc_plot(data['labels'], data['preds'], isStandard, unc_method)
        # print(f'y_true shape: {y_true_roc.shape} y_pred: {y_pred_roc.shape}')
        auroc_score = auc(tpr, fpr)
        aucs_lst.append(auroc_score)
        y_pred.append(data['preds'])
        y_true.append(data['labels'])

    tpr, fpr = make_roc_plot(np.vstack(y_true), np.vstack(y_pred), isStandard, unc_method)

    return tpr, fpr


aucs_test = {'predictive-entropy': {'duq': []}}
key = "test"
print(key)
for unc_name, methods_dict in aucs_test.items():
    print(unc_name)
    for method, auc_lst in methods_dict.items():
        tpr, fpr = roc_plot_and_auroc(method, key, unc_name)
        r = 6
        print(f'{key} AUC: {np.round(1 - auc(tpr, fpr), r) * 100}')
        # Normalizes AREA UNDER CURVE to sum up to 1. y-axis values are meaningless.

key = "lockbox"
print(key)
for unc_name, methods_dict in aucs_test.items():
    print(unc_name)
    for method, auc_lst in methods_dict.items():
        tpr, fpr = roc_plot_and_auroc(method, key, unc_name)
        r = 6
        print(f'{key} AUC: {np.round(1 - auc(tpr, fpr), r) * 100}')
        # Normalizes AREA UNDER CURVE to sum up to 1. y-axis values are meaningless.




test
predictive-entropy
(5184,) [0.38442016 0.92682046 0.29104722 ... 0.8111489  0.38891792 0.36042094]
(5184,) [0.38442016 0.92682046 0.29104722 ... 0.8111489  0.38891792 0.36042094]
test AUC: 65.70949999999999
lockbox
predictive-entropy
(4104,) [0.35372022 0.47833586 0.78228265 ... 0.33024693 0.43278146 0.79511464]
(4104,) [0.35372022 0.47833586 0.78228265 ... 0.33024693 0.43278146 0.79511464]
lockbox AUC: 73.1019


In [3]:
methods = load_dict_from_hdf5('predictions/predictions_duq.h5')
data = methods['duq']['test']

In [8]:
isStandard = True   # Because DUQ is only 1 forward pass

acc = []
print(f'data shape: {data["preds"].shape}')
data = avg_forward_passes(data) if not isStandard else data
print(f'data shape: {data["preds"].shape}')
y_preds = data["preds"].argmax(axis=-1)
y_trues = data["labels"].argmax(axis=-1)

# Get accuracy of each subject
for idx, subject in enumerate(y_trues):
    print(idx, subject.shape)
    score = accuracy_score(y_pred=subject, y_true=y_preds[idx], normalize=True)
    acc.append(score)

data['labels'].shape


data shape: (9, 576, 4)
data shape: (9, 576, 4)
0 (576,)
1 (576,)
2 (576,)
3 (576,)
4 (576,)
5 (576,)
6 (576,)
7 (576,)
8 (576,)


(9, 576, 4)