In [1]:
# Set notebook to use only one GPU
%env CUDA_VISIBLE_DEVICES=1     
%matplotlib inline

env: CUDA_VISIBLE_DEVICES=1


In [2]:
from result_analysis_functions import *
from models_bachelors import *

Using TensorFlow backend


Keras Uncertainty will use standalone Keras backend

Sidenote:
- isStandard is True for any method outputting a 3D array (9, 576, 4) when inputs are of shape (9, 576). It is therefore True for standard, standard_dropconnect and DUQ, while False for every other method. This is because the other methods, from their prediction sets, have either 4 or 5 dimensions.

# DUQ

In [3]:
from sklearn.metrics import roc_auc_score, auc
import matplotlib.patches as mpatches
from numpy import round

'''
Gets a numpy array down to a 2D array
'''
def get_in_shape(data):
    # If data has shape of 2 elements or less, assume it's already in shape
    if len(data.shape) < 3:
        return data
    else:
        while len(data.shape) > 2:
            data = np.vstack(data)
        return data


'''
data can be whatever shape.
manual roc plot creation reqs:
    y_pred: (9*50, 50, 576, 4)
'''
def make_roc_plot(y_true, y_pred, isStandard, unc_method):
    '''
    y_pred can be either of shape (50, 9, 50, 576, 4) or  (9, 50, 576, 4). We need it in shape (X, 4).
    y_true can be either of shape (50, 9, 576, 4) or (9, 576, 4).
    So apply same algorithm to get these sets into the shape (X, 4)
    '''
    thresholds = np.arange(0, 1.001, 0.001)
    tpr = []
    unc = get_uncertainty(y_pred, unc_method, isStandard).flatten()
    y_true = get_in_shape(y_true)
    y_pred = get_in_shape(y_pred) if isStandard else get_in_shape(y_pred.mean(axis=-3))
    
    # print(f'y_true: {y_true.shape}, y_pred: {y_pred.shape}, certains: {unc.shape}')
    for t in thresholds:
        '''
        Order is reversed because FPR and TPR are reversed for some reason
        '''
        certains = (t < unc)           # Certain when uncertainty is below threshold
        uncertains = (t > unc)
        # Calculate TPR and FPR
        tp = sum(y_pred.argmax(axis=1)[uncertains] != y_true.argmax(axis=1)[uncertains])     # N. preds uncertain predictions that are incorrect
        fn = sum(y_pred.argmax(axis=1)[certains] != y_true.argmax(axis=1)[certains])      # Prediction that's certain and incorrect
        fp = sum(y_pred.argmax(axis=1)[uncertains] == y_true.argmax(axis=1)[uncertains])   # prediction that's uncertain and correct
        tn = sum(y_pred.argmax(axis=1)[certains] == y_true.argmax(axis=1)[certains]) # Prediction that's certain and correct
        # print(f'tp: {tp} fn: {fn} fp: {fp} tn: {tn}')
        fpr.append(fp / (fp + tn))
        tpr.append(tp / (tp + fn))
    # fig1, ax1 = plt.subplots()
    # hist_correct, bins_correct, _ = ax1.hist(auc, bins=10, density=False, alpha=0.5, label='Correct')
    # fig1.show()
    return np.array(tpr), np.array(fpr)


'''
I calculate AUROC and plot ROC separately because I want to get
mean AUROC of all 50 prediction sets along with their variance.
Then I plot ROC with all 50 prediction sets.
'''
def roc_plot_and_auroc(method, key, unc_method):
    y_pred = []
    y_true = []
    aucs_lst = []
    # num_predictions = 50 if not isStandard else 1
    num_predictions = 1
    # creation of set of 50 predictions, as well as AUROC score calculation
    for n in range(num_predictions):
        # methods = load_predictions(n, 'duq')
        methods = load_dict_from_hdf5('predictions/predictions_ensemble_dropout.h5')
        data = methods[method][key]
        isStandard = checkIfStandard(method)
        tpr, fpr = make_roc_plot(data['labels'], data['preds'], isStandard, unc_method)
        # print(f'y_true shape: {y_true_roc.shape} y_pred: {y_pred_roc.shape}')
        auroc_score = auc(tpr, fpr)
        aucs_lst.append(auroc_score)
        y_pred.append(data['preds'])
        y_true.append(data['labels'])

    tpr, fpr = make_roc_plot(np.vstack(y_true), np.vstack(y_pred), isStandard, unc_method)

    return tpr, fpr


aucs_test = {'predictive-entropy': {'ensemble_dropout': []}}        # for ensemble, isStandard=False, unc=get_uncertainty(y_preds, unc_method).flatten()
key = "test"
print(key)
for unc_name, methods_dict in aucs_test.items():
    print(unc_name)
    for method, auc_lst in methods_dict.items():
        tpr, fpr = roc_plot_and_auroc(method, key, unc_name)
        r = 6
        print(f'{key} AUC: {np.round(auc(tpr, fpr), r) * 100}')
        # Normalizes AREA UNDER CURVE to sum up to 1. y-axis values are meaningless.

key = "lockbox"
print(key)
for unc_name, methods_dict in aucs_test.items():
    print(unc_name)
    for method, auc_lst in methods_dict.items():
        tpr, fpr = roc_plot_and_auroc(method, key, unc_name)
        r = 6
        print(f'{key} AUC: {np.round(auc(tpr, fpr), r) * 100}')
        # Normalizes AREA UNDER CURVE to sum up to 1. y-axis values are meaningless.




test
predictive-entropy


test AUC: 67.61749999999999
lockbox
predictive-entropy
lockbox AUC: 76.8438


In [3]:
methods = load_dict_from_hdf5('predictions/predictions_duq.h5')
data = methods['duq']['test']

In [8]:
isStandard = True   # Because DUQ is only 1 forward pass

acc = []
print(f'data shape: {data["preds"].shape}')
data = avg_forward_passes(data) if not isStandard else data
print(f'data shape: {data["preds"].shape}')
y_preds = data["preds"].argmax(axis=-1)
y_trues = data["labels"].argmax(axis=-1)

# Get accuracy of each subject
for idx, subject in enumerate(y_trues):
    print(idx, subject.shape)
    score = accuracy_score(y_pred=subject, y_true=y_preds[idx], normalize=True)
    acc.append(score)

data['labels'].shape


data shape: (9, 576, 4)
data shape: (9, 576, 4)
0 (576,)
1 (576,)
2 (576,)
3 (576,)
4 (576,)
5 (576,)
6 (576,)
7 (576,)
8 (576,)


(9, 576, 4)

# Per-subject AUROC

In [None]:
'''
Per-subject uncertainties and AUROC
This is exactly what I need:
    - Per subject AUROC. This can only be done with array of shape (9, 576).
        - start w/ (50, 9, 50, 576, 4) for a method.
        - Mean axis=0 -> (9, 50, 576, 4)
        - Get uncertainties -> (9, 576) -> Mean axis -1=Avg. uncertainties -> (9, 1)
        - For each subject in axis 0, calculate AUROC to get final array of (9, 1)
    - Array of shape (9, 1) for uncertanties
        - Get uncertainties -> (9, 576) -> Mean axis -1=Avg. uncertainties -> (9, 1)
'''

from sklearn.metrics import auc
import matplotlib.patches as mpatches
from numpy import round

'''
data can be whatever shape.
manual roc plot creation reqs:
    y_pred: (50, 576, 4)
'''
def get_fpr_tpr(y_true, y_pred, isStandard):
    thresholds = np.arange(0, 1.001, 0.001)
    tpr = []
    fpr = []
    y_pred = y_pred if isStandard else y_pred.mean(axis=-3)
    y_true = get_in_shape(y_true)
    y_pred = get_in_shape(y_pred) if isStandard else get_in_shape(y_pred.mean(axis=-3))

    
    # print(f'y_true: {y_true.shape}, y_pred: {y_pred.shape}, certains: {unc.shape}')
    for t in thresholds:
        '''
        Order is reversed because FPR and TPR are reversed for some reason
        '''
        certains = (t < unc)           # Certain when uncertainty is below threshold
        uncertains = (t > unc)
        # Calculate TPR and FPR
        tp = sum(y_pred.argmax(axis=1)[uncertains] != y_true.argmax(axis=1)[uncertains])     # N. preds uncertain predictions that are incorrect
        fn = sum(y_pred.argmax(axis=1)[certains] != y_true.argmax(axis=1)[certains])      # Prediction that's certain and incorrect
        fp = sum(y_pred.argmax(axis=1)[uncertains] == y_true.argmax(axis=1)[uncertains])   # prediction that's uncertain and correct
        tn = sum(y_pred.argmax(axis=1)[certains] == y_true.argmax(axis=1)[certains]) # Prediction that's certain and correct
        # print(f'tp: {tp} fn: {fn} fp: {fp} tn: {tn}')
        fpr.append(fp / (fp + tn))
        tpr.append(tp / (tp + fn))
    return np.array(tpr), np.array(fpr)

def get_auroc(y_true, y_pred, unc, isStandard):
    tpr, fpr = get_fpr_tpr(y_true, y_pred, unc, isStandard)
    return auc(tpr, fpr)

def per_subject_metrics(data, isStandard, key, unc_method):
    key_set = data[key]        # Whether lockbox or preds of the method
    y_true = key_set['labels']
    y_preds = key_set['preds'].mean(axis=0)     # CHANGE: FIRST AXIS IS NOT ALWAYS 50! FOR METHODS WITHOUT 50 SETS IT'S SIMPLY 9!
    unc = get_uncertainty(y_preds, unc_method, isStandard)
    per_subject_aucs = []
    # print(y_preds.shape)
    for subject_id in range(y_preds.shape[0]):
        per_subject_aucs.append(get_auroc(y_true[subject_id], y_preds[subject_id], unc[subject_id], isStandard))

    
    return np.array(per_subject_aucs), unc.mean(axis=1)

    

'''
data: (50, 9, 50, 576, 4)
method: 'mcdropconnect'/'mcdropout'/'standard'/'standard_dropconnect'
key: 'test'/'lockbox'
'''
def do_everything(data, method, key, unc_method):
    # data shape for UQ preds: (50, 9, 50, 576, 4)
    isStandard = checkIfStandard(method)
    aurocs, uncertainties = per_subject_metrics(data, isStandard, key, unc_method)
    return aurocs, uncertainties


def load_predictions_TEST(method, num=None):
    if 'standard' in method:        # Like standard_dropout/standard/standard_dropconnect
        return load_dict_from_hdf5(f'predictions/predictions_standard.h5')
    elif 'ensemble' in method:      # currently only ensemble based on regular dropout
        return load_dict_from_hdf5(f'predictions/predictions_ensemble_dropout.h5')
    elif 'duq' in method:
        return load_dict_from_hdf5(f'predictions/predictions_duq.h5')
    elif 'flipout' in method:
        return load_dict_from_hdf5(f'predictions/flipout/predictions_flipout_{num}.h5')
    elif num != None:                           # Only cases are MC-Dropout and MC-DropConnect
        if 'standard' in method:
           return load_dict_from_hdf5(f'predictions/predictions_{num}.h5')
        else:   # Only flipout satisfies this condition for now
           return load_dict_from_hdf5(f'predictions/flipout/predictions_flipout_{num}.h5')
    else:
      num = np.max(glob.glob('\d+(?=\.)')) + 1   # Trying to get number of preds for mcdropout and mcdropconnect
      ret = {method: {'test': {'preds':[], 'labels':[]}, 'lockbox': {'preds':[], 'labels':[]}}}
      for n in range(num):
          temp_holder = load_dict_from_hdf5(f'predictions/predictions_{n}.hdf5')
          ret[method]['test']['preds'].append(temp_holder[method]['test']['preds'])
          ret[method]['lockbox']['preds'].append(temp_holder[method]['lockbox']['preds'])
          if n == 0:
            ret[method]['test']['labels'].append(temp_holder[method]['test']['labels'])
            ret[method]['lockbox']['labels'].append(temp_holder[method]['lockbox']['labels'])

      ret[method]['test']['preds'] = np.array(ret[method]['test']['preds'])
      ret[method]['lockbox']['preds'] = np.array(ret[method]['lockbox']['preds'])
      return ret



dicts = {'mcdropconnect':{'aucs': 
                          {'predictive-entropy': [],
                           'mutual-information': [],
                           'shannon-entropy': []}}, 
        'mcdropout':{'aucs': {'predictive-entropy': [],
                           'mutual-information': [],
                           'shannon-entropy': []}}, 
        'standard':{'aucs': {'predictive-entropy': [],
                           'mutual-information': [],
                           'shannon-entropy': []}}, 
        'standard_dropconnect':{'aucs': {'predictive-entropy': [],
                           'mutual-information': [],
                           'shannon-entropy': []}}, 
        'ensemble': {'aucs': {'predictive-entropy': [],
                           'mutual-information': [],
                           'shannon-entropy': []}}, 
        'duq': {'aucs': {'predictive-entropy': [],
                           'mutual-information': [],
                           'shannon-entropy': []}}, 
        'flipout': {'aucs': {'predictive-entropy': [],
                           'mutual-information': [],
                           'shannon-entropy': []}}}
unc_methods = ['predictive-entropy', 'mutual-information', 'shannon-entropy']
key = "test"
for method, values in dicts.items():
    data = load_predictions_TEST(method)
    for unc in unc_methods:
        aurocs, _ = do_everything(values, method, key)
        # Normalizes AREA UNDER CURVE to sum up to 1. y-axis values are meaningless.
        print(method)
        r = 6
        values['aucs'][unc] = aurocs
        # print(f'{key} set avg AUROC: {np.mean(aucs)} +/- {np.std(aucs)}')
