In [1]:
import os
import time
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from src.datasets import load_dataset, preprocess_dataset, prefetch_dataset
from src.utils import *

2024-05-22 12:02:30.724972: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9360] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-22 12:02:30.725038: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-22 12:02:30.725072: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1537] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-05-22 12:02:30.733522: I tensorflow/core/platform/cpu_feature_guard.cc:183] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as 

In [26]:
cfg = { 'dataset' : 'svhn',
        'model' : 'densenet121',
        'batch_size' : 512,
        'optimizer' : 'Adam',
        'learning_rate' : 0.001,
        'max_epoch' : 300,
        'patience' : 10,}    

model_name = cfg['model']
dataset_name = cfg['dataset']

In [10]:
##############################################################
#
# Compute classification error
#
# #############################################################
ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)
n_classes = ds_info.features['label'].num_classes
ds_train = preprocess_dataset(ds_train, cfg, n_classes, resize=False, normalize=True, onehot=True)
ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)
ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)
ds_train = prefetch_dataset(ds_train, batch_size=cfg['batch_size'])
ds_val = prefetch_dataset(ds_val, batch_size=cfg['batch_size'])
ds_test = prefetch_dataset(ds_test, batch_size=cfg['batch_size'])

train_acc = []
val_acc = []
test_acc = []
for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')
    train_acc.append(model.evaluate(ds_train, verbose=0)[1])
    val_acc.append(model.evaluate(ds_val, verbose=0)[1])
    test_acc.append(model.evaluate(ds_test, verbose=0)[1])
print(f'Average train error: {(100-np.mean(train_acc)*100):.2f}, std: {(np.std(train_acc)*100):.2f}')
print(f'Average validation error: {(100-np.mean(val_acc)*100):.2f}, std: {(np.std(val_acc)*100):.2f}')
print(f'Average test error: {(100-np.mean(test_acc)*100):.2f}, std: {(np.std(test_acc)*100):.2f}')

Average train error: 0.09, std: 0.03
Average validation error: 1.86, std: 0.04
Average test error: 1.94, std: 0.05


### Softmax

In [28]:
##############################################################
#
# Compute Filtering Accuracy (Softmax)
#
# #############################################################

ds_train, ds_val, ds_test, ds_info = load_dataset(cfg, shuffle=False)
n_classes = ds_info.features['label'].num_classes
ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)
ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)

filtering_acc = []
threshold = []
for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    print(f'Run: {run+1}')
    model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')
    preds = model.predict(ds_val.batch(cfg['batch_size']), verbose=0)
    preds = np.array([softmax(x) for x in preds])
    true_y = np.argmax([y for x,y in ds_val], axis=1)
    pred_y = np.argmax(preds, axis=1)
    true_label = np.equal(true_y, pred_y).astype(int) # assign 0 if true_y != pred_y, assign 1 if true_y == pred_y
    softmax_val = np.max(preds, axis=1)
    opt_threshold = compute_opt_threshold(softmax_val, true_label)
    threshold.append(opt_threshold)
    
    preds = model.predict(ds_test.batch(cfg['batch_size']), verbose=0)
    preds = np.array([softmax(x) for x in preds])
    true_y = np.argmax([y for x,y in ds_test], axis=1)
    pred_y = np.argmax(preds, axis=1)
    true_label = np.equal(true_y, pred_y).astype(int) # assign 0 if true_y != pred_y, assign 1 if true_y == pred_y
    softmax_val = np.max(preds, axis=1)
    test_filtering_acc = compute_filtering_acc(softmax_val, true_label, opt_threshold)
    filtering_acc.append(test_filtering_acc)
    
    print(f'Opt. threshold: {opt_threshold:.3f}, Test filtering error:{100-test_filtering_acc:.2f}')
    exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration'
    np.savez(f'{exp_name}/softmax_filtering_accuracy.npz', opt_threshold=opt_threshold, test_filtering_acc=test_filtering_acc)

print('-----------------------------')
print(f'Average opt. threshold: {(np.mean(threshold)):.3f}, std: {(np.std(threshold)):.3f}')
print(f'Average test filtering error: {(100-np.mean(filtering_acc)):.2f}, std: {(np.std(filtering_acc)):.2f}')

Run: 1
Opt. threshold: 0.182, Test filtering error:5.45
Run: 2
Opt. threshold: 0.204, Test filtering error:5.81
Run: 3
Opt. threshold: 0.189, Test filtering error:5.84
Run: 4
Opt. threshold: 0.206, Test filtering error:5.68
Run: 5
Opt. threshold: 0.183, Test filtering error:7.01
-----------------------------
Average opt. threshold: 0.193, std: 0.010
Average test filtering error: 5.96, std: 0.55


In [None]:
threshold = []
filtering_acc = []
for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration'
    f = np.load(f'{exp_name}/softmax_filtering_accuracy.npz')
    opt_threshold = f['opt_threshold']
    test_filtering_acc = f['test_filtering_acc']
    threshold.append(opt_threshold)
    filtering_acc.append(test_filtering_acc)

print('-----------------------------')
print(f'Average opt. threshold: {(np.mean(threshold)):.3f}, std: {(np.std(threshold)):.3f}')
print(f'Average test filtering error: {(100-np.mean(filtering_acc)):.2f}, std: {(np.std(filtering_acc)):.2f}')

In [8]:
##############################################################
#
# Compute Filtering Accuracy (Softmax)
#
# #############################################################

ds_train, ds_val, ds_test, ds_info = load_dataset(cfg, shuffle=False)
n_classes = ds_info.features['label'].num_classes
ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)
ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)

filtering_acc = []
threshold = []
for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    print(f'Run: {run+1}')
    model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')
    temp = np.load(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/opt_temp.npy')
    logits_layer = model.layers[-1]
    logits_layer.activation = None 
    logits_model = tf.keras.models.Model(inputs=model.input, outputs=logits_layer.output)
    preds = logits_model.predict(ds_val.batch(cfg['batch_size']), verbose=0)
    preds = np.array([softmax(x) for x in preds])
    true_y = np.argmax([y for x,y in ds_val], axis=1)
    pred_y = np.argmax(preds, axis=1)
    true_label = np.equal(true_y, pred_y).astype(int) # assign 0 if true_y != pred_y, assign 1 if true_y == pred_y
    softmax_val = np.max(preds, axis=1)
    opt_threshold = compute_opt_threshold(softmax_val, true_label)
    threshold.append(opt_threshold)
    
    preds = logits_model.predict(ds_test.batch(cfg['batch_size']), verbose=0)
    preds = np.array([softmax(x) for x in preds])
    true_y = np.argmax([y for x,y in ds_test], axis=1)
    pred_y = np.argmax(preds, axis=1)
    true_label = np.equal(true_y, pred_y).astype(int) # assign 0 if true_y != pred_y, assign 1 if true_y == pred_y
    softmax_val = np.max(preds, axis=1)
    test_filtering_acc = compute_filtering_acc(softmax_val, true_label, opt_threshold)
    filtering_acc.append(test_filtering_acc)
    
    print(f'Opt. threshold: {opt_threshold:.3f}, Test filtering error:{100-test_filtering_acc:.2f}')
    exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration'
    np.savez(f'{exp_name}/calibrated_softmax_filtering_accuracy.npz', opt_threshold=opt_threshold, test_filtering_acc=test_filtering_acc)

print('-----------------------------')
print(f'Average opt. threshold: {(np.mean(threshold)):.3f}, std: {(np.std(threshold)):.3f}')
print(f'Average test filtering error: {(100-np.mean(filtering_acc)):.2f}, std: {(np.std(filtering_acc)):.2f}')

Run: 1
Opt. threshold: 0.528, Test filtering error:6.41
Run: 2
Opt. threshold: 0.379, Test filtering error:6.87
Run: 3
Opt. threshold: 0.504, Test filtering error:6.75
Run: 4
Opt. threshold: 0.584, Test filtering error:6.74
Run: 5
Opt. threshold: 0.382, Test filtering error:7.15
-----------------------------
Average opt. threshold: 0.476, std: 0.082
Average test filtering error: 6.78, std: 0.24


### PMI

In [3]:
##############################################################
#
# Compute Filtering Accuracy (with softmax scaling)
#
# #############################################################

critic = 'separable'
estimator = 'density_ratio_fitting'

for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    print(f'Run: {run+1}')
    print(f'Critic: {critic}, Estimator: {estimator}')
    exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pmi/{critic}_{estimator}'

    ds_train, ds_val, ds_test, ds_info = load_dataset(cfg, shuffle=False)
    n_classes = ds_info.features['label'].num_classes
    ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=True, normalize=True, onehot=True)
    ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=True, normalize=True, onehot=True)
    model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')

    true_y = np.argmax([y for x,y in ds_val], axis=1)
    pred_y = np.argmax(model.predict(ds_val.batch(cfg['batch_size']), verbose=0), axis=1)
    true_label = np.equal(true_y, pred_y).astype(int) # assign 0 if true_y != pred_y, assign 1 if true_y == pred_y
    pmi_class = np.load(f'{exp_name}/pmi_class_val.npy')
    pmi_class = np.array([softmax(x) for x in pmi_class])
    pmi = [pmi_value[pred_value] for pmi_value, pred_value in zip(pmi_class, pred_y)]
    opt_threshold = compute_opt_threshold(pmi, true_label)

    true_y = np.argmax([y for x,y in ds_test], axis=1)
    pred_y = np.argmax(model.predict(ds_test.batch(cfg['batch_size']), verbose=0), axis=1)
    true_label = np.equal(true_y, pred_y).astype(int) # assign 0 if true_y != pred_y, assign 1 if true_y == pred_y
    pmi_class = np.load(f'{exp_name}/pmi_class_test.npy')
    pmi_class = np.array([softmax(x) for x in pmi_class])
    pmi = [pmi_value[pred_value] for pmi_value, pred_value in zip(pmi_class, pred_y)]
    test_filtering_acc = compute_filtering_acc(pmi, true_label, opt_threshold)

    np.savez(f'{exp_name}/scaled_filtering_accuracy.npz', opt_threshold=opt_threshold, test_filtering_acc=test_filtering_acc)
    print(f'Opt. threshold: {opt_threshold:.3f}, Test filtering error:{100-test_filtering_acc:.2f}')

Run: 1
Critic: separable, Estimator: density_ratio_fitting


2024-02-01 12:10:28.875073: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1883] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 78835 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-80GB, pci bus id: 0000:47:00.0, compute capability: 8.0
2024-02-01 12:10:57.619039: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8906


Opt. threshold: 0.478, Test filtering error:4.29
Run: 2
Critic: separable, Estimator: density_ratio_fitting
Opt. threshold: 0.431, Test filtering error:4.42
Run: 3
Critic: separable, Estimator: density_ratio_fitting
Opt. threshold: 0.502, Test filtering error:4.38
Run: 4
Critic: separable, Estimator: density_ratio_fitting
Opt. threshold: 0.391, Test filtering error:4.55
Run: 5
Critic: separable, Estimator: density_ratio_fitting
Opt. threshold: 0.053, Test filtering error:4.81


In [18]:
critic = 'separable'
estimator = 'density_ratio_fitting'

threshold = []
filtering_acc = []
for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pmi/{critic}_{estimator}'
    f = np.load(f'{exp_name}/scaled_filtering_accuracy.npz')
    opt_threshold = f['opt_threshold']
    test_filtering_acc = f['test_filtering_acc']
    threshold.append(opt_threshold)
    filtering_acc.append(test_filtering_acc)

print('-----------------------------')
print(f'Critic: {critic}, Estimator: {estimator}')
print(f'Average opt. threshold: {(np.mean(threshold)):.3f}, std: {(np.std(threshold)):.3f}')
print(f'Average test filtering error: {(100-np.mean(filtering_acc)):.2f}, std: {(np.std(filtering_acc)):.2f}')

-----------------------------
Critic: separable, Estimator: density_ratio_fitting
Average opt. threshold: 0.371, std: 0.163
Average test filtering error: 4.49, std: 0.18


### PVI

In [29]:
##############################################################
#
# Compute Filtering Accuracy (with softmax scaling)
#
# #############################################################

estimator = 'training_from_scratch'

for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    print(f'Run: {run+1}')
    print(f'Estimator: {estimator}')
    exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/{estimator}'

    ds_train, ds_val, ds_test, ds_info = load_dataset(cfg, shuffle=False)
    n_classes = ds_info.features['label'].num_classes
    ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)
    ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)
    model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')

    true_y = np.argmax([y for x,y in ds_val], axis=1)
    pred_y = np.argmax(model.predict(ds_val.batch(cfg['batch_size']), verbose=0), axis=1)
    true_label = np.equal(true_y, pred_y).astype(int) # assign 0 if true_y != pred_y, assign 1 if true_y == pred_y
    pvi_class = np.load(f'{exp_name}/pvi_calibrated_class_val.npy')
    pvi_class = np.array([softmax(x) for x in pvi_class])
    pvi = [pvi_value[pred_value] for pvi_value, pred_value in zip(pvi_class, pred_y)]
    opt_threshold = compute_opt_threshold(pvi, true_label)

    true_y = np.argmax([y for x,y in ds_test], axis=1)
    pred_y = np.argmax(model.predict(ds_test.batch(cfg['batch_size']), verbose=0), axis=1)
    true_label = np.equal(true_y, pred_y).astype(int) # assign 0 if true_y != pred_y, assign 1 if true_y == pred_y
    pvi_class = np.load(f'{exp_name}/pvi_calibrated_class_test.npy')
    pvi_class = np.array([softmax(x) for x in pvi_class])
    pvi = [pvi_value[pred_value] for pvi_value, pred_value in zip(pvi_class, pred_y)]
    test_filtering_acc = compute_filtering_acc(pvi, true_label, opt_threshold)

    np.savez(f'{exp_name}/calibrated_scaled_filtering_accuracy.npz', opt_threshold=opt_threshold, test_filtering_acc=test_filtering_acc)
    print(f'Opt. threshold: {opt_threshold:.3f}, Test filtering error:{100-test_filtering_acc:.2f}')

Run: 1
Estimator: training_from_scratch
Opt. threshold: nan, Test filtering error:94.05
Run: 2
Estimator: training_from_scratch
Opt. threshold: nan, Test filtering error:94.17
Run: 3
Estimator: training_from_scratch


KeyboardInterrupt: 

In [33]:
np.load(f'{exp_name}/pvi_calibrated_class_test.npy')

array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])

In [23]:
estimator = 'training_from_scratch'

threshold = []
filtering_acc = []
for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/{estimator}'
    f = np.load(f'{exp_name}/calibrated_scaled_filtering_accuracy.npz')
    opt_threshold = f['opt_threshold']
    test_filtering_acc = f['test_filtering_acc']
    threshold.append(opt_threshold)
    filtering_acc.append(test_filtering_acc)

print('-----------------------------')
print(f'Estimator: {estimator}')
print(f'Average opt. threshold: {(np.mean(threshold)):.3f}, std: {(np.std(threshold)):.3f}')
print(f'Average test filtering error: {(100-np.mean(filtering_acc)):.2f}, std: {(np.std(filtering_acc)):.2f}')

-----------------------------
Estimator: training_from_scratch
Average opt. threshold: 0.144, std: 0.046
Average test filtering error: 16.69, std: 0.51


### PSI

In [3]:
##############################################################
#
# Compute Filtering Accuracy (with softmax scaling)
#
# #############################################################

estimator = 'gaussian'

for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    print(f'Run: {run+1}')
    print(f'Estimator: {estimator}')
    exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/psi/{estimator}'

    ds_train, ds_val, ds_test, ds_info = load_dataset(cfg, shuffle=False)
    n_classes = ds_info.features['label'].num_classes
    ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=True, normalize=True, onehot=True)
    ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=True, normalize=True, onehot=True)
    model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')

    true_y = np.argmax([y for x,y in ds_val], axis=1)
    pred_y = np.argmax(model.predict(ds_val.batch(cfg['batch_size']), verbose=0), axis=1)
    true_label = np.equal(true_y, pred_y).astype(int) # assign 0 if true_y != pred_y, assign 1 if true_y == pred_y
    psi_class = np.load(f'{exp_name}/psi_class_val.npy')
    psi_class = np.array([softmax(x) for x in psi_class])
    psi = [psi_value[pred_value] for psi_value, pred_value in zip(psi_class, pred_y)]
    opt_threshold = compute_opt_threshold(psi, true_label)

    true_y = np.argmax([y for x,y in ds_test], axis=1)
    pred_y = np.argmax(model.predict(ds_test.batch(cfg['batch_size']), verbose=0), axis=1)
    true_label = np.equal(true_y, pred_y).astype(int) # assign 0 if true_y != pred_y, assign 1 if true_y == pred_y
    psi_class = np.load(f'{exp_name}/psi_class_test.npy')
    psi_class = np.array([softmax(x) for x in psi_class])
    psi = [psi_value[pred_value] for psi_value, pred_value in zip(psi_class, pred_y)]
    test_filtering_acc = compute_filtering_acc(psi, true_label, opt_threshold)

    np.savez(f'{exp_name}/scaled_filtering_accuracy.npz', opt_threshold=opt_threshold, test_filtering_acc=test_filtering_acc)
    print(f'Opt. threshold: {opt_threshold:.3f}, Test filtering error:{100-test_filtering_acc:.2f}')

Run: 1
Estimator: gaussian


2024-05-22 12:02:37.713537: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1883] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 78835 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-80GB, pci bus id: 0000:47:00.0, compute capability: 8.0
2024-05-22 12:03:03.037671: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8906


Opt. threshold: 0.216, Test filtering error:4.19
Run: 2
Estimator: gaussian
Opt. threshold: 0.326, Test filtering error:4.45
Run: 3
Estimator: gaussian
Opt. threshold: 0.218, Test filtering error:4.56
Run: 4
Estimator: gaussian
Opt. threshold: 0.302, Test filtering error:4.64
Run: 5
Estimator: gaussian
Opt. threshold: 0.297, Test filtering error:4.64


In [4]:
estimator = 'gaussian'

threshold = []
filtering_acc = []
for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/psi/{estimator}'
    f = np.load(f'{exp_name}/scaled_filtering_accuracy.npz')
    opt_threshold = f['opt_threshold']
    test_filtering_acc = f['test_filtering_acc']
    threshold.append(opt_threshold)
    filtering_acc.append(test_filtering_acc)

print('-----------------------------')
print(f'Estimator: {estimator}')
print(f'Average opt. threshold: {(np.mean(threshold)):.3f}, std: {(np.std(threshold)):.3f}')
print(f'Average test filtering error: {(100-np.mean(filtering_acc)):.2f}, std: {(np.std(filtering_acc)):.2f}')

-----------------------------
Estimator: gaussian
Average opt. threshold: 0.272, std: 0.046
Average test filtering error: 4.49, std: 0.17
