In [1]:
import os
import time
import numpy as np
import tensorflow as tf

from src.models import mlp
from src.datasets import load_dataset, preprocess_dataset, prefetch_dataset
from src.psi_estimators import psi_bin_train, psi_bin_val, psi_gauss_train, psi_gauss_val, psi_neural_train, psi_neural_val, psi_rf_train, psi_rf_val
from src.utils import *

2024-05-21 23:00:27.061377: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9360] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-21 23:00:27.061442: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-21 23:00:27.061467: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1537] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-05-21 23:00:27.068637: I tensorflow/core/platform/cpu_feature_guard.cc:183] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as 

In [2]:
cfg = { 'dataset' : 'mnist',
        'model' : 'mlp',
        'batch_size' : 512,
        'optimizer' : 'Adam',
        'learning_rate' : 0.001,
        'max_epoch' : 300,
        'patience' : 10,}    

model_name = cfg['model']
dataset_name = cfg['dataset']

In [6]:
##############################################################
#
# Compute classification error
#
# #############################################################
ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)
n_classes = ds_info.features['label'].num_classes
ds_train = preprocess_dataset(ds_train, cfg, n_classes, resize=False, normalize=True, onehot=True)
ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)
ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)
ds_train = prefetch_dataset(ds_train, batch_size=cfg['batch_size'])
ds_val = prefetch_dataset(ds_val, batch_size=cfg['batch_size'])
ds_test = prefetch_dataset(ds_test, batch_size=cfg['batch_size'])

train_acc = []
val_acc = []
test_acc = []
for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')
    train_acc.append(model.evaluate(ds_train, verbose=0)[1])
    val_acc.append(model.evaluate(ds_val, verbose=0)[1])
    test_acc.append(model.evaluate(ds_test, verbose=0)[1])
print(f'Average train error: {(100-np.mean(train_acc)*100):.2f}, std: {(np.std(train_acc)*100):.2f}')
print(f'Average validation error: {(100-np.mean(val_acc)*100):.2f}, std: {(np.std(val_acc)*100):.2f}')
print(f'Average test error: {(100-np.mean(test_acc)*100):.2f}, std: {(np.std(test_acc)*100):.2f}')

Average train error: 0.09, std: 0.03
Average validation error: 1.86, std: 0.04
Average test error: 1.94, std: 0.05


### Binning

In [10]:
for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/psi/binning'
    if not os.path.exists(exp_name):
        print("Making directory", exp_name)
        os.makedirs(exp_name)

    ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)
    n_classes = ds_info.features['label'].num_classes
    ds_train = preprocess_dataset(ds_train, cfg, n_classes, resize=False, normalize=True, onehot=True)
    ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)
    ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)

    model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')
    int_model = tf.keras.Model(inputs=model.inputs, outputs=model.layers[-2].output)
    
    ##############################################################
    #
    # Train PSI Model
    #
    # #############################################################
    
    print(f'Training PSI model (binning)...')
    for n_projs in [50,100,250,500]:
        print(f'N_projs: {n_projs}')
        
        ds_activity = ds_train.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), y)).unbatch()
        x, y = zip(*ds_activity)
        x = np.array([val.numpy() for val in x])
        y = np.array([val.numpy() for val in y])
        
        psi_data = psi_bin_train(x, y, n_projs, n_bins=50)
        np.save(f'{exp_name}/trained_model_{n_projs}_projs.npy', psi_data)
    
    ##############################################################
    #
    # Compute PSI for all validation and test samples
    #
    # #############################################################
    
        psi_data = np.load(f'{exp_name}/trained_model_{n_projs}_projs.npy', allow_pickle=True).item()
        
        print(f'Computing PSI for all validation samples and for all classes...')
        psi_class = []
        for k in range(n_classes):
            ds_activity = ds_val.batch(cfg['batch_size']).map(lambda x, y: (int_model(x),tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).unbatch()
            x, y = zip(*ds_activity)
            x = np.array([val.numpy() for val in x])
            y = np.array([val.numpy() for val in y])
            psi, pmi_arr = psi_bin_val(x, y, psi_data, n_projs)
            psi_class.append(psi)
        np.save(f'{exp_name}/psi_class_{n_projs}_projs_val.npy', np.array(psi_class).T)
                
        print(f'Computing PSI for all test samples and for all classes...')
        psi_class = []
        for k in range(n_classes):
            ds_activity = ds_test.batch(cfg['batch_size']).map(lambda x, y: (int_model(x),tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).unbatch()
            x, y = zip(*ds_activity)
            x = np.array([val.numpy() for val in x])
            y = np.array([val.numpy() for val in y])
            psi, pmi_arr = psi_bin_val(x, y, psi_data, n_projs)
            psi_class.append(psi)
        np.save(f'{exp_name}/psi_class_{n_projs}_projs_test.npy', np.array(psi_class).T)

Training PSI model (binning)...
N_projs: 50
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
N_projs: 100
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
N_projs: 250
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
N_projs: 500
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
Training PSI model (binning)...
N_projs: 50
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
N_projs: 100
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
N_projs: 250
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all clas

### Gaussian

In [11]:
for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/psi/gaussian'
    if not os.path.exists(exp_name):
        print("Making directory", exp_name)
        os.makedirs(exp_name)

    ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)
    n_classes = ds_info.features['label'].num_classes
    ds_train = preprocess_dataset(ds_train, cfg, n_classes, resize=False, normalize=True, onehot=True)
    ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)
    ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)

    model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')
    int_model = tf.keras.Model(inputs=model.inputs, outputs=model.layers[-2].output)
    
    ##############################################################
    #
    # Train PSI Model
    #
    # #############################################################
    
    print(f'Training PSI model (gaussian)...')
    
    for n_projs in [50,100,250,500]:
        print(f'N_projs: {n_projs}')
        
        ds_activity = ds_train.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), y)).unbatch()
        x, y = zip(*ds_activity)
        x = np.array([val.numpy() for val in x])
        y = np.array([val.numpy() for val in y])
        
        psi_data = psi_gauss_train(x, y, n_projs)
        np.save(f'{exp_name}/trained_model_{n_projs}_projs.npy', psi_data)
    
    ##############################################################
    #
    # Compute PSI for all validation and test samples
    #
    # #############################################################
    
        psi_data = np.load(f'{exp_name}/trained_model_{n_projs}_projs.npy', allow_pickle=True).item()
        
        print(f'Computing PSI for all validation samples and for all classes...')
        psi_class = []
        for k in range(n_classes):
            ds_activity = ds_val.batch(cfg['batch_size']).map(lambda x, y: (int_model(x),tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).unbatch()
            x, y = zip(*ds_activity)
            x = np.array([val.numpy() for val in x])
            y = np.array([val.numpy() for val in y])
            psi, pmi_arr = psi_gauss_val(x, y, psi_data, n_projs)
            psi_class.append(psi)
        np.save(f'{exp_name}/psi_class_{n_projs}_projs_val.npy', np.array(psi_class).T)
                
        print(f'Computing PSI for all test samples and for all classes...')
        psi_class = []
        for k in range(n_classes):
            ds_activity = ds_test.batch(cfg['batch_size']).map(lambda x, y: (int_model(x),tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).unbatch()
            x, y = zip(*ds_activity)
            x = np.array([val.numpy() for val in x])
            y = np.array([val.numpy() for val in y])
            psi, pmi_arr = psi_gauss_val(x, y, psi_data, n_projs)
            psi_class.append(psi)
        np.save(f'{exp_name}/psi_class_{n_projs}_projs_test.npy', np.array(psi_class).T)

Training PSI model (gaussian)...
N_projs: 50
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
N_projs: 100
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
N_projs: 250
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
N_projs: 500
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
Training PSI model (gaussian)...
N_projs: 50
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
N_projs: 100
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
N_projs: 250
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all cl

### Random Forest

In [3]:
for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/psi/random_forest'
    if not os.path.exists(exp_name):
        print("Making directory", exp_name)
        os.makedirs(exp_name)

    ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)
    n_classes = ds_info.features['label'].num_classes
    ds_train = preprocess_dataset(ds_train, cfg, n_classes, resize=False, normalize=True, onehot=True)
    ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)
    ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)

    model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')
    int_model = tf.keras.Model(inputs=model.inputs, outputs=model.layers[-2].output)
    
    ##############################################################
    #
    # Train PSI Model
    #
    # #############################################################
    
    print(f'Training PSI model (random forest)...')
    ds_activity = ds_train.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), y)).unbatch()
    x, y = zip(*ds_activity)
    x = np.array([val.numpy() for val in x])
    y = np.array([val.numpy() for val in y])
    
    y = np.argmax([y for x,y in ds_activity], axis=1)
    n_class_list = []
    for k in range(np.max(y)+1):
        idx = np.where(y == k)[0]
        n_class_list.append(len(idx))
    class_prob = np.array(n_class_list)/len(y)
    np.save(f'{exp_name}/class_prob.npy', np.array(class_prob))

    psi_rf_train(x, y, n_projs=500, save_path=exp_name)
    
    ##############################################################
    #
    # Compute PSI for all validation and test samples
    #
    # #############################################################
    
    thetas = np.load(f'{exp_name}/all_thetas.npy')
    class_prob = np.load(f'{exp_name}/class_prob.npy')
    
    for n_projs in [50,100,250,500]:
        print(f'N_projs: {n_projs}')
        
        print(f'Computing PSI for all validation samples and for all classes...')
        psi_class = []
        for k in range(n_classes):
            ds_activity = ds_val.batch(cfg['batch_size']).map(lambda x, y: (int_model(x),y)).unbatch()
            x = np.array([val.numpy() for val, _ in ds_activity])
            psi, pmi_arr = psi_rf_val(x, k, thetas, class_prob, n_projs, save_path=exp_name)
            psi_class.append(psi)
        np.save(f'{exp_name}/psi_class_{n_projs}_projs_val.npy', np.array(psi_class).T)
        
        print(f'Computing PSI for all test samples and for all classes...')
        psi_class = []
        for k in range(n_classes):
            ds_activity = ds_test.batch(cfg['batch_size']).map(lambda x, y: (int_model(x),y)).unbatch()
            x = np.array([val.numpy() for val, _ in ds_activity])
            psi, pmi_arr = psi_rf_val(x, k, thetas, class_prob, n_projs, save_path=exp_name)
            psi_class.append(psi)
        np.save(f'{exp_name}/psi_class_{n_projs}_projs_test.npy', np.array(psi_class).T)

2024-02-01 19:18:57.737374: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1883] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 78835 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-80GB, pci bus id: 0000:47:00.0, compute capability: 8.0


N_projs: 50
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
N_projs: 100
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
N_projs: 250
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
N_projs: 500
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
N_projs: 50
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
N_projs: 100
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
N_projs: 250
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
N_projs: 500
Computing PSI for all validation samples and

In [12]:
##############################################################
#
# Compute Filtering Accuracy (without softmax scaling)
#
# #############################################################

estimators_list = ['binning', 'gaussian', 'random_forest']
n_projs_list = [50,100,250,500]

for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    print(f'Run: {run+1}')
    for estimator in estimators_list:
        print(f'Estimator: {estimator}')
        exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/psi/{estimator}'

        ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)
        n_classes = ds_info.features['label'].num_classes
        ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)
        ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)
        model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')
        
        for n_projs in n_projs_list:
            print(f'N_projs: {n_projs}')
            true_y = np.argmax([y for x,y in ds_val], axis=1)
            pred_y = np.argmax(model.predict(ds_val.batch(cfg['batch_size']), verbose=0), axis=1)
            true_label = np.equal(true_y, pred_y).astype(int) # assign 0 if true_y != pred_y, assign 1 if true_y == pred_y
            psi_class = np.load(f'{exp_name}/psi_class_{n_projs}_projs_val.npy')
            psi = [psi_value[pred_value] for psi_value, pred_value in zip(psi_class, pred_y)]
            opt_threshold = compute_opt_threshold(psi, true_label)

            true_y = np.argmax([y for x,y in ds_test], axis=1)
            pred_y = np.argmax(model.predict(ds_test.batch(cfg['batch_size']), verbose=0), axis=1)
            true_label = np.equal(true_y, pred_y).astype(int) # assign 0 if true_y != pred_y, assign 1 if true_y == pred_y
            psi_class = np.load(f'{exp_name}/psi_class_{n_projs}_projs_test.npy')
            psi = [psi_value[pred_value] for psi_value, pred_value in zip(psi_class, pred_y)]
            test_filtering_acc = compute_filtering_acc(psi, true_label, opt_threshold)

            np.savez(f'{exp_name}/unscaled_filtering_accuracy_{n_projs}_projs.npz', opt_threshold=opt_threshold, test_filtering_acc=test_filtering_acc)
            print(f'Opt. threshold: {opt_threshold:.3f}, Test filtering error:{100-test_filtering_acc:.2f}')

Run: 1
Estimator: binning
N_projs: 50
Opt. threshold: -1.226, Test filtering error:2.04
N_projs: 100
Opt. threshold: -1.193, Test filtering error:2.04
N_projs: 250
Opt. threshold: -1.014, Test filtering error:1.94
N_projs: 500
Opt. threshold: -1.153, Test filtering error:2.00
Estimator: gaussian
N_projs: 50
Opt. threshold: -2.119, Test filtering error:2.01
N_projs: 100
Opt. threshold: -1.890, Test filtering error:2.04
N_projs: 250
Opt. threshold: -2.255, Test filtering error:2.01
N_projs: 500
Opt. threshold: -1.767, Test filtering error:2.03
Estimator: random_forest
N_projs: 50
Opt. threshold: -0.756, Test filtering error:1.98
N_projs: 100
Opt. threshold: -0.695, Test filtering error:2.06
N_projs: 250
Opt. threshold: -0.447, Test filtering error:2.00
N_projs: 500
Opt. threshold: -0.525, Test filtering error:1.94
Run: 2
Estimator: binning
N_projs: 50
Opt. threshold: -1.171, Test filtering error:1.95
N_projs: 100
Opt. threshold: -2.636, Test filtering error:1.90
N_projs: 250
Opt. thresho

In [14]:
estimators_list = ['binning', 'gaussian', 'random_forest']
n_projs_list = [50,100,250,500]

for estimator in estimators_list:
    for n_projs in n_projs_list:
        threshold = []
        filtering_acc = []
        for run in range(5):
            tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
            exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/psi/{estimator}'
            f = np.load(f'{exp_name}/unscaled_filtering_accuracy_{n_projs}_projs.npz')
            opt_threshold = f['opt_threshold']
            test_filtering_acc = f['test_filtering_acc']
            threshold.append(opt_threshold)
            filtering_acc.append(test_filtering_acc)

        print('-----------------------------')
        print(f'Estimator: {estimator}, N_projs: {n_projs}')
        print(f'Average opt. threshold: {(np.mean(threshold)):.3f}, std: {(np.std(threshold)):.3f}')
        print(f'Average test filtering error: {(100-np.mean(filtering_acc)):.2f}, std: {(np.std(filtering_acc)):.2f}')

-----------------------------
Estimator: binning, N_projs: 50
Average opt. threshold: -1.492, std: 0.521
Average test filtering error: 1.95, std: 0.06
-----------------------------
Estimator: binning, N_projs: 100
Average opt. threshold: -1.719, std: 0.552
Average test filtering error: 1.96, std: 0.05
-----------------------------
Estimator: binning, N_projs: 250
Average opt. threshold: -1.837, std: 0.545
Average test filtering error: 1.93, std: 0.05
-----------------------------
Estimator: binning, N_projs: 500
Average opt. threshold: -1.602, std: 0.467
Average test filtering error: 1.94, std: 0.05
-----------------------------
Estimator: gaussian, N_projs: 50
Average opt. threshold: -2.478, std: 1.042
Average test filtering error: 1.96, std: 0.06
-----------------------------
Estimator: gaussian, N_projs: 100
Average opt. threshold: -2.853, std: 0.856
Average test filtering error: 1.96, std: 0.06
-----------------------------
Estimator: gaussian, N_projs: 250
Average opt. threshold: 

In [41]:
##############################################################
#
# Compute Filtering Accuracy (with softmax scaling)
#
# #############################################################

estimators_list = ['binning', 'gaussian', 'random_forest']
n_projs_list = [50,100,250,500]

for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    print(f'Run: {run+1}')
    for estimator in estimators_list:
        print(f'Estimator: {estimator}')
        exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/psi/{estimator}'

        ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)
        n_classes = ds_info.features['label'].num_classes
        ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)
        ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)
        model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')
        
        for n_projs in n_projs_list:
            print(f'N_projs: {n_projs}')
            true_y = np.argmax([y for x,y in ds_val], axis=1)
            pred_y = np.argmax(model.predict(ds_val.batch(cfg['batch_size']), verbose=0), axis=1)
            true_label = np.equal(true_y, pred_y).astype(int) # assign 0 if true_y != pred_y, assign 1 if true_y == pred_y
            psi_class = np.load(f'{exp_name}/psi_class_{n_projs}_projs_val.npy')
            psi_class = np.array([softmax(x) for x in psi_class])
            psi = [psi_value[pred_value] for psi_value, pred_value in zip(psi_class, pred_y)]
            opt_threshold = compute_opt_threshold(psi, true_label)

            true_y = np.argmax([y for x,y in ds_test], axis=1)
            pred_y = np.argmax(model.predict(ds_test.batch(cfg['batch_size']), verbose=0), axis=1)
            true_label = np.equal(true_y, pred_y).astype(int) # assign 0 if true_y != pred_y, assign 1 if true_y == pred_y
            psi_class = np.load(f'{exp_name}/psi_class_{n_projs}_projs_test.npy')
            psi_class = np.array([softmax(x) for x in psi_class])
            psi = [psi_value[pred_value] for psi_value, pred_value in zip(psi_class, pred_y)]
            test_filtering_acc = compute_filtering_acc(psi, true_label, opt_threshold)

            np.savez(f'{exp_name}/scaled_filtering_accuracy_{n_projs}_projs.npz', opt_threshold=opt_threshold, test_filtering_acc=test_filtering_acc)
            print(f'Opt. threshold: {opt_threshold:.3f}, Test filtering error:{100-test_filtering_acc:.2f}')

Run: 1
Estimator: binning
N_projs: 50
Opt. threshold: 0.144, Test filtering error:1.92
N_projs: 100
Opt. threshold: 0.139, Test filtering error:2.03
N_projs: 250
Opt. threshold: 0.184, Test filtering error:1.88
N_projs: 500
Opt. threshold: 0.176, Test filtering error:1.91
Estimator: gaussian
N_projs: 50
Opt. threshold: 0.199, Test filtering error:1.89
N_projs: 100
Opt. threshold: 0.128, Test filtering error:2.04
N_projs: 250
Opt. threshold: 0.198, Test filtering error:1.87
N_projs: 500
Opt. threshold: 0.213, Test filtering error:1.91
Estimator: random_forest
N_projs: 50
Opt. threshold: 0.154, Test filtering error:1.97
N_projs: 100
Opt. threshold: 0.169, Test filtering error:2.01
N_projs: 250
Opt. threshold: 0.174, Test filtering error:1.98
N_projs: 500
Opt. threshold: 0.184, Test filtering error:1.89
Run: 2
Estimator: binning
N_projs: 50
Opt. threshold: 0.115, Test filtering error:1.93
N_projs: 100
Opt. threshold: 0.031, Test filtering error:1.92
N_projs: 250
Opt. threshold: 0.045, Tes

In [42]:
estimators_list = ['binning', 'gaussian', 'random_forest']
n_projs_list = [50,100,250,500]

for estimator in estimators_list:
    for n_projs in n_projs_list:
        threshold = []
        filtering_acc = []
        for run in range(5):
            tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
            exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/psi/{estimator}'
            f = np.load(f'{exp_name}/scaled_filtering_accuracy_{n_projs}_projs.npz')
            opt_threshold = f['opt_threshold']
            test_filtering_acc = f['test_filtering_acc']
            threshold.append(opt_threshold)
            filtering_acc.append(test_filtering_acc)

        print('-----------------------------')
        print(f'Estimator: {estimator}, N_projs: {n_projs}')
        print(f'Average opt. threshold: {(np.mean(threshold)):.3f}, std: {(np.std(threshold)):.3f}')
        print(f'Average test filtering error: {(100-np.mean(filtering_acc)):.2f}, std: {(np.std(filtering_acc)):.2f}')

-----------------------------
Estimator: binning, N_projs: 50
Average opt. threshold: 0.134, std: 0.030
Average test filtering error: 1.95, std: 0.03
-----------------------------
Estimator: binning, N_projs: 100
Average opt. threshold: 0.100, std: 0.046
Average test filtering error: 1.96, std: 0.05
-----------------------------
Estimator: binning, N_projs: 250
Average opt. threshold: 0.119, std: 0.064
Average test filtering error: 1.92, std: 0.05
-----------------------------
Estimator: binning, N_projs: 500
Average opt. threshold: 0.151, std: 0.047
Average test filtering error: 1.92, std: 0.05
-----------------------------
Estimator: gaussian, N_projs: 50
Average opt. threshold: 0.154, std: 0.041
Average test filtering error: 1.93, std: 0.05
-----------------------------
Estimator: gaussian, N_projs: 100
Average opt. threshold: 0.119, std: 0.090
Average test filtering error: 1.94, std: 0.08
-----------------------------
Estimator: gaussian, N_projs: 250
Average opt. threshold: 0.176,

In [7]:
##############################################################
#
# Compute ECE (with softmax scaling)
#
# #############################################################

estimators_list = ['binning', 'gaussian', 'random_forest']
n_projs_list = [50,100,250,500]

for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    print(f'Run: {run+1}')
    for estimator in estimators_list:
        print(f'Estimator: {estimator}')
        exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/psi/{estimator}'

        ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)
        n_classes = ds_info.features['label'].num_classes
        ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)
        model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')
        
        for n_projs in n_projs_list:
            print(f'N_projs: {n_projs}')
            true_y = np.argmax([y for x,y in ds_test], axis=1)
            pred_y = np.argmax(model.predict(ds_test.batch(cfg['batch_size']), verbose=0), axis=1)
            
            psi_class = np.load(f'{exp_name}/psi_class_{n_projs}_projs_test.npy')
            psi_class = np.array([softmax(x) for x in psi_class])
            psi = np.array([psi_value[pred_value] for psi_value, pred_value in zip(psi_class, pred_y)])
            ece = compute_ece(psi, true_y, pred_y, n_bins=10)

            np.save(f'{exp_name}/ece_test_{n_projs}_projs.npy', ece)
            print(f'ECE: {ece:.2f}')

Run: 1
Estimator: binning
N_projs: 50
ECE: 30.38
N_projs: 100
ECE: 30.08
N_projs: 250
ECE: 29.06
N_projs: 500
ECE: 27.52
Estimator: gaussian
N_projs: 50
ECE: 16.12
N_projs: 100
ECE: 16.30
N_projs: 250
ECE: 14.80
N_projs: 500
ECE: 13.80
Estimator: random_forest
N_projs: 50
ECE: 32.34
N_projs: 100
ECE: 31.27
N_projs: 250
ECE: 31.79
N_projs: 500
ECE: 29.78
Run: 2
Estimator: binning
N_projs: 50
ECE: 32.10
N_projs: 100
ECE: 28.92
N_projs: 250
ECE: 28.84
N_projs: 500
ECE: 29.60
Estimator: gaussian
N_projs: 50
ECE: 17.63
N_projs: 100
ECE: 15.26
N_projs: 250
ECE: 14.90
N_projs: 500
ECE: 15.40
Estimator: random_forest
N_projs: 50
ECE: 35.23
N_projs: 100
ECE: 33.07
N_projs: 250
ECE: 31.40
N_projs: 500
ECE: 31.00
Run: 3
Estimator: binning
N_projs: 50
ECE: 29.26
N_projs: 100
ECE: 27.06
N_projs: 250
ECE: 27.04
N_projs: 500
ECE: 26.88
Estimator: gaussian
N_projs: 50
ECE: 15.46
N_projs: 100
ECE: 13.78
N_projs: 250
ECE: 13.15
N_projs: 500
ECE: 13.28
Estimator: random_forest
N_projs: 50
ECE: 32.58
N_pr

In [3]:
estimators_list = ['binning', 'gaussian', 'random_forest']
n_projs_list = [50,100,250,500]

for estimator in estimators_list:
    for n_projs in n_projs_list:
        ece_list = []
        for run in range(5):
            tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
            exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/psi/{estimator}'
            f = np.load(f'{exp_name}/scaled_filtering_accuracy_{n_projs}_projs.npz')
            ece = np.load(f'{exp_name}/ece_test_{n_projs}_projs.npy')
            ece_list.append(ece)

        print('-----------------------------')
        print(f'Estimator: {estimator}, N_projs: {n_projs}')
        print(f'Average ECE: {(np.mean(ece_list)):.2f}, std: {(np.std(ece_list)):.2f}')

-----------------------------
Estimator: binning, N_projs: 50
Average ECE: 30.29, std: 1.08
-----------------------------
Estimator: binning, N_projs: 100
Average ECE: 28.21, std: 1.14
-----------------------------
Estimator: binning, N_projs: 250
Average ECE: 27.82, std: 0.96
-----------------------------
Estimator: binning, N_projs: 500
Average ECE: 27.47, std: 1.13
-----------------------------
Estimator: gaussian, N_projs: 50
Average ECE: 16.13, std: 1.00
-----------------------------
Estimator: gaussian, N_projs: 100
Average ECE: 14.35, std: 1.23
-----------------------------
Estimator: gaussian, N_projs: 250
Average ECE: 13.77, std: 0.89
-----------------------------
Estimator: gaussian, N_projs: 500
Average ECE: 13.59, std: 1.07
-----------------------------
Estimator: random_forest, N_projs: 50
Average ECE: 33.27, std: 1.21
-----------------------------
Estimator: random_forest, N_projs: 100
Average ECE: 31.14, std: 1.24
-----------------------------
Estimator: random_forest, N

In [2]:
cfg = { 'dataset' : 'stl10',
        'model' : 'pretrained_inception',
        'batch_size' : 512,
        'optimizer' : 'Adam',
        'learning_rate' : 0.001,
        'max_epoch' : 300,
        'patience' : 10,}    

model_name = cfg['model']
dataset_name = cfg['dataset']

In [None]:
for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/psi/gaussian'
    if not os.path.exists(exp_name):
        print("Making directory", exp_name)
        os.makedirs(exp_name)

    ds_train, ds_val, ds_test, ds_info = load_dataset(cfg, shuffle=False)
    n_classes = ds_info.features['label'].num_classes
    ds_train = preprocess_dataset(ds_train, cfg, n_classes, resize=True, normalize=True, onehot=True)
    ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=True, normalize=True, onehot=True)
    ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=True, normalize=True, onehot=True)

    model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')
    int_model = tf.keras.Model(inputs=model.inputs, outputs=model.layers[-2].output)
    
    ##############################################################
    #
    # Train PSI Model
    #
    # #############################################################
    
    print(f'Training PSI model (gaussian)...')
        
    ds_activity = ds_train.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), y)).unbatch()
    x, y = zip(*ds_activity)
    x = np.array([val.numpy() for val in x])
    y = np.array([val.numpy() for val in y])

    psi_data = psi_gauss_train(x, y, n_projs=250)
    np.save(f'{exp_name}/trained_model.npy', psi_data)
    
    ##############################################################
    #
    # Compute PSI for all validation and test samples
    #
    # #############################################################
    
    psi_data = np.load(f'{exp_name}/trained_model.npy', allow_pickle=True).item()

    print(f'Computing PSI for all validation samples and for all classes...')
    psi_class = []
    for k in range(n_classes):
        ds_activity = ds_val.batch(cfg['batch_size']).map(lambda x, y: (int_model(x),tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).unbatch()
        x, y = zip(*ds_activity)
        x = np.array([val.numpy() for val in x])
        y = np.array([val.numpy() for val in y])
        psi, pmi_arr = psi_gauss_val(x, y, psi_data, n_projs=250)
        psi_class.append(psi)
    np.save(f'{exp_name}/psi_class_val.npy', np.array(psi_class).T)

    print(f'Computing PSI for all test samples and for all classes...')
    psi_class = []
    for k in range(n_classes):
        ds_activity = ds_test.batch(cfg['batch_size']).map(lambda x, y: (int_model(x),tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).unbatch()
        x, y = zip(*ds_activity)
        x = np.array([val.numpy() for val in x])
        y = np.array([val.numpy() for val in y])
        psi, pmi_arr = psi_gauss_val(x, y, psi_data, n_projs=250)
        psi_class.append(psi)
    np.save(f'{exp_name}/psi_class_test.npy', np.array(psi_class).T)

2024-05-21 23:00:41.393121: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1883] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1136 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-80GB, pci bus id: 0000:47:00.0, compute capability: 8.0


Training PSI model (gaussian)...
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
Training PSI model (gaussian)...
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
Training PSI model (gaussian)...
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...


In [None]:
n_projs = 250

for run in range(2,5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/psi/random_forest'
    if not os.path.exists(exp_name):
        print("Making directory", exp_name)
        os.makedirs(exp_name)

    ds_train, ds_val, ds_test, ds_info = load_dataset(cfg, shuffle=False)
    n_classes = ds_info.features['label'].num_classes
    ds_train = preprocess_dataset(ds_train, cfg, n_classes, resize=True, normalize=True, onehot=True)
    ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=True, normalize=True, onehot=True)
    ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=True, normalize=True, onehot=True)

    model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')
    int_model = tf.keras.Model(inputs=model.inputs, outputs=model.layers[-2].output)
    
    ##############################################################
    #
    # Train PSI Model
    #
    # #############################################################
    
    print(f'Training PSI model (random forest)...')
        
    ds_activity = ds_train.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), y)).unbatch()
    x, y = zip(*ds_activity)
    x = np.array([val.numpy() for val in x])
    y = np.array([val.numpy() for val in y])
    
    y = np.argmax([y for x,y in ds_activity], axis=1)
    n_class_list = []
    for k in range(np.max(y)+1):
        idx = np.where(y == k)[0]
        n_class_list.append(len(idx))
    class_prob = np.array(n_class_list)/len(y)
    np.save(f'{exp_name}/class_prob.npy', np.array(class_prob))

    psi_rf_train(x, y, n_projs=500, save_path=exp_name)
    
    ##############################################################
    #
    # Compute PSI for all validation and test samples
    #
    # #############################################################
    
    thetas = np.load(f'{exp_name}/all_thetas.npy')
    class_prob = np.load(f'{exp_name}/class_prob.npy')
        
    print(f'Computing PSI for all validation samples and for all classes...')
    psi_class = []
    for k in range(n_classes):
        ds_activity = ds_val.batch(cfg['batch_size']).map(lambda x, y: (int_model(x),y)).unbatch()
        x = np.array([val.numpy() for val, _ in ds_activity])
        psi, pmi_arr = psi_rf_val(x, k, thetas, class_prob, n_projs, save_path=exp_name)
        psi_class.append(psi)
    np.save(f'{exp_name}/psi_class_{n_projs}_projs_val.npy', np.array(psi_class).T)

    print(f'Computing PSI for all test samples and for all classes...')
    psi_class = []
    for k in range(n_classes):
        ds_activity = ds_test.batch(cfg['batch_size']).map(lambda x, y: (int_model(x),y)).unbatch()
        x = np.array([val.numpy() for val, _ in ds_activity])
        psi, pmi_arr = psi_rf_val(x, k, thetas, class_prob, n_projs, save_path=exp_name)
        psi_class.append(psi)
    np.save(f'{exp_name}/psi_class_{n_projs}_projs_test.npy', np.array(psi_class).T)

Making directory ../results/PI_Explainability/pretrained_inception_stl10/run_1/calibration/psi/random_forest


2024-02-06 17:10:36.737016: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1883] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 78835 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-80GB, pci bus id: 0000:47:00.0, compute capability: 8.0


Training PSI model (random forest)...
Making directory ../results/PI_Explainability/pretrained_inception_stl10/run_1/calibration/psi/random_forest/psi_models
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
Making directory ../results/PI_Explainability/pretrained_inception_stl10/run_2/calibration/psi/random_forest
Training PSI model (random forest)...
Making directory ../results/PI_Explainability/pretrained_inception_stl10/run_2/calibration/psi/random_forest/psi_models
Computing PSI for all validation samples and for all classes...
Computing PSI for all test samples and for all classes...
Making directory ../results/PI_Explainability/pretrained_inception_stl10/run_3/calibration/psi/random_forest
Training PSI model (random forest)...
Making directory ../results/PI_Explainability/pretrained_inception_stl10/run_3/calibration/psi/random_forest/psi_models
Computing PSI for all validation samples and for all classes...
