In [1]:
import os
import time
import numpy as np
import tensorflow as tf

from src.models import mlp
from src.datasets import load_dataset, preprocess_dataset, prefetch_dataset
from src.pvi_estimators import train_pvi_null_model, train_pvi_model_from_scratch, neural_pvi, neural_pvi_ensemble, neural_pvi_calibrated
from src.utils import *

tf.__version__

2024-05-22 13:00:29.619858: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9360] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-22 13:00:29.619920: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-22 13:00:29.619952: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1537] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-05-22 13:00:29.628994: I tensorflow/core/platform/cpu_feature_guard.cc:183] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as 

'2.14.0'

In [2]:
cfg = { 'dataset' : 'mnist',
        'model' : 'mlp',
        'batch_size' : 512,
        'optimizer' : 'Adam',
        'learning_rate' : 0.001,
        'max_epoch' : 300,
        'patience' : 10,}    

model_name = cfg['model']
dataset_name = cfg['dataset']

In [None]:
##############################################################
#
# Compute classification error
#
# #############################################################
ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)
n_classes = ds_info.features['label'].num_classes
ds_train = preprocess_dataset(ds_train, cfg, n_classes, resize=False, normalize=True, onehot=True)
ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)
ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)
ds_train = prefetch_dataset(ds_train, batch_size=cfg['batch_size'])
ds_val = prefetch_dataset(ds_val, batch_size=cfg['batch_size'])
ds_test = prefetch_dataset(ds_test, batch_size=cfg['batch_size'])

train_acc = []
val_acc = []
test_acc = []
for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')
    train_acc.append(model.evaluate(ds_train, verbose=0)[1])
    val_acc.append(model.evaluate(ds_val, verbose=0)[1])
    test_acc.append(model.evaluate(ds_test, verbose=0)[1])
print(f'Average train error: {(100-np.mean(train_acc)*100):.2f}, std: {(np.std(train_acc)*100):.2f}')
print(f'Average validation error: {(100-np.mean(val_acc)*100):.2f}, std: {(np.std(val_acc)*100):.2f}')
print(f'Average test error: {(100-np.mean(test_acc)*100):.2f}, std: {(np.std(test_acc)*100):.2f}')

### No training

In [4]:
for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/no_training'
    if not os.path.exists(exp_name):
        print("Making directory", exp_name)
        os.makedirs(exp_name)
        
    ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)
    n_classes = ds_info.features['label'].num_classes
    ds_train = preprocess_dataset(ds_train, cfg, n_classes, resize=False, normalize=True, onehot=True)
    ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)
    ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)
    
    ##############################################################
    #
    # Train PVI Model
    #
    # #############################################################
        
    pvi_model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')
    pvi_model.save(f'{exp_name}/pvi_model.keras')
    untrained_model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/untrained_model.keras')
    train_pvi_null_model(ds_train, untrained_model, cfg, save_path=f'{exp_name}/pvi_null_model.keras')
    
    ##############################################################
    #
    # Compute PVI for all training and test samples
    #
    # #############################################################
    
    pvi_model = tf.keras.models.load_model(f'{exp_name}/pvi_model.keras')
    null_model = tf.keras.models.load_model(f'{exp_name}/pvi_null_model.keras')

    print(f'Computing PVI for all validation samples and for all classes...')
    pvi_class = []
    for k in range(n_classes):
        ds_activity = ds_val.batch(cfg['batch_size']).map(lambda x, y: (x, tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)
        pvi_list = []
        for (x_batch, y_batch) in ds_activity:
            pvi = neural_pvi(x_batch, y_batch, pvi_model, null_model)
            pvi_list += np.array(pvi).tolist()
        pvi_class.append(pvi_list)
    np.save(f'{exp_name}/pvi_class_val.npy', np.array(pvi_class).T)

    print(f'Computing PVI for all test samples and for all classes...')
    pvi_class = []
    for k in range(n_classes):
        ds_activity = ds_test.batch(cfg['batch_size']).map(lambda x, y: (x, tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)
        pvi_list = []
        for (x_batch, y_batch) in ds_activity:
            pvi = neural_pvi(x_batch, y_batch, pvi_model, null_model)
            pvi_list += np.array(pvi).tolist()
        pvi_class.append(pvi_list)
    np.save(f'{exp_name}/pvi_class_test.npy', np.array(pvi_class).T)

Computing PVI for all validation samples and for all classes...


  return -1 * np.log2(tf.boolean_mask(prob, y).numpy())


Computing PVI for all test samples and for all classes...
Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...
Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...
Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...
Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...


### Training from Scratch

In [6]:
for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/training_from_scratch'
    if not os.path.exists(exp_name):
        print("Making directory", exp_name)
        os.makedirs(exp_name)
        
    ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)
    n_classes = ds_info.features['label'].num_classes
    ds_train = preprocess_dataset(ds_train, cfg, n_classes, resize=False, normalize=True, onehot=True)
    ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)
    ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)
        
    ##############################################################
    #
    # Train PVI Model
    #
    # #############################################################
    
    untrained_model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/untrained_model.keras')
    
    tf.keras.utils.set_random_seed(run+20)  # train the model from scratch with different seed
    train_pvi_model_from_scratch(ds_train, ds_val, untrained_model, cfg, save_path=f'{exp_name}/pvi_model.keras')
    tf.keras.utils.set_random_seed(run+10)
    train_pvi_null_model(ds_train, untrained_model, cfg, save_path=f'{exp_name}/pvi_null_model.keras')
    
    ##############################################################
    #
    # Compute PVI for all training and test samples
    #
    # #############################################################
    
    pvi_model = tf.keras.models.load_model(f'{exp_name}/pvi_model.keras')
    null_model = tf.keras.models.load_model(f'{exp_name}/pvi_null_model.keras')

    print(f'Computing PVI for all validation samples and for all classes...')
    pvi_class = []
    for k in range(n_classes):
        ds_activity = ds_val.batch(cfg['batch_size']).map(lambda x, y: (x, tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)
        pvi_list = []
        for (x_batch, y_batch) in ds_activity:
            pvi = neural_pvi(x_batch, y_batch, pvi_model, null_model)
            pvi_list += np.array(pvi).tolist()
        pvi_class.append(pvi_list)
    np.save(f'{exp_name}/pvi_class_val.npy', np.array(pvi_class).T)

    print(f'Computing PVI for all test samples and for all classes...')
    pvi_class = []
    for k in range(n_classes):
        ds_activity = ds_test.batch(cfg['batch_size']).map(lambda x, y: (x, tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)
        pvi_list = []
        for (x_batch, y_batch) in ds_activity:
            pvi = neural_pvi(x_batch, y_batch, pvi_model, null_model)
            pvi_list += np.array(pvi).tolist()
        pvi_class.append(pvi_list)
    np.save(f'{exp_name}/pvi_class_test.npy', np.array(pvi_class).T)

Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...
Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...
Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...
Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...
Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...


### Training MLP Penultimate

In [8]:
for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/training_mlp_penultimate'
    if not os.path.exists(exp_name):
        print("Making directory", exp_name)
        os.makedirs(exp_name)
        
    ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)
    n_classes = ds_info.features['label'].num_classes
    ds_train = preprocess_dataset(ds_train, cfg, n_classes, resize=False, normalize=True, onehot=True)
    ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)
    ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)
        
    ##############################################################
    #
    # Train PVI Model
    #
    # #############################################################
    
    model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')
    int_model = tf.keras.Model(inputs=model.inputs, outputs=model.layers[-2].output)
    ds_activity_train = ds_train.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), y)).unbatch()
    ds_activity_val = ds_val.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), y)).unbatch()
    pvi_model = mlp(ds_activity_train.batch(cfg['batch_size']), n_layers=1, n_hidden=512)
    pvi_model.save(f'{exp_name}/untrained_pvi_model_.keras')
    untrained_pvi_model = pvi_model
    train_pvi_model_from_scratch(ds_activity_train, ds_activity_val, pvi_model, cfg, save_path=f'{exp_name}/pvi_model.keras')
    train_pvi_null_model(ds_activity_train, untrained_pvi_model, cfg, save_path=f'{exp_name}/pvi_null_model.keras')
    
    ##############################################################
    #
    # Compute PVI for all training and test samples
    #
    # #############################################################
    
    model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')
    int_model = tf.keras.Model(inputs=model.inputs, outputs=model.layers[-2].output)
    pvi_model = tf.keras.models.load_model(f'{exp_name}/pvi_model.keras')
    null_model = tf.keras.models.load_model(f'{exp_name}/pvi_null_model.keras')

    print(f'Computing PVI for all validation samples and for all classes...')
    pvi_class = []
    for k in range(n_classes):
        ds_activity = ds_val.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)
        pvi_list = []
        for (x_batch, y_batch) in ds_activity:
            pvi = neural_pvi(x_batch, y_batch, pvi_model, null_model)
            pvi_list += np.array(pvi).tolist()
        pvi_class.append(pvi_list)
    np.save(f'{exp_name}/pvi_class_val.npy', np.array(pvi_class).T)

    print(f'Computing PVI for all test samples and for all classes...')
    pvi_class = []
    for k in range(n_classes):
        ds_activity = ds_test.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)
        pvi_list = []
        for (x_batch, y_batch) in ds_activity:
            pvi = neural_pvi(x_batch, y_batch, pvi_model, null_model)
            pvi_list += np.array(pvi).tolist()
        pvi_class.append(pvi_list)
    np.save(f'{exp_name}/pvi_class_test.npy', np.array(pvi_class).T)

Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...
Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...
Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...
Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...
Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...


### Ensemble (No training and Training from scratch)

In [13]:
for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/ensemble_no_training_training_from_scratch'
    if not os.path.exists(exp_name):
        print("Making directory", exp_name)
        os.makedirs(exp_name)
        
    ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)
    n_classes = ds_info.features['label'].num_classes
    ds_train = preprocess_dataset(ds_train, cfg, n_classes, resize=False, normalize=True, onehot=True)
    ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)
    ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)
    
    pvi_model_1 = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/no_training/pvi_model.keras')
    null_model_1 = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/no_training/pvi_null_model.keras')
    pvi_model_2 = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/training_from_scratch/pvi_model.keras')
    null_model_2 = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/training_from_scratch/pvi_null_model.keras')
    
    print(f'Computing PVI for all validation samples and for all classes...')
    pvi_class = []
    for k in range(n_classes):
        ds_class_1 = ds_val.batch(cfg['batch_size']).map(lambda x, y: (x, tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)
        ds_class_2 = ds_class_1
        pvi_list = []
        for (x_batch_1, y_batch), (x_batch_2, y_batch) in zip(ds_class_1, ds_class_2):
            pvi = neural_pvi_ensemble(x_batch_1,x_batch_2,y_batch,pvi_model_1,pvi_model_2,null_model_1,null_model_2)
            pvi_list += np.array(pvi).tolist()
        pvi_class.append(pvi_list)
    np.save(f'{exp_name}/pvi_class_val.npy', np.array(pvi_class).T)
    
    print(f'Computing PVI for all test samples and for all classes...')
    pvi_class = []
    for k in range(n_classes):
        ds_class_1 = ds_test.batch(cfg['batch_size']).map(lambda x, y: (x, tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)
        ds_class_2 = ds_class_1
        pvi_list = []
        for (x_batch_1, y_batch), (x_batch_2, y_batch) in zip(ds_class_1, ds_class_2):
            pvi = neural_pvi_ensemble(x_batch_1,x_batch_2,y_batch,pvi_model_1,pvi_model_2,null_model_1,null_model_2)
            pvi_list += np.array(pvi).tolist()
        pvi_class.append(pvi_list)
    np.save(f'{exp_name}/pvi_class_test.npy', np.array(pvi_class).T)

Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...
Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...
Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...
Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...
Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...


### Ensemble (No training and Training MLP penultimate)

In [3]:
for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/ensemble_no_training_training_mlp_penultimate'
    if not os.path.exists(exp_name):
        print("Making directory", exp_name)
        os.makedirs(exp_name)
        
    ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)
    n_classes = ds_info.features['label'].num_classes
    ds_train = preprocess_dataset(ds_train, cfg, n_classes, resize=False, normalize=True, onehot=True)
    ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)
    ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)
    model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')
    int_model = tf.keras.Model(inputs=model.inputs, outputs=model.layers[-2].output)
    
    pvi_model_1 = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/no_training/pvi_model.keras')
    null_model_1 = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/no_training/pvi_null_model.keras')
    pvi_model_2 = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/training_mlp_penultimate/pvi_model.keras')
    null_model_2 = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/training_mlp_penultimate/pvi_null_model.keras')
    
    print(f'Computing PVI for all validation samples and for all classes...')
    pvi_class = []
    for k in range(n_classes):
        ds_class_1 = ds_val.batch(cfg['batch_size']).map(lambda x, y: (x, tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)
        ds_class_2 = ds_val.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)
        pvi_list = []
        for (x_batch_1, y_batch), (x_batch_2, y_batch) in zip(ds_class_1, ds_class_2):
            pvi = neural_pvi_ensemble(x_batch_1,x_batch_2,y_batch,pvi_model_1,pvi_model_2,null_model_1,null_model_2)
            pvi_list += np.array(pvi).tolist()
        pvi_class.append(pvi_list)
    np.save(f'{exp_name}/pvi_class_val.npy', np.array(pvi_class).T)
    
    print(f'Computing PVI for all test samples and for all classes...')
    pvi_class = []
    for k in range(n_classes):
        ds_class_1 = ds_test.batch(cfg['batch_size']).map(lambda x, y: (x, tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)
        ds_class_2 = ds_test.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)
        pvi_list = []
        for (x_batch_1, y_batch), (x_batch_2, y_batch) in zip(ds_class_1, ds_class_2):
            pvi = neural_pvi_ensemble(x_batch_1,x_batch_2,y_batch,pvi_model_1,pvi_model_2,null_model_1,null_model_2)
            pvi_list += np.array(pvi).tolist()
        pvi_class.append(pvi_list)
    np.save(f'{exp_name}/pvi_class_test.npy', np.array(pvi_class).T)

Making directory ../results/PI_Explainability/mlp_mnist/run_1/calibration/pvi/ensemble_no_training_training_mlp_penultimate


2024-02-01 16:02:19.417654: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1883] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 78835 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-80GB, pci bus id: 0000:4e:00.0, compute capability: 8.0


Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...
Making directory ../results/PI_Explainability/mlp_mnist/run_2/calibration/pvi/ensemble_no_training_training_mlp_penultimate
Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...
Making directory ../results/PI_Explainability/mlp_mnist/run_3/calibration/pvi/ensemble_no_training_training_mlp_penultimate
Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...
Making directory ../results/PI_Explainability/mlp_mnist/run_4/calibration/pvi/ensemble_no_training_training_mlp_penultimate
Computing PVI for all validation samples and for all classes...
Computing PVI for all test samples and for all classes...
Making directory ../results/PI_Explainability/mlp_mnist/run_5/calibration/pvi/ensemble_no_training_training_mlp_penultimate
Computing PVI fo

### Ensemble (Training from scratch and Training MLP penultimate)

In [None]:
for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/ensemble_training_from_scratch_training_mlp_penultimate'
    if not os.path.exists(exp_name):
        print("Making directory", exp_name)
        os.makedirs(exp_name)
        
    ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)
    n_classes = ds_info.features['label'].num_classes
    ds_train = preprocess_dataset(ds_train, cfg, n_classes, resize=False, normalize=True, onehot=True)
    ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)
    ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)
    model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')
    int_model = tf.keras.Model(inputs=model.inputs, outputs=model.layers[-2].output)
    
    pvi_model_1 = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/training_from_scratch/pvi_model.keras')
    null_model_1 = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/training_from_scratch/pvi_null_model.keras')
    pvi_model_2 = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/training_mlp_penultimate/pvi_model.keras')
    null_model_2 = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/training_mlp_penultimate/pvi_null_model.keras')
    
    print(f'Computing PVI for all validation samples and for all classes...')
    pvi_class = []
    for k in range(n_classes):
        ds_class_1 = ds_val.batch(cfg['batch_size']).map(lambda x, y: (x, tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)
        ds_class_2 = ds_val.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)
        pvi_list = []
        for (x_batch_1, y_batch), (x_batch_2, y_batch) in zip(ds_class_1, ds_class_2):
            pvi = neural_pvi_ensemble(x_batch_1,x_batch_2,y_batch,pvi_model_1,pvi_model_2,null_model_1,null_model_2)
            pvi_list += np.array(pvi).tolist()
        pvi_class.append(pvi_list)
    np.save(f'{exp_name}/pvi_class_val.npy', np.array(pvi_class).T)
    
    print(f'Computing PVI for all test samples and for all classes...')
    pvi_class = []
    for k in range(n_classes):
        ds_class_1 = ds_test.batch(cfg['batch_size']).map(lambda x, y: (x, tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)
        ds_class_2 = ds_test.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)
        pvi_list = []
        for (x_batch_1, y_batch), (x_batch_2, y_batch) in zip(ds_class_1, ds_class_2):
            pvi = neural_pvi_ensemble(x_batch_1,x_batch_2,y_batch,pvi_model_1,pvi_model_2,null_model_1,null_model_2)
            pvi_list += np.array(pvi).tolist()
        pvi_class.append(pvi_list)
    np.save(f'{exp_name}/pvi_class_test.npy', np.array(pvi_class).T)

In [17]:
##############################################################
#
# Compute Filtering Accuracy (without softmax scaling)
#
# #############################################################

estimators_list = ['no_training', 'training_from_scratch', 'training_mlp_penultimate',
                   'ensemble_no_training_training_from_scratch', 'ensemble_no_training_training_mlp_penultimate', 'ensemble_training_from_scratch_training_mlp_penultimate']

for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    print(f'Run: {run+1}')
    for estimator in estimators_list:
        print(f'Estimator: {estimator}')
        exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/{estimator}'

        ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)
        n_classes = ds_info.features['label'].num_classes
        ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)
        ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)
        model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')

        true_y = np.argmax([y for x,y in ds_val], axis=1)
        pred_y = np.argmax(model.predict(ds_val.batch(cfg['batch_size']), verbose=0), axis=1)
        true_label = np.equal(true_y, pred_y).astype(int) # assign 0 if true_y != pred_y, assign 1 if true_y == pred_y
        pvi_class = np.load(f'{exp_name}/pvi_class_val.npy')
        pvi = [pvi_value[pred_value] for pvi_value, pred_value in zip(pvi_class, pred_y)]
        opt_threshold = compute_opt_threshold(pvi, true_label)

        true_y = np.argmax([y for x,y in ds_test], axis=1)
        pred_y = np.argmax(model.predict(ds_test.batch(cfg['batch_size']), verbose=0), axis=1)
        true_label = np.equal(true_y, pred_y).astype(int) # assign 0 if true_y != pred_y, assign 1 if true_y == pred_y
        pvi_class = np.load(f'{exp_name}/pvi_class_test.npy')
        pvi = [pvi_value[pred_value] for pvi_value, pred_value in zip(pvi_class, pred_y)]
        test_filtering_acc = compute_filtering_acc(pvi, true_label, opt_threshold)

        np.savez(f'{exp_name}/unscaled_filtering_accuracy.npz', opt_threshold=opt_threshold, test_filtering_acc=test_filtering_acc)
        print(f'Opt. threshold: {opt_threshold:.3f}, Test filtering error:{100-test_filtering_acc:.2f}')

Run: 1
Estimator: no_training
Opt. threshold: 2.140, Test filtering error:1.97
Estimator: training_from_scratch
Opt. threshold: -0.872, Test filtering error:1.77
Estimator: training_mlp_penultimate
Opt. threshold: 1.142, Test filtering error:1.87
Estimator: ensemble_no_training_training_from_scratch
Opt. threshold: 2.391, Test filtering error:1.78
Estimator: ensemble_no_training_training_mlp_penultimate
Opt. threshold: 2.006, Test filtering error:1.93
Estimator: ensemble_training_from_scratch_training_mlp_penultimate
Opt. threshold: 2.151, Test filtering error:1.63
Run: 2
Estimator: no_training
Opt. threshold: 2.476, Test filtering error:1.96
Estimator: training_from_scratch
Opt. threshold: -2.072, Test filtering error:1.86
Estimator: training_mlp_penultimate
Opt. threshold: 1.734, Test filtering error:1.82
Estimator: ensemble_no_training_training_from_scratch
Opt. threshold: 1.849, Test filtering error:1.85
Estimator: ensemble_no_training_training_mlp_penultimate
Opt. threshold: 2.599

In [18]:
estimators_list = ['no_training', 'training_from_scratch', 'training_mlp_penultimate',
                   'ensemble_no_training_training_from_scratch', 'ensemble_no_training_training_mlp_penultimate', 'ensemble_training_from_scratch_training_mlp_penultimate']

for estimator in estimators_list:
    threshold = []
    filtering_acc = []
    for run in range(5):
        tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
        exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/{estimator}'
        f = np.load(f'{exp_name}/unscaled_filtering_accuracy.npz')
        opt_threshold = f['opt_threshold']
        test_filtering_acc = f['test_filtering_acc']
        threshold.append(opt_threshold)
        filtering_acc.append(test_filtering_acc)

    print('-----------------------------')
    print(f'Estimator: {estimator}')
    print(f'Average opt. threshold: {(np.mean(threshold)):.3f}, std: {(np.std(threshold)):.3f}')
    print(f'Average test filtering error: {(100-np.mean(filtering_acc)):.2f}, std: {(np.std(filtering_acc)):.2f}')

-----------------------------
Estimator: no_training
Average opt. threshold: 2.379, std: 0.134
Average test filtering error: 1.93, std: 0.04
-----------------------------
Estimator: training_from_scratch
Average opt. threshold: -2.040, std: 2.152
Average test filtering error: 1.82, std: 0.06
-----------------------------
Estimator: training_mlp_penultimate
Average opt. threshold: 2.028, std: 0.563
Average test filtering error: 1.82, std: 0.03
-----------------------------
Estimator: ensemble_no_training_training_from_scratch
Average opt. threshold: 2.136, std: 0.266
Average test filtering error: 1.75, std: 0.07
-----------------------------
Estimator: ensemble_no_training_training_mlp_penultimate
Average opt. threshold: 2.519, std: 0.281
Average test filtering error: 1.86, std: 0.06
-----------------------------
Estimator: ensemble_training_from_scratch_training_mlp_penultimate
Average opt. threshold: 1.963, std: 0.327
Average test filtering error: 1.65, std: 0.06


In [19]:
##############################################################
#
# Compute Filtering Accuracy (with softmax scaling)
#
# #############################################################

estimators_list = ['no_training', 'training_from_scratch', 'training_mlp_penultimate',
                   'ensemble_no_training_training_from_scratch', 'ensemble_no_training_training_mlp_penultimate', 'ensemble_training_from_scratch_training_mlp_penultimate']

for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    print(f'Run: {run+1}')
    for estimator in estimators_list:
        print(f'Estimator: {estimator}')
        exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/{estimator}'

        ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)
        n_classes = ds_info.features['label'].num_classes
        ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)
        ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)
        model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')

        true_y = np.argmax([y for x,y in ds_val], axis=1)
        pred_y = np.argmax(model.predict(ds_val.batch(cfg['batch_size']), verbose=0), axis=1)
        true_label = np.equal(true_y, pred_y).astype(int) # assign 0 if true_y != pred_y, assign 1 if true_y == pred_y
        pvi_class = np.load(f'{exp_name}/pvi_class_val.npy')
        pvi_class = np.array([softmax(x) for x in pvi_class])
        pvi = [pvi_value[pred_value] for pvi_value, pred_value in zip(pvi_class, pred_y)]
        opt_threshold = compute_opt_threshold(pvi, true_label)

        true_y = np.argmax([y for x,y in ds_test], axis=1)
        pred_y = np.argmax(model.predict(ds_test.batch(cfg['batch_size']), verbose=0), axis=1)
        true_label = np.equal(true_y, pred_y).astype(int) # assign 0 if true_y != pred_y, assign 1 if true_y == pred_y
        pvi_class = np.load(f'{exp_name}/pvi_class_test.npy')
        pvi_class = np.array([softmax(x) for x in pvi_class])
        pvi = [pvi_value[pred_value] for pvi_value, pred_value in zip(pvi_class, pred_y)]
        test_filtering_acc = compute_filtering_acc(pvi, true_label, opt_threshold)

        np.savez(f'{exp_name}/scaled_filtering_accuracy.npz', opt_threshold=opt_threshold, test_filtering_acc=test_filtering_acc)
        print(f'Opt. threshold: {opt_threshold:.3f}, Test filtering error:{100-test_filtering_acc:.2f}')

Run: 1
Estimator: no_training
Opt. threshold: 0.436, Test filtering error:1.98
Estimator: training_from_scratch
Opt. threshold: 0.013, Test filtering error:1.77
Estimator: training_mlp_penultimate
Opt. threshold: 0.122, Test filtering error:1.85
Estimator: ensemble_no_training_training_from_scratch
Opt. threshold: 0.500, Test filtering error:1.70
Estimator: ensemble_no_training_training_mlp_penultimate
Opt. threshold: 0.353, Test filtering error:1.95
Estimator: ensemble_training_from_scratch_training_mlp_penultimate
Opt. threshold: 0.457, Test filtering error:1.64
Run: 2
Estimator: no_training
Opt. threshold: 0.554, Test filtering error:1.96
Estimator: training_from_scratch
Opt. threshold: 0.006, Test filtering error:1.86
Estimator: training_mlp_penultimate
Opt. threshold: 0.177, Test filtering error:1.84
Estimator: ensemble_no_training_training_from_scratch
Opt. threshold: 0.469, Test filtering error:1.75
Estimator: ensemble_no_training_training_mlp_penultimate
Opt. threshold: 0.698, 

In [20]:
estimators_list = ['no_training', 'training_from_scratch', 'training_mlp_penultimate',
                   'ensemble_no_training_training_from_scratch', 'ensemble_no_training_training_mlp_penultimate', 'ensemble_training_from_scratch_training_mlp_penultimate']

for estimator in estimators_list:
    threshold = []
    filtering_acc = []
    for run in range(5):
        tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
        exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/{estimator}'
        f = np.load(f'{exp_name}/scaled_filtering_accuracy.npz')
        opt_threshold = f['opt_threshold']
        test_filtering_acc = f['test_filtering_acc']
        threshold.append(opt_threshold)
        filtering_acc.append(test_filtering_acc)

    print('-----------------------------')
    print(f'Estimator: {estimator}')
    print(f'Average opt. threshold: {(np.mean(threshold)):.3f}, std: {(np.std(threshold)):.3f}')
    print(f'Average test filtering error: {(100-np.mean(filtering_acc)):.2f}, std: {(np.std(filtering_acc)):.2f}')

-----------------------------
Estimator: no_training
Average opt. threshold: 0.584, std: 0.094
Average test filtering error: 1.95, std: 0.06
-----------------------------
Estimator: training_from_scratch
Average opt. threshold: 0.021, std: 0.017
Average test filtering error: 1.79, std: 0.04
-----------------------------
Estimator: training_mlp_penultimate
Average opt. threshold: 0.311, std: 0.184
Average test filtering error: 1.83, std: 0.02
-----------------------------
Estimator: ensemble_no_training_training_from_scratch
Average opt. threshold: 0.447, std: 0.089
Average test filtering error: 1.74, std: 0.05
-----------------------------
Estimator: ensemble_no_training_training_mlp_penultimate
Average opt. threshold: 0.644, std: 0.149
Average test filtering error: 1.88, std: 0.05
-----------------------------
Estimator: ensemble_training_from_scratch_training_mlp_penultimate
Average opt. threshold: 0.388, std: 0.102
Average test filtering error: 1.65, std: 0.05


In [14]:
##############################################################
#
# Compute ECE (with softmax scaling)
#
# #############################################################

estimators_list = ['no_training', 'training_from_scratch', 'training_mlp_penultimate',
                   'ensemble_no_training_training_from_scratch', 'ensemble_no_training_training_mlp_penultimate', 'ensemble_training_from_scratch_training_mlp_penultimate']

for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
    print(f'Run: {run+1}')
    for estimator in estimators_list:
        print(f'Estimator: {estimator}')
        exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/{estimator}'

        ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)
        n_classes = ds_info.features['label'].num_classes
        ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)
        model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')

        true_y = np.argmax([y for x,y in ds_test], axis=1)
        pred_y = np.argmax(model.predict(ds_test.batch(cfg['batch_size']), verbose=0), axis=1)
        pvi_class = np.load(f'{exp_name}/pvi_class_test.npy')
        pvi_class = np.array([softmax(x) for x in pvi_class])
        pvi = np.array([pvi_value[pred_value] for pvi_value, pred_value in zip(pvi_class, pred_y)])
        ece = compute_ece(pvi, true_y, pred_y, n_bins=10)

        np.save(f'{exp_name}/ece_test.npy', ece)
        print(f'ECE: {ece:.2f}')

Run: 1
Estimator: no_training
ECE: 1.61
Estimator: training_from_scratch
ECE: 1.59
Estimator: training_mlp_penultimate
ECE: 1.38
Estimator: ensemble_no_training_training_from_scratch
ECE: 0.92
Estimator: ensemble_no_training_training_mlp_penultimate
ECE: 1.37
Estimator: ensemble_training_from_scratch_training_mlp_penultimate
ECE: 0.97
Run: 2
Estimator: no_training
ECE: 1.47
Estimator: training_from_scratch
ECE: 1.56
Estimator: training_mlp_penultimate
ECE: 1.19
Estimator: ensemble_no_training_training_from_scratch
ECE: 0.81
Estimator: ensemble_no_training_training_mlp_penultimate
ECE: 1.21
Estimator: ensemble_training_from_scratch_training_mlp_penultimate
ECE: 0.78
Run: 3
Estimator: no_training
ECE: 1.54
Estimator: training_from_scratch
ECE: 1.28
Estimator: training_mlp_penultimate
ECE: 1.34
Estimator: ensemble_no_training_training_from_scratch
ECE: 0.91
Estimator: ensemble_no_training_training_mlp_penultimate
ECE: 1.38
Estimator: ensemble_training_from_scratch_training_mlp_penultimate

In [16]:
estimators_list = ['no_training', 'training_from_scratch', 'training_mlp_penultimate',
                   'ensemble_no_training_training_from_scratch', 'ensemble_no_training_training_mlp_penultimate', 'ensemble_training_from_scratch_training_mlp_penultimate']

for estimator in estimators_list:
    ece_list = []
    for run in range(5):
        tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
        exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/{estimator}'
        ece = np.load(f'{exp_name}/ece_test.npy')
        ece_list.append(ece)

    print('-----------------------------')
    print(f'Estimator: {estimator}')
    print(f'Average ECE: {(np.mean(ece_list)):.2f}, std: {(np.std(ece_list)):.2f}')

-----------------------------
Estimator: no_training
Average ECE: 1.56, std: 0.05
-----------------------------
Estimator: training_from_scratch
Average ECE: 1.45, std: 0.15
-----------------------------
Estimator: training_mlp_penultimate
Average ECE: 1.32, std: 0.08
-----------------------------
Estimator: ensemble_no_training_training_from_scratch
Average ECE: 0.88, std: 0.06
-----------------------------
Estimator: ensemble_no_training_training_mlp_penultimate
Average ECE: 1.35, std: 0.08
-----------------------------
Estimator: ensemble_training_from_scratch_training_mlp_penultimate
Average ECE: 0.80, std: 0.10


In [19]:
cfg = { 'dataset' : 'svhn',
        'model' : 'densenet121',
        'batch_size' : 256,
        'optimizer' : 'SGD',
        'learning_rate' : 0.001,
        'max_epoch' : 300,
        'patience' : 10,}    

model_name = cfg['model']
dataset_name = cfg['dataset']

In [20]:
for run in range(5):
    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow
        
    ds_train, ds_val, ds_test, ds_info = load_dataset(cfg, shuffle=False)
    n_classes = ds_info.features['label'].num_classes
    ds_train = preprocess_dataset(ds_train, cfg, n_classes, resize=False, normalize=True, onehot=True)
    ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)
    ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)
    untrained_model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/untrained_model.keras')
    model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/trained_model.keras')
    int_model = tf.keras.Model(inputs=model.inputs, outputs=model.layers[-2].output)
        
#     ##############################################################
#     #
#     # Train PVI Model (training from scratch)
#     #
#     # #############################################################
    
    exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/training_from_scratch'
    if not os.path.exists(exp_name):
        print("Making directory", exp_name)
        os.makedirs(exp_name)
    
#     tf.keras.utils.set_random_seed(run+20)  # train the model from scratch with different seed
#     train_pvi_model_from_scratch(ds_train, ds_val, untrained_model, cfg, save_path=f'{exp_name}/pvi_model.keras')
    tf.keras.utils.set_random_seed(run+10)
    train_pvi_null_model(ds_train, untrained_model, cfg, save_path=f'{exp_name}/pvi_null_model.keras')
    
#     ##############################################################
#     #
#     # Train PVI Model (training MLP penultimate)
#     #
#     # #############################################################
    
#     exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/training_mlp_penultimate'
#     if not os.path.exists(exp_name):
#         print("Making directory", exp_name)
#         os.makedirs(exp_name)
    
#     ds_activity_train = ds_train.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), y)).unbatch()
#     ds_activity_val = ds_val.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), y)).unbatch()
#     pvi_model = mlp(ds_activity_train.batch(cfg['batch_size']), n_layers=1, n_hidden=512)
#     pvi_model.save(f'{exp_name}/untrained_pvi_model_.keras')
#     untrained_pvi_model = pvi_model
#     train_pvi_model_from_scratch(ds_activity_train, ds_activity_val, pvi_model, cfg, save_path=f'{exp_name}/pvi_model.keras')
#     train_pvi_null_model(ds_activity_train, untrained_pvi_model, cfg, save_path=f'{exp_name}/pvi_null_model.keras')
    
    ##############################################################
    #
    # Compute PVI for all training and test samples
    #
    # #############################################################
    
#     exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/training_from_scratch'
#     if not os.path.exists(exp_name):
#         print("Making directory", exp_name)
#         os.makedirs(exp_name)
    
#     pvi_model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/training_from_scratch/pvi_model.keras')
#     null_model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/training_from_scratch/pvi_null_model.keras')
    
#     pvi_opt_temp = temp_scaling(pvi_model, ds_val)
#     ds_null = ds_val.map(lambda x, y: (tf.zeros_like(x), y))
#     null_opt_temp = temp_scaling(null_model, ds_null)
#     np.save(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/training_from_scratch/pvi_model_opt_temp.npy', pvi_opt_temp)
#     np.save(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/training_from_scratch/null_model_opt_temp.npy', null_opt_temp)
    
#     model_temp = np.load(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/training_from_scratch/pvi_model_opt_temp.npy')
#     null_temp = np.load(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pvi/training_from_scratch/null_model_opt_temp.npy')
#     print(model_temp, null_temp)
    
#     print(f'Computing PVI for all validation samples and for all classes...')
#     pvi_class = []
#     for k in range(n_classes):
#         ds_activity = ds_val.batch(cfg['batch_size']).map(lambda x, y: (x, tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)
#         pvi_list = []
#         for (x_batch, y_batch) in ds_activity:
#             pvi = neural_pvi_calibrated(x_batch, y_batch, pvi_model, null_model, model_temp, null_temp)
#             pvi_list += np.array(pvi).tolist()
#         pvi_class.append(pvi_list)
#     np.save(f'{exp_name}/pvi_calibrated_class_val.npy', np.array(pvi_class).T)

#     print(f'Computing PVI for all test samples and for all classes...')
#     pvi_class = []
#     for k in range(n_classes):
#         ds_activity = ds_test.batch(cfg['batch_size']).map(lambda x, y: (x, tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)
#         pvi_list = []
#         for (x_batch, y_batch) in ds_activity:
#             pvi = neural_pvi_calibrated(x_batch, y_batch, pvi_model, null_model, model_temp, null_temp)
#             pvi_list += np.array(pvi).tolist()
#         pvi_class.append(pvi_list)
#     np.save(f'{exp_name}/pvi_calibrated_class_test.npy', np.array(pvi_class).T)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
