In [2]:
import math
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import simpson

In [4]:
def compute_aopc_morf(models):
    """
    Computes the AOPC (Area Over Perturbation Curve) using the ROAD plot 
    Parameters:
        models (dict): A dictionary where keys are model names and values are dictionaries with percentage of features perturbed as keys and corresponding model accuracy as values (we get this from ROAD plot).
    Returns:
        pd.DataFrame: A DataFrame containing sorted AOPC MoRF scores for each model.
    """
    aopc_scores = {}
    
    for model_name, model_performance in models.items():
        percent_perturbed = np.array(list(model_performance.keys()))
        accuracy = np.array(list(model_performance.values()))
        initial_acc = accuracy[0]
        aopc_values = initial_acc - accuracy 
        aopc_morf = np.sum(aopc_values) / (len(percent_perturbed) + 1)
        aopc_scores[model_name] = aopc_morf
    aopc_df = pd.DataFrame.from_dict(aopc_scores, orient='index', columns=['AOPC MoRF Score'])
    aopc_df = aopc_df.sort_values(by='AOPC MoRF Score', ascending=False)  
    return aopc_df


In [5]:
def filter_and_compute_road(model, test_loader, method, device,  resnet=False):
    """
    This function calculates the ROAD metric for vanilla gradient and integrated gradient method

    Args:
        model: Model to calculate metric on
        test_loader: test data loader
        method: Vanilla Gradient or Integrated Gradients
        device: use GPU or CPU
        resnet: is it resnet model or not
    """
    # if it is renet remove 10% of the features at once.
    if resnet:
        faithfulness = quantus.ROAD(
        noise=0.01,
        perturb_func=quantus.perturb_func.noisy_linear_imputation,
        percentages=list(range(1, 100, 10)),
        display_progressbar=False)
    else: # if it is not renet remove 5% of the features at once.
        faithfulness = quantus.ROAD(
        noise=0.01,
        perturb_func=quantus.perturb_func.noisy_linear_imputation,
        percentages=list(range(1, 100, 5)),
        display_progressbar=False)

    score_list = []

    for i, (x_batch, y_batch) in enumerate(test_loader):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        # passing image through model
        outputs = model(x_batch)
        predictions = torch.argmax(outputs, dim=1)
        # using only correct prediction
        correct_mask = predictions == y_batch
        x_batch = x_batch[correct_mask]
        y_batch = y_batch[correct_mask]
        x_batch, y_batch = x_batch.cpu().numpy(), y_batch.cpu().numpy()
        # if the metric is SmoothGrad use custom explainer
        if method == "SmoothGrad":
             scores = faithfulness(
                model= model,
                x_batch=x_batch,
                y_batch=y_batch,
                a_batch=None,
                s_batch=None,
                device=device,
                explain_func= explainer_wrapper,
                explain_func_kwargs = {
                    "method": method,
                    "posterior_mean": copy.deepcopy(
                        model
                        .to(device)
                        .state_dict()
                    ),
                    "mean": 1.0,
                    "std": 0.5,
                    "n": 25,
                    "device": device,
                },
        )
        else: # use in built explainer
            scores = faithfulness(
                    model= model,
                    x_batch=x_batch,
                    y_batch=y_batch,
                    a_batch=None,
                    s_batch=None,
                    device=device,
                    explain_func= quantus.explain,
                    explain_func_kwargs = {"method": method, "softmax": False})
        # appending scores in score list
        score_list.append(scores)
        # collecting 1000 score samples
        if resnet:
           if len(score_list) > 500:
            break
        else:
             if len(score_list) > 1000:
                break
                 
    average = {}
    for d in score_list:
        for key, value in d.items():
            if key in average:
                average[key] += value
            else:
                average[key] = value

    # Divide the sum by the number of dictionaries to get the average
    num_dicts = len(score_list)
    for key in average.keys():
        average[key] /= num_dicts
    return average

In [6]:
def filter_and_compute_sparsity(model, test_loader,method,  device, resnet=False):
    """
    This function calculates the Sparsity metric for Vanilla Gradient or Integrated Gradients method
    
   Args:
        model: Model to calculate metric on
        test_loader: test data loader
        method: Vanilla Gradient or Integrated Gradients
        device: use GPU or CPU
        resnet: is it resnet model or not
    """
    sparsity = quantus.Sparseness(disable_warnings=True, return_aggregate=True)
    score_sparsity = []

    for i, (x_batch, y_batch) in enumerate(test_loader):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        outputs = model(x_batch)
        predictions = torch.argmax(outputs, dim=1)
        # using correct predictions
        correct_mask = predictions == y_batch
        x_batch = x_batch[correct_mask]
        y_batch = y_batch[correct_mask]
        x_batch, y_batch = x_batch.cpu().numpy(), y_batch.cpu().numpy()
        # if the metric is SmoothGrad use custom explainer
        if method == "SmoothGrad":
             scores = sparsity(
                model= model,
                x_batch=x_batch,
                y_batch=y_batch,
                a_batch=None,
                s_batch=None,
                device=device,
                 explain_func= explainer_wrapper,
                explain_func_kwargs = {
                    "method": method,
                    "posterior_mean": copy.deepcopy(
                        model
                        .to(device)
                        .state_dict()
                    ),
                    "mean": 1.0,
                    "std": 0.5,
                    "n": 25,
                    "device": device,
                })
        else: # use in built explainer
            scores = sparsity(
                    model= model,
                    x_batch=x_batch,
                    y_batch=y_batch,
                    a_batch=None,
                    s_batch=None,
                    device=device,
                    explain_func= quantus.explain, 
                    explain_func_kwargs = {"method": method, "softmax": False})
        score_sparsity.extend(scores)
        # using 500 samples if resnet else 1000
        if resnet:
            if len(score_sparsity) > 500:
                break
        else:
            if len(score_sparsity) > 1000:
                break
    return np.nanmean(score_sparsity)