# Reproducibility Study of “Studying How to Efficiently and Effectively Guide Models with Explanations”

Description: This file is part of a project aiming to reproduce the study titled "Studying How to Efficiently and 
Effectively Guide Models with Explanations." The project focuses on verifying the results and methodologies 
proposed in the original study, and potentially extending or refining the study's findings.

Based on the code of orginal paper: https://github.com/sukrutrao/Model-Guidance

### 1. Load the data

#### Download the ImageNet Model Weight

In [None]:
import os
import subprocess

# Check if the xdnn directory exists and if the xfixup_resnet50_model_best.pth.tar exists
if not os.path.exists("weights/xdnn/xfixup_resnet50_model_best.pth.tar"):

    # Check if the xdnn directory exists, create if it doesn't
    if not os.path.exists("weights/xdnn"):
        os.makedirs("weights/xdnn")

    # Download the xfixup_resnet50_model_best.pth.tar
    subprocess.run(["wget", "-P", "weights/xdnn/", "https://download.visinf.tu-darmstadt.de/data/2021-neurips-fast-axiomatic-attribution/models/xfixup_resnet50_model_best.pth.tar"])

    # Change the current directory to the parent directory
    os.chdir("../..")

# Check if the bcos directory exists and if the resnet_50-f46c1a4159.pth exists
if not os.path.exists("weights/bcos/resnet_50-f46c1a4159.pth"):

    # Check if the bcos directory exists, create if it doesn't
    if not os.path.exists("weights/bcos"):
        os.makedirs("weights/bcos")

    # Download the resnet_50-f46c1a4159.pth
    subprocess.run(["wget", "-O", "weights/bcos/resnet_50-f46c1a4159.pth", "https://nextcloud.mpi-klsb.mpg.de/index.php/s/3Yk6p86SBBFYSBN/download"])

    # Change the current directory to the parent directory
    os.chdir("../..")

#### For VOC2007

First, we load the data of VOC2007. The data is stored in the folder `datasets/VOC2007/`. The data is stored in the format of `.pt` file. We use the function `preprocess_voc2007.py` to load the data. Because this is run in a Jupyter Notebook, the `argspace` library is used to pass the arguments.

In [None]:
import os
import argparse

from datasets.VOC2007.preprocess import preprocess_voc2007

data_root = "datasets/VOC2007/"
split = "train"
save_path = "datasets/VOC2007/processed/"

args = {
    "data_root": data_root,
    "split": split,
    "save_path": save_path
}

# check if the .pt file already exists
if os.path.exists(os.path.join(save_path, f"{split}.pt")):
    
    print(f"Dataset {split} already exists.")

else:

    args = argparse.Namespace(**args)

    preprocess_voc2007(args)

In [None]:
import os
import argparse

from datasets.VOC2007.preprocess import preprocess_voc2007

data_root = "datasets/VOC2007/"
split = "val"
save_path = "datasets/VOC2007/processed/"

args = {
    "data_root": data_root,
    "split": split,
    "save_path": save_path
}

# check if the .pt file already exists
if os.path.exists(os.path.join(save_path, f"{split}.pt")):
    
    print(f"Dataset {split} already exists.")

else:

    args = argparse.Namespace(**args)

    preprocess_voc2007(args)

In [None]:
import os
import argparse

from datasets.VOC2007.preprocess import preprocess_voc2007

data_root = "datasets/VOC2007/"
split = "test"
save_path = "datasets/VOC2007/processed/"

args = {
    "data_root": data_root,
    "split": split,
    "save_path": save_path
}

# check if the .pt file already exists
if os.path.exists(os.path.join(save_path, f"{split}.pt")):
    
    print(f"Dataset {split} already exists.")

else:

    args = argparse.Namespace(**args)

    preprocess_voc2007(args)

In [None]:
import os
import argparse

from datasets.VOC2007.preprocess import preprocess_voc2007

data_root = "datasets/VOC2007/"
split = "trainval"
save_path = "datasets/VOC2007/processed/"

args = {
    "data_root": data_root,
    "split": split,
    "save_path": save_path
}

# check if the .pt file already exists
if os.path.exists(os.path.join(save_path, f"{split}.pt")):
    
    print(f"Dataset {split} already exists.")

else:

    args = argparse.Namespace(**args)

    preprocess_voc2007(args)

### For waterbirds

Secondly, we load the data of waterbirds. The data is stored in the folder `datasets/WATERBIRDS/`. The data is stored in the format of `.pt` file. We use the function `preprocess_waterbirds.py` to load the data. Because this is run in a Jupyter Notebook, the `argspace` library is used to pass the arguments.

In [None]:
import argparse  

from datasets.WATERBIRDS.preprocess import preprocess_waterbirds

bounding_boxes_path = "./datasets/WATERBIRDS/bounding_boxes.txt"
waterbirds_dataset_root = "./datasets/WATERBIRDS/waterbird_1.0_forest2water2/"
split = "train"
save_path = "./datasets/WATERBIRDS/processed/"

args = {
    "bounding_boxes_path": bounding_boxes_path,
    "waterbirds_dataset_root": waterbirds_dataset_root,
    "split": split,
    "save_path": save_path
}

args = argparse.Namespace(**args)

preprocess_waterbirds(args)

In [None]:
import argparse

from datasets.WATERBIRDS.preprocess import preprocess_waterbirds

bounding_boxes_path = "./datasets/WATERBIRDS/bounding_boxes.txt"
waterbirds_dataset_root = "./datasets/WATERBIRDS/waterbird_1.0_forest2water2/"
split = "val"
save_path = "./datasets/WATERBIRDS/processed/"

args = {
    "bounding_boxes_path": bounding_boxes_path,
    "waterbirds_dataset_root": waterbirds_dataset_root,
    "split": split,
    "save_path": save_path
}

args = argparse.Namespace(**args)

preprocess_waterbirds(args)

In [None]:
import argparse

from datasets.WATERBIRDS.preprocess import preprocess_waterbirds

bounding_boxes_path = "./datasets/WATERBIRDS/bounding_boxes.txt"
waterbirds_dataset_root = "./datasets/WATERBIRDS/waterbird_1.0_forest2water2/"
split = "test"
save_path = "./datasets/WATERBIRDS/processed/"

args = {
    "bounding_boxes_path": bounding_boxes_path,
    "waterbirds_dataset_root": waterbirds_dataset_root,
    "split": split,
    "save_path": save_path
}

args = argparse.Namespace(**args)

preprocess_waterbirds(args)

### 2. Train the baseline model

Train the baseline model using the training data for 2 epochs and a batch size of 16

#### For VOC2007

In [None]:
import argparse

from train import main

model_backbone = "vanilla"
total_epochs = 2
learning_rate = 1e-4
log_path = "./base_logs_demo"
save_path = "./BASE_DEMO"
train_batch_size = 16
dataset = "VOC2007"

# Default values
seed = 0
model_path = None
data_path = "datasets/"
localization_loss_lambda = 1.0
layer = "Input"
localization_loss_fn = None
attribution_method = None
optimize_explanations = False
min_fscore = -1
pareto = False
annotated_fraction = 1.0
evaluation_frequency = 1
eval_batch_size = 4
box_dilation_percentage = 0
pareto_metric = "EPG_IOU"

args = {
    "model_backbone": model_backbone,
    "total_epochs": total_epochs,
    "learning_rate": learning_rate,
    "log_path": log_path,
    "save_path": save_path,
    "seed": seed,
    "train_batch_size": train_batch_size,
    "dataset": dataset,
    "model_path": model_path,
    "data_path": data_path,
    "localization_loss_lambda": localization_loss_lambda,
    "layer": layer,
    "localization_loss_fn": localization_loss_fn,
    "attribution_method": attribution_method,
    "optimize_explanations": optimize_explanations,
    "min_fscore": min_fscore,
    "pareto": pareto,
    "annotated_fraction": annotated_fraction,
    "evaluation_frequency": evaluation_frequency,
    "eval_batch_size": eval_batch_size,
    "box_dilation_percentage": box_dilation_percentage,
    "pareto_metric": pareto_metric
}

args = argparse.Namespace(**args)

main(args)

#### For waterbirds

*Since Waterbird uses ImageNet pretrained weigths as a baseline model no code is needed to create a baseline model for waterbirds*

### 3. Train the fine-tuned model on the base model

For the fine-tuned model, due to computational resource limitation, we only train on 2 epochs and a batch size of 16.

#### For VOC2007:

In [None]:
import argparse

from train import main

model_backbone = "vanilla"
total_epochs = 2
learning_rate = 1e-4
log_path = "./finetune_logs_demo"
save_path = "./FT_DEMO"
train_batch_size = 16
dataset = "VOC2007"

model_path = "./BASE_DEMO/VOC2007/vanilla_standard_attrNone_loclossNone_origNone_resnet50_lr0.0001_sll1.0_layerInput/model_checkpoint_f1_best.pt"

# Default values
seed = 0
data_path = "datasets/"
localization_loss_lambda = 1e-3
layer = "Final"
localization_loss_fn = "Energy"
attribution_method = "IxG"
optimize_explanations = True
min_fscore = -1
pareto = True
annotated_fraction = 1.0
evaluation_frequency = 1
eval_batch_size = 4
box_dilation_percentage = 0
pareto_metric = "EPG_IOU"

args = {
    "model_backbone": model_backbone,
    "total_epochs": total_epochs,
    "learning_rate": learning_rate,
    "log_path": log_path,
    "save_path": save_path,
    "seed": seed,
    "train_batch_size": train_batch_size,
    "dataset": dataset,
    "model_path": model_path,
    "data_path": data_path,
    "localization_loss_lambda": localization_loss_lambda,
    "layer": layer,
    "localization_loss_fn": localization_loss_fn,
    "attribution_method": attribution_method,
    "optimize_explanations": optimize_explanations,
    "min_fscore": min_fscore,
    "pareto": pareto,
    "annotated_fraction": annotated_fraction,
    "evaluation_frequency": evaluation_frequency,
    "eval_batch_size": eval_batch_size,
    "box_dilation_percentage": box_dilation_percentage,
    "pareto_metric": pareto_metric
}

args = argparse.Namespace(**args)

main(args)

#### For Waterbirds:

In [None]:
import argparse

from train import main

model_backbone = "bcos"
total_epochs = 2
learning_rate = 1e-5
log_path = "./base_logs_demo"
save_path = "./FT_DEMO"
train_batch_size = 16
dataset = "WATERBIRDS"

# Default values
seed = 0
model_path = None
data_path = "datasets/"
localization_loss_lambda = 1.0
layer = "Input"
localization_loss_fn = None
attribution_method = None
optimize_explanations = False
min_fscore = -1
pareto = False
annotated_fraction = 1.0
evaluation_frequency = 1
eval_batch_size = 4
box_dilation_percentage = 0
pareto_metric = "EPG_IOU"

args = {
    "model_backbone": model_backbone,
    "total_epochs": total_epochs,
    "learning_rate": learning_rate,
    "log_path": log_path,
    "save_path": save_path,
    "seed": seed,
    "train_batch_size": train_batch_size,
    "dataset": dataset,
    "model_path": model_path,
    "data_path": data_path,
    "localization_loss_lambda": localization_loss_lambda,
    "layer": layer,
    "localization_loss_fn": localization_loss_fn,
    "attribution_method": attribution_method,
    "optimize_explanations": optimize_explanations,
    "min_fscore": min_fscore,
    "pareto": pareto,
    "annotated_fraction": annotated_fraction,
    "evaluation_frequency": evaluation_frequency,
    "eval_batch_size": eval_batch_size,
    "box_dilation_percentage": box_dilation_percentage,
    "pareto_metric": pareto_metric
}

args = argparse.Namespace(**args)

main(args)

### 4. Evaluate the fine-tuned model on the test set

Evaluate the fine-tuned model on the test set and save the results.

#### For VOC2007:

In [None]:
from eval import evaluation_function

model_path = './FT_DEMO/VOC2007/vanilla_finetunedobjlocpareto_attrIxG_loclossEnergy_origmodel_checkpoint_f1_best.pt_resnet50_lr0.0001_sll0.001_layerFinal/model_checkpoint_f1_best.pt'

split = 'test'

mode = 'bbs'

dataset = 'VOC2007'

# Create kwargs for evaluation function
args = {'model_path': model_path,
        'split': split,
        'mode': mode,
        'dataset': dataset}

# Evaluate
evaluation_function(**args)

#### For Waterbirds:

In [None]:
from eval import evaluation_function

model_path = './FT_DEMO/WATERBIRDS/bcos_standard_attrNone_loclossNone_origNone_resnet50_lr1e-05_sll1.0_layerInput/model_checkpoint_f1_best.pt'

split = 'test'

mode = 'bbs'

dataset = 'WATERBIRDS'

# Create kwargs for evaluation function
args = {'model_path': model_path,
        'split': split,
        'mode': mode,
        'dataset': dataset}

# Evaluate
evaluation_function(**args)

### 5. Visualize the different attribution methods

Visualize the different attribution methods on the test set. Due to computational resource limitation, we load the best model checkpoints.

In [None]:
import torch
torch.cuda.empty_cache()

from visualize import visualize_fig9

visualize_fig9(
    [
        "BASE/VOC2007/bcos_standard_attrNone_loclossNone_origNone_resnet50_lr0.0001_sll1.0_layerInput/model_checkpoint_f1_best.pt",
        "FT/VOC2007/bcos_finetunedobjlocpareto_attrBCos_loclossEnergy_origmodel_checkpoint_f1_best.pt_resnet50_lr0.0001_sll0.005_layerInput/model_checkpoint_f1_best.pt",
        "FT/VOC2007/bcos_finetunedobjlocpareto_attrBCos_loclossL1_origmodel_checkpoint_f1_best.pt_resnet50_lr0.0001_sll0.01_layerInput/model_checkpoint_f1_best.pt",
        "FT/VOC2007/bcos_finetunedobjlocpareto_attrBCos_loclossPPCE_origmodel_checkpoint_f1_best.pt_resnet50_lr0.001_sll0.001_layerInput/model_checkpoint_f1_best.pt",
        "FT/VOC2007/bcos_finetunedobjlocpareto_attrBCos_loclossRRR_origmodel_checkpoint_f1_best.pt_resnet50_lr0.0001_sll5e-05_layerInput/model_checkpoint_f1_best.pt",
    ],
    last=False
)

### 6. Visualize the model with dilated bounding boxes

To demonstrate the effect of dilated bounding boxes, we visualize the model with dilated bounding boxes. Due to computational resource limitation, we load in the pre-trained model and visualize the model with dilated bounding boxes.

In [None]:
import torch
torch.cuda.empty_cache()

from visualize import visualize_fig11

visualize_fig11(
    base_path="BASE/VOC2007/bcos_standard_attrNone_loclossNone_origNone_resnet50_lr0.0001_sll1.0_layerInput/model_checkpoint_f1_best.pt",
    energy_paths=[
        "FT/DIL/bcos_finetunedobjlocpareto_attrBCos_loclossEnergy_origmodel_checkpoint_f1_best.pt_resnet50_lr0.0001_sll0.005_layerInput/model_checkpoint_f1_best.pt",
        "FT/DIL/bcos_FT_dilated_attrBCos_loclossEnergy_origmodel_checkpoint_f1_best.pt_resnet50_lr0.0001_sll0.005_layerInput_dil0.5/model_checkpoint_f1_best.pt",
    ],
    L1_paths=[
        "FT/DIL/bcos_finetunedobjlocpareto_attrBCos_loclossL1_origmodel_checkpoint_f1_best.pt_resnet50_lr0.0001_sll0.005_layerInput/model_checkpoint_f1_best.pt",
        "FT/DIL/bcos_FT_dilated_attrBCos_loclossL1_origmodel_checkpoint_f1_best.pt_resnet50_lr0.0001_sll0.005_layerInput_dil0.5/model_checkpoint_f1_best.pt",
    ],
    last=False
    )

### 7. Show that the Pareto Front works

To show that the Pareto Front works, we need to evaluate the model on the test set and calculate the Pareto Front. But due to computational resource limitation, we selected 6 model checkpoints from the fine-tuned model (see the given path) and evaluate them on the test set.

In [1]:
from pareto_FT import pareto_demo

bin_width=0.005,
layer="Final"
data_split="test"
attribution_method="BCos"
eval_batch_size=4
model_dir="./FT/VOC2007/bcos/fin/l1/bcos_finetunedobjlocpareto_attrBCos_loclossL1_origmodel_checkpoint_f1_best.pt_resnet50_lr0.0001_sll0.005_layerFinal/pareto_front"
output_dir="./p_curves_demo/VOC2007/bcos/Final/L1/pareto"

pareto_demo(
    bin_width=bin_width,
    layer=layer,
    data_split=data_split,
    attribution_method=attribution_method,
    eval_batch_size=eval_batch_size,
    model_dir=model_dir,
    output_dir=output_dir
)

2024-02-01 21:16:20.139344: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  return F.conv2d(input, weight, bias, self.stride,
100%|██████████| 1238/1238 [01:03<00:00, 19.45it/s]


Validation Metrics: {'Accuracy': 0.9715266823768616, 'Precision': 0.8774709701538086, 'Recall': 0.7328354120254517, 'F-Score': 0.7986577153205872, 'True Positives': 5593.0, 'True Negatives': 90627.0, 'False Positives': 781.0, 'False Negatives': 2039.0, 'BB-Loc': 0.6188609856665165, 'BB-IoU': 0.26205289024865525, 'BB-IoU-Adapt': 0.18593994294662167, 'Average-Loss': 0.10765171821360056}
Current Pareto Front Size: 1
Pareto Costs: (F1:0.7987, EPG:0.6189, MOD16)


100%|██████████| 1238/1238 [01:04<00:00, 19.07it/s]


Validation Metrics: {'Accuracy': 0.9715064764022827, 'Precision': 0.8800569176673889, 'Recall': 0.7296907901763916, 'F-Score': 0.797851026058197, 'True Positives': 5569.0, 'True Negatives': 90649.0, 'False Positives': 759.0, 'False Negatives': 2063.0, 'BB-Loc': 0.578344636322933, 'BB-IoU': 0.28960877438914845, 'BB-IoU-Adapt': 0.19349529231158824, 'Average-Loss': 0.09790503112102748}
Current Pareto Front Size: 1
Pareto Costs: (F1:0.7987, EPG:0.6189, MOD16)


100%|██████████| 1238/1238 [01:05<00:00, 18.87it/s]


Validation Metrics: {'Accuracy': 0.9710116982460022, 'Precision': 0.8749409317970276, 'Recall': 0.7278563976287842, 'F-Score': 0.7946498990058899, 'True Positives': 5555.0, 'True Negatives': 90614.0, 'False Positives': 794.0, 'False Negatives': 2077.0, 'BB-Loc': 0.6457726532228821, 'BB-IoU': 0.23785057200283075, 'BB-IoU-Adapt': 0.17456240911746998, 'Average-Loss': 0.114491673779218}
Current Pareto Front Size: 2
Pareto Costs: (F1:0.7987, EPG:0.6189, MOD16)(F1:0.7946, EPG:0.6458, MOD23)


100%|██████████| 1238/1238 [01:05<00:00, 19.00it/s]


Validation Metrics: {'Accuracy': 0.9713146090507507, 'Precision': 0.8704190254211426, 'Recall': 0.7375524044036865, 'F-Score': 0.7984963655471802, 'True Positives': 5629.0, 'True Negatives': 90570.0, 'False Positives': 838.0, 'False Negatives': 2003.0, 'BB-Loc': 0.5366749359657786, 'BB-IoU': 0.31905086826245543, 'BB-IoU-Adapt': 0.1946479531424506, 'Average-Loss': 0.09062630525505794}
Current Pareto Front Size: 2
Pareto Costs: (F1:0.7987, EPG:0.6189, MOD16)(F1:0.7946, EPG:0.6458, MOD23)


100%|██████████| 1238/1238 [01:04<00:00, 19.12it/s]


Validation Metrics: {'Accuracy': 0.9719305038452148, 'Precision': 0.8791809678077698, 'Recall': 0.7370283007621765, 'F-Score': 0.8018531799316406, 'True Positives': 5625.0, 'True Negatives': 90635.0, 'False Positives': 773.0, 'False Negatives': 2007.0, 'BB-Loc': 0.610978875644064, 'BB-IoU': 0.43752291838439206, 'BB-IoU-Adapt': 0.1953980387498161, 'Average-Loss': 0.1250996258416738}
Current Pareto Front Size: 2
Pareto Costs: (F1:0.7987, EPG:0.6189, MOD16)(F1:0.7946, EPG:0.6458, MOD23)


100%|██████████| 1238/1238 [01:04<00:00, 19.08it/s]


Validation Metrics: {'Accuracy': 0.9707390666007996, 'Precision': 0.8625919222831726, 'Recall': 0.7378144860267639, 'F-Score': 0.7953389883041382, 'True Positives': 5631.0, 'True Negatives': 90511.0, 'False Positives': 897.0, 'False Negatives': 2001.0, 'BB-Loc': 0.6748056483271222, 'BB-IoU': 0.20978544122021148, 'BB-IoU-Adapt': 0.1620899864685625, 'Average-Loss': 0.13166194046833982}
Current Pareto Front Size: 2
Pareto Costs: (F1:0.7987, EPG:0.6189, MOD16)(F1:0.7953, EPG:0.6748, MOD43)


The results are shown in the following figure. We can see the distinction between the Pareto Front and the non-Pareto Front.

In [None]:
from utils import *
from visualize import *

root_folder = './p_curves_demo/VOC2007/bcos/Final/L1/pareto'
data_f1_epg_pareto = utils.load_data_from_folders_with_npz_files(root_folder, metrics=('f_score', 'bb_score'))

root_folder = './p_curves_demo/VOC2007/bcos/Final/L1/not_pareto'
data_f1_epg_not_pareto = utils.load_data_from_folders_with_npz_files(root_folder, metrics=('f_score', 'bb_score'))

plot_pareto_curve(
    baseline_data=data_f1_epg_pareto['vanilla']['input']['baseline'],
    l1_data=data_f1_epg_pareto['bcos']['final']['l1'],
    x_label='F1 Score (%)',
    y_label='EPG Score (%)',
    title='',
    figsize=(10, 4),
    set_xlim=(79, 81),
    set_ylim=(50, 73),
    hide_x_ticks=False,
    hide_y_ticks=False,
    fontsize=20,
    attribution_method="IxG",
    plot_demo_data=True,
    demo_data=data_f1_epg_not_pareto['bcos']['final']['l1'])

### 8. Show the results from the additional experiment, which looks at each class individually and downloads a .csv file with the results.

In [None]:
import argparse

model_pathBase = "./BASE/VOC2007/bcos_standard_attrNone_loclossNone_origNone_resnet50_lr0.0001_sll1.0_layerInput/model_checkpoint_f1_best.pt"
model_pathFN50 = "./FT/VOC2007/vanilla_finetunedobjlocpareto_attrIxG_loclossEnergy_origmodel_checkpoint_f1_best.pt_resnet50_lr0.0001_sll0.001_layerInput/model_checkpoint_f1_best.pt"
model_pathFNbest = "./FT/VOC2007/vanilla_finetunedobjlocpareto_attrIxG_loclossEnergy_origmodel_checkpoint_f1_best.pt_resnet50_lr0.0001_sll0.001_layerInput/model_checkpoint_final_50.pt"
dataset = "VOC2007"
split = "test"
metric = "BB-Loc"

from fairness import main

args = {
    "model_pathBase": model_pathBase,
    "model_pathFN50": model_pathFN50,
    "model_pathFNbest": model_pathFNbest,
    "dataset": dataset,
    "split": split,
    "metric": metric
}

args = argparse.Namespace(**args)

main(args)

Visualize the results from the additional experiment in a dataframe. Due to computational resource limitation, we load in the pre-trained model and show the results from the additional experiment.

In [None]:
import pandas as pd

pd.read_csv("./VOC2007_BB-Loc_test.csv")

### 9. Show the evaluation results of the fine-tuned model (Pareto front)

In this section, we show the evaluation results of the fine-tuned model (Pareto front) on the test set. Because of computational resource limitation, we saved the evaluation results in .npz files. The evaluation results are loaded from the .npz files  with the ```load_data_from_folders_with_npz_files``` function in ```utils.py``` and visualized with the ```plot_pareto_curve``` function in ```visualize.py```.

*The plots for figure 1 from our reproducibility study are shown below.*

In [None]:
from utils import *
from visualize import *

root_folder = './p_curves/VOC2007'
data_f1_epg = utils.load_data_from_folders_with_npz_files(root_folder, metrics=('f_score', 'bb_score'))

x_lim_range = (65, 85)
y_lim_range = (31, 90)
step_size_xticks = 5
step_size_yticks = 10

plot_pareto_curve(
    baseline_data=data_f1_epg['vanilla']['input']['baseline'],
    energy_data=data_f1_epg['vanilla']['input']['energy'],
    l1_data=data_f1_epg['vanilla']['input']['l1'],
    ppce_data=data_f1_epg['vanilla']['input']['ppce'],
    rrr_data=data_f1_epg['vanilla']['input']['rrr'],
    x_label='F1 Score (%)',
    y_label='EPG Score (%)',
    title='',
    figsize=(10, 4),
    set_xlim=x_lim_range,
    set_ylim=y_lim_range,
    step_size_xticks=step_size_xticks,
    step_size_yticks=step_size_yticks,
    hide_x_ticks=True,
    hide_y_ticks=False,
    fontsize=20,
    attribution_method="IxG")

plot_pareto_curve(
    baseline_data=data_f1_epg['bcos']['input']['baseline'],
    energy_data=data_f1_epg['bcos']['input']['energy'],
    l1_data=data_f1_epg['bcos']['input']['l1'],
    ppce_data=data_f1_epg['bcos']['input']['ppce'],
    rrr_data=data_f1_epg['bcos']['input']['rrr'],
    x_label='F1 Score (%)',
    y_label='EPG Score (%)',
    title='',
    figsize=(10, 4),
    set_xlim=x_lim_range,
    set_ylim=y_lim_range,
    step_size_xticks=step_size_xticks,
    step_size_yticks=step_size_yticks,
    hide_x_ticks=True,
    hide_y_ticks=True,
    fontsize=20,
    attribution_method="B-cos")

plot_pareto_curve(
    baseline_data=data_f1_epg['vanilla']['final']['baseline'],
    energy_data=data_f1_epg['vanilla']['final']['energy'],
    l1_data=data_f1_epg['vanilla']['final']['l1'],
    ppce_data=data_f1_epg['vanilla']['final']['ppce'],
    rrr_data=data_f1_epg['vanilla']['final']['rrr'],
    x_label='F1 Score (%)',
    y_label='EPG Score (%)',
    title='',
    figsize=(10, 4),
    set_xlim=x_lim_range,
    set_ylim=y_lim_range,
    step_size_xticks=step_size_xticks,
    step_size_yticks=step_size_yticks,
    hide_x_ticks=False,
    hide_y_ticks=False,
    fontsize=20,
    attribution_method="IxG")

plot_pareto_curve(
    baseline_data=data_f1_epg['bcos']['final']['baseline'],
    energy_data=data_f1_epg['bcos']['final']['energy'],
    l1_data=data_f1_epg['bcos']['final']['l1'],
    ppce_data=data_f1_epg['bcos']['final']['ppce'],
    rrr_data=data_f1_epg['bcos']['final']['rrr'],
    x_label='F1 Score (%)',
    y_label='EPG Score (%)',
    title='',
    figsize=(10, 4),
    set_xlim=x_lim_range,
    set_ylim=y_lim_range,
    step_size_xticks=step_size_xticks,
    step_size_yticks=(step_size_yticks),
    hide_x_ticks=False,
    hide_y_ticks=True,
    fontsize=20,
    attribution_method="B-cos")

*The plots for figure 2 from our reproducibility study are shown below.*

In [None]:
from utils import *
from visualize import *

root_folder = './p_curves/VOC2007'
data_f1_iou = utils.load_data_from_folders_with_npz_files(root_folder, metrics=('f_score', 'iou_score'))

x_lim_range = (65, 85)
y_lim_range = (11, 60)
step_size_xticks = 5
step_size_yticks = 10

plot_pareto_curve(
    baseline_data=data_f1_iou['vanilla']['final']['baseline'],
    energy_data=data_f1_iou['vanilla']['final']['energy'],
    l1_data=data_f1_iou['vanilla']['final']['l1'],
    ppce_data=data_f1_iou['vanilla']['final']['ppce'],
    rrr_data=data_f1_iou['vanilla']['final']['rrr'],
    x_label='F1 Score (%)',
    y_label='IoU Score (%)',
    title='',
    figsize=(10, 4),
    set_xlim = x_lim_range,
    set_ylim = y_lim_range,
    step_size_xticks=step_size_xticks,
    step_size_yticks=step_size_yticks,
    hide_y_ticks=False,
    fontsize=20,
    attribution_method="IxG")

plot_pareto_curve(
    baseline_data=data_f1_iou['bcos']['final']['baseline'],
    energy_data=data_f1_iou['bcos']['final']['energy'],
    l1_data=data_f1_iou['bcos']['final']['l1'],
    ppce_data=data_f1_iou['bcos']['final']['ppce'],
    rrr_data=data_f1_iou['bcos']['final']['rrr'],
    x_label='F1 Score (%)',
    y_label='IoU Score (%)',
    figsize=(10, 4),
    set_xlim = x_lim_range,
    set_ylim = y_lim_range,
    step_size_xticks=step_size_xticks,
    step_size_yticks=step_size_yticks,
    hide_y_ticks=True,
    fontsize=20,
    attribution_method="B-cos")

*The plots for figure 4 from our reproducibility study are shown below.*

In [None]:
from utils import *
from visualize import *

root_folder = './p_c_ann'
data_limited_ann_f1_epg = utils.load_data_from_folders_with_npz_files_with_limited_ann(root_folder, metrics=('f_score', 'bb_score'))

x_lim_range = (75, 81)
y_lim_range = (41, 90)
step_size_xticks = 2
step_size_yticks = 10

plot_pareto_curve(
    baseline_data=data_limited_ann_f1_epg['bcos']['lim0.01']['baseline'],
    energy_data=data_limited_ann_f1_epg['bcos']['lim0.01']['energy'],
    l1_data=data_limited_ann_f1_epg['bcos']['lim0.01']['l1'],
    x_label='F1 Score (%)',
    y_label='EPG Score (%)',
    figsize=(8, 4),
    set_xlim=x_lim_range,
    set_ylim=y_lim_range,
    step_size_xticks=step_size_xticks,
    step_size_yticks=step_size_yticks,
    hide_y_ticks=False,
    fontsize=20,
    attribution_method="")

plot_pareto_curve(
    baseline_data=data_limited_ann_f1_epg['bcos']['lim0.1']['baseline'],
    energy_data=data_limited_ann_f1_epg['bcos']['lim0.1']['energy'],
    l1_data=data_limited_ann_f1_epg['bcos']['lim0.1']['l1'],
    x_label='F1 Score (%)',
    y_label='EPG Score (%)',
    figsize=(8, 4),
    set_xlim=x_lim_range,
    set_ylim=y_lim_range,
    step_size_xticks=step_size_xticks,
    step_size_yticks=step_size_yticks,
    hide_y_ticks=True,
    fontsize=20,
    attribution_method="")

plot_pareto_curve(
    baseline_data=data_limited_ann_f1_epg['bcos']['lim1.0']['baseline'],
    energy_data=data_limited_ann_f1_epg['bcos']['lim1.0']['energy'],
    l1_data=data_limited_ann_f1_epg['bcos']['lim1.0']['l1'],
    x_label='F1 Score (%)',
    y_label='EPG Score (%)',
    figsize=(8, 4),
    set_xlim=x_lim_range,
    set_ylim=y_lim_range,
    step_size_xticks=step_size_xticks,
    step_size_yticks=step_size_yticks,
    hide_y_ticks=True,
    fontsize=20,
    attribution_method="")

*The plots for figure 5 from our reproducibility study are shown below.*

In [None]:
from utils import *
from visualize import *    

root_folder = './p_curves/VOC2007'
data_speed_up_f1_epg = utils.load_data_from_folders_with_npz_files(root_folder, metrics=('f_score', 'bb_score'))

x_lim_range = (74, 81)
y_lim_range = (41, 90)
step_size_xticks = 2
step_size_yticks = 10

plot_pareto_curve_speed_up(
    baseline_data=data_speed_up_f1_epg['bcos']['input']['baseline'],
    energy_data_input_layer=data_speed_up_f1_epg['bcos']['input']['energy'],
    energy_data=data_speed_up_f1_epg['bcos']['mid2']['energy'],
    x_label='F1 Score (%)',
    y_label='EPG Score (%)',
    speed_up_text='Speed-up: 1.25x',
    figsize=(10, 4),
    set_xlim = x_lim_range,
    set_ylim = y_lim_range,
    step_size_xticks=step_size_xticks,
    step_size_yticks=step_size_yticks,
    hide_y_ticks=False,
    fontsize=20)

plot_pareto_curve_speed_up(
    baseline_data=data_speed_up_f1_epg['bcos']['input']['baseline'],
    energy_data_input_layer=data_speed_up_f1_epg['bcos']['input']['energy'],
    energy_data=data_speed_up_f1_epg['bcos']['final']['energy'],
    x_label='F1 Score (%)',
    y_label='EPG Score (%)',
    speed_up_text='Speed-up: 2.0x',
    figsize=(10, 4),
    set_xlim = x_lim_range,
    set_ylim = y_lim_range,
    step_size_xticks=step_size_xticks,
    step_size_yticks=step_size_yticks,
    hide_y_ticks=True,
    fontsize=20)

*The plots for figure 6 from our reproducibility study are shown below.*

In [None]:
from utils import *
from visualize import *    

root_folder = './p_c_dil'
data_dilation_f1_epg = utils.load_data_from_folders_with_npz_files_with_dilation(root_folder, metrics=('f_score', 'bb_score'))

x_lim_range = (75, 81)
y_lim_range = (41, 80)
step_size_xticks = 2
step_size_yticks = 10

plot_pareto_curve_dilation(
    baseline_data=data_dilation_f1_epg['bcos']['dil0']['baseline'],
    data_0=data_dilation_f1_epg['bcos']['dil0']['energy'],
    data_01=data_dilation_f1_epg['bcos']['dil0.1']['energy'],
    data_025=data_dilation_f1_epg['bcos']['dil0.25']['energy'],
    data_05=data_dilation_f1_epg['bcos']['dil0.5']['energy'],
    data_0_not_pareto=data_dilation_f1_epg['bcos']['dil0_not_pareto']['energy'],
    data_01_not_pareto=data_dilation_f1_epg['bcos']['dil0.1_not_pareto']['energy'],
    data_025_not_pareto=data_dilation_f1_epg['bcos']['dil0.25_not_pareto']['energy'],
    data_05_not_pareto=data_dilation_f1_epg['bcos']['dil0.5_not_pareto']['energy'],
    loss='Energy',
    x_label='F1 Score (%)',
    y_label='EPG Score (%)',
    save_path='./images/fig_10_voc2007_bcos_resnet50_dilation_loss_energy_f1_epg_pareto_curve.png',
    figsize=(8, 6),
    set_xlim = x_lim_range,
    set_ylim = y_lim_range,
    step_size_xticks=step_size_xticks,
    step_size_yticks=step_size_yticks,
    hide_x_ticks=True,
    hide_y_ticks=False,
    fontsize=20)

plot_pareto_curve_dilation(
    baseline_data=data_dilation_f1_epg['bcos']['dil0']['baseline'],
    data_0=data_dilation_f1_epg['bcos']['dil0']['l1'],
    data_01=data_dilation_f1_epg['bcos']['dil0.1']['l1'],
    data_025=data_dilation_f1_epg['bcos']['dil0.25']['l1'],
    data_05=data_dilation_f1_epg['bcos']['dil0.5']['l1'],
    data_0_not_pareto=data_dilation_f1_epg['bcos']['dil0_not_pareto']['l1'],
    data_01_not_pareto=data_dilation_f1_epg['bcos']['dil0.1_not_pareto']['l1'],
    data_025_not_pareto=data_dilation_f1_epg['bcos']['dil0.25_not_pareto']['l1'],
    data_05_not_pareto=data_dilation_f1_epg['bcos']['dil0.5_not_pareto']['l1'],
    loss='L1',
    x_label='F1 Score (%)',
    y_label='EPG Score (%)',
    save_path='./images/fig_10_voc2007_bcos_resnet50_dilation_loss_l1_f1_epg_pareto_curve.png',
    figsize=(8, 6),
    set_xlim = x_lim_range,
    set_ylim = y_lim_range,
    step_size_xticks=step_size_xticks,
    step_size_yticks=step_size_yticks,
    hide_x_ticks=True,
    hide_y_ticks=True,
    fontsize=20)

root_folder = './p_c_dil'
data_dilation_f1_epg = utils.load_data_from_folders_with_npz_files_with_dilation(root_folder, metrics=('f_score', 'adapt_iou_score'))

x_lim_range = (75, 81)
y_lim_range = (11, 35)
step_size_xticks = 2
step_size_yticks = 10

plot_pareto_curve_dilation(
    baseline_data=data_dilation_f1_epg['bcos']['dil0']['baseline'],
    data_0=data_dilation_f1_epg['bcos']['dil0']['energy'],
    data_01=data_dilation_f1_epg['bcos']['dil0.1']['energy'],
    data_025=data_dilation_f1_epg['bcos']['dil0.25']['energy'],
    data_05=data_dilation_f1_epg['bcos']['dil0.5']['energy'],
    data_0_not_pareto=data_dilation_f1_epg['bcos']['dil0_not_pareto']['energy'],
    data_01_not_pareto=data_dilation_f1_epg['bcos']['dil0.1_not_pareto']['energy'],
    data_025_not_pareto=data_dilation_f1_epg['bcos']['dil0.25_not_pareto']['energy'],
    data_05_not_pareto=data_dilation_f1_epg['bcos']['dil0.5_not_pareto']['energy'],
    loss='Energy',
    x_label='F1 Score (%)',
    y_label='Adapted IoU Score (%)',
    save_path='./images/fig_10_voc2007_bcos_resnet50_dilation_loss_energy_f1_adapt_iou_pareto_curve.png',
    figsize=(8, 6),
    set_xlim = x_lim_range,
    set_ylim = y_lim_range,
    step_size_xticks=step_size_xticks,
    step_size_yticks=step_size_yticks,
    hide_x_ticks=False,
    hide_y_ticks=False,
    fontsize=20)

plot_pareto_curve_dilation(
    baseline_data=data_dilation_f1_epg['bcos']['dil0']['baseline'],
    data_0=data_dilation_f1_epg['bcos']['dil0']['l1'],
    data_01=data_dilation_f1_epg['bcos']['dil0.1']['l1'],
    data_025=data_dilation_f1_epg['bcos']['dil0.25']['l1'],
    data_05=data_dilation_f1_epg['bcos']['dil0.5']['l1'],
    data_0_not_pareto=data_dilation_f1_epg['bcos']['dil0_not_pareto']['l1'],
    data_01_not_pareto=data_dilation_f1_epg['bcos']['dil0.1_not_pareto']['l1'],
    data_025_not_pareto=data_dilation_f1_epg['bcos']['dil0.25_not_pareto']['l1'],
    data_05_not_pareto=data_dilation_f1_epg['bcos']['dil0.5_not_pareto']['l1'],
    loss='L1',
    x_label='F1 Score (%)',
    y_label='Adapted IoU Score (%)',
    save_path='./images/fig_10_voc2007_bcos_resnet50_dilation_loss_l1_f1_adapt_iou_pareto_curve.png',
    figsize=(8, 6),
    set_xlim = x_lim_range,
    set_ylim = y_lim_range,
    step_size_xticks=step_size_xticks,
    step_size_yticks=step_size_yticks,
    hide_x_ticks=False,
    hide_y_ticks=True,
    fontsize=20)