In [None]:
!pip install datasets transformers --quiet
!pip install -U datasets huggingface_hub fsspec
!pip install captum

Collecting fsspec
  Using cached fsspec-2025.5.1-py3-none-any.whl.metadata (11 kB)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
from datasets import load_dataset
from sklearn.metrics import roc_auc_score
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import os
from tqdm.notebook import tqdm

In [None]:
eraser = load_dataset("movie_rationales")
eraser_train = eraser["train"]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

movie_rationales.py: 0.00B [00:00, ?B/s]

The repository for movie_rationales contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/movie_rationales.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


Downloading data:   0%|          | 0.00/3.90M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1600 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/200 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/199 [00:00<?, ? examples/s]

In [None]:
def find_token_spans(text, evidence_list, tokenizer):
    encoding = tokenizer(text, return_offsets_mapping=True, return_tensors="pt", truncation=True)
    offsets = encoding["offset_mapping"][0].tolist()
    spans = []

    for phrase in evidence_list:
        start_char = text.find(phrase)
        if start_char == -1:
            continue
        end_char = start_char + len(phrase)

        span_tokens = []
        for idx, (tok_start, tok_end) in enumerate(offsets):
            if tok_start is None or tok_end is None:
                continue
            if tok_start < end_char and tok_end > start_char:
                span_tokens.append(idx)
        if span_tokens:
            spans.append((span_tokens[0], span_tokens[-1] + 1))
    return spans

def get_relevant_positions(spans, token_positions):
    relevant = []
    for pos in token_positions:
        for span_low, span_high in spans:
            if span_low <= pos and span_high >= pos:
                relevant.append(pos)
    return relevant

def precision(spans, token_positions):
    relevant = get_relevant_positions(spans, token_positions)
    return len(relevant) / len(token_positions)

def weight_recall(spans, weights, token_positions):
    relevant = get_relevant_positions(spans, token_positions)
    mask = np.isin(token_positions, relevant)
    relevant_weights = np.abs(weights[mask])
    return np.sum(relevant_weights)/np.sum(np.abs(weights)) if sum(np.abs(weights)) > 0 else 0.0


def jaccard_similarity(spans, token_positions):
    gold_token_set = set()
    for start, end in spans:
        gold_token_set.update(range(start, end))

    pred_token_set = set(token_positions)

    intersection = gold_token_set & pred_token_set
    union = gold_token_set | pred_token_set

    return len(intersection) / len(union) if union else 0.0

def span_hit_recall(spans, token_positions, k=5):
    hits = 0
    pred_token_set = set(token_positions)

    for start, end in spans:
        span_tokens = set(range(start, end))
        if span_tokens & pred_token_set:
            hits += 1

    return min(k, hits) / min(k, len(spans)) if spans else 0.0

def f1_token(spans, weights, token_positions):
    p = precision(spans, token_positions)
    r = weight_recall(spans, weights, token_positions)
    return 2 * p * r / (p + r) if (p + r) > 0 else 0.0

In [None]:
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
perturbation_folder = "/content/drive/MyDrive/NLP_project/bert_eraser/results/perturbation"
gradient_folder = "/content/drive/MyDrive/NLP_project/bert_eraser/results/gradient"
linear_folder = "/content/drive/MyDrive/NLP_project/bert_eraser/results/linear"
ext = '.parquet'

tokenizer = AutoTokenizer.from_pretrained(model_name)
batch_size = 10

for batch_start in tqdm(range(0, len(eraser_train), batch_size)):
    file_name = f'/batch_{batch_start}'
    evidence_batch = eraser_train['evidences'][batch_start: batch_start + batch_size]

    for current_folder in [perturbation_folder, gradient_folder, linear_folder]:
        save_path = current_folder + file_name + '_metrics' + ext

        if os.path.exists(save_path):
            print(f"Skipping {save_path}, already exists.")
            continue

        current_batch = pd.read_parquet(current_folder + file_name + ext)

        precisions = []
        weight_recalls = []
        f1_tokens = []
        jaccards = []
        span_hit_recalls = []

        for i in range(batch_size):
            text = current_batch['text'][i]
            evidence = evidence_batch[i]
            spans = find_token_spans(text=text, evidence_list=evidence, tokenizer=tokenizer)
            token_positions = current_batch['token_positions'][i]
            weights = current_batch['weights'][i]

            precision_ = precision(spans, token_positions)
            weight_recall_ = weight_recall(spans, weights, token_positions)
            f1_ = f1_token(spans, weights, token_positions)
            jaccard_ = jaccard_similarity(spans, token_positions)
            span_hit_ = span_hit_recall(spans, token_positions)

            precisions.append(precision_)
            weight_recalls.append(weight_recall_)
            f1_tokens.append(f1_)
            jaccards.append(jaccard_)
            span_hit_recalls.append(span_hit_)

        results = pd.DataFrame({
            'precision': precisions,
            'weight_recall': weight_recalls,
            'f1_token': f1_tokens,
            'jaccard_similarity': jaccards,
            'span_hit_recall': span_hit_recalls
        })

        results.to_parquet(save_path)

  0%|          | 0/158 [00:00<?, ?it/s]

Skipping /content/drive/MyDrive/NLP_project/bert_eraser/results/perturbation/batch_0_metrics.parquet, already exists.
Skipping /content/drive/MyDrive/NLP_project/bert_eraser/results/gradient/batch_0_metrics.parquet, already exists.
Skipping /content/drive/MyDrive/NLP_project/bert_eraser/results/linear/batch_0_metrics.parquet, already exists.
Skipping /content/drive/MyDrive/NLP_project/bert_eraser/results/perturbation/batch_10_metrics.parquet, already exists.
Skipping /content/drive/MyDrive/NLP_project/bert_eraser/results/gradient/batch_10_metrics.parquet, already exists.
Skipping /content/drive/MyDrive/NLP_project/bert_eraser/results/linear/batch_10_metrics.parquet, already exists.
Skipping /content/drive/MyDrive/NLP_project/bert_eraser/results/perturbation/batch_20_metrics.parquet, already exists.
Skipping /content/drive/MyDrive/NLP_project/bert_eraser/results/gradient/batch_20_metrics.parquet, already exists.
Skipping /content/drive/MyDrive/NLP_project/bert_eraser/results/linear/batc

In [None]:
results = pd.DataFrame(columns=['precision', 'weight_recall', 'f1_token', 'jaccard_similarity', 'span_hit_recall', 'method'])

for batch_start in range(0, 1580, batch_size):
    file_name = f'/batch_{batch_start}'
    evidence_batch = eraser_train['evidences'][batch_start: batch_start + batch_size]

    for current_folder in [perturbation_folder, gradient_folder, linear_folder]:
        save_path = current_folder + file_name + '_metrics' + ext
        data = pd.read_parquet(save_path)
        data['method'] = current_folder.split('/')[-1]
        results = pd.concat([results, data])

  results = pd.concat([results, data])


In [None]:
ext = '.parquet'
batch_size = 10

def calculate_metrics_eraser(folder_name, model_name, size):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    perturbation_folder = f"/content/drive/MyDrive/NLP_project/{folder_name}/results/perturbation"
    gradient_folder = f"/content/drive/MyDrive/NLP_project/{folder_name}/results/gradient"
    linear_folder = f"/content/drive/MyDrive/NLP_project/{folder_name}/results/linear"
    # perturbation_folder = "https://drive.google.com/drive/folders/1v_erpcA8H68aCNb_lzMr8kVRgS2nzaUn?usp=sharing" + f'{folder_name}/results/perturbation'
    # gradient_folder = "https://drive.google.com/drive/folders/1v_erpcA8H68aCNb_lzMr8kVRgS2nzaUn?usp=sharing" + f'{folder_name}/results/gradient'
    # linear_folder = "https://drive.google.com/drive/folders/1v_erpcA8H68aCNb_lzMr8kVRgS2nzaUn?usp=sharing" + f'{folder_name}/results/linear'

    batch_size = 10
    ext = '.parquet'

    for batch_start in tqdm(range(0, size, batch_size)):
        file_name = f'/batch_{batch_start}'
        evidence_batch = eraser_train['evidences'][batch_start: batch_start + batch_size]

        for current_folder in [perturbation_folder, gradient_folder, linear_folder]:
            save_path = current_folder + file_name + '_metrics' + ext

            if os.path.exists(save_path):
                print(f"Skipping {save_path}, already exists.")
                continue

            current_batch = pd.read_parquet(current_folder + file_name + ext)

            precisions = []
            weight_recalls = []
            f1_tokens = []
            jaccards = []
            span_hit_recalls = []

            for i in range(batch_size):
                text = current_batch['text'][i]
                evidence = evidence_batch[i]
                spans = find_token_spans(text=text, evidence_list=evidence, tokenizer=tokenizer)
                token_positions = current_batch['token_positions'][i]
                weights = current_batch['weights'][i]

                precision_ = precision(spans, token_positions)
                weight_recall_ = weight_recall(spans, weights, token_positions)
                f1_ = f1_token(spans, weights, token_positions)
                jaccard_ = jaccard_similarity(spans, token_positions)
                span_hit_ = span_hit_recall(spans, token_positions)

                precisions.append(precision_)
                weight_recalls.append(weight_recall_)
                f1_tokens.append(f1_)
                jaccards.append(jaccard_)
                span_hit_recalls.append(span_hit_)

            results = pd.DataFrame({
                'precision': precisions,
                'weight_recall': weight_recalls,
                'f1_token': f1_tokens,
                'jaccard_similarity': jaccards,
                'span_hit_recall': span_hit_recalls
            })

            results.to_parquet(save_path)

def get_results(folder_name, model_name, size):
    perturbation_folder = f"/content/drive/MyDrive/NLP_project/{folder_name}/results/perturbation"
    gradient_folder = f"/content/drive/MyDrive/NLP_project/{folder_name}/results/gradient"
    linear_folder = f"/content/drive/MyDrive/NLP_project/{folder_name}/results/linear"
    results = pd.DataFrame(columns=['precision', 'weight_recall', 'f1_token', 'jaccard_similarity', 'span_hit_recall', 'method'])

    for batch_start in range(0, size, batch_size):
        file_name = f'/batch_{batch_start}'
        evidence_batch = eraser_train['evidences'][batch_start: batch_start + batch_size]

        for current_folder in [perturbation_folder, gradient_folder, linear_folder]:
            save_path = current_folder + file_name + '_metrics' + ext
            data = pd.read_parquet(save_path)
            data['method'] = current_folder.split('/')[-1]
            results = pd.concat([results, data])
    print(model_name, size)
    display(results.groupby('method').agg('mean'))

In [None]:
folder_name = 'bert_eraser'
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
size = len(eraser_train)

calculate_metrics_eraser(folder_name, model_name, size)

In [None]:
get_results(folder_name, model_name, size)

In [None]:
folder_name = 'gpt_eraser'
model_name = "PavanNeerudu/gpt2-finetuned-sst2"
size = len(eraser_train)

calculate_metrics_eraser(folder_name, model_name, size)

  0%|          | 0/160 [00:00<?, ?it/s]

Skipping /content/drive/MyDrive/NLP_project/gpt_eraser/results/perturbation/batch_0_metrics.parquet, already exists.
Skipping /content/drive/MyDrive/NLP_project/gpt_eraser/results/gradient/batch_0_metrics.parquet, already exists.
Skipping /content/drive/MyDrive/NLP_project/gpt_eraser/results/linear/batch_0_metrics.parquet, already exists.
Skipping /content/drive/MyDrive/NLP_project/gpt_eraser/results/perturbation/batch_10_metrics.parquet, already exists.
Skipping /content/drive/MyDrive/NLP_project/gpt_eraser/results/gradient/batch_10_metrics.parquet, already exists.
Skipping /content/drive/MyDrive/NLP_project/gpt_eraser/results/linear/batch_10_metrics.parquet, already exists.
Skipping /content/drive/MyDrive/NLP_project/gpt_eraser/results/perturbation/batch_20_metrics.parquet, already exists.
Skipping /content/drive/MyDrive/NLP_project/gpt_eraser/results/gradient/batch_20_metrics.parquet, already exists.
Skipping /content/drive/MyDrive/NLP_project/gpt_eraser/results/linear/batch_20_metr

In [None]:
get_results(folder_name, model_name, size)

  results = pd.concat([results, data])


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/NLP_project/gpt_eraser/results/perturbation/batch_500_metrics.parquet'

In [None]:
cose = load_dataset("cos_e", "v1.11")['train']

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/1.73M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/222k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/9741 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1221 [00:00<?, ? examples/s]

{'id': '89afdea1d76272fda8e1538a7d3b7a3f',
 'question': 'A bride and groom are taking care of proposals, what is the likely ceremony?',
 'choices': ['efficiency',
  'getting work done',
  'marriage',
  'finishing up',
  'results'],
 'answer': 'marriage',
 'abstractive_explanation': 'the term bride and groom is mostly associated with a marriage ceremony',
 'extractive_explanation': 'bride and groom'}

In [None]:
def calculate_metrics_cose(folder_name, model_name, size):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    perturbation_folder = f"/content/drive/MyDrive/NLP_project/{folder_name}/results/perturbation"
    gradient_folder = f"/content/drive/MyDrive/NLP_project/{folder_name}/results/gradient"

    batch_size = 10
    ext = '.parquet'

    for batch_start in tqdm(range(0, size, batch_size)):
        file_name = f'/batch_{batch_start}'
        evidence_batch = cose['extractive_explanation'][batch_start: batch_start + batch_size]

        for current_folder in [perturbation_folder, gradient_folder]:
            save_path = current_folder + file_name + '_metrics' + ext

            if os.path.exists(save_path):
                print(f"Skipping {save_path}, already exists.")
                continue

            current_batch = pd.read_parquet(current_folder + file_name + ext)

            precisions = []
            weight_recalls = []
            f1_tokens = []
            jaccards = []
            span_hit_recalls = []

            for i in range(batch_size):
                evidence = [evidence_batch[i]]
                text = f"Premise: {current_batch['question']} Hypothesis: The answer is {current_batch['correct_answer']}"

                spans = find_token_spans(text=text, evidence_list=evidence, tokenizer=tokenizer)
                token_positions = current_batch['token_positions'][i]
                weights = current_batch['weights'][i]

                precision_ = precision(spans, token_positions)
                weight_recall_ = weight_recall(spans, weights, token_positions)
                f1_ = f1_token(spans, weights, token_positions)
                jaccard_ = jaccard_similarity(spans, token_positions)
                span_hit_ = span_hit_recall(spans, token_positions)

                precisions.append(precision_)
                weight_recalls.append(weight_recall_)
                f1_tokens.append(f1_)
                jaccards.append(jaccard_)
                span_hit_recalls.append(span_hit_)

            results = pd.DataFrame({
                'precision': precisions,
                'weight_recall': weight_recalls,
                'f1_token': f1_tokens,
                'jaccard_similarity': jaccards,
                'span_hit_recall': span_hit_recalls
            })

            results.to_parquet(save_path)

def get_results_cose(folder_name, model_name, size):
    perturbation_folder = f"/content/drive/MyDrive/NLP_project/{folder_name}/results/perturbation"
    gradient_folder = f"/content/drive/MyDrive/NLP_project/{folder_name}/results/gradient"
    results = pd.DataFrame(columns=['precision', 'weight_recall', 'f1_token', 'jaccard_similarity', 'span_hit_recall', 'method'])

    for batch_start in range(0, size, batch_size):
        file_name = f'/batch_{batch_start}'

        for current_folder in [perturbation_folder, gradient_folder]:
            save_path = current_folder + file_name + '_metrics' + ext
            data = pd.read_parquet(save_path)
            data['method'] = current_folder.split('/')[-1]
            results = pd.concat([results, data])
    print(model_name, size)
    display(results.groupby('method').agg('mean'))

In [None]:
folder_name = 'bert_cos'
model_name = "roberta-large-mnli"
size = 500

calculate_metrics_cose(folder_name, model_name, size)

  0%|          | 0/50 [00:00<?, ?it/s]

In [None]:
get_results_cose(folder_name, model_name, size)

  results = pd.concat([results, data])


roberta-large-mnli 500


Unnamed: 0_level_0,precision,weight_recall,f1_token,jaccard_similarity,span_hit_recall
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
gradient,0.006798,0.006187,0.006442,0.005026,0.03
perturbation,0.006597,0.005315,0.005618,0.004848,0.028


In [None]:
def calculate_metrics_cose(folder_name, model_name, size):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    perturbation_folder = f"/content/drive/MyDrive/NLP_project/{folder_name}/results/perturbation"

    batch_size = 10
    ext = '.parquet'

    for batch_start in tqdm(range(0, size, batch_size)):
        file_name = f'/batch_{batch_start}'
        evidence_batch = cose['extractive_explanation'][batch_start: batch_start + batch_size]

        for current_folder in [perturbation_folder]:
            save_path = current_folder + file_name + '_metrics' + ext

            if os.path.exists(save_path):
                print(f"Skipping {save_path}, already exists.")
                continue

            current_batch = pd.read_parquet(current_folder + file_name + ext)

            precisions = []
            weight_recalls = []
            f1_tokens = []
            jaccards = []
            span_hit_recalls = []

            for i in range(batch_size):
                evidence = [evidence_batch[i]]
                text = f"Premise: {current_batch['question']} Hypothesis: The answer is {current_batch['correct_answer']}"

                spans = find_token_spans(text=text, evidence_list=evidence, tokenizer=tokenizer)
                token_positions = current_batch['token_positions'][i]
                weights = current_batch['weights'][i]

                precision_ = precision(spans, token_positions)
                weight_recall_ = weight_recall(spans, weights, token_positions)
                f1_ = f1_token(spans, weights, token_positions)
                jaccard_ = jaccard_similarity(spans, token_positions)
                span_hit_ = span_hit_recall(spans, token_positions)

                precisions.append(precision_)
                weight_recalls.append(weight_recall_)
                f1_tokens.append(f1_)
                jaccards.append(jaccard_)
                span_hit_recalls.append(span_hit_)

            results = pd.DataFrame({
                'precision': precisions,
                'weight_recall': weight_recalls,
                'f1_token': f1_tokens,
                'jaccard_similarity': jaccards,
                'span_hit_recall': span_hit_recalls
            })

            results.to_parquet(save_path)

def get_results_cose(folder_name, model_name, size):
    perturbation_folder = f"/content/drive/MyDrive/NLP_project/{folder_name}/results/perturbation"
    results = pd.DataFrame(columns=['precision', 'weight_recall', 'f1_token', 'jaccard_similarity', 'span_hit_recall', 'method'])

    for batch_start in range(0, size, batch_size):
        file_name = f'/batch_{batch_start}'

        for current_folder in [perturbation_folder]:
            save_path = current_folder + file_name + '_metrics' + ext
            data = pd.read_parquet(save_path)
            data['method'] = current_folder.split('/')[-1]
            results = pd.concat([results, data])
    print(model_name, size)
    display(results.groupby('method').agg('mean'))

In [None]:
folder_name = 'deberta_cos'
model_name = "bert-base-uncased"
size = 500

calculate_metrics_cose(folder_name, model_name, size)

  0%|          | 0/50 [00:00<?, ?it/s]

In [None]:
get_results_cose(folder_name, model_name, size)

  results = pd.concat([results, data])


bert-base-uncased 500


Unnamed: 0_level_0,precision,weight_recall,f1_token,jaccard_similarity,span_hit_recall
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
perturbation,0.009109,0.009526,0.009273,0.007297,0.036
