In [1]:
! pip install -U datasets
! pip install transformers==4.18.0

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (

# Install requirements / Clone repository

In [2]:
! git clone "https://github.com/mohsenfayyaz/DecompX"

Cloning into 'DecompX'...
remote: Enumerating objects: 172, done.[K
remote: Counting objects: 100% (172/172), done.[K
remote: Compressing objects: 100% (133/133), done.[K
remote: Total 172 (delta 74), reused 100 (delta 32), pack-reused 0 (from 0)[K
Receiving objects: 100% (172/172), 25.93 MiB | 10.84 MiB/s, done.
Resolving deltas: 100% (74/74), done.


In [3]:
import os
os.environ["HF_TOKEN"] = "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"

# Config (Change model and sentence here)

In [4]:
import torch
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import matplotlib
from IPython.display import display, HTML
from transformers import AutoTokenizer
from DecompX.src.decompx_utils import DecompXConfig
from DecompX.src.modeling_bert import BertForSequenceClassification
from DecompX.src.modeling_roberta import RobertaForSequenceClassification

CONFIGS = {
    "DecompX":
        DecompXConfig(
            include_biases=True,
            bias_decomp_type="absdot",
            include_LN1=True,
            include_FFN=True,
            FFN_approx_type="GeLU_ZO",
            include_LN2=True,
            aggregation="vector",
            include_classifier_w_pooler=True,
            tanh_approx_type="ZO",
            output_all_layers=True,
            output_attention=None,
            output_res1=None,
            output_LN1=None,
            output_FFN=None,
            output_res2=None,
            output_encoder=None,
            output_aggregated="norm",
            output_pooler="norm",
            output_classifier=True,
        ),
}

# Load corresponding model/tokenizer

In [5]:
model = BertForSequenceClassification.from_pretrained("BoringAnt1793/metehan-toxic-spans-bert-small")
tokenizer = AutoTokenizer.from_pretrained("BoringAnt1793/metehan-toxic-spans-bert-small")

Downloading:   0%|          | 0.00/691 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/110M [00:00<?, ?B/s]

  return torch.load(checkpoint_file, map_location="cpu")


Downloading:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/695k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/695 [00:00<?, ?B/s]

In [12]:
def load_model_and_tokenizer(model_name, input_sentences):
    model = None
    if "roberta" in model_name:
      model = RobertaForSequenceClassification.from_pretrained(model_name)
    elif "bert" in model_name:
      model = BertForSequenceClassification.from_pretrained(model_name)
    else:
      raise Exception(f"Not implemented model: {model_name}")

    tokenizer = AutoTokenizer.from_pretrained(model_name)

    return model, tokenizer

# Compute DecompX

In [7]:
def compute_decompx(model, tokenizer, tokenized_sentence, batch_lengths):
    """
    Compute DecompX outputs and importances for the given model and tokenized sentences on GPU.

    Args:
        model: The model to evaluate.
        tokenizer: The tokenizer associated with the model.
        tokenized_sentence: Tokenized input sentences (move to GPU).
        batch_lengths: Lengths of the tokenized batches.

    Returns:
        decompx_outputs_df: DataFrame with decomposed importances and other outputs.
        importance: Importance values for all layers.
    """
    # Move tokenized inputs to GPU
    tokenized_sentence = {key: value.cuda() for key, value in tokenized_sentence.items()}

    with torch.no_grad():
        model.eval()
        # Move model to GPU
        model = model.cuda()
        # Forward pass
        logits, hidden_states, decompx_last_layer_outputs, decompx_all_layers_outputs = model(
            **tokenized_sentence,
            output_attentions=False,
            return_dict=False,
            output_hidden_states=True,
            decompx_config=CONFIGS["DecompX"]
        )

    # Extract predictions
    predictions = torch.argmax(logits, dim=1).cpu().tolist()  # Predicted class

    # Prepare decompx outputs
    decompx_outputs = {
        "tokens": [tokenizer.convert_ids_to_tokens(tokenized_sentence["input_ids"][i][:batch_lengths[i]]) for i in range(len(batch_lengths))],
        "logits": logits.cpu().detach().numpy().tolist(),  # Move logits to CPU
        "cls": hidden_states[-1][:, 0, :].cpu().detach().numpy().tolist(),  # Last layer & only CLS -> (batch, emb_dim)
        "predictions": predictions
    }

    # Process last layer aggregated importance
    importance = np.array([g.squeeze().cpu().detach().numpy() for g in decompx_last_layer_outputs.aggregated]).squeeze()  # Move to CPU
    importance = [importance[j][:batch_lengths[j], :batch_lengths[j]] for j in range(len(importance))]
    decompx_outputs["importance_last_layer_aggregated"] = importance

    # Process last layer pooler importance
    importance = np.array([g.squeeze().cpu().detach().numpy() for g in decompx_last_layer_outputs.pooler]).squeeze()  # Move to CPU
    importance = [importance[j][:batch_lengths[j]] for j in range(len(importance))]
    decompx_outputs["importance_last_layer_pooler"] = importance

    # Process last layer classifier importance
    importance = np.array([g.squeeze().cpu().detach().numpy() for g in decompx_last_layer_outputs.classifier]).squeeze()  # Move to CPU
    importance = [importance[j][:batch_lengths[j], :] for j in range(len(importance))]
    decompx_outputs["importance_last_layer_classifier"] = importance

    # Process all layers aggregated importance
    importance = np.array([g.squeeze().cpu().detach().numpy() for g in decompx_all_layers_outputs.aggregated])  # Move to CPU
    importance = np.einsum('lbij->blij', importance)  # Rearrange dimensions
    importance = [importance[j][:, :batch_lengths[j], :batch_lengths[j]] for j in range(len(importance))]
    decompx_outputs["importance_all_layers_aggregated"] = importance

    # Convert outputs to DataFrame
    decompx_outputs_df = pd.DataFrame(decompx_outputs)

    return decompx_outputs_df, importance

# Visualization

In [8]:
def print_importance(importance, tokenized_text, discrete=False, prefix="", no_cls_sep=False):
    """
    importance: (sent_len)
    """
    if no_cls_sep:
        importance = importance[1:-1]
        tokenized_text = tokenized_text[1:-1]
    importance = importance / np.abs(importance).max() / 1.5  # Normalize
    if discrete:
        importance = np.argsort(np.argsort(importance)) / len(importance) / 1.6

    html = "<pre style='color:black; padding: 3px;'>"+prefix
    for i in range(len(tokenized_text)):
        if importance[i] >= 0:
            rgba = matplotlib.colormaps.get_cmap('Greens')(importance[i])   # Wistia
        else:
            rgba = matplotlib.colormaps.get_cmap('Reds')(np.abs(importance[i]))   # Wistia
        text_color = "color: rgba(255, 255, 255, 1.0); " if np.abs(importance[i]) > 0.9 else ""
        color = f"background-color: rgba({rgba[0]*255}, {rgba[1]*255}, {rgba[2]*255}, {rgba[3]}); " + text_color
        html += (f"<span style='"
                 f"{color}"
                 f"border-radius: 5px; padding: 3px;"
                 f"font-weight: {int(800)};"
                 "'>")
        html += tokenized_text[i].replace('<', "[").replace(">", "]")
        html += "</span> "
    display(HTML(html))
#     print(html)
    return html

def print_preview(model, tokenizer, tokenized_sentence, batch_lengths, idx=0, discrete=False):
    NO_CLS_SEP = False
    df, _ = compute_decompx(model, tokenizer, tokenized_sentence, batch_lengths)

    for col in ["importance_last_layer_aggregated", "importance_last_layer_classifier"]:
        if col in df and df[col][idx] is not None:
            if "aggregated" in col:
                sentence_importance = df[col].iloc[idx][0, :]
            if "classifier" in col:
                for label in range(df[col].iloc[idx].shape[-1]):
                    sentence_importance = df[col].iloc[idx][:, label]
                    print_importance(
                        sentence_importance,
                        df["tokens"].iloc[idx],
                        prefix=f"{col.split('_')[-1]} Label{label}:".ljust(20),
                        no_cls_sep=NO_CLS_SEP,
                        discrete=False
                    )
                break
                sentence_importance = df[col].iloc[idx][:, df["label"].iloc[idx]]
            if "pooler" in col:
                sentence_importance = df[col].iloc[idx]
            print_importance(
                sentence_importance,
                df["tokens"].iloc[idx],
                prefix=f"{col.split('_')[-1]}:".ljust(20),
                no_cls_sep=NO_CLS_SEP,
                discrete=discrete
            )
    print("------------------------------------")
    return df

In [9]:
from datasets import load_dataset

# Load the dataset
dataset = load_dataset('heegyu/toxic-spans')

# Print the size of the dataset
for split in dataset:
    print(f"Split: {split}, Size: {len(dataset[split])}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/3.65k [00:00<?, ?B/s]

train.csv:   0%|          | 0.00/9.71M [00:00<?, ?B/s]

test.csv:   0%|          | 0.00/954k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/10006 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1000 [00:00<?, ? examples/s]

Split: train, Size: 10006
Split: test, Size: 1000


In [13]:
def visual_evaluation(model, tokenizer, examples, labels=None):
    correct_predictions = 0
    total_predictions = 0

    tokenized_sentence = tokenizer(examples, return_tensors="pt", padding=True)
    batch_lengths = tokenized_sentence['attention_mask'].sum(dim=-1)

    # Evaluate each example
    for i in range(len(examples)):
        df = print_preview(model, tokenizer, tokenized_sentence, batch_lengths, idx=i)

        if labels:
            # Compute accuracy
            if df["predictions"][i] == labels[i]:
                correct_predictions += 1
            total_predictions += 1

    if labels:
        # Print accuracy
        accuracy = correct_predictions / total_predictions
        print(f"Accuracy for the model: {accuracy:.2%}")


In [14]:
visual_evaluation(model, tokenizer, dataset['train'][10:15]['text_of_post'], labels=dataset['train'][10:15]['toxic'])

------------------------------------


------------------------------------


------------------------------------


------------------------------------


------------------------------------
Accuracy for the model: 80.00%


In [15]:
def get_token_importance_for_sentences(model, tokenizer, sentences, labels):
    """
    Compute token importances for a list of sentences.

    Args:
    - model: The model to use for computation.
    - tokenizer: The tokenizer associated with the model.
    - sentences: List of input sentences as strings.
    - labels: List of labels corresponding to the sentences.

    Returns:
    - List of tuples with sentences, tokens, and their importance scores.
    """
    # Tokenize the input sentences
    tokenized_sentences = tokenizer(sentences, return_tensors="pt", padding=True, truncation=True)
    batch_lengths = tokenized_sentences["attention_mask"].sum(dim=-1)

    # Compute decompositions
    decompx_outputs_df, _ = compute_decompx(model, tokenizer, tokenized_sentences, batch_lengths)

    results = []
    # Process each sentence
    for idx, sentence in enumerate(sentences):
        tokens = decompx_outputs_df["tokens"][idx]  # Retrieve tokens for this sentence
        importances = decompx_outputs_df["importance_last_layer_classifier"][idx][:, labels[idx]]  # Importance for the corresponding label
        token_importance_pairs = [(token, importance) for token, importance in zip(tokens, importances)]
        results.append((sentence, token_importance_pairs))

    return results

THE METRIC

In [16]:
import ast
from tqdm import tqdm


def calculate_phrase_metric_unordered(token_importance_pairs, token_groups):
    """
    Calculate the metric for token groups in a sentence considering unordered matches.

    Args:
        token_importance_pairs: List of (token, importance) pairs.
        token_groups: List of tokenized groups (phrases) to match.

    Returns:
        float: The metric value for the token groups.
    """
    # Filter out [CLS] and [SEP] tokens
    token_importance_pairs = [
        pair for pair in token_importance_pairs if pair[0] not in ["[CLS]", "[SEP]"]
    ]

    phrase_importance_sum = 0
    total_importance = sum(abs(importance) for _, importance in token_importance_pairs)
    token_list = [pair[0] for pair in token_importance_pairs]
    matched_positions = set()

    # Match each token group
    for group in token_groups:
        for start_idx in range(len(token_list) - len(group) + 1):
            if token_list[start_idx:start_idx + len(group)] == group:
                phrase_importance_sum += sum(
                    abs(token_importance_pairs[start_idx + offset][1]) for offset in range(len(group))
                )
                matched_positions.update(range(start_idx, start_idx + len(group)))
                break

    metric = phrase_importance_sum / total_importance if total_importance > 0 else 0
    return metric

In [31]:
def evaluate_model_on_dataset(model_name, dataset, model, tokenizer):
    """
    Evaluate a model on a dataset using the custom metric for token group coverage.

    Args:
        model_name (str): Name of the model to evaluate.
        dataset: The dataset to evaluate on (assumed to have 'test' split).

    Returns:
        float: The average metric value across all examples in the test split.
    """
    # Filter examples with non-empty 'text_of_post' field
    test_split = dataset['test']
    valid_examples = [ex for ex in test_split if ex['text_of_post']]

    # Extract examples and token groups
    examples = [ex['text_of_post'] for ex in valid_examples]
    labels = [ex['toxic'] for ex in valid_examples]

    token_groups_list = []
    for ex in valid_examples:
        text_dict = ast.literal_eval(ex['text'])
        tokenized_groups = [tokenizer.tokenize(phrase) for phrase in text_dict.keys()]
        token_groups_list.append(tokenized_groups)

    # Calculate metrics for each batch of examples
    metrics = []

    batch_size = 2

    for start_idx in tqdm(range(0, len(examples), batch_size), desc="Processing Examples"):
        batch_examples = examples[start_idx:start_idx + batch_size]
        batch_labels = labels[start_idx:start_idx + batch_size]
        batch_groups = token_groups_list[start_idx:start_idx + batch_size]

        # Fetch token importances for the current batch
        token_importance_results = get_token_importance_for_sentences(
            model, tokenizer, batch_examples, batch_labels
        )

        for idx, token_importance_pairs in enumerate(token_importance_results):
            token_groups = batch_groups[idx]
            metric = calculate_phrase_metric_unordered(token_importance_pairs[1], token_groups)
            metrics.append(metric)


    # Return the average metric
    average_metric = sum(metrics) / len(metrics) if metrics else 0
    return average_metric

In [32]:
# Example usage
average_metric = evaluate_model_on_dataset("BoringAnt1793/metehan-toxic-spans-bert-small", dataset, model, tokenizer)
print(f"Average Metric: {average_metric:.4f}")

Processing Examples: 100%|██████████| 500/500 [01:41<00:00,  4.91it/s]

Average Metric: 0.4917



