# T5-ParaDetox Pipeline with DecompX Reranking

This notebook combines:
- **T5-base** fine-tuned on ParaDetox for detoxification
- **DecompX reranking** to select the least toxic candidate from multiple generations

## Pipeline

1. Generate `num_candidates` detoxified texts per input using T5 sampling
2. Score each candidate using DecompX toxicity attribution (RoBERTa-based)
3. Select candidate with lowest toxicity score
4. Evaluate with BLEU, BERTScore, MeaningBERT, Perplexity, Toxicity

---

## `detoxify()` API

```python
def detoxify(
    data_type: str = "paradetox",
    output_folder: str = "T5_w_DecompX-Reranking",
    batch_size: int = 8,
    max_length: int = 128,
    num_examples: int = 100,
    num_candidates: int = 10,
    temperature: float = 1.0,
    top_k: int = 50,
    top_p: float = 0.95,
    overwrite_gen: bool = False,
    run_eval: bool = True,
    overwrite_eval: bool = False,
    echo: bool = False,
)
```

### Key Arguments

- `data_type`: Dataset key (paradetox, microagressions_test, sbf_test, dynabench_test, jigsaw_toxic, appdia_original, appdia_discourse)
- `output_folder`: Folder under `data/model_outputs/` for results
- `num_candidates`: Number of candidates to generate per input for reranking
- `temperature`: Sampling temperature for diversity (higher = more diverse)
- `echo`: If True, print example inputs, candidates, and outputs

## Setup

In [2]:
#@title Mount Drive, imports & locate XDetox
from google.colab import drive; drive.mount('/content/drive')

import os, sys, torch

# Base paths (adjust if needed)
PROJECT_BASE = "/content/drive/MyDrive/w266 - Project"
XDETOX_DIR   = os.path.join(PROJECT_BASE, "XDetox")
T5_CHECKPOINT = os.path.join(PROJECT_BASE, "t5-base-detox-model")

print("PROJECT_BASE:", PROJECT_BASE)
print("XDETOX_DIR:", XDETOX_DIR, "->", os.path.isdir(XDETOX_DIR))
print("T5_CHECKPOINT:", T5_CHECKPOINT)

assert os.path.isdir(XDETOX_DIR), f"XDETOX_DIR does not exist: {XDETOX_DIR}"

# Runtime setup (paths, cache, GPU)
HF_CACHE = os.path.join(XDETOX_DIR, "cache")
os.makedirs(HF_CACHE, exist_ok=True)
os.environ["TRANSFORMERS_CACHE"] = HF_CACHE
os.environ["WANDB_DISABLED"] = "true"

if XDETOX_DIR not in sys.path:
    sys.path.append(XDETOX_DIR)

print("TRANSFORMERS_CACHE:", HF_CACHE)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))

Mounted at /content/drive
PROJECT_BASE: /content/drive/MyDrive/w266 - Project
XDETOX_DIR: /content/drive/MyDrive/w266 - Project/XDetox -> True
T5_CHECKPOINT: /content/drive/MyDrive/w266 - Project/t5-base-detox-model
TRANSFORMERS_CACHE: /content/drive/MyDrive/w266 - Project/XDetox/cache
CUDA available: True
GPU: Tesla T4


In [3]:
# Verify XDetox repo layout
for d in ["rewrite", "evaluation", "datasets", "data"]:
    assert os.path.isdir(os.path.join(XDETOX_DIR, d)), f"Missing folder: {d}"
print("Repo folders OK.")

REPO = XDETOX_DIR
DATASET_BASE = REPO

Repo folders OK.


In [4]:
#@title Install dependencies (aligned with LLM DecompX pipeline)
!pip -q install --upgrade pip setuptools wheel
!pip -q install "transformers==4.41.2" "tokenizers==0.19.1" \
                "datasets==2.19.0" "evaluate==0.4.1" \
                "sacrebleu==2.4.1" sacremoses ftfy nltk matplotlib pandas jedi \
                sentencepiece
!pip -q install bert-score

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m67.1 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m57.4 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
ipython 7.34.0 requires jedi>=0.16, which is not installed.[0m[31m
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gcsfs 2025.3.0 requires fsspec==2025.3.0, but you have fsspec 2024.3.1 which is incompatible.[0m[

In [5]:
#@title NLTK data
import nltk
nltk.download("punkt", quiet=True)
try:
    nltk.download("punkt_tab", quiet=True)
except Exception:
    pass
print("NLTK ready")

NLTK ready


In [6]:
#@title Imports
import glob, re, json, shutil, math
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from pathlib import Path
from subprocess import run, PIPE
from typing import List, Tuple

from transformers import (
    T5Tokenizer,
    T5ForConditionalGeneration,
)
# DecompX Masker (same alias as LLM pipeline)
from rewrite.mask_orig import Masker as Masker_single

# DecompX compatibility fixes for newer Transformers versions
import transformers.modeling_utils as modeling_utils

# 1) apply_chunking_to_forward moved to transformers.pytorch_utils
try:
    from transformers.modeling_utils import apply_chunking_to_forward
except ImportError:
    from transformers.pytorch_utils import apply_chunking_to_forward
    modeling_utils.apply_chunking_to_forward = apply_chunking_to_forward

# 2) find_pruneable_heads_and_indices may move out of modeling_utils
try:
    from transformers.modeling_utils import find_pruneable_heads_and_indices
except ImportError:
    try:
        from transformers.models.bert.modeling_bert import find_pruneable_heads_and_indices
        modeling_utils.find_pruneable_heads_and_indices = find_pruneable_heads_and_indices
    except ImportError:
        def find_pruneable_heads_and_indices(*args, **kwargs):
            raise NotImplementedError("find_pruneable_heads_and_indices is not available")
        modeling_utils.find_pruneable_heads_and_indices = find_pruneable_heads_and_indices

# 3) prune_linear_layer may also move
try:
    from transformers.modeling_utils import prune_linear_layer
except ImportError:
    try:
        from transformers.models.bert.modeling_bert import prune_linear_layer
        modeling_utils.prune_linear_layer = prune_linear_layer
    except ImportError:
        def prune_linear_layer(*args, **kwargs):
            raise NotImplementedError("prune_linear_layer is not available")
        modeling_utils.prune_linear_layer = prune_linear_layer

print("Libraries imported")



Libraries imported


## Dataset Configuration

In [7]:
#@title Data configs (matching XDetox datasets)
data_configs = {
    "paradetox": {
        "data_path": "./datasets/paradetox/test_toxic_parallel.txt",
        "format": "txt",
    },
    "microagressions_test": {
        "data_path": "./datasets/microagressions/test.csv",
        "format": "csv",
    },
    "sbf_test": {
        "data_path": "./datasets/sbf/sbftst.csv",
        "format": "csv",
    },
    "dynabench_test": {
        "data_path": "./datasets/dynabench/db_test.csv",
        "format": "csv",
    },
    "jigsaw_toxic": {
        "data_path": "./datasets/jigsaw_full_30/test_10k_toxic.txt",
        "format": "txt",
    },
    "appdia_original": {
        "data_path": "./datasets/appdia/original-annotated-data/original-test.tsv",
        "format": "tsv",
    },
    "appdia_discourse": {
        "data_path": "./datasets/appdia/discourse-augmented-data/discourse-test.tsv",
        "format": "tsv",
    },
}
print(f"{len(data_configs)} datasets configured:", ", ".join(data_configs.keys()))

7 datasets configured: paradetox, microagressions_test, sbf_test, dynabench_test, jigsaw_toxic, appdia_original, appdia_discourse


## Helper Functions

In [8]:
#@title Helper functions

def _ensure_dir(p: str):
    Path(p).mkdir(parents=True, exist_ok=True)

def load_test_data(data_type: str, num_examples: int = None) -> List[str]:
    """
    Load test data from .txt / .csv / .tsv.
    Returns a list of toxic texts as strings.
    """
    if data_type not in data_configs:
        raise ValueError(f"Unknown data_type: {data_type}")

    cfg = data_configs[data_type]
    data_path = os.path.join(DATASET_BASE, cfg["data_path"].lstrip("./"))

    texts = []

    if cfg["format"] == "txt":
        with open(data_path, "r", encoding="utf-8") as f:
            texts = [line.strip() for line in f if line.strip()]

    elif cfg["format"] == "csv":
        df = pd.read_csv(data_path)
        if "text" in df.columns:
            texts = df["text"].tolist()
        elif "toxic" in df.columns:
            texts = df["toxic"].tolist()
        else:
            texts = df.iloc[:, 0].tolist()

    elif cfg["format"] == "tsv":
        df = pd.read_csv(data_path, sep="\t")
        if "text" in df.columns:
            texts = df["text"].tolist()
        else:
            texts = df.iloc[:, 0].tolist()

    cleaned = []
    for t in texts:
        if pd.isna(t):
            continue
        s = str(t).strip()
        if s:
            cleaned.append(s)

    if num_examples and num_examples > 0:
        cleaned = cleaned[:num_examples]

    return cleaned

def _safe_float(x):
    try:
        return float(x)
    except Exception:
        return float("nan")

def _read_stats_file(path: str) -> dict:
    out = {}
    with open(path, "r") as f:
        for line in f:
            if ":" not in line:
                continue
            k, v = line.strip().split(": ", 1)
            k = k.replace("(skipped)", "").strip().lower()
            out[k] = _safe_float(v)
    return out

print("Helper functions loaded")

Helper functions loaded


## T5 Model Loading

In [9]:
#@title Load T5 model (ParaDetox)
print(f"Loading T5 model from {T5_CHECKPOINT}...")

t5_tokenizer = T5Tokenizer.from_pretrained(T5_CHECKPOINT)
t5_model = T5ForConditionalGeneration.from_pretrained(T5_CHECKPOINT)
t5_model.eval()

DEVICE_T5 = torch.device("cuda" if torch.cuda.is_available() else "cpu")
t5_model.to(DEVICE_T5)

print(f"T5 model loaded on {DEVICE_T5}")

Loading T5 model from /content/drive/MyDrive/w266 - Project/t5-base-detox-model...


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


T5 model loaded on cuda


In [10]:
#@title T5 multi-candidate generation

def t5_generate_candidates(
    text: str,
    model: T5ForConditionalGeneration,
    tokenizer: T5Tokenizer,
    num_candidates: int,
    temperature: float = 1.0,
    top_k: int = 50,
    top_p: float = 0.95,
    max_length: int = 128,
    device: torch.device = DEVICE_T5,
) -> List[str]:
    """
    Generate num_candidates outputs for a single input.
    """
    input_text = f"detoxify: {text}"
    input_ids = tokenizer.encode(
        input_text,
        return_tensors="pt",
        max_length=max_length,
        truncation=True,
    ).to(device)

    with torch.no_grad():
        outputs = model.generate(
            input_ids,
            max_length=max_length,
            num_return_sequences=num_candidates,
            do_sample=True,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            no_repeat_ngram_size=2,
        )

    return [tokenizer.decode(out, skip_special_tokens=True) for out in outputs]

def t5_generate_candidates_batch(
    texts: List[str],
    model: T5ForConditionalGeneration,
    tokenizer: T5Tokenizer,
    num_candidates: int,
    temperature: float = 1.0,
    top_k: int = 50,
    top_p: float = 0.95,
    max_length: int = 128,
    batch_size: int = 8,
    device: torch.device = DEVICE_T5,
) -> List[List[str]]:
    """
    Batch generation of candidates for many inputs.
    """
    all_candidates: List[List[str]] = []
    for i in tqdm(range(0, len(texts), batch_size), desc="T5 Generation"):
        batch_texts = texts[i:i + batch_size]
        prompts = [f"detoxify: {t}" for t in batch_texts]

        enc = tokenizer(
            prompts,
            return_tensors="pt",
            max_length=max_length,
            truncation=True,
            padding=True,
        ).to(device)

        with torch.no_grad():
            outputs = model.generate(
                **enc,
                max_length=max_length,
                num_return_sequences=num_candidates,
                do_sample=True,
                temperature=temperature,
                top_k=top_k,
                top_p=top_p,
                no_repeat_ngram_size=2,
            )

        # outputs: shape [B * num_candidates, seq_len]
        decoded = [tokenizer.decode(o, skip_special_tokens=True) for o in outputs]
        B = len(batch_texts)
        for b in range(B):
            start = b * num_candidates
            end = (b + 1) * num_candidates
            all_candidates.append(decoded[start:end])

    return all_candidates

# Quick sanity check
test_text = "This is a stupid idea"
test_cands = t5_generate_candidates(test_text, t5_model, t5_tokenizer, num_candidates=3, device=DEVICE_T5)
print(f"Input: {test_text}")
for i, c in enumerate(test_cands):
    print(f"  cand[{i}]: {c}")

Input: This is a stupid idea
  cand[0]: This is a bad idea
  cand[1]: This is a bad idea
  cand[2]: This is a bad idea


## DecompX reranking (using Masker, mask-count-based score)

In [11]:
#@title DecompX-based reranking helpers

def _decompx_mask_texts(
    texts: List[str],
    threshold: float = 0.20,
    batch_size: int = 16,
) -> List[str]:
    """
    Run DecompX Masker on a list of texts and return masked versions.
    Same as in LLM DecompX pipeline.
    """
    if not texts:
        return []

    masker = Masker_single()
    masked_all = []
    for i in tqdm(range(0, len(texts), batch_size),
                  desc="DecompX masking for reranking", leave=False):
        batch = texts[i:i + batch_size]
        batch_out = masker.process_text(sentence=batch, threshold=threshold)
        masked_all.extend(batch_out)
    cleaned = [
        m.replace("<s>", "").replace("</s>", "").strip()
        for m in masked_all
    ]
    masker.release_model()
    return cleaned

def _decompx_toxicity_scores(
    texts: List[str],
    threshold: float = 0.20,
    batch_size: int = 16,
) -> np.ndarray:
    """
    Score texts by DecompX 'toxicity':

      score = (# of <mask> tokens DecompX inserts) / (# tokens)

    Lower score => less DecompX-toxic.
    """
    if not texts:
        return np.zeros((0,), dtype=float)

    masked = _decompx_mask_texts(texts, threshold=threshold, batch_size=batch_size)
    scores = []
    for m in masked:
        num_masks = len(re.findall(r"<mask>", m))
        tokens = m.split()
        length = max(len(tokens), 1)
        scores.append(num_masks / length)
    return np.asarray(scores, dtype=float)

def rerank_candidates_decompx(
    sources: List[str],
    candidates: List[List[str]],
    threshold: float = 0.20,
    batch_size_mask: int = 16,
):
    """
    DecompX-based reranking (same as LLM pipeline):

      - Flatten candidates
      - Score each candidate with DecompX
      - For each source, choose candidate with lowest score

    Returns:
      best_idx: np.ndarray (N,) chosen candidate index per source
      details: dict with 'score' matrix shape [N, C]
    """
    N = len(sources)
    assert len(candidates) == N, "candidates length mismatch"

    if N == 0:
        return np.array([], dtype=int), {}

    C_list = [len(c) for c in candidates]
    assert len(set(C_list)) == 1, "All inputs must have same num_candidates"
    C = C_list[0]
    if C == 0:
        raise ValueError("num_candidates must be >= 1")

    flat_cands = []
    flat_src_idx = []
    for i, cand_list in enumerate(candidates):
        for cand in cand_list:
            flat_cands.append(cand)
            flat_src_idx.append(i)
    flat_src_idx = np.array(flat_src_idx, dtype=int)

    scores = _decompx_toxicity_scores(
        flat_cands,
        threshold=threshold,
        batch_size=batch_size_mask,
    )  # [N*C]

    scores2 = scores.reshape(N, C)
    best_idx = np.argmin(scores2, axis=1)

    details = {
        "score": scores2,
    }
    return best_idx, details

print("DecompX reranking functions loaded")

DecompX reranking functions loaded


## Evaluation models and metrics

In [12]:
#@title Evaluation helpers (evaluate_all.py with MeaningBERT + toxicity)

def _eval_with_toxicity(base_path,
                        overwrite_eval: bool = False,
                        skip_ref: bool = False,
                        tox_threshold: float = 0.5,
                        tox_batch_size: int = 32):
    """
    Call evaluation.evaluate_all on each run folder in base_path.
    Same pattern as LLM DecompX pipeline.
    """
    import sys as _sys
    for folder in os.listdir(base_path):
        gen_dir = os.path.join(base_path, folder)
        if not os.path.isdir(gen_dir):
            continue
        orig_path = os.path.join(gen_dir, "orig.txt")
        gen_path  = os.path.join(gen_dir, "gen.txt")
        out_stats = os.path.join(gen_dir, "gen_stats.txt")
        if not (os.path.exists(orig_path) and os.path.exists(gen_path)):
            continue
        if os.path.exists(out_stats) and not overwrite_eval:
            continue

        env = os.environ.copy()
        env["PYTHONPATH"] = REPO + (
            ":" + env.get("PYTHONPATH", "") if env.get("PYTHONPATH") else ""
        )
        cmd = [
            _sys.executable, "-m", "evaluation.evaluate_all",
            "--orig_path", orig_path,
            "--gen_path",  gen_path,
            "--tox_threshold", str(tox_threshold),
            "--tox_batch_size", str(tox_batch_size),
        ]
        if skip_ref:
            cmd.append("--skip_ref")
        print("Eval:", " ".join(cmd))
        res = run(cmd, cwd=REPO, env=env, stdout=PIPE, stderr=PIPE, text=True)
        if res.returncode != 0:
            print(res.stdout)
            print(res.stderr)
            res.check_returncode()

def _aggregate_eval_csv(output_folder: str,
                        data_type: str,
                        base_out_dir: str):
    """
    Aggregate eval metrics for T5 ParaDetox + DecompX reranking.

    Layout (absolute base_out_dir):
      base_out_dir/
        └── {data_type}/
            └── T5_DecompX/
                └── {run_folder}/
                    └── gen_stats.txt

    threshold column is kept as a label (=0.20) for compatibility.
    """
    rows = []

    rerank_dir = "T5_DecompX"
    base_path  = os.path.join(base_out_dir, data_type, rerank_dir)
    if not os.path.isdir(base_path):
        print("No evaluation directory found:", base_path)
        return

    for folder in os.listdir(base_path):
        gen_dir    = os.path.join(base_path, folder)
        stats_path = os.path.join(gen_dir, "gen_stats.txt")
        if not os.path.exists(stats_path):
            continue
        s = _read_stats_file(stats_path)
        rows.append({
            "threshold":       0.20,  # label only, matches LLM pipeline style
            "folder":          folder,
            "bertscore":       s.get("bertscore", np.nan),
            "meaningbert":     s.get("meaningbert", np.nan),
            "bleu4":           s.get("bleu4", np.nan),
            "perplexity_gen":  s.get("perplexity gen", np.nan),
            "perplexity_orig": s.get("perplexity orig", np.nan),
            "toxicity_gen":    s.get("toxicity gen", np.nan),
            "toxicity_orig":   s.get("toxicity orig", np.nan),
        })

    if rows:
        cols = [
            "threshold", "folder",
            "bertscore", "meaningbert", "bleu4",
            "perplexity_gen", "perplexity_orig",
            "toxicity_gen", "toxicity_orig",
        ]
        df = pd.DataFrame(rows)
        df = df[cols]
        out_csv = os.path.join(base_out_dir, data_type, f"{data_type}.csv")
        _ensure_dir(os.path.dirname(out_csv))
        df.to_csv(out_csv, index=False)
        print("Wrote summary CSV:", out_csv)
    else:
        print("No evaluation files found to summarize.")

print("Evaluation helpers defined")

Evaluation helpers defined


In [13]:
#@title Helpers for folder naming

def _build_run_folder_name_t5_decompx(
    num_candidates: int,
    max_length: int,
    temperature: float,
    top_k: int,
    top_p: float,
    decompx_threshold: float,
) -> str:
    """
    Build a folder name encoding T5 + DecompX hyperparameters.
    Parallel to LLM DecompX folder naming.
    """
    return (
        f"t5_nc{num_candidates}_maxlen{max_length}_"
        f"temp{temperature}_topk{top_k}_topp{top_p}_"
        f"dxth{decompx_threshold}"
    )

In [14]:
#@title detoxify() — T5 + DecompX reranking + evaluate_all

def detoxify(
    data_type: str = "paradetox",
    output_folder: str = "T5_w_DecompX-Reranking_Pipeline",
    echo: bool = False,
    num_examples: int = 1000,
    batch_size: int = 8,
    num_candidates: int = 10,
    max_length: int = 128,
    temperature: float = 1.0,
    top_k: int = 50,
    top_p: float = 0.95,
    # DecompX
    decompx_threshold: float = 0.20,
    decompx_batch_size: int = 16,
    # generation + eval flags
    overwrite_gen: bool = False,
    run_eval: bool = True,
    overwrite_eval: bool = False,
    skip_ref_eval: bool = False,
):
    """
    T5 ParaDetox + DecompX reranking + evaluate_all.py

    Steps:
      1. Load toxic inputs.
      2. Generate num_candidates candidates per input with T5.
      3. Rerank candidates with DecompX (mask-count ratio).
      4. Save orig.txt and gen.txt under:
         data/model_outputs/{output_folder}/{data_type}/T5_DecompX/{run_folder}/
      5. Run evaluation via evaluation.evaluate_all.py.
      6. Aggregate per-dataset CSV like LLM DecompX pipeline.
    """
    assert data_type in data_configs, f"Unknown data_type: {data_type}"

    # Base output relative to repo (same style as LLM pipeline)
    base_out_rel = os.path.join("data", "model_outputs", output_folder)
    base_out_abs = os.path.join(REPO, base_out_rel)
    _ensure_dir(base_out_abs)

    # Load data
    print("=" * 80)
    print(f"[{data_type}] Loading data...")
    orig_texts = load_test_data(data_type, num_examples)
    print(f"  Loaded {len(orig_texts)} examples")

    if echo:
        print("\n[echo] Example inputs (first up to 3):")
        for i, s in enumerate(orig_texts[:3]):
            print(f"  input[{i}]: {s}")
        print(f"\n[echo] DecompX threshold: {decompx_threshold}")
        print(f"[echo] num_candidates per input: {num_candidates}")

    # Directory for this pipeline (T5 + DecompX)
    rerank_dir = "T5_DecompX"
    cur_rel = os.path.join(base_out_rel, data_type, rerank_dir)
    cur_abs = os.path.join(REPO, cur_rel)
    _ensure_dir(cur_abs)

    # Run-folder name for current hyperparameters
    run_folder = _build_run_folder_name_t5_decompx(
        num_candidates=num_candidates,
        max_length=max_length,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
        decompx_threshold=decompx_threshold,
    )
    final_abs = os.path.join(cur_abs, run_folder)
    _ensure_dir(final_abs)

    orig_path  = os.path.join(final_abs, "orig.txt")
    gen_path   = os.path.join(final_abs, "gen.txt")
    stats_path = os.path.join(final_abs, "gen_stats.txt")

    # Generate or reuse outputs
    if overwrite_gen or not os.path.exists(gen_path):
        print("  Generating T5 candidates...")
        all_candidates = t5_generate_candidates_batch(
            texts=orig_texts,
            model=t5_model,
            tokenizer=t5_tokenizer,
            num_candidates=num_candidates,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            max_length=max_length,
            batch_size=batch_size,
            device=DEVICE_T5,
        )

        if echo and all_candidates:
            print("\n[echo] Example candidates for input[0]:")
            for j, c in enumerate(all_candidates[0][:3]):
                print(f"    cand[{j}]: {c}")

        print(f"  DecompX reranking (threshold={decompx_threshold:.2f})...")
        best_idx, details = rerank_candidates_decompx(
            sources=orig_texts,
            candidates=all_candidates,
            threshold=decompx_threshold,
            batch_size_mask=decompx_batch_size,
        )
        best_generations = [
            all_candidates[i][best_idx[i]] for i in range(len(orig_texts))
        ]

        if echo:
            print("\n[echo] Selected outputs (first up to 3):")
            for i, g in enumerate(best_generations[:3]):
                print(f"  output[{i}]: {g}")

        # Save orig and gen
        with open(orig_path, "w") as f:
            for t in orig_texts:
                f.write(re.sub(r"\s+", " ", t).strip() + "\n")
        with open(gen_path, "w") as f:
            for t in best_generations:
                f.write(re.sub(r"\s+", " ", t).strip() + "\n")

        print("  Saved orig/gen to:", final_abs)
    else:
        print("  Reusing existing orig/gen from:", final_abs)
        with open(orig_path, "r") as f:
            orig_texts = [l.strip() for l in f]
        with open(gen_path, "r") as f:
            best_generations = [l.strip() for l in f]
        print(f"  Loaded {len(best_generations)} generated examples")

    # Evaluation via evaluation/evaluate_all.py
    metrics = None
    if run_eval:
        base_path = os.path.join(base_out_abs, data_type, rerank_dir)
        _eval_with_toxicity(
            base_path,
            overwrite_eval=overwrite_eval,
            skip_ref=skip_ref_eval,
            tox_threshold=0.5,
            tox_batch_size=32,
        )
        _aggregate_eval_csv(
            output_folder,
            data_type,
            os.path.join(REPO, "data", "model_outputs", output_folder),
        )

        if os.path.exists(stats_path):
            metrics = _read_stats_file(stats_path)
            if echo:
                print("\n[echo] Evaluation metrics for this run:")
                for k, v in metrics.items():
                    if isinstance(v, float) and math.isnan(v):
                        continue
                    print(f"  {k}: {v:.4f}")
        else:
            print("  gen_stats.txt not found for this run; no metrics to print.")

    print("=" * 80)
    return metrics

print("detoxify() defined")

detoxify() defined


In [15]:
#@title Example run on ParaDetox
metrics_paradetox = detoxify(
    data_type="paradetox",
    output_folder="T5_w_DecompX-Reranking_Pipeline - KB",
    echo=True,
    num_examples=1000,
    batch_size=8,
    num_candidates=10,
    max_length=128,
    temperature=1.0,
    top_k=50,
    top_p=0.95,
    decompx_threshold=0.20,
    decompx_batch_size=16,
    overwrite_gen=True,
    run_eval=True,
    overwrite_eval=True,
    skip_ref_eval=False,
)

print("\nParadetox metrics for this run:")
if metrics_paradetox:
    for k, v in metrics_paradetox.items():
        if isinstance(v, float) and math.isnan(v):
            continue
        print(f"  {k}: {v:.4f}")

[paradetox] Loading data...
  Loaded 671 examples

[echo] Example inputs (first up to 3):
  input[0]: . or the loud ass one - thousand ton beast roaring towards you howling its horn .
  input[1]: mandated  and " right fucking now " would be good .
  input[2]: * neither * of my coworkers gave a shit when it came time to ditch mitch . ugh .

[echo] DecompX threshold: 0.2
[echo] num_candidates per input: 10
  Generating T5 candidates...


T5 Generation:   0%|          | 0/84 [00:00<?, ?it/s]


[echo] Example candidates for input[0]:
    cand[0]: . or the one thousand ton beast roaring towards you howling its horn
    cand[1]: or the one thousand ton beast roaring toward you howling its horn
    cand[2]: or the one thousand ton beast roaring towards you howling its horn
  DecompX reranking (threshold=0.20)...


Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).
Some weights of the model checkpoint at s-nlp/roberta_toxicity_classifier were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequence

DecompX masking for reranking:   0%|          | 0/420 [00:00<?, ?it/s]




[echo] Selected outputs (first up to 3):
  output[0]: .or the one - thousand ton beast roaring towards you howling its horn
  output[1]: Mandatory and "right now" would be good
  output[2]: neither * of my coworkers gave a damn when it came time to ditch mitch . ugh
  Saved orig/gen to: /content/drive/MyDrive/w266 - Project/XDetox/data/model_outputs/T5_w_DecompX-Reranking_Pipeline - KB/paradetox/T5_DecompX/t5_nc10_maxlen128_temp1.0_topk50_topp0.95_dxth0.2
Eval: /usr/bin/python3 -m evaluation.evaluate_all --orig_path /content/drive/MyDrive/w266 - Project/XDetox/data/model_outputs/T5_w_DecompX-Reranking_Pipeline - KB/paradetox/T5_DecompX/t5_nc10_maxlen128_temp1.0_topk50_topp0.95_dxth0.2/orig.txt --gen_path /content/drive/MyDrive/w266 - Project/XDetox/data/model_outputs/T5_w_DecompX-Reranking_Pipeline - KB/paradetox/T5_DecompX/t5_nc10_maxlen128_temp1.0_topk50_topp0.95_dxth0.2/gen.txt --tox_threshold 0.5 --tox_batch_size 32
Wrote summary CSV: /content/drive/MyDrive/w266 - Project/XDetox/d

In [16]:
# #@title Run on multiple datasets

# datasets_to_eval = ["paradetox", "microagressions_test", "sbf_test", "dynabench_test"]
# num_examples = 200
# output_folder = "T5_w_DecompX-Reranking_Pipeline"

# all_results = {}

# print("\n" + "=" * 80)
# print("T5-PARADETOX + DECOMPX RERANKING PIPELINE (evaluate_all)")
# print("=" * 80)

# for dataset_name in datasets_to_eval:
#     try:
#         results = detoxify(
#             data_type=dataset_name,
#             output_folder=output_folder,
#             echo=False,
#             num_examples=num_examples,
#             batch_size=8,
#             num_candidates=10,
#             max_length=128,
#             temperature=1.0,
#             top_k=50,
#             top_p=0.95,
#             decompx_threshold=0.20,
#             decompx_batch_size=16,
#             overwrite_gen=False,
#             run_eval=True,
#             overwrite_eval=False,
#             skip_ref_eval=False,
#         )
#         if results:
#             all_results[dataset_name] = results
#             print(f"  {dataset_name}: done")
#     except Exception as e:
#         print(f"  Error on {dataset_name}: {e}")
#         import traceback
#         traceback.print_exc()
#         continue

# print("\n" + "=" * 80)

# # Optional short summary of this batch of runs
# if all_results:
#     rows = []
#     for dataset_name, results in all_results.items():
#         row = {"dataset": dataset_name}
#         row.update(results)
#         rows.append(row)

#     df = pd.DataFrame(rows)

#     # Map keys with spaces to snake_case for convenience
#     rename_map = {
#         "perplexity gen": "perplexity_gen",
#         "perplexity orig": "perplexity_orig",
#         "toxicity gen": "toxicity_gen",
#         "toxicity orig": "toxicity_orig",
#     }
#     df = df.rename(columns=rename_map)

#     col_order = [
#         "dataset",
#         "bertscore",
#         "meaningbert",
#         "bleu4",
#         "perplexity_gen",
#         "perplexity_orig",
#         "toxicity_gen",
#         "toxicity_orig",
#     ]
#     df = df[[c for c in col_order if c in df.columns]]

#     summary_csv = os.path.join(
#         XDETOX_DIR,
#         "data",
#         "model_outputs",
#         output_folder,
#         "t5_decompx_summary_latest_run.csv",
#     )
#     _ensure_dir(os.path.dirname(summary_csv))
#     df.to_csv(summary_csv, index=False)
#     print(f"Saved summary of this run to {summary_csv}\n")
#     print(df.to_string(index=False))
# else:
#     print("No per-run metrics collected (per-dataset CSVs still written under data/model_outputs).")
