# Error Analysis + Fairness + Latency Benchmark (Privacy-Preserving)

Notebook này hỗ trợ bạn **ăn điểm rubric doanh nghiệp** hơn bằng cách:
- Error analysis (không in raw toxic text)
- Fairness slice evaluation (identity mention heuristic)
- Latency benchmark p50/p95/p99

⚠️ Safety: dataset có thể chứa nội dung độc hại. Notebook này **không in** text từ dataset.


In [None]:
#@title 0) Controls
USE_DRIVE = True  #@param {type:"boolean"}
DRIVE_PROJECT_DIR = "NLP_Project/toxicity_agent"  #@param {type:"string"}

# Paths to configs (inside repo)
TRAIN_CONFIG = "configs/train_final.yaml"  #@param {type:"string"}
INFER_CONFIG = "configs/infer.yaml"  #@param {type:"string"}
FAIRNESS_CONFIG = "configs/fairness_slices.yaml"  #@param {type:"string"}

THRESHOLD = 0.5  #@param {type:"number"}
MAX_SAMPLES = None  #@param {type:"raw"}  # set int for faster run


In [None]:
#@title 1) Mount Drive + set artifacts dirs
import os
from pathlib import Path

if USE_DRIVE:
    from google.colab import drive
    drive.mount('/content/drive')

if USE_DRIVE:
    ARTIFACTS_DIR = f"/content/drive/MyDrive/{DRIVE_PROJECT_DIR}/artifacts"
else:
    ARTIFACTS_DIR = "/content/artifacts"

MODEL_DIR = os.path.join(ARTIFACTS_DIR, "models")
RUN_DIR = os.path.join(ARTIFACTS_DIR, "runs")

Path(ARTIFACTS_DIR).mkdir(parents=True, exist_ok=True)
Path(MODEL_DIR).mkdir(parents=True, exist_ok=True)
Path(RUN_DIR).mkdir(parents=True, exist_ok=True)

os.environ["ARTIFACTS_DIR"] = ARTIFACTS_DIR
os.environ["TOXICITY_MODEL_DIR"] = MODEL_DIR
os.environ["TOXICITY_RUN_DIR"] = RUN_DIR

print("ARTIFACTS_DIR:", ARTIFACTS_DIR)
print("MODEL_DIR:", MODEL_DIR)
print("RUN_DIR:", RUN_DIR)


In [None]:
#@title 2) Install deps (Colab-safe) + install package
# Assumes you already have the repo in /content/toxicity_agent_project
# If not, clone or restore it first.
import os
REPO_DIR = "/content/toxicity_agent_project"
%cd $REPO_DIR

!pip -q install --upgrade pip setuptools wheel
!pip -q install -r requirements_colab.txt
!pip -q install -e . --no-deps
!pip -q check
print("✅ Installed.")


In [None]:
#@title 3) Run error analysis (privacy-preserving)
import os
max_arg = f"--max-samples {MAX_SAMPLES}" if MAX_SAMPLES is not None else ""
!toxicity-agent error-analysis --config {TRAIN_CONFIG} --split test --threshold {THRESHOLD} {max_arg}


In [None]:
#@title 4) Run fairness slice evaluation
import os
max_arg = f"--max-samples {MAX_SAMPLES}" if MAX_SAMPLES is not None else ""
!toxicity-agent fairness --config {TRAIN_CONFIG} --fairness-config {FAIRNESS_CONFIG} --split test --threshold {THRESHOLD} {max_arg}


In [None]:
#@title 5) Run latency benchmark (agent end-to-end)
!toxicity-agent benchmark --config {INFER_CONFIG} --n 300 --warmup 10


In [None]:
#@title 6) Load latest reports + show summary tables (no raw text)
import json
from pathlib import Path
import pandas as pd

run_dir = Path(RUN_DIR)

def latest_json(folder: Path, prefix: str):
    paths = sorted(folder.glob(f"{prefix}-*.json"), key=lambda p: p.name)
    return paths[-1] if paths else None

err_path = latest_json(run_dir/"error_analysis", "error-analysis")
fair_path = latest_json(run_dir/"fairness", "fairness")
bench_path = latest_json(run_dir/"benchmarks", "benchmark")

print("Latest error analysis:", err_path)
print("Latest fairness report:", fair_path)
print("Latest benchmark:", bench_path)

if err_path:
    err = json.loads(err_path.read_text(encoding="utf-8"))
    rows=[]
    for lf, cc in err.get("confusion_per_label", {}).items():
        rows.append({"label": lf, **cc})
    display(pd.DataFrame(rows))

if fair_path:
    fair = json.loads(fair_path.read_text(encoding="utf-8"))
    slice_rows=[]
    for s in fair.get("slices", []):
        slice_rows.append({
            "slice": s["slice_name"],
            "n": s["n"],
            "f1_micro": s.get("metrics", {}).get("f1_micro"),
            "auc_macro": s.get("metrics", {}).get("auc_macro"),
        })
    display(pd.DataFrame(slice_rows).sort_values("n", ascending=False))

if bench_path:
    bench = json.loads(bench_path.read_text(encoding="utf-8"))
    display(pd.DataFrame([bench.get("stats", {})]))
