# Benchmarking [Qwen3-4B](https://huggingface.co/Qwen/Qwen3-4B)

## Libraries

In [1]:
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from pathlib import Path
import os
from transformers import AutoTokenizer, AutoModelForCausalLM
from rich.console import Console
from rich.table import Table
import warnings
from tqdm.std import TqdmExperimentalWarning
warnings.filterwarnings("ignore", category=TqdmExperimentalWarning)
from tqdm.rich import tqdm
tqdm.pandas(desc="Prédiction Toxicité")

from rich.panel import Panel
from rich.text import Text

## Global variables

In [None]:
ROOT = Path('../..')
DATA_DIR = ROOT / "data"
BENCHMARK_PATH = DATA_DIR / "benchmark" / "benchmark_balanced_subset.csv"
output_path = DATA_DIR / "benchmark" / "Qwen-3-4B.csv"
console = Console()
system_prompt = (Path(".") / "API_SYSTEM_PROMPT.txt").read_text().strip()
prompt = (Path(".") / "API_PROMPT.txt").read_text().strip()

In [5]:
os.environ["HTTP_PROXY"] = "socks5h://127.0.0.1:1080"
os.environ["HTTPS_PROXY"] = "socks5h://127.0.0.1:1080"

## Load dataset

In [6]:
df = pd.read_csv(BENCHMARK_PATH, encoding="utf-8")
df = df.dropna(subset=["content", "label"])
df["label"] = df["label"].astype(int)

label_counts = df["label"].value_counts().reset_index()
label_counts.columns = ["label", "count"]
table = Table(title="Label Counts", show_lines=True)
table.add_column("Label", justify="center", style="cyan")
table.add_column("Count", justify="center", style="yellow")
table.add_row("Total Rows", str(len(df)))
for _, row in label_counts.iterrows():
    table.add_row(str(row["label"]), str(row["count"]))
console.print(table, justify="left")
df.sample(5, random_state=42)

Unnamed: 0,msg_id,content,label
650,anon_msg_5ff0a5549f65,"des bovins mon pote, j'ai jamais vu ça sauf en...",0
208,anon_msg_2ea521264d58,Cet accent du sud dégueulasse,1
427,anon_msg_99e41015a911,"Joffrin, je serais capable de faire des trucs ...",1
677,anon_msg_31edc839b8b8,"D'accord le julf, mais vous n'êtes pas BLANC.",1
1272,anon_msg_15c39b3b7e9a,C'est Siemens Espagne hein... Pas le PDG du gr...,0


## Load model

In [7]:
model_name = "Qwen/Qwen3-4B"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype="auto",
    trust_remote_code=True
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

## Define prediction function

In [8]:
def generate_prompt(text):
    input_text = prompt + f"« {text} »" + "\n Ce message est-il toxique ?\n"
    return [{"role": "system", "content": system_prompt},
            {"role": "user", "content": input_text}]

def predict(text):
    message = generate_prompt(text)
    prompt = tokenizer.apply_chat_template(message, tokenize=False, add_generation_prompt=True, enable_thinking=False)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    outputs = model.generate(**inputs, max_new_tokens=50, temperature=0.7, top_p=0.8, top_k=20)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return response.split("Ce message est-il toxique ?")[-1].replace('assistant', '').strip()

## Run prediction

In [11]:
df["toxicity_score"] = df["content"].progress_apply(predict)
df = df.dropna(subset=["toxicity_score"])

Output()

In [12]:
df["toxicity_score"].value_counts()

toxicity_score
<think>\n\n</think>\n\nnon-toxique    856
<think>\n\n</think>\n\ntoxique        532
Name: count, dtype: int64

In [13]:
df['toxicity_score'] = df['toxicity_score'].apply(lambda x: 0 if "non-toxique" in x.lower() else 1)

In [15]:
df['toxicity_score'].value_counts()

toxicity_score
0    856
1    532
Name: count, dtype: int64

In [14]:
for i, row in df.sample(5, random_state=42).iterrows():
    content = Text(row['content'], style="bold")
    toxicity = f"[yellow]Toxicity Score:[/yellow] [bold]{row['toxicity_score']}[/bold]"
    label = f"[cyan]Label:[/cyan] [bold]{row['label']}[/bold]"
    panel = Panel.fit(
        f"{content}\n\n{toxicity}\n{label}",
        title=f"Exemple {i+1}",
        border_style="magenta"
    )
    console.print(panel)

## Metrics & Report        

| Metric                     | Formula                                           | Interpretation                                                                                                       |
| -------------------------- | ------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------- |
| **Precision**              | `TP / (TP + FP)`                                  | Of the samples predicted **toxic**, how many were **actually toxic**? <br>→ High precision = **low false positives** |
| **Recall** *(Sensitivity)* | `TP / (TP + FN)`                                  | Of the **actual toxic** samples, how many did we **correctly identify**? <br>→ High recall = **low false negatives** |
| **F1-score**               | `2 * (Precision * Recall) / (Precision + Recall)` | Harmonic mean of precision and recall. <br>→ Best when **balance** is needed                                         |
| **Accuracy**               | `(TP + TN) / (TP + TN + FP + FN)`                 | Fraction of all correct predictions (toxic and non-toxic). <br>→ Can be misleading on imbalanced data                |
| **ROC AUC**                | Area under the ROC Curve                          | Measures the **ranking ability** of the classifier. <br>→ Higher = better separation of toxic vs. non-toxic          |


In [16]:
y_true = df["label"]
y_pred = df["toxicity_score"].astype(int)

In [17]:

# Rapport de classification
report = classification_report(y_true, y_pred, digits=3, output_dict=True)
table = Table(title="Classification Report", show_lines=True)
table.add_column("Classe", style="cyan", justify="center")
table.add_column("Precision", justify="center")
table.add_column("Recall", justify="center")
table.add_column("F1-score", justify="center")
table.add_column("Support", justify="center")

for label, metrics in report.items():
    if label in ["accuracy", "macro avg", "weighted avg"]:
        continue
    table.add_row(
        str(label),
        f"{metrics['precision']:.3f}",
        f"{metrics['recall']:.3f}",
        f"{metrics['f1-score']:.3f}",
        f"{int(metrics['support'])}"
    )
# Ajout de l'accuracy
table.add_row(
    "[bold yellow]Accuracy[/bold yellow]",
    "-",
    "-",
    f"{report['accuracy']:.3f}",
    "-"
)
console.print(table)

# Matrice de confusion
cm = confusion_matrix(y_true, y_pred)
cm_table = Table(title="Matrice de Confusion", show_lines=True)
cm_table.add_column(" ", style="bold")
cm_table.add_column("Prédit 0", justify="center")
cm_table.add_column("Prédit 1", justify="center")
cm_table.add_row("Réel 0", str(cm[0, 0]), str(cm[0, 1]))
cm_table.add_row("Réel 1", str(cm[1, 0]), str(cm[1, 1]))
console.print(cm_table)

# ROC AUC
try:
    auc = roc_auc_score(y_true, y_pred)
    console.print(Panel(f"[bold green]ROC AUC Score : {auc:.3f}[/bold green]", title="ROC AUC"))
except:
    console.print(Panel("[bold red]Impossible de calculer le ROC AUC.[/bold red]", title="ROC AUC"))

## Save the predictions

In [18]:
df['predictions'] = y_pred

In [19]:
df

Unnamed: 0,msg_id,content,label,toxicity_score,predictions
0,anon_msg_468ffd36870c,AYAAAAAAAAAAAA LES CRACKHEAD ANTI MIGRANT BORD...,1,1,1
1,anon_msg_c30ce8c67bac,Bordel le gars chantait 'FN souffrance' alors ...,1,1,1
2,anon_msg_a7be822bc051,Je vois que ça ddb en masse\nComme en dictatur...,1,1,1
3,anon_msg_8a2c7da3ff85,vous tenez là ? persoent c'est chaud depuis hi...,1,1,1
4,anon_msg_b20f2e8b51ed,Le racisme est devenu le crime ultime en Occid...,0,1,1
...,...,...,...,...,...
1383,anon_msg_a3bb41085bd2,que j’entende plus un média dire le danger c’e...,0,0,0
1384,anon_msg_835f5f8a3e25,il raconte absolument n'importe quoi non stop,0,0,0
1385,anon_msg_5de411f54a1b,mais qu'est ce qu'il raconte saint nikos,0,0,0
1386,anon_msg_48b9b19e3c3b,Oui el famoso 200 bordel\n:rire:\nLE QI bordel...,0,0,0


In [20]:
output_path

PosixPath('../data/benchmark/Qwen-3-4B.csv')

In [21]:
df.to_csv(output_path, index=False, encoding="utf-8")