<a href="https://colab.research.google.com/github/mathildabineli/Moderation_model_mdeberta/blob/main/moderation_mdeberta.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pandas

In [None]:
!!pip install --upgrade tqdm

In [None]:
import os, math, json, re, numpy as np, pandas as pd
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple

from tqdm.auto import tqdm

import torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import WeightedRandomSampler

from datasets import Dataset, DatasetDict
from transformers import (
    AutoTokenizer, AutoConfig, AutoModel,
    Trainer, TrainingArguments, DataCollatorWithPadding, EarlyStoppingCallback
)
from sklearn.metrics import f1_score, precision_recall_fscore_support


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
import pandas as pd

In [None]:
data=pd.read_csv('/content/gdrive/My Drive/tst-317/datasets_for_training/final_data.csv')

In [None]:
data.head(5)

In [None]:
data.info

In [None]:
import re
import numpy as np

def quick_moderation_report(
    df: pd.DataFrame,
    text_col: str = "user_input",
    lang_col: str = "language_code",
):
    # 1) Identify candidate label columns = all 0/1-like numeric cols (exclude meta)
    meta_cols = {text_col, lang_col, "safe", "Unnamed: 0", "split", "id"}
    cand = [c for c in df.columns if c not in meta_cols]
    label_cols = []
    for c in cand:
        s = df[c].dropna()
        if pd.api.types.is_numeric_dtype(s) and s.isin([0, 1]).mean() > 0.98:
            label_cols.append(c)

    print("\n=== SHAPE & MEMORY ===")
    print("shape:", df.shape)
    print("memory (MB):", round(df.memory_usage(deep=True).sum() / 1e6, 2))

    print("\n=== DTYPES ===")
    print(df.dtypes)

    print("\n=== NULLS (top 20) ===")
    print(df.isna().sum().sort_values(ascending=False).head(20))

    print("\n=== LANGUAGES ===")
    print(df[lang_col].value_counts(dropna=False).head(30))

    print("\n=== REQUIRED LANGUAGE COVERAGE ===")
    required = {"en","fr","de","es","pt","it","nl","ar","hi","zh"}
    present = set(df[lang_col].astype(str).str.lower().unique())
    print("present:", sorted(list(present))[:20], "…")
    print("missing:", sorted(list(required - present)))

    print("\n=== LABELS DETECTED ===")
    print(label_cols)

    print("\n=== POSITIVE COUNTS PER LABEL ===")
    pos_counts = df[label_cols].sum().sort_values(ascending=False)
    print(pos_counts)

    print("\n=== PREVALENCE PER LABEL (%) ===")
    print((100 * pos_counts / len(df)).round(3))

    print("\n=== MULTI-LABEL CARDINALITY ===")
    lbl_card = df[label_cols].sum(axis=1)
    print("avg labels/sample:", lbl_card.mean().round(3))
    print("label-count distribution:\n", lbl_card.value_counts().sort_index())

    print("\n=== PAIRWISE CO-OCCURRENCE (counts) ===")
    co = df[label_cols].T.dot(df[label_cols])  # counts of co-positives
    print(co)

    print("\n=== PER-LANGUAGE PREVALENCE (%) ===")
    per_lang = (100 * df.groupby(lang_col)[label_cols].mean()).round(2)
    print(per_lang.head(15))

    print("\n=== CONSISTENCY CHECKS ===")
    if "safe" in df.columns:
        bad_safe = df[(df[label_cols].sum(axis=1) > 0) & (df["safe"] == 1)]
        print("safe==1 but some label==1:", len(bad_safe))
    if "violence/graphic" in df.columns and "violence" in df.columns:
        vg_not_v = df[(df["violence/graphic"] == 1) & (df["violence"] != 1)]
        print("violence/graphic==1 but violence!=1:", len(vg_not_v))

    print("\n=== TEXT LENGTH (chars) ===")
    lens = df[text_col].astype(str).str.len()
    print(lens.describe(percentiles=[.5, .9, .99]))

    print("\n=== URL / EMOJI FLAGS ===")
    url_pat = re.compile(r"(https?://|www\.)", re.I)
    emoji_pat = re.compile(r"[\U0001F300-\U0001FAFF]")
    print("rows with URL:", df[text_col].str.contains(url_pat, na=False).sum())
    print("rows with emoji:", df[text_col].str.contains(emoji_pat, na=False).sum())

    print("\n=== DUPLICATES ===")
    print("duplicate texts:", df[text_col].duplicated(keep=False).sum())

    # Return useful artifacts if you want to save them
    return {
        "label_cols": label_cols,
        "co_matrix": co,
        "per_lang_prevalence": per_lang,
        "pos_counts": pos_counts
    }

# Example usage
# df = pd.read_csv("PATH/TO/your_dataset.csv")
# report = quick_moderation_report(df)


In [None]:
report = quick_moderation_report(data)

In [None]:
data.to_csv('data.csv')

In [None]:
# -----------------------
# 0) Config
# -----------------------
MODEL_NAME = os.getenv("MODEL_NAME", "microsoft/mdeberta-v3-base")
TEXT_COL = os.getenv("TEXT_COL", "user_input")
LANG_COL = os.getenv("LANG_COL", "language_code")
TRAIN_CSV = os.getenv("TRAIN_CSV", "data.csv")
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "./outputs_teacher")
MAX_LEN = int(os.getenv("MAX_LEN", "256"))
LR = float(os.getenv("LR", "2e-5"))
EPOCHS = int(os.getenv("EPOCHS", "4"))
BATCH = int(os.getenv("BATCH", "16"))
SEED = int(os.getenv("SEED", "42"))
USE_LANG_HEAD = os.getenv("USE_LANG_HEAD", "true").lower() == "true"
LAMBDA_LANG = float(os.getenv("LAMBDA_LANG", "0.2"))
GAMMA_FOCAL = float(os.getenv("GAMMA_FOCAL", "2.0"))

# Optional: collapse toxicity/offensive
MERGE_ABUSIVE = os.getenv("MERGE_ABUSIVE", "true").lower() == "true"
os.makedirs(OUTPUT_DIR, exist_ok=True)
torch.manual_seed(SEED)
np.random.seed(SEED)


In [None]:
# -----------------------
# 1) Load & clean data
# -----------------------
rename_map = {
    "sexual/minors": "child_safety",
    "violence/graphic": "violence_graphic",
    "terrorist": "terrorism",
    LANG_COL: "lang",
}
df = data.rename(columns=rename_map)

if MERGE_ABUSIVE and {"toxicity","offensive"}.issubset(df.columns):
    df["abusive"] = df[["toxicity","offensive"]].max(axis=1)
    df = df.drop(columns=[c for c in ["toxicity","offensive"] if c in df.columns])

# Constraint: graphic implies violence
if {"violence_graphic","violence"}.issubset(df.columns):
    df.loc[df["violence_graphic"]==1, "violence"] = 1

# Hygiene
df = df.dropna(subset=[TEXT_COL]).drop_duplicates(subset=[TEXT_COL])

# Detect label columns (binary)
meta_cols = {TEXT_COL, "lang", "safe", "split", "id"}
label_cols = []
for c in df.columns:
    if c in meta_cols: continue
    if pd.api.types.is_integer_dtype(df[c]) and set(df[c].unique()).issubset({0,1}):
        label_cols.append(c)

# Derive safe (not used as target)
df["safe"] = (df[label_cols].sum(axis=1) == 0).astype(int)

# Language mapping
langs = df["lang"].astype(str).str.lower().fillna("unknown").tolist()
lang_vocab = sorted(list(dict.fromkeys(langs)))
lang2id = {l:i for i,l in enumerate(lang_vocab)}
df["lang_id"] = [lang2id[l] for l in langs]

# Train/val split (simple; replace with iterative stratification later)
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(
    df, test_size=0.1, random_state=SEED, stratify=df[label_cols].sum(axis=1).clip(0,1)
)

print("Labels:", label_cols)
print("Languages:", lang_vocab[:20], "... (#", len(lang_vocab), ")")

In [None]:
train_df.head(10)

In [None]:
# -----------------------
# 2) Tokenize
# -----------------------
tok = AutoTokenizer.from_pretrained(MODEL_NAME)

def preprocess(batch):
    enc = tok(batch[TEXT_COL], truncation=True, padding=False, max_length=MAX_LEN)
    # multi-label targets
    labels = np.stack([batch[c] for c in label_cols], axis=1).astype(np.float32)
    out = {**enc, "labels": labels}
    if USE_LANG_HEAD:
        out["lang_id"] = batch["lang_id"]
    return out

train_ds = Dataset.from_pandas(train_df, preserve_index=False).map(preprocess, batched=True, remove_columns=train_df.columns.to_list())
val_ds   = Dataset.from_pandas(val_df,   preserve_index=False).map(preprocess, batched=True, remove_columns=val_df.columns.to_list())
dset = DatasetDict(train=train_ds, validation=val_ds)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoModel
import numpy as np

class MultiTaskDeberta(nn.Module):
    def __init__(self, model_name, num_labels, num_langs, p_drop=0.1):
        super().__init__()

        #print(f"Initializing model: {model_name}, with {num_labels} labels and {num_langs} languages.")

        self.backbone = AutoModel.from_pretrained(model_name)
        hidden = self.backbone.config.hidden_size
        self.dropout = nn.Dropout(p_drop)
        self.cls = nn.Linear(hidden, num_labels)     # multi-label head
        self.lang = nn.Linear(hidden, num_langs)     # language-ID head

        self.use_lang_head = USE_LANG_HEAD

        # α weights from prevalence
        #print("Calculating class weights (alpha) based on label prevalence.")
        prev = train_df[label_cols].mean().values
        alpha = 1.0 / np.clip(prev, 1e-4, 1.0)
        alpha = alpha / alpha.max()  # Normalize alpha values
        self.register_buffer("alpha", torch.tensor(alpha, dtype=torch.float32))
        self.gamma = GAMMA_FOCAL

        print(f"Model initialized with alpha weights: {self.alpha}, gamma: {self.gamma}")

    def focal_bce(self, logits, targets):
        #print("Calculating focal loss.")
        # logits: [B, C], targets: [B, C] in {0,1}
        bce = F.binary_cross_entropy_with_logits(logits, targets, reduction="none")
        p = torch.sigmoid(logits)
        pt = p * targets + (1 - p) * (1 - targets)
        mod = (1 - pt).pow(self.gamma)
        loss = mod * bce
        loss = loss * self.alpha  # Apply per-class weight
        return loss.mean()

    def forward(self, input_ids=None, attention_mask=None, labels=None, lang_id=None):
        #print("Forward pass: Extracting features.")
        out = self.backbone(input_ids=input_ids, attention_mask=attention_mask)
        pooled = out.last_hidden_state[:, 0]  # CLS token
        pooled = self.dropout(pooled)
        logits = self.cls(pooled)
        outputs = {"logits": logits}

        loss = None
        if labels is not None:
            #print("Calculating loss.")
            labels = labels.float()
            loss = self.focal_bce(logits, labels)

        if self.use_lang_head:
            #print("Including language classification head.")
            lang_logits = self.lang(pooled)
            outputs["lang_logits"] = lang_logits
            if lang_id is not None:
                ce = F.cross_entropy(lang_logits, lang_id)
                loss = loss + LAMBDA_LANG * ce if loss is not None else LAMBDA_LANG * ce

        if loss is not None:
            outputs["loss"] = loss

        return outputs

# Initialize the model
print("Initializing MultiTaskDeberta model...")
model = MultiTaskDeberta(MODEL_NAME, num_labels=len(label_cols), num_langs=len(lang_vocab))

# Check the model's parameter count to ensure it's been initialized correctly
num_params = sum(p.numel() for p in model.parameters())
print(f"Model has {num_params / 1e6:.2f} million parameters.")


In [None]:
# -----------------------
# 4) Metrics
# -----------------------
def sigmoid(x): return 1/(1+np.exp(-x))

def compute_metrics(eval_pred):
    # eval_pred can be ((logits, lang_logits), labels) depending on HF version
    preds, labels = eval_pred
    if isinstance(preds, tuple):  # (task_logits, lang_logits?)
        preds = preds[0]
    probs = sigmoid(preds)
    # naive 0.5 threshold (we'll calibrate later)
    y_pred = (probs >= 0.5).astype(int)
    y_true = labels

    macro_f1 = f1_score(y_true, y_pred, average="macro", zero_division=0)
    prec, rec, f1, _ = precision_recall_fscore_support(y_true, y_pred, average=None, zero_division=0)
    return {
        "macro_f1": macro_f1,
        "macro_recall": rec.mean(),
        "macro_precision": prec.mean(),
    }

In [None]:
# -----------------------
# 5) Weighted sampler (helps rare labels & low-resource langs)
# -----------------------
def make_weights(df: pd.DataFrame) -> np.ndarray:
    # upweight positives and under-represented languages
    pos = df[label_cols].sum(axis=1)
    pos_w = 1 + 4*(pos > 0)
    lang_counts = df["lang"].value_counts()
    lang_w = df["lang"].map(lambda L: (lang_counts.max()/lang_counts[L])).astype(float)
    return (pos_w * lang_w).values

train_weights = make_weights(train_df)
sampler = WeightedRandomSampler(train_weights, num_samples=len(train_weights), replacement=True)


In [None]:
# -----------------------
# 6) Train
# -----------------------
data_collator = DataCollatorWithPadding(tokenizer=tok)



# Adjust the TrainingArguments for better GPU usage
args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    learning_rate=LR,
    per_device_train_batch_size=BATCH,  # Train batch size
    per_device_eval_batch_size=BATCH,  # Eval batch size
    num_train_epochs=EPOCHS,
    weight_decay=0.01,
    eval_strategy="epoch",  # How often to evaluate
    save_strategy="epoch",  # How often to save model
    load_best_model_at_end=True,  # Ensure the best model is loaded at the end
    metric_for_best_model="macro_f1",
    greater_is_better=True,
    logging_steps=50,  # Log every 50 steps
    logging_dir=OUTPUT_DIR,
    fp16=True,  # Mixed precision training (if using a modern GPU)
    report_to="none",  # Disable external reporting (or set to "tensorboard" if needed)
    seed=SEED,
    gradient_accumulation_steps=2,  # Accumulate gradients to simulate larger batch sizes
    dataloader_pin_memory=True,  # Pin memory for faster data loading
    disable_tqdm=False,  # Enable tqdm progress bar
    # Optionally, if you have multiple GPUs, enable this:
    # _n_gpu=1,  # Uncomment if using multiple GPUs
    # dataloader_num_workers=4  # You can increase the number of workers for faster data loading if needed
)



In [None]:
# Ensure model is on the correct device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


In [None]:
class MyTrainer(Trainer):
    def get_train_dataloader(self):
        print("Loading train dataloader...")
        dl = super().get_train_dataloader()
        dl.sampler = sampler  # Use weighted sampler for imbalance
        return dl


trainer = MyTrainer(
    model=model,
    args=args,
    train_dataset=dset["train"],
    eval_dataset=dset["validation"],
    tokenizer=tok,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)],
)


In [None]:
train_iterator = tqdm(trainer.get_train_dataloader(), desc="Training", position=0, leave=True)
eval_iterator = tqdm(trainer.get_eval_dataloader(), desc="Evaluating", position=1, leave=True)


In [None]:
trainer.train()

# Save model and additional files
trainer.save_model(OUTPUT_DIR)

# Print a completion message
print(f"Training completed. Model saved in {OUTPUT_DIR}")

with open(os.path.join(OUTPUT_DIR, "label_cols.json"), "w") as f:
    json.dump(label_cols, f, indent=2)
with open(os.path.join(OUTPUT_DIR, "lang_vocab.json"), "w") as f:
    json.dump(lang_vocab, f, indent=2)

print("Training complete. Best metrics:", trainer.state.best_metric)
