In [3]:
# ✅ Setup: Install these in your environment first
# pip install transformers datasets sentence-transformers detoxify
from transformers import MT5ForConditionalGeneration, MT5Tokenizer, Seq2SeqTrainingArguments, Seq2SeqTrainer, DataCollatorForSeq2Seq
from datasets import load_dataset, DatasetDict
from detoxify import Detoxify
from sentence_transformers import SentenceTransformer, util
import torch
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# ✅ 1. Load dataset (only English and German)
dataset = load_dataset("textdetox/multilingual_paradetox")


In [12]:
print("Dataset loaded. Available languages:", dataset)

Dataset loaded. Available languages: DatasetDict({
    en: Dataset({
        features: ['toxic_sentence', 'neutral_sentence'],
        num_rows: 400
    })
    ru: Dataset({
        features: ['toxic_sentence', 'neutral_sentence'],
        num_rows: 400
    })
    uk: Dataset({
        features: ['toxic_sentence', 'neutral_sentence'],
        num_rows: 400
    })
    de: Dataset({
        features: ['toxic_sentence', 'neutral_sentence'],
        num_rows: 400
    })
    es: Dataset({
        features: ['toxic_sentence', 'neutral_sentence'],
        num_rows: 400
    })
    am: Dataset({
        features: ['toxic_sentence', 'neutral_sentence'],
        num_rows: 400
    })
    zh: Dataset({
        features: ['toxic_sentence', 'neutral_sentence'],
        num_rows: 400
    })
    ar: Dataset({
        features: ['toxic_sentence', 'neutral_sentence'],
        num_rows: 400
    })
    hi: Dataset({
        features: ['toxic_sentence', 'neutral_sentence'],
        num_rows: 400
    })
})


In [13]:
# Filter EN and DE data
def filter_lang(ds, lang):
    return ds.filter(lambda x: x['lang'] == lang)

en = dataset['en']
de = dataset['de']

In [2]:
import torch
print(torch.backends.mps.is_available())

True


In [3]:
# ✅ 2. Prepare tokenizer and model
tokenizer = MT5Tokenizer.from_pretrained("google/mt5-small")
model = MT5ForConditionalGeneration.from_pretrained("google/mt5-small")

# ✅ 3. Preprocessing function
def preprocess(example):
    input_text = f"detoxify: {example['toxic']}"
    target_text = example['neutral']
    model_inputs = tokenizer(input_text, max_length=128, truncation=True, padding="max_length")
    labels = tokenizer(target_text, max_length=128, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# ✅ 4. Tokenize datasets
en = en.map(preprocess, batched=True)
de = de.map(preprocess, batched=True)

# Combine for multilingual training
train_dataset = en.select(range(2000)).train_test_split(test_size=0.1)
train_dataset = DatasetDict({"train": train_dataset["train"], "test": train_dataset["test"]})


NameError: name 'MT5Tokenizer' is not defined

In [None]:

# ✅ 5. Training setup
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

training_args = Seq2SeqTrainingArguments(
    output_dir="./results_mt5",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=1,
    predict_with_generate=True,
    fp16=torch.cuda.is_available(),
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset["train"],
    eval_dataset=train_dataset["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
)

# ✅ 6. Train!
trainer.train()

# ✅ 7. Generate predictions
def generate_predictions(dataset):
    model.eval()
    inputs = tokenizer([f"detoxify: {ex['toxic']}" for ex in dataset], return_tensors="pt", padding=True, truncation=True).to(model.device)
    with torch.no_grad():
        outputs = model.generate(**inputs)
    return [tokenizer.decode(g, skip_special_tokens=True) for g in outputs]

# ✅ 8. Evaluation functions
labse = SentenceTransformer("sentence-transformers/LaBSE")
def content_similarity(srcs, preds):
    embeddings1 = labse.encode(srcs, convert_to_tensor=True)
    embeddings2 = labse.encode(preds, convert_to_tensor=True)
    return util.cos_sim(embeddings1, embeddings2).diagonal().cpu().numpy()

def toxicity_score(preds):
    scores = Detoxify("original").predict(preds)
    return [1 - score for score in scores['toxicity']]

# ✅ Example use
sample = en.select(range(10))
preds = generate_predictions(sample)
content_sim = content_similarity([x['toxic'] for x in sample], preds)
tox_score = toxicity_score(preds)

for i in range(len(sample)):
    print(f"\nToxic: {sample[i]['toxic']}\nDetoxified: {preds[i]}\nSimilarity: {content_sim[i]:.2f}, Non-toxicity: {tox_score[i]:.2f}")
