In [None]:
!pip install datasets transformers torch accelerate peft sentencepiece sentence_transformers

In [2]:
!pip install -U transformers

Defaulting to user installation because normal site-packages is not writeable
Collecting transformers
  Using cached transformers-4.52.4-py3-none-any.whl.metadata (38 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers)
  Using cached tokenizers-0.21.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Using cached transformers-4.52.4-py3-none-any.whl (10.5 MB)
Using cached tokenizers-0.21.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
Installing collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.15.2
    Uninstalling tokenizers-0.15.2:
      Successfully uninstalled tokenizers-0.15.2
  Attempting uninstall: transformers
    Found existing installation: transformers 4.36.2
    Uninstalling transformers-4.36.2:
      Successfully uninstalled transformers-4.36.2
Successfully installed tokenizers-0.21.1 transformers-4.52.4


In [3]:
from datasets import load_dataset, concatenate_datasets
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments, DataCollatorForSeq2Seq,AutoTokenizer,AutoModelForSeq2SeqLM
import torch
import torch.nn.functional as F
from transformers import AutoModelForSequenceClassification
from sentence_transformers import SentenceTransformer
from peft import LoraConfig, get_peft_model,PeftModel, PeftConfig
import jieba

  from .autonotebook import tqdm as notebook_tqdm


## SPANISH


In [4]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch.nn.functional as F


sim = SentenceTransformer('sentence-transformers/LaBSE')
classify_tok = AutoTokenizer.from_pretrained("textdetox/xlmr-large-toxicity-classifier")
classify_model = AutoModelForSequenceClassification.from_pretrained("textdetox/xlmr-large-toxicity-classifier")
classify_model.eval()


def score_model(text):
    with torch.no_grad():
        inputs = classify_tok(text, return_tensors="pt", truncation=True, padding=True)
        inputs = {k: v.to(classify_model.device) for k, v in inputs.items()}
        logits = classify_model(**inputs).logits
        probs = F.softmax(logits, dim=-1)  # 分类器输出两个类：toxic 和 non-toxic
        toxic_score = probs[0][1].item()   # 索引1对应 toxic 类
        return toxic_score


toxic_words = set()
#zh_lexicon = load_dataset("textdetox/multilingual_toxic_lexicon", split="zh")
#zh_toxic_words = set(zh_lexicon["text"])
es_lexicon = load_dataset("textdetox/multilingual_toxic_lexicon", split="es")
es_toxic_words = set(es_lexicon["text"])

In [8]:
def load_infer_model(adapter_path="mt0l-lora-adapter-largelearning_low"):
    peft_cfg = PeftConfig.from_pretrained(adapter_path)
    base_model = AutoModelForSeq2SeqLM.from_pretrained(
        peft_cfg.base_model_name_or_path,
        device_map="auto" if torch.cuda.is_available() else None,
        torch_dtype=torch.float16
    )
    model = PeftModel.from_pretrained(base_model, adapter_path).eval()
    tokenizer = AutoTokenizer.from_pretrained(adapter_path)
    tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token
    return model, tokenizer
    
def detoxify(text: str,
             model,
             tokenizer,
             score_model,  # STA分类器
             sim_model,  # sim模型（如 sentence-transformers）
             max_new_tokens: int = 64,
             num_return_sequences: int = 10) -> dict:

    #prefix = "请将句子中的粗俗或攻击性词语改写为中性、礼貌的表达，保留原句意思，避免不必要的删改。"
    #prefix = "Formuliere den Satz um, indem du toxische oder beleidigende Wörter durch neutrale und höfliche Ausdrücke ersetzt. Erhalte dabei die ursprüngliche Bedeutung."
    prefix = "Reescribe la frase reemplazando términos ofensivos por lenguaje neutral, sin cambiar su significado."
    inputs = tokenizer(prefix + text,
                       return_tensors="pt",
                       truncation=True,
                       max_length=192).to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        no_repeat_ngram_size=4,
        repetition_penalty=3.0, 
        num_beams=10, 
        num_return_sequences=num_return_sequences,      
        early_stopping=True,
        do_sample=False,
        decoder_start_token_id=tokenizer.pad_token_id  
    )

    candidates = tokenizer.batch_decode(outputs, skip_special_tokens=True, clean_up_tokenization_spaces=True)
    seen = set()
    unique_candidates = []
    for c in candidates:
        c = c.strip()
        if c and c not in seen:
            seen.add(c)
            unique_candidates.append(c)

    # filter toxic lexicon
    def contains_toxic_word(text):
        return any(word in text for word in toxic_words)

    
    filtered = [c for c in unique_candidates if not contains_toxic_word(c)]
    final_candidates = filtered if filtered else unique_candidates

    # select best candidate
    def select_best_output(toxic_text, detoxs, sta_model, sim_model):
        vals = []
        for detox in detoxs:
            emb = sim_model.encode([toxic_text, detox], convert_to_tensor=True)
            sim_val = (emb[0] * emb[1]).sum()
            sta_score = 1 - score_model(detox)
            vals.append((detox, (sim_val * sta_score).item()))
        best, _ = max(vals, key=lambda x: x[1])
        return best

    best_text = select_best_output(text, final_candidates, score_model, sim_model)

    return {
        "best": best_text,
        "candidates": final_candidates
    }

In [9]:
import pandas as pd

es_ds = load_dataset("textdetox/multilingual_paradetox", split="es")
#zh_ds = load_dataset("textdetox/multilingual_paradetox", split="zh")


toxic_list = []
neutral_list = []
lang_list = []

model, tok = load_infer_model()


for txt in es_ds['toxic_sentence']:
    detoxed = detoxify(txt, model, tok, score_model, sim)

    toxic_list.append(txt)
    neutral_list.append(detoxed["best"])
    lang_list.append("es")
    #lang_list.append("zh")

In [10]:
df = pd.DataFrame({
    "toxic_sentence": toxic_list,
    "neutral_sentence": neutral_list,
    "lang": lang_list
})

# 检查并替换 NaN（官方要求）
df.fillna(value={"neutral_sentence": df["toxic_sentence"]}, inplace=True)

# 保存为 .tsv 文件
df.to_csv("submission_zhlow_baslin.tsv", sep="\t", index=False)