In [106]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [107]:
data = pd.read_csv("../data/filtered.tsv", delimiter="\t")
data.head()

Unnamed: 0.1,Unnamed: 0,reference,translation,similarity,length_diff,ref_tox,trn_tox
0,0,"If Alkar is flooding her with psychic waste, t...","if Alkar floods her with her mental waste, it ...",0.785171,0.010309,0.014195,0.981983
1,1,Now you're getting nasty.,you're becoming disgusting.,0.749687,0.071429,0.065473,0.999039
2,2,"Well, we could spare your life, for one.","well, we can spare your life.",0.919051,0.268293,0.213313,0.985068
3,3,"Ah! Monkey, you've got to snap out of it.","monkey, you have to wake up.",0.664333,0.309524,0.053362,0.994215
4,4,I've got orders to put her down.,I have orders to kill her.,0.726639,0.181818,0.009402,0.999348


In [108]:
data = data.drop("Unnamed: 0", axis=1)

In [109]:
import torch
from torch.utils.data import Dataset


class ToxicityDataset(Dataset):
    def __init__(self, data, tokenizer, max_length, task="classification"):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.task = task

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        text = self.data.iloc[index]["reference"]
        target = self.data.iloc[index]["translation"]
        label = self.data.iloc[index]["ref_tox"]

        encoding = self.tokenizer(
            text,
            padding="max_length",
            max_length=self.max_length,
            truncation=True,
            return_tensors="pt",
        )

        input_ids = encoding["input_ids"].squeeze()
        attention_mask = encoding["attention_mask"].squeeze()

        if self.task == "classification":
            return {
                "input_ids": input_ids,
                "attention_mask": attention_mask,
                "labels": torch.tensor(label, dtype=torch.long),
            }
        elif self.task == "generation":
            target_encoding = self.tokenizer(
                target,
                padding="max_length",
                max_length=self.max_length,
                truncation=True,
                return_tensors="pt",
            )
            return {
                "input_ids": input_ids,
                "attention_mask": attention_mask,
                "labels": target_encoding["input_ids"].squeeze(),
            }

In [110]:
import pandas as pd
import torch
from torch.utils.data import random_split
from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config, Trainer, TrainingArguments, T5ForSequenceClassification
from sklearn.metrics import accuracy_score, classification_report

In [111]:
tokenizer = T5Tokenizer.from_pretrained("t5-small")
dataset = ToxicityDataset(data, tokenizer, max_length=120, task="generation")
train_dataset, test_dataset, validate_dataset = random_split(dataset, [.7, .2, .1]) 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [112]:
# Define the T5 model and tokenizer
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
# model = T5ForConditionalGeneration.from_pretrained(model_name, config=T5Config())

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [113]:

# Define training arguments
training_args = TrainingArguments(
    output_dir="./t5_toxicity_finetuned",
    per_device_train_batch_size=4,
    num_train_epochs=3,
    evaluation_strategy="steps",
    eval_steps=100,  
    save_steps=100,  
    logging_steps=1000
)

In [114]:
def model_init():
    return T5ForConditionalGeneration.from_pretrained(model_name)

In [115]:
from transformers import Trainer, TrainingArguments

# Define the trainer and training arguments
trainer = Trainer(
    model_init=model_init,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validate_dataset
)

# Train the model
trainer.train()


  0%|          | 0/303333 [09:02<?, ?it/s]


KeyboardInterrupt: 

In [76]:
model_dir = "./saved_model"  
trainer.save_model(model_dir)

In [12]:

# Evaluate the model on the test set
test_results = trainer.predict(test_dataset)
predictions = torch.argmax(test_results.predictions, dim=-1)
true_labels = test_results.label_ids
accuracy = accuracy_score(true_labels, predictions)
classification_report_str = classification_report(true_labels, predictions)

# Use the model for text classification
def classify_toxicity(text):
    input_ids = tokenizer.encode("classify: " + text, return_tensors="pt")
    output = model.generate(input_ids)
    return output[0].item()  # 0 for non-toxic, 1 for toxic

# Use the model for text detoxification
def detoxify_text(text):
    input_ids = tokenizer.encode("detoxify: " + text, return_tensors="pt")
    output = model.generate(input_ids)
    return tokenizer.decode(output[0], skip_special_tokens=True)

# Example usage:
toxicity = classify_toxicity("This is a toxic message.")
detoxified_text = detoxify_text("This is a toxic message.")

print(f"Toxicity: {toxicity}")
print(f"Detoxified Text: {detoxified_text}")


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
  0%|          | 0/351000 [04:57<?, ?it/s]


ValueError: not enough values to unpack (expected 2, got 1)