In [1]:
!pip install sentencepiece



In [2]:
!pip install transformers[torch]



In [144]:
import pandas as pd
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments
from torch.utils.data import Dataset
import torch
from tqdm import tqdm

In [145]:
# Load the dataset
data = pd.read_csv('most_toxic_data.csv').head(1000)

In [146]:
import torch

if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"There are {torch.cuda.device_count()} GPU(s) available.")
    print("Device name:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("No GPU available, using the CPU instead.")


There are 1 GPU(s) available.
Device name: Tesla T4


In [148]:
# Define the Dataset
class DetoxDataset(Dataset):
    def __init__(self, tokenizer, data, max_length=512):
        self.tokenizer = tokenizer
        self.input_texts = data['reference'].tolist()  # Assuming this is the toxic text
        self.target_texts = data['translation'].tolist()  # Assuming this is the non-toxic text
        self.max_length = max_length

    def __len__(self):
        return len(self.input_texts)

    def __getitem__(self, idx):
        source_text = self.input_texts[idx]
        target_text = self.target_texts[idx]

        # Tokenize the source text
        source = self.tokenizer.batch_encode_plus(
            [source_text], max_length=self.max_length, padding='max_length', truncation=True, return_tensors="pt"
        )

        # Tokenize the target text
        target = self.tokenizer.batch_encode_plus(
            [target_text], max_length=self.max_length, padding='max_length', truncation=True, return_tensors="pt"
        )

        source_ids = source['input_ids'].squeeze()
        target_ids = target['input_ids'].squeeze()

        # The labels are the target_ids without the padding token
        labels = target_ids.masked_fill(target_ids == self.tokenizer.pad_token_id, -100)

        # No need to move to device here, as this will be handled by the data loader and Trainer later
        return {"input_ids": source_ids, "attention_mask": source["attention_mask"].squeeze(), "labels": labels}

In [149]:
# Initialize the tokenizer and model
tokenizer = T5Tokenizer.from_pretrained('t5-small')
model = T5ForConditionalGeneration.from_pretrained('t5-small').to(device)

# Create the dataset
dataset = DetoxDataset(tokenizer, data)

# Split the dataset
train_size = int(0.8 * len(dataset))
train_dataset, eval_dataset = torch.utils.data.random_split(dataset, [train_size, len(dataset) - train_size])


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [150]:
# Define the training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=20,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    save_strategy="epoch",
    evaluation_strategy="epoch",
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset
)

In [151]:
# Train the model
trainer.train()


Epoch,Training Loss,Validation Loss
1,3.1665,2.761869
2,2.3407,2.380705
3,2.2221,2.252425
4,2.5216,2.201399
5,1.9393,2.174416
6,2.0455,2.155092
7,2.0991,2.156805
8,1.6057,2.143599
9,1.5895,2.144321
10,1.6985,2.134311


TrainOutput(global_step=4000, training_loss=1.940207050561905, metrics={'train_runtime': 974.5708, 'train_samples_per_second': 16.417, 'train_steps_per_second': 4.104, 'total_flos': 2165468823552000.0, 'train_loss': 1.940207050561905, 'epoch': 20.0})

In [152]:
# Save the model
model.save_pretrained('detoxified_t5_model')
tokenizer.save_pretrained('detoxified_t5_model')

('detoxified_t5_model/tokenizer_config.json',
 'detoxified_t5_model/special_tokens_map.json',
 'detoxified_t5_model/spiece.model',
 'detoxified_t5_model/added_tokens.json')

In [135]:
# model_path = 'detoxified_t5_model'
# model = T5ForConditionalGeneration.from_pretrained(model_path).to(device)

# # Load the tokenizer
# tokenizer = T5Tokenizer.from_pretrained(model_path)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [153]:
# Function to use the fine-tuned model for prediction
def detoxify_text(text):
    model.eval()
    input_ids = tokenizer.encode(text, return_tensors="pt").to(device)  # "detoxify:" is the task prefix
    outputs = model.generate(input_ids, max_length=512, num_beams=5, early_stopping=True)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [154]:
from transformers import pipeline
classifier = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')

In [155]:
def is_toxic(text_to_analyze, comments = False):
  # Run the classifier (model and tokenizer)
  results = classifier(text_to_analyze)

  # Interpret the results
  for result in results:
      label = result['label']
      score = result['score']


      # Heuristic for toxicity based on negative sentiment score
      res = label == 'NEGATIVE' and score > 0.85
      if comments:
        # Print results
        print(f"Label: {label}, Score: {score}")
        if res:
            print("The text may be considered toxic.")
        else:
            print("The text is unlikely to be toxic.")
      return res

In [156]:
# Example text
toxic_text = "i like that shit"
text_to_analyze =detoxify_text(toxic_text)
print(text_to_analyze)
print("Is toxic:", is_toxic( text_to_analyze, True))

i like it
Label: POSITIVE, Score: 0.9998593330383301
The text is unlikely to be toxic.
Is toxic: False


In [157]:
import random
data_test = pd.read_csv('test.csv')
test_texts = data_test['reference'].tolist()
random.shuffle(test_texts)
test_texts = test_texts[:100]

In [158]:
not_toxic = 0

for toxic_text in tqdm(test_texts):
    text_to_analyze =detoxify_text(toxic_text)
    toxity = is_toxic(text_to_analyze, False)
    if not toxity:
      not_toxic += 1
print("\nAccuracy:", not_toxic/len(test_texts))

100%|██████████| 100/100 [00:32<00:00,  3.09it/s]


Accuracy: 0.31



