Let us train sequence-to-sequence models to perform a task of translation of toxic sentences to non-toxic versions

In [None]:
%pip install datasets

# Models

I decided to use the same model architecture as mentioned in [1] - T5. But instead, I trained it on the parallel corpus, ParaMNT instead of the original corpora, Jigsaw. The reason for this is to experiment with the setting of the model and parallel dataset, instead of using unsupervised approach as in the paper.

Before anything, we firstly need to derive metrics for the task. I will be using metrics provided by [1] using their source code and toxicity classfier provided by them. The metrics are as follows:

1. Toxicity: Toxicity score of the generated sentence and a difference between the toxicity score of the generated sentence and the original non-toxic sentence.
2. Fluency: Fluency score of the generated sentence and a difference between the fluency score of the generated sentence and the original non-toxic sentence.
3. Meaning Preservation: BLEU score between the generated sentence and the original non-toxic sentence.
4. Semantic Similarity: Semantic similarity score between the generated sentence and the original non-toxic sentence.

Firstly, let us try to train sequence-to-sequence T5 model to perform the task of translation of toxic sentences to non-toxic versions. 

In [4]:
import os
import pandas as pd
from transformers import (
    T5ForConditionalGeneration, 
    T5Tokenizer, 
    DataCollatorForSeq2Seq, 
    Seq2SeqTrainer, 
    Seq2SeqTrainingArguments
)

In [None]:
DATA_FOLDER = os.path.join(os.getcwd(), '..', 'data')
DATASET_FILE = os.path.join(DATA_FOLDER, 'raw', 'filtered.tsv')
MODEL_FOLDER = os.path.join(os.getcwd(), '..', 'models')
MODEL_PREFIX = os.path.join(MODEL_FOLDER, 'tokenizer')
VOCAB_SIZE = 10000 # spiece

In [21]:
model: T5ForConditionalGeneration = T5ForConditionalGeneration.from_pretrained('t5-small')
tokenizer: T5Tokenizer = T5Tokenizer.from_pretrained('t5-small')

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [16]:
pd_data = pd.read_csv(os.path.join(DATA_FOLDER, 'raw', 'filtered.tsv'), sep='\t')

In [17]:
# Prepare data for training
source = pd_data['translation'].tolist()
target = pd_data['reference'].tolist()

In [18]:
import torch
import numpy as np
from sklearn.model_selection import train_test_split

torch.manual_seed(705)
np.random.seed(705)

source_val_train, source_test, target_val_train, target_test = train_test_split(source, target, test_size=0.2)
source_train, source_val, target_train, target_val = train_test_split(source_val_train, target_val_train, test_size=0.2)

## Training

The model will train on sequence-to-sequence task with a helo 

In [22]:
from transformers import DataCollatorForSeq2Seq

def tokenize_function(examples):
    """
    Tokenize the examples
    
    :param examples: the examples to tokenize
    
    :return: the tokenized examples
    """
    inputs = tokenizer.batch_encode_plus(
        examples['translation'], 
        padding='max_length',
        max_length=512,
        add_special_tokens=True,
        truncation=True,
    )
    
    labels = tokenizer.batch_encode_plus(
        examples['reference'], 
        padding='max_length',
        max_length=512,
        add_special_tokens=True,
        truncation=True,
    ).input_ids
    
    labels_with_ignore_index = []
    for labels_example in labels:
        # Replace 0 with -100 (T5 default ignore index)
        labels_example = [label if label != 0 else -100 for label in labels_example]
        labels_with_ignore_index.append(labels_example)
    
    inputs['labels'] = labels_with_ignore_index
    
    return inputs

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer)

In [23]:
# Tokenize data
from datasets import Dataset

train_dataset = Dataset.from_dict({'translation': source_train, 'reference': target_train})
val_dataset = Dataset.from_dict({'translation': source_val, 'reference': target_val})
test_dataset = Dataset.from_dict({'translation': source_test, 'reference': target_test})

train_dataset = train_dataset.map(tokenize_function, batched=True, batch_size=512, num_proc=6)
val_dataset = val_dataset.map(tokenize_function, batched=True, batch_size=512, num_proc=6)
test_dataset = test_dataset.map(tokenize_function, batched=True, batch_size=512, num_proc=6)

Map (num_proc=6):   0%|          | 0/369776 [00:00<?, ? examples/s]

  block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
  table = cls._concat_blocks(blocks, axis=0)


Map (num_proc=6):   0%|          | 0/92445 [00:00<?, ? examples/s]

  block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
  table = cls._concat_blocks(blocks, axis=0)


Map (num_proc=6):   0%|          | 0/115556 [00:00<?, ? examples/s]

  block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
  table = cls._concat_blocks(blocks, axis=0)


In [57]:
# Train model

from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments

batch_size = 35
num_epochs = 1
learning_rate = 5e-5
warmup_steps = 500
weight_decay = 0.01


training_args = Seq2SeqTrainingArguments(
    output_dir=MODEL_FOLDER,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    predict_with_generate=True,
    do_train=True,
    do_eval=True,
    logging_steps=1000,
    save_steps=1000,
    eval_steps=1000,
    overwrite_output_dir=True,
    warmup_steps=warmup_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    num_train_epochs=num_epochs,
    report_to="none",
)

In [45]:
trainer = Seq2SeqTrainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

In [46]:
trainer.train()

Step,Training Loss
100,3.2254
200,2.9433
300,2.5846
400,2.2331
500,1.8827
600,1.5678
700,1.3324
800,1.1353
900,0.9998
1000,0.8965


TrainOutput(global_step=1300, training_loss=1.6264456763634314, metrics={'train_runtime': 171.6048, 'train_samples_per_second': 58.273, 'train_steps_per_second': 7.576, 'total_flos': 1353418014720000.0, 'train_loss': 1.6264456763634314, 'epoch': 100.0})

In [47]:
train_sample = train_dataset[4]
train_sample['translation'], train_sample['reference']

("I know you hate me, but I don't make a cow out of myself, and I don't lecture you.",
 "I know you hate me. But this isn't me being some overbearing bitch.")

In [48]:
# Test model

input_ids = train_sample['input_ids']
attention_mask = train_sample['attention_mask']

outputs = model.generate(
    input_ids=torch.tensor(input_ids).unsqueeze(0).to('cuda'),
    attention_mask=torch.tensor(attention_mask).unsqueeze(0).to('cuda'),
    max_length=512,
    num_beams=5,
    early_stopping=True
)

tokenizer.decode(outputs[0])

"<pad> I know you hate me, but I don't give you a lame.</s>"

In [58]:
end_trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

In [59]:
end_trainer.train()

Step,Training Loss
1000,2.5362
2000,2.3364
3000,2.2684
4000,2.2498
5000,2.2235
6000,2.2105
7000,2.2081
8000,2.1996
9000,2.1954
10000,2.1916


TrainOutput(global_step=10566, training_loss=2.2582586816646555, metrics={'train_runtime': 5129.9381, 'train_samples_per_second': 72.082, 'train_steps_per_second': 2.06, 'total_flos': 5.004614998111027e+16, 'train_loss': 2.2582586816646555, 'epoch': 1.0})

In [13]:
DETOK_MODEL_FOLDER = os.path.join(MODEL_FOLDER, 't5-detox')


In [60]:
# Save model

model.save_pretrained(DETOK_MODEL_FOLDER)
tokenizer.save_pretrained(DETOK_MODEL_FOLDER)

# Load model


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [14]:
loaded = T5ForConditionalGeneration.from_pretrained(DETOK_MODEL_FOLDER)
loaded_tokenizer = T5Tokenizer.from_pretrained(DETOK_MODEL_FOLDER)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [10]:
# Test loaded model on whole test dataset
# Generate predictions for each of reference sentences
# and save them to two text files

# One contain predictions, other contain references
# each line in both files correspond to one sentence

def generate_translation(batch):
    input_ids = torch.tensor(batch['input_ids']).to('cuda')
    attention_mask = torch.tensor(batch['attention_mask']).to('cuda')
    
    outputs = loaded.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_length=512,
        num_beams=5,
        early_stopping=True
    )
    
    return outputs

In [76]:
print(len(target_test))

115556


In [24]:
# Iterate over test dataset and generate predictions
# for each of reference sentences

from tqdm import tqdm

loaded.to('cuda')

predictions = []
references = []

def gen(batch):
    outputs = generate_translation(batch)
    
    predictions.extend([loaded_tokenizer.decode(ids) for ids in outputs])
    references.extend(batch['translation'])
    
test_dataset.select(range(1024)).map(gen, batched=True, batch_size=32)

Map:   0%|          | 0/1024 [00:00<?, ? examples/s]

Dataset({
    features: ['translation', 'reference', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 1024
})

In [72]:
test_sample = test_dataset[4]

_g = loaded.to('cuda').generate(
    input_ids=torch.tensor(test_sample['input_ids']).unsqueeze(0).to('cuda'),
    max_length=512,
    num_beams=5,
    early_stopping=True
)

loaded_tokenizer.decode(_g[0], skip_special_tokens=True), test_sample['reference'], test_sample['translation']

('Calling her ridiculously naive would be a compliment.',
 'To call it ridiculously naive would have been a compliment.',
 'calling her ridiculously naive would be a compliment.')

In [25]:
with open(os.path.join(DATA_FOLDER, 'interim', 'references.txt'), 'w+') as f:
    f.write('\n'.join(references))
    
with open(os.path.join(DATA_FOLDER, 'interim', 'predictions.txt'), 'w+') as f:
    f.write('\n'.join(predictions))