# Final Solution

The final solution is a GPT-2 model finetuned to the detoxifying task.

In [1]:
import torch
import numpy as np
import random

SEED = 42

torch.random.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)

## Data Preparation

In [2]:
import requests
import zipfile
import os
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split

In [3]:
DOWNLOAD_URL = "https://github.com/skoltech-nlp/detox/" +\
    "releases/download/emnlp2021/filtered_paranmt.zip"
with open('filtered.zip', 'wb') as file, requests.get(DOWNLOAD_URL, stream=True) as req:
    file.write(req.content)

In [4]:
with zipfile.ZipFile('filtered.zip') as zfile, open('filtered.tsv', 'wb') as file:
    zfile.extract('filtered.tsv')
os.remove('filtered.zip')

In [5]:
dataframe = pd.read_csv('filtered.tsv', sep='\t', index_col=0)

In [6]:
class ToxicDataset(Dataset):
    """A simple dataset class which stores reference-translation pairs concatenated together.
    """
    def __init__(self, dataframe: pd.DataFrame):
        """A simple dataset class which stores reference-translation pairs
        concatenated together.

        Args:
            dataframe (pd.DataFrame): The dataset dataframe. Must contain
            columnts: `reference`, `translation`, `ref_tox`, `trn_tox`.
        """
        self.data = []
        separate_token = '<|endoftext|>'
        eos_token = '<|endoftext|>'
        for _, (ref, trn, ref_tox, trn_tox) in dataframe[['reference', 'translation', 'ref_tox', 'trn_tox']].iterrows():
            if ref_tox < trn_tox:
                ref, trn = trn, ref
            self.data.append(f'{ref}{separate_token}{trn}{eos_token}')
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        return self.data[index]

In [7]:
dataset = ToxicDataset(dataframe)
train_size, test_size = len(dataset) - 100, 100
train_ds, test_ds = random_split(dataset, (train_size, test_size))
train_loader = DataLoader(train_ds, batch_size=1)
test_loader = DataLoader(test_ds, batch_size=1)

## Model Loading and Training

In [8]:
import tqdm
import transformers
from torch.utils import tensorboard as torchboard

In [9]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Training on "{device}"')

Training on "cuda"


### Finetuning GPT-2

In [10]:
training_steps = 9500
warmup_steps = 500
max_step = warmup_steps + training_steps
learning_rate = 3e-5
epochs = 1
log_dir = './runs'
save_dir = './gpt'

In [11]:
model = transformers.AutoModelForCausalLM.from_pretrained('gpt2').to(device)
tokenizer = transformers.AutoTokenizer.from_pretrained('gpt2')
optimizer = transformers.AdamW(model.parameters(), lr=learning_rate)
scheduler = transformers.get_linear_schedule_with_warmup(optimizer, warmup_steps, training_steps)



In [12]:
def train():
    step = 0
    writer = torchboard.writer.SummaryWriter(log_dir)
    pbar = tqdm.notebook.tqdm(total=max_step, desc='Training')
    for epoch in range(epochs):
        for texts in train_loader:
            text, = texts
            if step > max_step:
                break
            tokens = tokenizer.encode(text)
            inputs = torch.tensor(tokens).to(device)
            loss = model(inputs, labels=inputs)['loss']
            model.zero_grad()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            scheduler.step()
            scalars = {
                'Loss': loss.item(),
                'Step': step,
                'Epoch': epoch,
            }
            for metric, value in scalars.items():
                writer.add_scalar(metric, value, global_step=step)
            writer.flush()
            step += 1
            pbar.update()
            pbar.set_postfix(scalars)
    pbar.close()

In [None]:
# The training can be skipped if there is a pretrained model available.
# model = transformers.AutoModelForCausalLM.from_pretrained('./gpt').to(device)

In [13]:
train()

Training:   0%|          | 0/10000 [00:00<?, ?it/s]

In [14]:
model.save_pretrained(save_dir)

## Model Evaluation

In [15]:
import torchmetrics



In [16]:
def detoxify(text: str, max_len=100) -> str:
    """Uses the model to detoxify a small text.

    Args:
        text (str): Text to be detoxified.
        max_len (int, optional): Maximum length of the translated
        text in tokens. Defaults to 100.

    Returns:
        str: De-toxified text. Capitalization may be incorrect.
    """
    tokens = tokenizer.encode(text) + [tokenizer.eos_token_id]
    initial_len = len(tokens)
    for _ in range(max_len):
        tokens_tensor = torch.tensor(tokens).to(device)
        logits = model(tokens_tensor)['logits']
        next_token = logits[-1, :].argmax()
        if next_token == tokenizer.eos_token_id:
            break
        tokens.append(next_token)
    detoxified_tokens = tokens[initial_len:]
    return tokenizer.decode(detoxified_tokens)

In [17]:
def eval() -> float:
    """Evaluates the model and returns the BLEU score.

    Returns:
        float: BLEU score, ranging from 0 to 1.
    """
    pbar = tqdm.notebook.tqdm(total=len(test_loader), desc='Evaluation')
    bleu = torchmetrics.text.BLEUScore()
    scores = []
    logits = []
    for texts in test_loader:
        text, = texts
        ref, ground_trn, _ = text.split('<|endoftext|>')
        trn = detoxify(text)
        pbar.update(1)
        score = bleu([trn], [[ground_trn]]).item()
        scores.append(score)
    pbar.close()
    scores = np.array(scores)
    return scores.mean()

In [18]:
bleu = eval()

Evaluation:   0%|          | 0/100 [00:00<?, ?it/s]

In [19]:
print(f'BLEU score: {bleu * 100:.0f}')

BLEU score: 27
