In [1]:
import os
import numpy as np
import pickle
import sentencepiece as spm
from tqdm.autonotebook import trange

import pytorch_lightning as L
import torch.nn as nn
import torch

  from tqdm.autonotebook import trange


In [2]:
DATA_FOLDER = os.path.join(os.getcwd(), '..', 'data')
TOKENIZER_FOLDER = os.path.join(os.getcwd(), '..', 'models')
VOCAB_SIZE = 10000

# Toxic Classifiers

To classify toxic comments there is a need to firstly understand what means toxic. The dataset contains sentences for which the toxicity score is measured. However, toxicity is not spread into the whole sentence, but it is concentrated in some words. For example, the sentence "I love you" is not toxic, but if we change the word "love" with "hate" the sentence becomes toxic. 

The purpose of this notebook is to gain knowledge about the toxicity. For this naive word classifier and sentence classifier's will be built. Also, a classifier used in the original paper will be evaluated against my solutions

## Loading Data

Let us load data previously tokenized with sentencepience model. Also, let us load the tokenizer to have access to the vocabulary.

In [3]:
# Load dataset
train = pickle.load(open(os.path.join(DATA_FOLDER, 'interim', 'train.pkl'), 'rb'))

test = pickle.load(open(os.path.join(DATA_FOLDER, 'interim', 'test.pkl'), 'rb'))
test_corpus = test['reference'] + test['translation']
test_scores = np.concatenate((test['ref_tox'], test['trn_tox']))

val = pickle.load(open(os.path.join(DATA_FOLDER, 'interim', 'val.pkl'), 'rb'))
val_corpus = val['reference'] + val['translation']
val_scores = np.concatenate((val['ref_tox'], val['trn_tox']))

# Load tokenizer
tokenizer = spm.SentencePieceProcessor()
tokenizer.Load(os.path.join(TOKENIZER_FOLDER, 'tokenizer.model'))

True

In [4]:
corpus = train['reference'] + train['translation']
scores = np.concatenate((train['ref_tox'], train['trn_tox']))

Save the maximum length of a sentence in the dataset. This will be used later to pad the sentences.

In [5]:
MAX_LEN = np.max([len(sentence) for sentence in corpus])

In [6]:
assert tokenizer.vocab_size() == VOCAB_SIZE

## Frequency Calculation

Let us calculate the frequency of each token in the dataset. This will help us to understand the distribution of the words in the dataset.

In [7]:
total = 0
freq = np.zeros(VOCAB_SIZE)

for tox, sentence in zip(scores, corpus):
    total += len(sentence)
    for word in sentence:
        freq[word] += tox * (-1 if tox < 0.5 else 1)
        
freq = freq / total

In [8]:
def oov(_i):
    return np.finfo(np.float32).eps if _i >= freq.shape[0] else freq[_i]

Xi = [[freq[word] for word in sentence] for sentence in corpus]
yi = scores

valXi = [[oov(word) for word in sentence] for sentence in val_corpus]
valyi = val_scores

testXi = [[oov(word) for word in sentence] for sentence in test_corpus]
testyi = test_scores

# Building a Naive Sentence classifier

Let us build a naive sentence classifier. The classifier will be based on the frequency of the tokens in the sentence. The sentence will be classified as toxic if the sum of the frequency of the tokens is greater than a threshold. The only parameter of the classifier is the threshold, which will be optimized using linear search.

### Data Preparation

Let us add padding to the sentences and calculate the frequency of the tokens in each sentence.

In [9]:

device = 'cuda' if torch.cuda.is_available() else 'cpu'
alpha = nn.Parameter(torch.tensor(0.0).to(device))
optimizer = torch.optim.Adam([alpha], lr=0.01)

def train_step(X, y):
    optimizer.zero_grad()
    loss = torch.mean((y - alpha * torch.tensor(X)) ** 2)
    loss.backward()
    optimizer.step()
    return loss.item()

for j in trange(100):
    shuffle = np.random.permutation(len(Xi))
    Xi = [Xi[i] for i in shuffle]
    yi = [yi[i] for i in shuffle]
    for i in range(0, len(Xi), 10000):
        batch = Xi[i:i+1024]
        batch = [torch.tensor(x).mean() for x in batch]
        batch = torch.stack(batch).to(device)
        labels = torch.tensor(yi[i:i+1024]).to(device)
        loss = train_step(batch, labels)
        
        if i % (len(Xi) // 4) == 0:
            print(f"Epoch {j} Loss: {loss}")
            

  0%|          | 0/100 [00:00<?, ?it/s]

  loss = torch.mean((y - alpha * torch.tensor(X)) ** 2)


Epoch 0 Loss: 0.46845304633528545
Epoch 1 Loss: 0.4350812683523954
Epoch 2 Loss: 0.40414133500680366
Epoch 3 Loss: 0.4151755981182559
Epoch 4 Loss: 0.4174811234736763
Epoch 5 Loss: 0.3954872702266928
Epoch 6 Loss: 0.4099972022303072
Epoch 7 Loss: 0.38102171325239564
Epoch 8 Loss: 0.40062770337396325
Epoch 9 Loss: 0.41695560789333685
Epoch 10 Loss: 0.3836447205132132
Epoch 11 Loss: 0.3739991509394266
Epoch 12 Loss: 0.3903340458570954
Epoch 13 Loss: 0.3517180670852234
Epoch 14 Loss: 0.3540070097308201
Epoch 15 Loss: 0.33893560359119546
Epoch 16 Loss: 0.3629609626139281
Epoch 17 Loss: 0.3263081266154309
Epoch 18 Loss: 0.3433655753592363
Epoch 19 Loss: 0.35611210824477035
Epoch 20 Loss: 0.35522165751970014
Epoch 21 Loss: 0.3200347889405113
Epoch 22 Loss: 0.31760089405739833
Epoch 23 Loss: 0.31139568925758976
Epoch 24 Loss: 0.30477414723178436
Epoch 25 Loss: 0.3265486602042241
Epoch 26 Loss: 0.3170179486066642
Epoch 27 Loss: 0.30887220419265904
Epoch 28 Loss: 0.2851437352533057
Epoch 29 Los

### Evaluation

Let us evaluate the classifier on the validation set.

In [10]:
%env CUDA_LAUNCH_BLOCKING=1

env: CUDA_LAUNCH_BLOCKING=1


In [11]:
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, precision_score, recall_score

def evaluate(X, y):
    preds = []
    for i in range(0, len(X), 128):
        batch = X[i:i+128]
        batch = [torch.tensor(x).mean() for x in batch]
        batch = torch.stack(batch)
        batch = batch.to(device)
        preds.append(alpha * batch)
    preds = torch.cat(preds).cpu().detach().numpy()
    preds = np.array([1 if pred > 0.5 else 0 for pred in preds])
    y = np.round(y)
    return roc_auc_score(y, preds), accuracy_score(y, preds), f1_score(y, preds), precision_score(y, preds), recall_score(y, preds)


print("Train:", evaluate(Xi, yi))
print("Val:", evaluate(valXi, valyi))
print("Test:", evaluate(testXi, testyi))

Train: (0.49309313077510547, 0.4930931307751054, 0.37267601259451244, 0.48878923766816146, 0.3011394982054039)
Val: (0.4922288760427844, 0.4922288760427844, 0.37274437697767904, 0.4874461779343511, 0.30174114714943406)
Test: (0.49427117588009273, 0.4942711758800928, 0.37380794205258977, 0.4906886463373467, 0.3018969157810931)


### RNN Classifier

Let us build a RNN classifier. The classifier will be based on a RNN network with a single LSTM layer. The output of the LSTM layer will be passed to a fully connected layer with a sigmoid activation function. The output of the sigmoid function will be the probability of the sentence to be toxic.

In [12]:
%pip install lightning

[0mNote: you may need to restart the kernel to use updated packages.


In [13]:
tokenizer.pad_id()

-1

In [18]:
from torch.utils.data import Dataset, DataLoader

class ToxicDataset(Dataset):
    def __init__(self, corpus, scores):
        self.corpus = corpus
        self.scores = scores
        
    def __len__(self):
        return len(self.corpus)
    
    def __getitem__(self, index):
        sentence = self.corpus[index]
        sentence = np.array(sentence)
        sentence = np.pad(sentence, (0, MAX_LEN - len(sentence)), 'constant', constant_values=0)
        return sentence.astype(np.int32), self.scores[index].astype(np.float32)
    
train_dataset = ToxicDataset(corpus, scores)
val_dataset = ToxicDataset(val_corpus, val_scores)
test_dataset = ToxicDataset(test_corpus, test_scores)

In [19]:
class ToxicClassifier(L.LightningModule):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, dropout):
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(
            embedding_dim, 
            hidden_dim, 
            num_layers=n_layers, 
            bidirectional=False, 
            dropout=dropout, 
            batch_first=True
        )
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.narrow = nn.Linear(output_dim, 1)
        self.sigmoid = nn.Sigmoid()
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text):
        # (batch_size, seq_len) -> (batch_size, seq_len, embedding_dim)
        embedded = self.dropout(self.embedding(text))
        # (batch_size, seq_len, embedding_dim) -> (batch_size, seq_len, hidden_dim)
        _, (hidden, _) = self.lstm(embedded)
        # (num_layers, batch_size, hidden_dim) -> (batch_size, hidden_dim)
        hidden = hidden.sum(dim=0)
        # (batch_size, hidden_dim) -> (batch_size, output_dim)
        output = self.fc(hidden)
        # (batch_size, output_dim) -> (batch_size, 1)
        output = self.narrow(output)
        # (batch_size, 1) -> (batch_size)
        output = self.sigmoid(output).squeeze(1)
        return output
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self.forward(x)
        loss = nn.functional.mse_loss(y_pred, y)
        self.log('train_loss', loss, prog_bar=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self.forward(x)
        loss = nn.functional.mse_loss(y_pred, y)
        self.log('val_loss', loss)
        return loss
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)
    
    def train_dataloader(self):
        return DataLoader(train_dataset, batch_size=64, shuffle=True)
    
    def val_dataloader(self):
        return DataLoader(test_dataset, batch_size=64)

In [22]:
BATCH_SIZE = 1024

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

INPUT_DIM = tokenizer.vocab_size()
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 64

model = ToxicClassifier(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, 2, 0.333)

In [23]:
trainer = L.Trainer(
    accelerator='gpu',
    max_epochs=2,
    val_check_interval=0.25,
)
trainer.fit(model, train_loader, val_loader)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type      | Params
----------------------------------------
0 | embedding | Embedding | 1.0 M 
1 | lstm      | LSTM      | 892 K 
2 | fc        | Linear    | 16.4 K
3 | narrow    | Linear    | 65    
4 | sigmoid   | Sigmoid   | 0     
5 | dropout   | Dropout   | 0     
----------------------------------------
1.9 M     Trainable params
0         Non-trainable params
1.9 M     Total params
7.638     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=2` reached.


In [24]:
trainer.validate()

Restoring states from the checkpoint path at /shared/detoxification/notebooks/lightning_logs/version_37/checkpoints/epoch=1-step=1803.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /shared/detoxification/notebooks/lightning_logs/version_37/checkpoints/epoch=1-step=1803.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        val_loss            0.2130133956670761
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'val_loss': 0.2130133956670761}]

# Fine-Tuning RoBERTa Classifier

In [3]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification

tokenizer = RobertaTokenizer.from_pretrained('SkolkovoInstitute/roberta_toxicity_classifier')
model = RobertaForSequenceClassification.from_pretrained('SkolkovoInstitute/roberta_toxicity_classifier')

Some weights of the model checkpoint at SkolkovoInstitute/roberta_toxicity_classifier were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
import pandas as pd

df = pd.read_csv(os.path.join(DATA_FOLDER, 'raw', 'filtered.tsv'), sep='\t')

reference = df['reference'].tolist()
translation = df['translation'].tolist()
ref_tox = df['ref_tox'].tolist()
trn_tox = df['trn_tox'].tolist()

text_corpus = reference + translation
tox = ref_tox + trn_tox

In [5]:
df.head()

Unnamed: 0.1,Unnamed: 0,reference,translation,similarity,lenght_diff,ref_tox,trn_tox
0,0,"If Alkar is flooding her with psychic waste, t...","if Alkar floods her with her mental waste, it ...",0.785171,0.010309,0.014195,0.981983
1,1,Now you're getting nasty.,you're becoming disgusting.,0.749687,0.071429,0.065473,0.999039
2,2,"Well, we could spare your life, for one.","well, we can spare your life.",0.919051,0.268293,0.213313,0.985068
3,3,"Ah! Monkey, you've got to snap out of it.","monkey, you have to wake up.",0.664333,0.309524,0.053362,0.994215
4,4,I've got orders to put her down.,I have orders to kill her.,0.726639,0.181818,0.009402,0.999348


In [6]:
model.to(device)

NameError: name 'device' is not defined

In [None]:
model.to(device)

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
             

In [None]:
from tqdm.autonotebook import tqdm

mse = 0
mse_f = torch.nn.MSELoss(reduction='sum')
for i in tqdm(range(0, len(text_corpus), 1024)):
    batch = text_corpus[i:i+100]
    batch = tokenizer(batch, padding=True, truncation=True, return_tensors='pt')
    batch = batch.to(device)
    y_pred = model(**batch)[0].squeeze(1)
    y_pred = torch.softmax(y_pred, -1)[:, 1].cpu().detach().numpy()
    mse += mse_f(torch.tensor(y_pred), torch.tensor(tox[i:i+100])).item()
    
mse / len(text_corpus)

  0%|          | 0/1129 [00:00<?, ?it/s]

0.007597915964621088

In [7]:
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, precision_score, recall_score
from transformers import Trainer, TrainingArguments


def fine_tune_roberta(
    model: RobertaForSequenceClassification,
    tokenizer: RobertaTokenizer,
    corpus: list,
    scores: list,
    validation_corpus: list,
    validation_scores: list,
    batch_size: int,
    max_epochs: int,
    learning_rate: float,
    output_dir: str,
):
    def tokenize(batch):
        inputs = tokenizer(
            batch['text'], 
            padding='longest', 
            truncation=True, 
            max_length=512
        )
        batch['input_ids'] = inputs['input_ids']
        batch['attention_mask'] = inputs['attention_mask']
        # Convert label to two labels
        batch['label'] = [1 if label > 0.5 else 0 for label in batch['label']]
        
        return batch
    
    train_dataset = pd.DataFrame({'text': corpus, 'label': scores})
    validation_dataset = pd.DataFrame({'text': validation_corpus, 'label': validation_scores})
    
    train_dataset = Dataset.from_pandas(train_dataset)
    validation_dataset = Dataset.from_pandas(validation_dataset)
    
    train_dataset = train_dataset.map(tokenize, batched=True, batch_size=batch_size, num_proc=4)
    validation_dataset = validation_dataset.map(tokenize, batched=True, batch_size=batch_size, num_proc=4)
    
    train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])
    validation_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])
    
    training_args = TrainingArguments(
        output_dir='output_dir',
        num_train_epochs=max_epochs,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        warmup_steps=500,
        weight_decay=0.01,
        logging_steps=10,
        eval_steps=100,
        save_steps=100,
        learning_rate=learning_rate,
        load_best_model_at_end=True,
        evaluation_strategy='steps',
        report_to='none',
    )
    
    def compute_metrics(pred):
        labels = pred.label_ids
        preds = pred.predictions.argmax(-1)
        acc = accuracy_score(labels, preds)
        f1 = f1_score(labels, preds)
        precision = precision_score(labels, preds)
        recall = recall_score(labels, preds)
        return {
            'accuracy': acc,
            'f1': f1,
            'precision': precision,
            'recall': recall
        }
    
    class MultilabelTrainer(Trainer):
        def compute_loss(self, model, inputs, return_outputs=False):
            # (*, 1)
            labels = inputs.pop("labels")
            outputs = model(**inputs)
            # (*, 2)
            logits = outputs.logits
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, 2), labels.view(-1))
            return (loss, outputs) if return_outputs else loss
            
    
    trainer = MultilabelTrainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=validation_dataset,
        compute_metrics=compute_metrics,
    )
    
    trainer.train()
    
    return trainer, model, tokenizer, training_args

In [None]:
fine_tune_roberta(
    model,
    tokenizer,
    reference,
    translation,
    
    
)