In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICE'] = '0'

import torch
from transformers import BartForConditionalGeneration, AutoTokenizer
import datasets
from torch.utils.data import DataLoader
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
base_model_name = 'facebook/bart-base'
model_name = 'SkolkovoInstitute/bart-base-detox'
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
model = BartForConditionalGeneration.from_pretrained(model_name).to(device)

In [3]:
dataset_path = '../data/external/paradetox-data/'
dataset = datasets.load_dataset(dataset_path)['train']
dataloader = DataLoader(dataset, batch_size=16, shuffle=False)

In [14]:
from tqdm.notebook import tqdm
detoxified_ressults = []

for batch in tqdm(dataloader):
    toxic_batch, neutral_batch = batch['en_toxic_comment'], batch['en_neutral_comment']
    tokenized_toxic_batch = tokenizer(toxic_batch, return_tensors='pt', padding='max_length', truncation=True).to(device)
    responce = model.generate(**tokenized_toxic_batch, max_new_tokens=512)
    results = tokenizer.batch_decode(responce, skip_special_tokens=True)
    detoxified_ressults.append(results)

  0%|          | 0/1234 [00:00<?, ?it/s]

In [17]:
import numpy as np
detoxified_ressults = np.array(detoxified_ressults).flatten()
detoxified_ressults.shape

(19744,)

In [21]:
dataset = dataset.add_column("BART_paraphrased", detoxified_ressults)
dataloader = DataLoader(dataset, batch_size=16, shuffle=False)

In [30]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import torch.nn as nn
# tokenizer and model weights for calculating toxisity score of the text
toxic_tokenizer = RobertaTokenizer.from_pretrained(
    "SkolkovoInstitute/roberta_toxicity_classifier"
)
toxic_model = RobertaForSequenceClassification.from_pretrained(
    "SkolkovoInstitute/roberta_toxicity_classifier", device_map='auto'
)

def get_toxisity_score(model_output):
    sigmoid = nn.Sigmoid()
    scores = sigmoid(model_output.squeeze()).cpu().detach().numpy()
    result = {"neutral": scores[0], "toxic": scores[1]}
    return result


def get_classification(
    text, classification_tokenizer=toxic_tokenizer, classification_model=toxic_model
):
    tokenized_text = classification_tokenizer.encode(text, return_tensors="pt")
    predictions = classification_model(tokenized_text)
    return predictions


Some weights of the model checkpoint at SkolkovoInstitute/roberta_toxicity_classifier were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [31]:
init_toxicity_val = []
detoxified_toxicity_val = []
masked_toxicity_val = []

for batch in tqdm(dataloader):
    toxic_comments_batch, detoxified_comments_batch, masked_comments_batch = (
        batch["en_toxic_comment"],
        batch["en_neutral_comment"],
        batch["BART_paraphrased"],
    )
    t = toxic_tokenizer(toxic_comments_batch, return_tensors='pt', padding='max_length', truncation=True).to(device)
    res = toxic_model(**t)
    for elem in res.logits:
        init_toxicity_val.append(get_toxisity_score(elem)['toxic'])

    t = toxic_tokenizer(detoxified_comments_batch, return_tensors='pt', padding='max_length', truncation=True).to(device)
    res = toxic_model(**t)
    for elem in res.logits:
        detoxified_toxicity_val.append(get_toxisity_score(elem)['toxic'])

    t = toxic_tokenizer(masked_comments_batch, return_tensors='pt', padding='max_length', truncation=True).to(device)
    res = toxic_model(**t)
    for elem in res.logits:
        masked_toxicity_val.append(get_toxisity_score(elem)['toxic'])

  0%|          | 0/1234 [00:00<?, ?it/s]

In [32]:
dataset = dataset.add_column('initial_toxicity', init_toxicity_val)
dataset = dataset.add_column('ideal_toxicity', detoxified_toxicity_val)
dataset = dataset.add_column('resulting_toxicity', masked_toxicity_val)

In [36]:
from sentence_transformers import SentenceTransformer
import torch.nn as nn

simmilarity_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
cosine_simmilarity = nn.CosineSimilarity(dim=0)

def get_sim(text1, text2):
    embeddings = simmilarity_model.encode([text1, text2], convert_to_tensor=True)
    return cosine_simmilarity(embeddings[0], embeddings[1]).item()

In [38]:
reference2masked_sim = []
reference2translation_sim = []

for example in tqdm(dataset):
    toxic = example['en_toxic_comment']
    nontoxic = example['en_neutral_comment']
    masked = example['BART_paraphrased']
    reference2masked_sim.append(get_sim(toxic, masked))
    reference2translation_sim.append(get_sim(toxic, nontoxic))

  0%|          | 0/19744 [00:00<?, ?it/s]

In [39]:
dataset = dataset.add_column('reference2masked_sim', reference2masked_sim)
dataset = dataset.add_column('reference2translation_sim', reference2translation_sim)
pandas_dataset_results = dataset.to_pandas()

## Measure fluency of thee generated text

In [44]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification

fluency_tokenizer = AutoTokenizer.from_pretrained("cointegrated/roberta-large-cola-krishna2020")
fluency_model = AutoModelForSequenceClassification.from_pretrained("cointegrated/roberta-large-cola-krishna2020")

Downloading (…)okenizer_config.json:   0%|          | 0.00/289 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/628 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

In [58]:
def get_fluency_score(logits):
    sigmoid = nn.Sigmoid()
    scores = sigmoid(logits.squeeze()).cpu().detach().numpy()
    result = {"Fluent": scores[0], "Non-Fluent": scores[1]}
    return result

{'Fluent': 0.37349015, 'Non-Fluent': 0.68185806}

In [None]:
fluency_scores = []
for example in tqdm(dataset):
    text = example['BART_paraphrased']
    tokens = fluency_tokenizer(text, return_tensors='pt')
    fluency_scores.append(get_fluency_score(fluency_model(**tokens).logits)['Fluent'])

  0%|          | 0/19744 [00:00<?, ?it/s]