In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import torch.nn as nn
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from transformers import AutoTokenizer, AutoModelForSequenceClassification

import datasets
from torch.utils.data import DataLoader
import numpy as np
device = 'cuda' if torch.cuda.is_available() else 'cpu'
from tqdm.notebook import tqdm
from sentence_transformers import SentenceTransformer
device

'cuda'

In [2]:
# fine-tuned model for text detoxification
# finetuned_model_chk = "../models/T5_training_checkpoints/final_checkpoint/"
finetuned_model_chk = "../models/T5_checkpointsV2/final_checkpointV2"
tokenizer = AutoTokenizer.from_pretrained(finetuned_model_chk)
model = AutoModelForSeq2SeqLM.from_pretrained(finetuned_model_chk).to(device)

# tokenizer and model weights for calculating toxisity score of the text
toxic_tokenizer = RobertaTokenizer.from_pretrained('SkolkovoInstitute/roberta_toxicity_classifier')
toxic_model = RobertaForSequenceClassification.from_pretrained('SkolkovoInstitute/roberta_toxicity_classifier').to(device)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some weights of the model checkpoint at SkolkovoInstitute/roberta_toxicity_classifier were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [3]:
def get_toxisity_score(model_output):
    sigmoid = nn.Sigmoid()
    scores = sigmoid(model_output.squeeze()).cpu().detach().numpy()
    result = {'neutral' : scores[0], 'toxic' : scores[1]}
    return result

def get_classification(text, classification_tokenizer=toxic_tokenizer, classification_model=toxic_model):
    tokenized_text = classification_tokenizer.encode(text, return_tensors='pt')
    predictions = classification_model(tokenized_text)
    return predictions


# output = get_classification('Fuck you', tokenizer_toxisity_score, model_toxisity_score)

In [4]:
dataset_path = 's-nlp/paradetox'
dataset = datasets.load_dataset(dataset_path)['train']
dataloader = DataLoader(dataset, batch_size=16)

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/2.04M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [9]:
detoxified_ressults = []

for batch in tqdm(dataloader):
    toxic_batch, neutral_batch = batch['en_toxic_comment'], batch['en_neutral_comment']
    tokenized_toxic_batch = tokenizer(toxic_batch, return_tensors='pt', padding='max_length', truncation=True).to(device)
    responce = model.generate(**tokenized_toxic_batch, max_new_tokens=512)
    results = tokenizer.batch_decode(responce, skip_special_tokens=True)
    detoxified_ressults.append(results)

  0%|          | 0/1234 [00:00<?, ?it/s]

In [10]:
detoxified_ressults = np.array(detoxified_ressults).flatten()
detoxified_ressults.shape

(19744,)

In [11]:
dataset = dataset.add_column('T5_paraphrased', detoxified_ressults)

In [12]:
dataloader = DataLoader(dataset, batch_size=16)

In [13]:
init_toxicity_val = []
detoxified_toxicity_val = []
paraphrased_toxicity_val = []

for batch in tqdm(dataloader):
    toxic_comments_batch, detoxified_comments_batch, paraphrased_comments_batch = (
        batch["en_toxic_comment"],
        batch["en_neutral_comment"],
        batch["T5_paraphrased"],
    )
    t = toxic_tokenizer(toxic_comments_batch, return_tensors='pt', padding='max_length', truncation=True).to(device)
    res = toxic_model(**t)
    for elem in res.logits:
        init_toxicity_val.append(get_toxisity_score(elem)['toxic'])

    t = toxic_tokenizer(detoxified_comments_batch, return_tensors='pt', padding='max_length', truncation=True).to(device)
    res = toxic_model(**t)
    for elem in res.logits:
        detoxified_toxicity_val.append(get_toxisity_score(elem)['toxic'])

    t = toxic_tokenizer(paraphrased_comments_batch, return_tensors='pt', padding='max_length', truncation=True).to(device)
    res = toxic_model(**t)
    for elem in res.logits:
        paraphrased_toxicity_val.append(get_toxisity_score(elem)['toxic'])


  0%|          | 0/1234 [00:00<?, ?it/s]

In [14]:
dataset = dataset.add_column('initial_toxicity', init_toxicity_val)
dataset = dataset.add_column('ideal_toxicity', detoxified_toxicity_val)
dataset = dataset.add_column('resulting_toxicity', paraphrased_toxicity_val)

In [15]:
simmilarity_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
cosine_simmilarity = nn.CosineSimilarity(dim=0)

def get_sim(text1, text2):
    embeddings = simmilarity_model.encode([text1, text2], convert_to_tensor=True)
    return cosine_simmilarity(embeddings[0], embeddings[1]).item()

In [16]:
reference2masked_sim = []
reference2translation_sim = []

for example in tqdm(dataset):
    toxic = example['en_toxic_comment']
    nontoxic = example['en_neutral_comment']
    masked = example['T5_paraphrased']
    reference2masked_sim.append(get_sim(toxic, masked))
    reference2translation_sim.append(get_sim(toxic, nontoxic))

  0%|          | 0/19744 [00:00<?, ?it/s]

In [17]:
dataset = dataset.add_column('reference2masked_sim', reference2masked_sim)
dataset = dataset.add_column('reference2translation_sim', reference2translation_sim)

In [18]:
fluency_tokenizer = AutoTokenizer.from_pretrained("cointegrated/roberta-large-cola-krishna2020")
fluency_model = AutoModelForSequenceClassification.from_pretrained("cointegrated/roberta-large-cola-krishna2020").to(device)

In [19]:
def get_fluency_score(logits):
    sigmoid = nn.Sigmoid()
    scores = sigmoid(logits.squeeze()).cpu().detach().numpy()
    result = {"Fluent": scores[0], "Non-Fluent": scores[1]}
    return result

In [20]:
dataloader = DataLoader(dataset, batch_size=16, shuffle=False)

In [21]:
fluency_scores = []
for batch in tqdm(dataloader):
    text = batch['T5_paraphrased']
    tokens = fluency_tokenizer(text, return_tensors='pt', truncation=True, padding='max_length').to(device)
    result = fluency_model(**tokens).logits
    for elem in result:
        elem_res = get_fluency_score(elem)['Fluent']
        fluency_scores.append(elem_res)

  0%|          | 0/1234 [00:00<?, ?it/s]

In [22]:
assert len(dataset) == len(fluency_scores)
dataset  = dataset.add_column('Fluency_scores', fluency_scores)

In [23]:
pandas_dataset_results = dataset.to_pandas()
pandas_dataset_results.head()

Unnamed: 0,en_toxic_comment,en_neutral_comment,T5_paraphrased,initial_toxicity,ideal_toxicity,resulting_toxicity,reference2masked_sim,reference2translation_sim,Fluency_scores
0,he had steel balls too !,he was brave too!,He had steel balls too!,0.912212,0.005482,0.917721,1.0,0.510213,0.900897
1,"dude should have been taken to api , he would ...",It would have been good if he went to api. He ...,"The guy should have been taken to the api, he'...",0.744653,0.00685,0.783338,0.912686,0.55702,0.866171
2,"im not gonna sell the fucking picture , i just...","I'm not gonna sell the picture, i just want to...","I'm not gonna sell the picture, I just want to...",0.919632,0.005215,0.005073,0.887211,0.887211,0.894954
3,the garbage that is being created by cnn and o...,the news that is being created by cnn and othe...,The garbage being created by CNN and other new...,0.955578,0.014164,0.964304,0.804467,0.912075,0.886835
4,the reason they dont exist is because neither ...,The reason they don't exist is because neither...,The reason they don't exist is that neither is...,0.94548,0.108274,0.936731,0.992926,0.895799,0.654566


In [25]:
pandas_dataset_results.to_csv('T5_paraphrased_resV2.csv')