In [1]:
import pandas as pd
import numpy as np

In [2]:
bart_results = pd.read_csv('BART_paraphrasing_res.csv')
t5_results = pd.read_csv('T5_paraphrased_res.csv')
masked_results = pd.read_csv('word_masking_res.csv')

### Count Joint metric
it's counted as (1 - toxicity_score) * simmilarity * fluency

In [6]:
bart_results['J'] = (1 - bart_results['resulting_toxicity']) * bart_results['reference2masked_sim'] * bart_results['Fluency_score']
masked_results['J'] = (1 - masked_results['resulting_toxicity']) * masked_results['reference2masked_sim'] * masked_results['Fluency_scores']
t5_results['J'] = (1 - t5_results['resulting_toxicity']) * t5_results['reference2paraphrased_sim'] * t5_results['Fluency_scores']

### Comparing Joint metric among 3 methods

In [13]:
bart_results['J'].mean(), masked_results['J'].mean(), t5_results['J'].mean()

(0.5644548913585631, 0.41140514581595666, 0.46843638177436375)

In [21]:
bart_tox = bart_results['resulting_toxicity'].mean()
mask_tox = masked_results['resulting_toxicity'].mean()
t5_tox = t5_results['resulting_toxicity'].mean()

bart_sim = bart_results['reference2masked_sim'].mean()
mask_sim = masked_results['reference2masked_sim'].mean()
t5_sim = t5_results['reference2paraphrased_sim'].mean()

bart_f = bart_results['Fluency_score'].mean()
mask_f = masked_results['Fluency_scores'].mean()
t5_f = t5_results['Fluency_scores'].mean()

print(f"pretrained BART results: tox - {bart_tox:.3f}, sim - {bart_sim:.3f}, fluency - {bart_f:.3f}")
print(f"fintuned T5 results: tox - {t5_tox:.3f}, sim - {t5_sim:.3f}, fluency - {t5_f:.3f}")
print(f"Words masking results: tox - {mask_tox:.3f}, sim - {mask_sim:.3f}, fluency - {mask_f:.3f}")

pretrained BART results: tox - 0.077, sim - 0.851, fluency - 0.726
fintuned T5 results: tox - 0.280, sim - 0.830, fluency - 0.788
Words masking results: tox - 0.244, sim - 0.874, fluency - 0.643


In [23]:
import warnings
import sys, os

# Ignore all warnings
warnings.filterwarnings("ignore")


# Disable
def blockPrint():
    sys.stdout = open(os.devnull, "w")
    sys.stderr = open(os.devnull, "w")


# Restore
def enablePrint():
    sys.stdout = sys.__stdout__
    sys.stderr = sys.__stderr__


blockPrint()
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
from transformers import RobertaTokenizer, RobertaForSequenceClassification

device = "cuda" if torch.cuda.is_available() else "cpu"

enablePrint()
print("Input your text you want to detoxify:")
text = 'fuck you'
# blockPrint()

# Fine-tuned model for text detoxification
finetuned_model_chk = "../models/T5_training_checkpoints/final_checkpoint/"
tokenizer = AutoTokenizer.from_pretrained(finetuned_model_chk)
model = AutoModelForSeq2SeqLM.from_pretrained(finetuned_model_chk, device_map="auto")

# Tokenizer and model weights for calculating toxicity score of the text
tokenizer_toxicity_score = RobertaTokenizer.from_pretrained(
    "SkolkovoInstitute/roberta_toxicity_classifier"
)
model_toxicity_score = RobertaForSequenceClassification.from_pretrained(
    "SkolkovoInstitute/roberta_toxicity_classifier"
)

text_tokenized = tokenizer(text, return_tensors="pt")
input = {
    "input_ids": text_tokenized["input_ids"],
    "attention_mask": text_tokenized["attention_mask"],
}
detoxified_text_tokenized = model.generate(**input, max_new_tokens=512, do_sample=True)
text_detoxified = tokenizer.decode(
    detoxified_text_tokenized[0], skip_special_tokens=True
)
enablePrint()
print(text_detoxified)
