In [2]:
import os
import torch
from transformers import AutoModelForSeq2SeqLM, AutoModelForCausalLM, AutoTokenizer
from transformers import RobertaForSequenceClassification, RobertaTokenizer

from gedi_adapter import GediAdapter
import text_processing

# TODO:
# Use fine-tuned paraphraser on toxic parallel data
# Finetune ROBERTA on toxic dataset to use a classifier for the re-rank step

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
para_name = 'ceshine/t5-paraphrase-paws-msrp-opinosis'
gedi_path = 's-nlp/gpt2-base-gedi-detoxification'
clf_name = 'SkolkovoInstitute/roberta_toxicity_classifier_v1'

tokenizer = AutoTokenizer.from_pretrained(para_name)
para_model = AutoModelForSeq2SeqLM.from_pretrained(para_name)
gedi_model = AutoModelForCausalLM.from_pretrained(gedi_path)

# clf = RobertaForSequenceClassification.from_pretrained(clf_name)
# clf_tokenizer = RobertaTokenizer.from_pretrained(clf_name)

Some weights of the model checkpoint at s-nlp/gpt2-base-gedi-detoxification were not used when initializing GPT2LMHeadModel: ['logit_scale', 'bias']
- This IS expected if you are initializing GPT2LMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GPT2LMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
## Setup
para_model.resize_token_embeddings(len(tokenizer)) 

# add gedi-specific parameters
if os.path.exists(gedi_path):
    w = torch.load(gedi_path + '/pytorch_model.bin', map_location='cpu')
    gedi_model.bias = w['bias']
    gedi_model.logit_scale = w['logit_scale']
    del w
else:
    gedi_model.bias = torch.tensor([[ 0.08441592, -0.08441573]])
    gedi_model.logit_scale = torch.tensor([[1.2701858]])
print(f"bias: {gedi_model.bias}, logit_scale: {gedi_model.logit_scale}")

NEW_POS = tokenizer.encode('normal', add_special_tokens=False)[0]
NEW_NEG = tokenizer.encode('toxic', add_special_tokens=False)[0]

bias: tensor([[ 0.0844, -0.0844]]), logit_scale: tensor([[1.2702]])


In [5]:
# Load test data
with open('../../data/test_10k_toxic', 'r') as f:
    test_toxic_data = [line.strip() for line in f.readlines()]
print(len(test_toxic_data))

10000


In [6]:
%%time
NEUTRAL = 0
TOXIC = 1
def paraphrase_and_rerank(text, max_length='auto', beams=10, rerank=False, clf=None, target=None):
    def predict_toxicity(texts, clf):
        with torch.inference_mode():
            inputs = clf_tokenizer(texts, return_tensors='pt', padding=True)
            out = torch.softmax(clf(**inputs).logits, -1)[:, 1].cpu().numpy()
        return out

    gedi_adapter = GediAdapter(
        model=para_model, gedi_model=gedi_model, 
        tokenizer=tokenizer, 
        gedi_logit_coef=10, 
        target=target, 
        neg_code=NEW_NEG, 
        pos_code=NEW_POS, 
        reg_alpha=3e-5, ub=0.01
    )

    gen_texts = gedi_adapter.paraphrase(
        text=text, max_length=max_length, beams=beams, 
        preprocess=text_processing.text_preprocess, 
        postprocess=text_processing.text_postprocess
    )

    if rerank:
        toxic_scores = predict_toxicity(gen_texts)
    
    out_texts = []
    for i in range(len(text)):
        if rerank:
            idx = toxic_scores[(i*beams):((i+1)*beams)].argmin()
        else:
            idx = 0 # Results ordered from most likely to least
        out_texts.append(gen_texts[i*beams+idx])
    return out_texts


print(test_toxic_data[:5])
paraphrase_and_rerank(text=test_toxic_data[:5], target=NEUTRAL)

['you are a fucking idiot .', 'as stupid and arrogant as his boss', 'a stupid society does stupid things and votes for stupid politicians .', 'a president who is an idiot .', 'colbert is a stupid moron and is a terrorist .']
CPU times: user 2min 8s, sys: 7.71 s, total: 2min 16s
Wall time: 17.4 s


["I've been thinking about this a lot lately, and I think it's time",
 'As aroungeant and snooty, as his boss,',
 'A bad society does the wrong things, and votes for the wrong politicians.',
 'A president who is an Iroquois is not a president.',
 'Colber is a smart Morson and a TSA Tatortainer.']