# Models Exploration

## 1. Importing Libraries

In [1]:
import pickle
import warnings
from src.data.make_dataset import TextDataset

import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
from src.models.model_class import GediAdapter

warnings.filterwarnings('ignore')

## 2. Importing Dataset

In [2]:
df = pickle.load(open('../data/interim/text_dataset.pkl', 'rb'))

In [3]:
df.data.head()

Unnamed: 0,toxic,normal,toxic_reduction
5,I'm not gonna have a child... ...with the same...,I'm not going to breed kids with a genetic dis...,0.915109
6,"They're all laughing at us, so we'll kick your...",they're laughing at us. We'll show you.,0.999361
7,Maine was very short on black people back then.,there wasn't much black in Maine then.,0.814971
11,"So now their spirits are cursed, walking back ...","their souls are cursed, they guard the paths, ...",0.698517
13,"Come on, Cal, leave that shit alone.","come on, Cal, put it down.",0.999357


## 3. Splitting Dataset

In [4]:
train_dataset, val_dataset, test_dataset = df.split(0.7, 0.1, 0.2)

In [5]:
train_dataset.data.shape, val_dataset.data.shape, test_dataset.data.shape

((404444, 3), (57778, 3), (115555, 3))

## 4. Trying GPT-2 based model

In [6]:
tokenizer_name = "s-nlp/t5-paraphrase-paws-msrp-opinosis-paranmt"
model_name = tokenizer_name
dis_name = 's-nlp/gpt2-base-gedi-detoxification'

device = 'cpu'

In [7]:
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)

NEW_POS = tokenizer.encode('normal', add_special_tokens=False)[0]
NEW_NEG = tokenizer.encode('toxic', add_special_tokens=False)[0]

In [8]:
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
model.resize_token_embeddings(len(tokenizer))

Embedding(32100, 768)

In [9]:
gedi_dis = AutoModelForCausalLM.from_pretrained(dis_name)

gedi_dis.bias = torch.tensor([[0.08441592, -0.08441573]])
gedi_dis.logit_scale = torch.tensor([[1.2701858]])

gedi_dis.eval()

Some weights of the model checkpoint at s-nlp/gpt2-base-gedi-detoxification were not used when initializing GPT2LMHeadModel: ['logit_scale', 'bias']
- This IS expected if you are initializing GPT2LMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GPT2LMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(32100, 1024)
    (wpe): Embedding(1024, 1024)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-23): 24 x GPT2Block(
        (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=1024, out_features=32100, bias=False)
)

In [10]:
gedi_adapter = GediAdapter(
    model=model,
    gedi_model=gedi_dis,
    tokenizer=tokenizer,
    gedi_logit_coef=10,
    target=0,
    reg_alpha=3e-5,
    neg_code=NEW_NEG,
    pos_code=NEW_POS,
    ub=0.01
)

In [11]:
text = df.data['toxic'].sample(n=1).values[0]
inputs = tokenizer.encode(text, return_tensors='pt').to(device)
result = gedi_adapter.generate(inputs, do_sample=False, num_return_sequences=1, temperature=0.0,
                               repetition_penalty=3.0, num_beams=2, bad_words_ids=[[2]])
print(text)
print(result)
for r in result:
    print(tokenizer.decode(r, skip_special_tokens=True))

yes, with some nutcase who says he cured him.
tensor([[    0,  2163,     6,    28,     3,     9,   720,  1033,   195,   113,
           243,     3,    88,   141, 28648,   376,     5,     1]])
Yes, with a bitchell who said he had healed him.


## 5. Trying Roberta based toxicity classifier

In [12]:
from transformers import RobertaForSequenceClassification, RobertaTokenizer

clf_name = 's-nlp/roberta_toxicity_classifier_v1'
clf = RobertaForSequenceClassification.from_pretrained(clf_name)
clf_tokenizer = RobertaTokenizer.from_pretrained(clf_name)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Downloading (…)lve/main/config.json:   0%|          | 0.00/530 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at s-nlp/roberta_toxicity_classifier_v1 were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading (…)olve/main/vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

In [13]:
def predict_toxicity(texts):
    with torch.inference_mode():
        inputs = clf_tokenizer(texts, return_tensors='pt', padding=True).to(clf.device)
        out = torch.softmax(clf(**inputs).logits, -1)[:, 1].cpu().numpy()
    return out

In [15]:
predict_toxicity(['You are a good person', 'You are a bad person'])

array([6.9651840e-05, 9.8459953e-01], dtype=float32)

As we can see, classifier works well.