In [1]:
from unitaryAI.detoxify import Detoxify
import numpy as np
import pandas as pd
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch
import gc
from tqdm import tqdm

In [2]:
# BertForSequenceClassification # <class 'transformers.models.bert.modeling_bert.BertForSequenceClassification'> # bert-base-uncased	
ori = Detoxify('original', checkpoint="./unitaryAI/toxic_original-c1212f89.ckpt", huggingface_config_path="./unitaryAI/bert-base-uncased")#.predict(["Dane here and I have to say WHAT THE FUCK dude no it's not. This it utter bullshit, please people dont trust him. "])
ori.model

BertForSequenceClassification


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [3]:
## constants 
model_info_dict = {
    'original' : {
        "checkpoint":"./unitaryAI/toxic_original-c1212f89.ckpt", "huggingface_config_path": "./unitaryAI/bert-base-uncased"
    },
    'unbiased' : {
        "checkpoint":"./unitaryAI/toxic_debiased-c7548aa0.ckpt", "huggingface_config_path":"./unitaryAI/roberta-base"
    },
    "multilingual" : {
        "checkpoint":"./unitaryAI/multilingual_debiased-0b549669.ckpt","huggingface_config_path": "./unitaryAI/xlm-roberta-base"
    }
}

test_data_path = "./train/removed_redundant_ruddit_with_text.csv"
val_data_path = "./train/validation_data.csv"
comment_key = "text"
comment_id_key = "comment_id"
batch_size = 32
dense_dim = 768 

In [4]:
whole_df = pd.read_csv(test_data_path)
whole_df.shape

(5710, 3)

In [5]:
class CustomDataset(Dataset):
    def __init__(self, comments, targets, include_target=True):
        self.comments = comments
        self.targets = targets
        self.include_target = include_target
    
    def __len__(self):
        return self.comments.shape[0]
    
    def __getitem__(self, idx):
        comment = self.comments[idx]
        if self.include_target == True:
            return comment, self.targets[idx]
        else:
            return comment


In [6]:
pytorch_dataset = CustomDataset(whole_df[comment_key].values, None, include_target=False)
test_dataloader = DataLoader(pytorch_dataset, batch_size=batch_size, shuffle=False)

In [7]:
models = []
for i, model_name in enumerate(model_info_dict):
    model = Detoxify(model_name, checkpoint=model_info_dict[model_name]["checkpoint"], huggingface_config_path=model_info_dict[model_name]["huggingface_config_path"])
    models.append(model)

BertForSequenceClassification
RobertaForSequenceClassification
XLMRobertaForSequenceClassification


In [8]:
activation = {}
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach().cpu().numpy()
    return hook

In [9]:
preds = np.zeros((whole_df.shape[0], len(model_info_dict)))
dense_output = np.zeros((whole_df.shape[0], len(model_info_dict), dense_dim)) 
for i, model_name in enumerate(model_info_dict):
    model =  models[i]
    lastidx=0
    if model_name == "original":
        model.model.bert.pooler.dense.register_forward_hook(get_activation("dense"))
    else:
        model.model.classifier.dense.register_forward_hook(get_activation("dense"))


    for texts in tqdm(test_dataloader):
        preds_dict = model.predict(texts)
        dense_output[lastidx: lastidx+len(texts), i, :] = activation["dense"]
        # print(preds_dict)
        for key in preds_dict:
            preds[lastidx: lastidx+len(texts), i]+=preds_dict[key]
        
        lastidx+=len(texts)

preds.shape, dense_output.shape

100%|██████████| 179/179 [00:17<00:00, 10.46it/s]
100%|██████████| 179/179 [00:15<00:00, 11.53it/s]
100%|██████████| 179/179 [00:15<00:00, 11.42it/s]


((5710, 3), (5710, 3, 768))

In [12]:
with open('./output/unitaryAI_ruddit_dense_output.npy', 'wb') as f:
    np.save(f, dense_output) # (examples, models, dense_dim) # 

In [10]:
# pd.DataFrame({comment_id_key: whole_df[comment_id_key], "original": preds[:, 0], "unbiased": preds[:, 1], "multilingual": preds[:, 2]}).to_csv("./output/unitaryAI-3-preds.csv", index=False)