# BERT test

## 0. import libs

In [2]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from transformers import AutoTokenizer, AutoModel

from utils import clean, getData
from data import DataProcessor
from model import BertRegressor

## 1. Set model

In [6]:
# set cofig
CONFIG = dict(
    seed = 12345,
    pretrained_model = 'bert-base-uncased',
    output_dir = '../models/bert_regression_mini',
    finetune_model = '../models/bert_regression_mini/bert_regression-5-16.pt',
    train_file = '4th/v0/train.csv',
    dev_file = '4th/v0/dev.csv',
    train_batch_size = 32,
    dev_batch_size = 32,
    lr = 5e-5,
    epochs = 5,
    num_class = 1,
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu"),
    device_ids = [0,1]
)

### load fine-tuning model

In [None]:
# init bert pretrained model
tokenizer = AutoTokenizer.from_pretrained(CONFIG['pretrained_model'])
model = BertRegressor(CONFIG['pretrained_model'], CONFIG['num_class'])

In [7]:
# load fine-tuning model
checkpoint = torch.load(CONFIG['finetune_model'])
model.load_state_dict(checkpoint)
if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model, device_ids=CONFIG['device_ids'])

device = CONFIG['device']
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


DataParallel(
  (module): BertRegressor(
    (bert): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(30522, 768, padding_idx=0)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_features=768, bias=True)
             

## 2. Load dataset

### Load Validation and Test data

In [11]:
# Validation data 
df_val = getData(data_path="4th/validation_cleaned.csv")
# # Test data
df_sub = getData(data_path="4th/comments_to_score.csv")

Read 4th/validation_cleaned.csv ...
Read 4th/comments_to_score.csv ...


In [13]:
val1_data = DataProcessor(df_val['less_toxic'], tokenizer, is_eval=True)
val1_dataloader = DataLoader(val1_data, batch_size=CONFIG['dev_batch_size'], shuffle=False, num_workers=4)

val2_data = DataProcessor(df_val['more_toxic'], tokenizer, is_eval=True)
val2_dataloader = DataLoader(val2_data, batch_size=CONFIG['dev_batch_size'], shuffle=False, num_workers=4)

test_data = DataProcessor(df_sub['text'], tokenizer, is_eval=True)
test_dataloader = DataLoader(test_data, batch_size=CONFIG['dev_batch_size'], shuffle=False, num_workers=4)

## 3. Predict

In [18]:
def predict(model, val_dataloader, device):

	torch.cuda.empty_cache()
	model.eval()

	outputs = []
	with torch.no_grad():
		for val_input in tqdm(val_dataloader):
			input_id = val_input['input_ids'].squeeze(1).to(device)
			mask = val_input['attention_mask'].squeeze(1).to(device)

			output = model(input_id, mask)
			output = torch.squeeze(output, 1)
			outputs.append(output)

			del input_id
			del mask

	outputs = torch.cat(outputs, dim=0)

	return outputs

In [19]:
print('predict less toxic')
p1 = predict(model, val1_dataloader, device)	
print('predict more toxic')
p2 = predict(model, val2_dataloader, device)	

  0%|          | 0/941 [00:00<?, ?it/s]

predict less toxic


100%|██████████| 941/941 [02:37<00:00,  5.98it/s]
  0%|          | 0/941 [00:00<?, ?it/s]

predict more toxic


100%|██████████| 941/941 [02:37<00:00,  5.96it/s]


In [25]:
p1 = p1.cpu().numpy()
p2 = p2.cpu().numpy()

In [26]:
print(f'Validation Accuracy is { np.round((p1 < p2).mean() * 100,2)}')

Validation Accuracy is 0.0


### Analyze bad predictions

In [27]:
df_val['p1'] = p1
df_val['p2'] = p2
df_val['diff'] = np.abs(p2 - p1)
df_val['correct'] = (p1 < p2).astype('int')

In [29]:
### Incorrect predictions with similar scores
df_val[df_val.correct == 0].sort_values('diff', ascending=True).head()

Unnamed: 0,worker,less_toxic,more_toxic,p1,p2,diff,correct
0,313,This article sucks woo woo woo,WHAT !!!?!?!!?!?!!?!?!?!?!!!???!!! = WHER IS Y...,0.0,0.0,0.0,0
20079,297,Ciao Whats up with that ? I thought Sonny Bon...,"No, what you ' ve done is prove you ' re cluel...",0.0,0.0,0.0,0
20078,168,image the animation is crap; get a new one,"It is not libel idiot, she really said that, i...",0.0,0.0,0.0,0
20077,430,Resistance is AWESOME Resistance: Fall of M...,You ! YOu leave me alone asshole !,0.0,0.0,0.0,0
20076,98,Resistance is AWESOME Resistance: Fall of M...,You ! YOu leave me alone asshole !,0.0,0.0,0.0,0


In [30]:
### Incorrect predictions with dis-similar scores
df_val[df_val.correct == 0].sort_values('diff', ascending=False).head()

Unnamed: 0,worker,less_toxic,more_toxic,p1,p2,diff,correct
0,313,This article sucks woo woo woo,WHAT !!!?!?!!?!?!!?!?!?!?!!!???!!! = WHER IS Y...,0.0,0.0,0.0,0
20068,268,You must be a sock. Your request was your only...,FUCK YOU Twat. Don ' t bother with the BS in ...,0.0,0.0,0.0,0
20080,144,Your edits make me sick to my stomache Re: [...,have no life coo oco coco haaaz,0.0,0.0,0.0,0
20079,297,Ciao Whats up with that ? I thought Sonny Bon...,"No, what you ' ve done is prove you ' re cluel...",0.0,0.0,0.0,0
20078,168,image the animation is crap; get a new one,"It is not libel idiot, she really said that, i...",0.0,0.0,0.0,0


## 4. Predict on test data

In [32]:
# Predict using pipeline
p = predict(model, test_dataloader, device)
p = p.cpu().numpy()	
df_sub['score'] = p	

100%|██████████| 236/236 [00:39<00:00,  5.98it/s]


In [33]:
# Cases with duplicates scores
df_sub['score'].count() - df_sub['score'].nunique()

7536

In [28]:
same_score = df_sub['score'].value_counts().reset_index()[:10]
same_score

Unnamed: 0,index,score
0,0.572927,2
1,0.464975,2
2,0.303382,2
3,0.022798,2
4,0.230748,2
5,0.130651,2
6,0.0645,2
7,0.124117,2
8,0.230264,2
9,0.14535,2


In [29]:
df_sub[df_sub['score'].isin(same_score['index'].tolist())]

Unnamed: 0,comment_id,text,score
1832,95080362,"""\n\nPlease do not add nonsense to Wikipedia. ...",0.022798
2842,160935265,"""\n\nPlease do not add nonsense to Wikipedia. ...",0.022798
4832,275797183,Hi\n\nCould you please learn to interact like ...,0.0645
4833,275812977,Could you please learn to interact like a sent...,0.0645
5140,298854514,"her!\n\nPoop, pee, toot, fart, gas, diareah!\n...",0.464975
5190,301925517,"her!\n\nPoop, pee, toot, fart, gas, diareah!\n...",0.464975
5752,339478276,I'm gonna beat you to a bloody pulp then sho...,0.230748
5753,339478966,I'm gonna beat you to a bloody pulp then shoo...,0.230748
5832,345043812,JIMBO SAID I COULD EDIT HIS PAGE. YOU ARE A MO...,0.303382
5833,345043888,JIMBO SAID I COULD EDIT HIS PAGE. YOU ARE A M...,0.303382


In [30]:
df_sub.sample(5)

Unnamed: 0,comment_id,text,score
3294,186197494,"""\nFor copying and pasting of what I felt stro...",0.141626
2167,116257386,Dude! \nThat was an attempt at saying somethi...,0.160497
7070,457417171,You simply display your ignorance. Fatuorum,0.274591
4347,242591983,"""\n\nSockpuppetry case\n \nYou have been accus...",0.012607
1370,70880071,Now let's see who's gonna start crying like a ...,0.279953


In [31]:
# save submission
df_sub[['comment_id', 'score']].to_csv("submission.csv", index=False)