In [1]:
%load_ext autoreload
%autoreload 2

In [42]:
import pandas as pd
import torch

from src.config import CONFIG
from src.dataset import SBICDataset
from src.train_utils import make_tokinzer, make_model, make_dataloader, evaluate
from src.utils import print_evaluation_results

In [3]:
tokenizer = make_tokinzer(CONFIG.hp)
model = make_model(CONFIG.hp, tokenizer)
checkpoints = "distilgpt2_full_1"
model.load_state_dict(torch.load(f"checkpoints/{checkpoints}.pt"))

List of all special token and its token_id:
 - ['<|endoftext|>', '<|sep|>', '<|pad|>', '<|offY|>', '<|offN|>', '<|sexY|>', '<|sexN|>', '<|intY|>', '<|intN|>', '<|grpY|>', '<|grpN|>', '<|ingrpN|>', '<|ingrpY|>']
 - [[50256], [50258], [50257], [50259], [50260], [50261], [50262], [50263], [50264], [50265], [50266], [50267], [50268]]
Model vocab resize: 50269
Model eos token: 50256
Model pad token: 50257
Model sep token: 50258


<All keys matched successfully>

In [44]:
split = "validation"
data = pd.read_pickle(CONFIG.dataset.val_data_agg).to_numpy()
dataset = SBICDataset(data, tokenizer, is_training=False)
dataloader = make_dataloader(dataset, model, tokenizer, CONFIG.hp, split="validation")

val_res = evaluate(model, tokenizer, dataloader, CONFIG.hp)

print_evaluation_results(split=split, res=val_res)

                                                 

Classification F1 on validation set: avg=0.697
 - Offensive: 0.652
 - Intent: 0.609
 - Sex: 0.840
 - Group: 0.479
 - In-Group: 0.905
Minority RougeL-f1 on validation set: 0.711
Stereotype RougeL-f1 on validation set: 0.282




                                                 
Classification F1 on validation set: avg=0.697
 - Offensive: 0.652
 - Intent: 0.609
 - Sex: 0.840
 - Group: 0.479
 - In-Group: 0.905
 
Minority RougeL-f1 on validation set: 0.813 <br>
Stereotype RougeL-f1 on validation set: 0.508 <br>

In [43]:
split = "test"
data = pd.read_pickle(CONFIG.dataset.test_data_agg).to_numpy()
dataset = SBICDataset(data, tokenizer, is_training=False)
dataloader = make_dataloader(dataset, model, tokenizer, CONFIG.hp, split="validation")

test_res = evaluate(model, tokenizer, dataloader, CONFIG.hp)

print_evaluation_results(split=split, res=test_res)

Classification F1 on test set: avg=0.704
 - Offensive: 0.654
 - Intent: 0.617
 - Sex: 0.832
 - Group: 0.508
 - In-Group: 0.911
Minority RougeL-f1 on test set: 0.863
Stereotype RougeL-f1 on test set: 0.531


In [3]:
import transformers
from src.model import *

config = transformers.GPT2Config.from_pretrained("distilgpt2")
config.head_dropout = 0.1
model = GPT2ForTweetClassification(config=config)

# Add a [CLS] to the vocabulary (we should train it also!)
num_added_tokens = tokenizer.add_special_tokens({"cls_token": "[CLS]"})
# Update the model embeddings with the new vocabulary size
embedding_layer = model.resize_token_embeddings(len(tokenizer))

choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"]
encoded_choices = [tokenizer.encode(s) for s in choices]
cls_token_location = [tokens.index(tokenizer.cls_token_id) for tokens in encoded_choices]

input_ids = torch.tensor(encoded_choices).unsqueeze(0)  # Batch size: 1, number of choices: 2
mc_token_ids = torch.tensor([cls_token_location])  # Batch size: 1

outputs = model(input_ids, mc_token_ids=mc_token_ids)

GPT2ForTweetClassification(
  (gpt2): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-5): 6 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
  (clssf_head): ClassificationHead(
    (linear): Linear(in_features=768, out_features=1, bias=True)
    (drop_o