In [1]:
from datasets import load_dataset
from cb import CB
from util import *

# load dataset
lambada = load_dataset("EleutherAI/lambada_openai")
test_set = lambada['test'].map(extract_label)
X, Y = test_set['feat'], test_set['label']

# load boosted model
alpha = -0.6
k = 10

cb_model = CB(alpha, k)



  0%|          | 0/1 [00:00<?, ?it/s]



### without beam search

In [2]:
y_pred_boosted, y_pred = cb_model.boosted_batched_generate(X, batch_size=32, fmax_score=True)

100%|██████████| 162/162 [01:43<00:00,  1.57it/s]


### with beam search, beam_width=5

In [15]:
y_pred_boosted_beam, y_pred_beam = cb_model.boosted_batched_generate(X, batch_size=32, fmax_score=True, beam_width=5)

100%|██████████| 162/162 [01:42<00:00,  1.59it/s]


In [4]:
y_tokenized = cb_model.tokenize_label(Y)
y_tokenized_rev = [cb_model.tokenizer(i)["input_ids"][-1] for i in Y]

### last word accuracy

In [5]:
lw_acc = last_word_accuracy(cb_model, y_pred, Y)
lw_acc_boost = last_word_accuracy(cb_model, y_pred_boosted, Y)
print(f"last word accuracy (GPT): {lw_acc}\nlast word accuracy (boosted GPT): {lw_acc_boost}")

last word accuracy (GPT): 0.2507277314185911
last word accuracy (boosted GPT): 0.4370269745779158


### first and last subtoken of last word accuracy

In [25]:
lt_acc = first_token_accuracy(y_pred, y_tokenized_rev)
lt_acc_boost = first_token_accuracy(y_pred_boosted, y_tokenized_rev)
print(f"last subtoken accuracy (GPT): {lt_acc}\nlast subtoken accuracy (boosted GPT): {lt_acc_boost}")

last subtoken accuracy (GPT): 0.2507277314185911
last subtoken accuracy (boosted GPT): 0.43663885115466716


In [26]:
ft_acc = first_token_accuracy(y_pred, y_tokenized)
ft_acc_boost = first_token_accuracy(y_pred_boosted, y_tokenized)
print(f"first subtoken accuracy (GPT): {ft_acc}\nfirst subtoken accuracy (boosted GPT): {ft_acc_boost}")

first subtoken accuracy (GPT): 0.3283524160683097
first subtoken accuracy (boosted GPT): 0.5581214826314768


### first and last subtoken of last word accuracy with beam search

In [27]:
lt_beam = first_token_w_beam_accuracy(y_pred_beam, y_tokenized_rev)
lt_beam_boost = first_token_w_beam_accuracy(y_pred_boosted_beam, y_tokenized_rev)
print(f"last subtoken accuracy with beam (GPT): {lt_beam}\nlast subtoken accuracy with beam (boosted GPT): {lt_beam_boost}")

last subtoken accuracy with beam (GPT): 0.46128468853095284
last subtoken accuracy with beam (boosted GPT): 0.6041141082864351


In [24]:
ft_beam = first_token_w_beam_accuracy(y_pred_beam, y_tokenized)
ft_beam_boost = first_token_w_beam_accuracy(y_pred_boosted_beam, y_tokenized)
print(f"first subtoken accuracy with beam (GPT): {ft_beam}\nfirst subtoken accuracy with beam (boosted GPT): {ft_beam_boost}")

first subtoken accuracy with beam (GPT): 0.6037259848631865
first subtoken accuracy with beam (boosted GPT): 0.7826508829807879
