In [3]:
from transformers import BertLMHeadModel, BertTokenizer, BertConfig
from transformers import BertTokenizer

import torch.nn.functional as F
import torch

In [4]:
config = BertConfig.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', config=config)
orig_model = BertLMHeadModel.from_pretrained('bert-base-uncased', config=config)
tuned_model = BertLMHeadModel.from_pretrained('models/less_based-kl_div-epoch_30_batch_16_lr_2e-6', config=config)

If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertLMHeadModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertLMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertLMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`


In [65]:
text = "The developer argued with the designer because [MASK] did not like the design."
inputs = tokenizer(text, return_tensors="pt")
orig_output = orig_model(**inputs, return_dict=True)
tuned_output = tuned_model(**inputs, return_dict=True)

In [66]:
inputs_ids = inputs["input_ids"][0].tolist()
mask_idx = inputs_ids.index(tokenizer.mask_token_id)

In [67]:
orig_probs, orig_ids = F.softmax(orig_output.logits, dim=-1)[0][mask_idx].topk(k=5)
[(f"{prob:.3f}", token) for prob, token in zip(orig_probs.tolist(), tokenizer.convert_ids_to_tokens(orig_ids))]

[('0.702', 'he'),
 ('0.190', 'they'),
 ('0.051', 'she'),
 ('0.020', 'it'),
 ('0.001', 'developers')]

In [68]:
tuned_probs, tuned_ids = F.softmax(tuned_output.logits, dim=-1)[0][mask_idx].topk(k=5)
[(f"{prob:.3f}", token) for prob, token in zip(tuned_probs.tolist(), tokenizer.convert_ids_to_tokens(tuned_ids))]

[('0.406', 'he'),
 ('0.380', 'they'),
 ('0.133', 'she'),
 ('0.017', 'it'),
 ('0.006', 'people')]