In [1]:
import torch
import os
os.chdir("../")
from util import masking
import warnings
warnings.filterwarnings("ignore")

In [2]:
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', 'bert-base-uncased')

Using cache found in /Users/lhayne/.cache/torch/hub/huggingface_pytorch-transformers_main


In [3]:
model = torch.hub.load('huggingface/pytorch-transformers', 'model', 'bert-base-uncased', output_hidden_states=True)

Using cache found in /Users/lhayne/.cache/torch/hub/huggingface_pytorch-transformers_main
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"

# Tokenized input with special tokens around it (for BERT: [CLS] at the beginning and [SEP] at the end)
indexed_tokens = tokenizer.encode(text_1, text_2, add_special_tokens=True)

In [5]:
# Define sentence A and B indices associated to 1st and 2nd sentences (see paper)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]

# Convert inputs to PyTorch tensors
segments_tensors = torch.tensor([segments_ids])
tokens_tensor = torch.tensor([indexed_tokens])

In [6]:
# Evaluate model on input
with torch.no_grad():
    encoded_layers = model(tokens_tensor, token_type_ids=segments_tensors)

In [7]:
# Create hooked model
bert_hooked = masking.HookedModel(model)

# Create mask hook on output
mask_hook = masking.OutputMaskHook(torch.randn((1, 14, 768)))

# Apply that hook to the output of the 3rd encoder layer
bert_hooked.apply_hook('encoder.layer.3.output',mask_hook)

In [8]:
# Evaluate masked model
with torch.no_grad():
    encoded_layers_masked = model(tokens_tensor, token_type_ids=segments_tensors)

In [9]:
# Remove hooks
bert_hooked.remove_hooks()

In [10]:
# The output of the masked model does not equal that of the original model beyond layer 3
for i in range(13):
    print(torch.equal(encoded_layers[2][i],encoded_layers_masked[2][i]))

True
True
True
True
False
False
False
False
False
False
False
False
False
