# Bidirectional Encoder Representations from Transformers (BERT), Masked Word Completion

In [1]:
import torch
from pytorch_transformers import BertTokenizer, BertModel, BertForMaskedLM

## Set Device

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

## Load Pre-trained BERT Model Tokenizer (Vocabulary)

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

## Encode Text Inputs

In [None]:
text = '[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]'
tokenized_text = tokenizer.tokenize(text)

In [None]:
masked_index = 8
tokenized_text[masked_index] = '[MASK]'
assert tokenized_text == ['[CLS]', 'who', 'was', 'jim', 'henson', '?', '[SEP]', 'jim', '[MASK]', 'was', 'a', 'puppet', '##eer', '[SEP]']

In [None]:
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]

tensor_tokens = torch.tensor([indexed_tokens])
tensor_segments = torch.tensor([segments_ids])

In [None]:
tensor_tokens = tensor_tokens.to(device) 
tensor_segments = tensor_segments.to(device)

## Load Pre-trained BERT Model Weights

In [None]:
bert = BertForMaskedLM.from_pretrained('bert-base-uncased')
bert.to(device)

## Evaluate BERT Model

In [None]:
bert.eval()

with torch.no_grad():
    outputs = bert(tensor_tokens, token_type_ids=tensor_segments)
    predictions = outputs[0]

In [None]:
predicted_index = torch.argmax(predictions[0, masked_index]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]

In [None]:
print('Prediction is:', predicted_token)

---