In [None]:
# Install Hugging Face transformers if not already
!pip install transformers

In [21]:
from transformers import BertTokenizer, BertForMaskedLM
import torch

In [None]:
# 1. Load pretrained BERT + official vocab
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertForMaskedLM.from_pretrained("bert-base-uncased")

mask_token = tokenizer.mask_token   # "[MASK]"
mask_token_id = tokenizer.mask_token_id
print("Mask token:", mask_token, "| ID:", mask_token_id)

In [23]:
# 2. Function to predict masked token(s)
def predict_masked_tokens(text, top_k=5):
    # Encode text
    inputs = tokenizer(text, return_tensors="pt")
    input_ids = inputs["input_ids"]

    # Ensure [MASK] exists
    if mask_token_id not in input_ids:
        return f"⚠️ No {mask_token} token found in: {text}"

    # Forward pass
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = outputs.logits

    # Find mask positions
    mask_positions = (input_ids == mask_token_id).nonzero(as_tuple=True)[1]

    results = []
    for pos in mask_positions:
        probs = predictions[0, pos].softmax(dim=0)
        top_tokens = torch.topk(probs, top_k)
        tokens = [tokenizer.decode([i]) for i in top_tokens.indices]
        scores = [round(float(s), 4) for s in top_tokens.values]
        results.append(list(zip(tokens, scores)))

    return results


In [None]:

examples = [
    "The capital of France is [MASK].",
    "Tesla is building a [MASK] robot.",
    "Football legend Messi joined [MASK]."
]

for ex in examples:
    print(f"\nInput: {ex}")
    print("Predictions:", predict_masked_tokens(ex))