In [None]:
import logging
import torch
from transformers import BertTokenizer, BertForMaskedLM

logging.getLogger("transformers").setLevel(logging.ERROR)

def predict_missing_word(sentence):
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    model = BertForMaskedLM.from_pretrained('bert-base-uncased')

    # Tokenize the input sentence
    tokenized_text = tokenizer.tokenize(sentence)

    # Find the position of the masked token '[MASK]'
    mask_token_index = tokenized_text.index('[MASK]')

    # Convert token to vocabulary indices
    indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)

    # Convert inputs to PyTorch tensors
    tokens_tensor = torch.tensor([indexed_tokens])

    # Predict missing word
    with torch.no_grad():
        outputs = model(tokens_tensor)
        predictions = outputs[0][0, mask_token_index].topk(5)  # Get top 5 predictions

    # Convert prediction indices back to tokens
    predicted_tokens = [tokenizer.convert_ids_to_tokens([index.item()])[0] for index in predictions.indices]

    return predicted_tokens





In [None]:
# Example sentences with missing words
sentence1 = "I want to [MASK] a new book."
sentence2 = "The cat is sitting on the [MASK]."
sentence3 = "She went to the store to buy some [MASK]."



In [None]:
# Predict missing word for each sentence
predicted_words1 = predict_missing_word(sentence1)
predicted_words2 = predict_missing_word(sentence2)
predicted_words3 = predict_missing_word(sentence3)

# Print the predicted words for each sentence
print(predicted_words1)
print(predicted_words2)
print(predicted_words3)

['be', 'get', 'write', 'see', 'have']
['door', 'cat', 'bed', 'wall', 'screen']
['.', '"', 'more', ',', 'the']
