In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch.nn.functional as F

# Load the model and tokenizer
model_name = "gpt2"  # or any other model from Hugging Face
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Set the model to evaluation mode
model.eval()

# Encode input text
input_text = "Once upon a time"
input_ids = tokenizer.encode(input_text, return_tensors='pt')

# Generate one extra token
with torch.no_grad():  # Disable gradient calculation
    output = model.generate(input_ids, max_length=input_ids.shape[1] + 1, return_dict_in_generate=True, output_scores=True)

# Get the generated token ID and logits
generated_ids = output.sequences[0]
logits = output.scores[-1]  # Get logits for the last generated token

# Calculate probabilities using softmax
probabilities = F.softmax(logits, dim=-1)

# Get the top 10 most probable tokens
top_k = 10
top_probabilities, top_indices = torch.topk(probabilities, top_k)

# Print the top tokens and their probabilities
print("Top 10 Most Probable Tokens:")
for i in range(top_k):
    token_id = top_indices[0][i].item()
    token = tokenizer.decode(token_id)
    probability = top_probabilities[0][i].item()
    print(f"Token: '{token}', Probability: {probability:.4f}")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Top 10 Most Probable Tokens:
Token: ',', Probability: 0.4269
Token: ' the', Probability: 0.0646
Token: ' I', Probability: 0.0406
Token: ' he', Probability: 0.0369
Token: ' there', Probability: 0.0294
Token: ' in', Probability: 0.0276
Token: ' it', Probability: 0.0234
Token: ' when', Probability: 0.0229
Token: ' of', Probability: 0.0227
Token: ' they', Probability: 0.0175
