In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F

# Load pre-trained GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")
model.eval()  # Inference mode (no gradients)

# Step 1: Input prompt
prompt = "What is the capital city of Nepal?"
input_ids = tokenizer.encode(prompt, return_tensors="pt")

print("Token IDs:", input_ids)

# Step 2: Pass through model to get output logits
with torch.no_grad():
    outputs = model(input_ids)
    logits = outputs.logits  # Shape: [1, seq_len, vocab_size]

# Step 3: Focus on the last token's logits
last_token_logits = logits[0, -1, :]

# Step 4: Apply temperature and top-k sampling
temperature = 0.8
top_k = 50

# Scale logits by temperature
scaled_logits = last_token_logits / temperature

# Top-k filtering
top_k_logits, top_k_indices = torch.topk(scaled_logits, top_k)
top_k_probs = F.softmax(top_k_logits, dim=-1)

# Sample from the filtered distribution
next_token = torch.multinomial(top_k_probs, num_samples=1)
next_token_id = top_k_indices[next_token]

# Step 5: Append new token to input and repeat
generated = input_ids.tolist()[0] + [next_token_id.item()]
generated_tensor = torch.tensor([generated])

# You can loop this process to generate more tokens:
# for _ in range(num_tokens_to_generate):
#     pass  # Repeat from Step 2 onwards with updated generated_tensor

# Decode the tokens
decoded_text = tokenizer.decode(generated_tensor[0])
print("\nGenerated Text:")
print(decoded_text)
