In [2]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load model and tokenizer
model_name = 'gpt2'
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
model = GPT2LMHeadModel.from_pretrained(model_name)
model.eval()

# Define prompts
prompts = [
    "Once upon a time",
    "In a galaxy far, far away",
    "The quick brown fox"
]

# Tokenize with padding and attention masks
inputs = tokenizer(prompts, return_tensors='pt', padding=True, truncation=True)

# Ensure pad_token_id is set
if tokenizer.pad_token_id is None:
    tokenizer.pad_token = tokenizer.eos_token

# Generate text with proper handling of padding tokens
outputs = model.generate(
    input_ids=inputs['input_ids'],
    attention_mask=inputs['attention_mask'],
    max_length=50,
    num_return_sequences=1,
    pad_token_id=tokenizer.eos_token_id
)

# Decode generated texts
generated_texts = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

# Display results
for idx, text in enumerate(generated_texts):
    print(f"Prompt {idx+1}: {prompts[idx]}")
    print(f"Generated Text: {text}\n")

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Prompt 1: Once upon a time
Generated Text: Once upon a time

The sun shines in the sky

And the moon shines in the sky

And the stars shine in the sky

And the stars shine in the sky

And the stars shine in the

Prompt 2: In a galaxy far, far away
Generated Text: In a galaxy far, far away, the galaxy is a vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast,

Prompt 3: The quick brown fox
Generated Text: The quick brown foxThe quick brown foxThe quick brown foxThe quick brown foxThe quick brown foxThe quick brown foxThe quick brown foxThe quick brown foxThe quick brown foxThe quick brown foxThe quick brown foxThe quick brown

