In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Load GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token  # Set pad token using string
model = GPT2LMHeadModel.from_pretrained("gpt2")
model.config.pad_token_id = tokenizer.pad_token_id
model.eval()

# Define context and question
context = """The Indian Premier League (IPL) is a professional Twenty20 cricket league in India organised by the Board of Control for Cricket in India (BCCI). Founded in 2007, the league features ten state-based or city-based franchise teams. The IPL is the most popular and richest cricket league in the world and is held between March and May. It has an exclusive window in the Future Tours Programme of the International Cricket Council (ICC), resulting in fewer international cricket tours occurring during the IPL seasons. It is also the most viewed sports competition in India, as per the Broadcast Audience Research Council.

In 2014, it ranked sixth in attendance among all sports leagues. In 2010, the IPL became the first sporting event to be broadcast live on YouTube. Inspired by the success of the IPL, other Indian sports leagues have been established. IPL is the second-richest sports league in the world by per-match value, after the NFL. In 2023, the league sold its media rights for the next 4 seasons for US$6.4 billion to Viacom18 and Star Sports, which meant that each IPL match was valued at $13.4 million. As of 2024, there have been seventeen seasons of the tournament. The current champions are the Kolkata Knight Riders, who won the 2024 season after defeating the Sunrisers Hyderabad in the final. In just six years, the IPL's value has more than doubled, reaching $12 billion in 2024."""

question = "When did IPL start?"
prompt = f"Context: {context}\nQuestion: {question}\nAnswer:"

# Tokenize input with attention mask
inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512)
input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]

# Generate output
with torch.no_grad():
    outputs = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_new_tokens=50,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id
    )

# Decode and extract generated answer
decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
generated_answer = decoded[len(prompt):].strip()

print("Generated Answer:\n", generated_answer)
