In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments
import torch
from torch.utils.data import Dataset

# Define a custom dataset class for PyTorch
class RiddleDataset(Dataset):
    def __init__(self, riddles, tokenizer, max_length=128):
        self.tokenizer = tokenizer
        self.data = [
            f"Riddle: {riddle} Solution: {solution}"
            for riddle, solution in riddles
        ]
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        encoded = self.tokenizer(
            self.data[idx],
            max_length=self.max_length,
            truncation=True,
            padding="max_length",
            return_tensors="pt"
        )
        return {
            "input_ids": encoded["input_ids"].squeeze(0),
            "attention_mask": encoded["attention_mask"].squeeze(0),
            "labels": encoded["input_ids"].squeeze(0),  # Labels should match input_ids
        }

# Define the dataset (30 samples)
riddle_data = [
    ("What number becomes zero when you subtract 15 from half of it?", "30"),
    ("I am an odd number. Take away a letter, and I become even. What number am I?", "7"),
    ("If you multiply me by 2, I become 10. What number am I?", "5"),
    ("What number do you get when you multiply all the numbers on a telephone keypad?", "0"),
    ("What is the smallest positive integer that is equal to the sum of its digits plus the product of its digits?", "19"),
    ("If you add 5 to me, I become 10. What number am I?", "5"),
    ("What number is three times the sum of its digits?", "27"),
    ("I am a number. If you double me and add 4, you get 10. What number am I?", "3"),
    ("What number is the only even prime number?", "2"),
    ("If you divide me by 3, I become 5. What number am I?", "15"),
    ("What number is the square root of 64?", "8"),
    ("What number is the sum of the first three prime numbers?", "10"),
    ("If you subtract 7 from me, I become 8. What number am I?", "15"),
    ("What number is the product of the first three positive integers?", "6"),
    ("What number is the smallest perfect number?", "6"),
    ("If you add 12 to me, I become 20. What number am I?", "8"),
    ("What number is the sum of the digits of 12345?", "15"),
    ("What number is the difference between 20 and 7?", "13"),
    ("What number is the result of 3 squared plus 4 squared?", "25"),
    ("What number is the cube of 3?", "27"),
    ("What number is the sum of the first five positive integers?", "15"),
    ("What number is the result of 100 divided by 4?", "25"),
    ("What number is the factorial of 4?", "24"),
    ("What number is the smallest odd prime number?", "3"),
    ("What number is the sum of the digits of 999?", "27"),
    ("What number is the result of 10 multiplied by 10?", "100"),
    ("What number is the smallest positive integer that is not a prime number?", "1"),
    ("What number is the sum of the first four even numbers?", "20"),
    ("What number is the result of 2 to the power of 5?", "32"),
    ("What number is the smallest positive integer that is a multiple of both 2 and 3?", "6"),
]

# Load GPT-2 tokenizer and model
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

# Set padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Create dataset
train_dataset = RiddleDataset(riddle_data, tokenizer)

# Fine-tuning arguments (adjusted for small dataset)
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=5,  # Train for more epochs (small dataset needs more passes)
    per_device_train_batch_size=2,  # Reduce batch size for stability
    save_steps=100,
    save_total_limit=2,
    logging_dir="./logs",
    logging_steps=10,
    report_to="none",  # Disable W&B logging
    learning_rate=5e-5,  # Lower learning rate for better convergence
    weight_decay=0.01,
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
)

# Fine-tune the model
trainer.train()

# Save the fine-tuned model
model.save_pretrained("./fine_tuned_gpt2_math_riddles")
tokenizer.save_pretrained("./fine_tuned_gpt2_math_riddles")


Step,Training Loss
10,2.2489
20,0.3624
30,0.2256
40,0.1934
50,0.1336
60,0.1739
70,0.1432


('./fine_tuned_gpt2_math_riddles/tokenizer_config.json',
 './fine_tuned_gpt2_math_riddles/special_tokens_map.json',
 './fine_tuned_gpt2_math_riddles/vocab.json',
 './fine_tuned_gpt2_math_riddles/merges.txt',
 './fine_tuned_gpt2_math_riddles/added_tokens.json')

In [None]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load the fine-tuned model and tokenizer
model_path = "./fine_tuned_gpt2_math_riddles"
model = GPT2LMHeadModel.from_pretrained(model_path)
tokenizer = GPT2Tokenizer.from_pretrained(model_path)

# Function to generate riddles
def generate_riddle():
    input_text = "Riddle:"
    inputs = tokenizer(input_text, return_tensors="pt")

    # Generate riddles
    outputs = model.generate(
        **inputs,
        max_length=100,
        num_return_sequences=10,  # Generate 10 riddles
        temperature=0.7,
        top_k=50,
        top_p=0.9,
        do_sample=True
    )

    # Decode riddles
    riddles = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
    return riddles

# Generate riddles
generated_riddles = generate_riddle()

# Display all riddles
print("\nGenerated Riddles:")
for i, riddle in enumerate(generated_riddles, 1):
    print(f"{i}. {riddle}")

# Let user manually pick the 3 best riddles
best_riddles = generated_riddles[:3]  # You can modify this to pick manually

print("\nBest 3 Riddles:")
for i, riddle in enumerate(best_riddles, 1):
    print(f"{i}. {riddle}")


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Generated Riddles:
1. Riddle: What number is the sum of the first three positive integers? Solution: 3
2. Riddle: What number is the sum of the first four positive integers? Solution: 3
3. Riddle: What number is the product of the first three prime numbers? Solution: 15
4. Riddle: What number is the result of 3 to the power of 7? Solution: 5
5. Riddle: What number is the result of 3 divided by 4? Solution: 100
6. Riddle: What number is the sum of the first five prime numbers? Solution: 10
7. Riddle: What number is the sum of the first three prime numbers? Solution: 5
8. Riddle: What number is the smallest positive integer that is not a prime number? Solution: 10
9. Riddle: What number is the sum of the first five positive integers? Solution: 15
10. Riddle: What number is the sum of the first three positive integers? Solution: 25

Best 3 Riddles:
1. Riddle: What number is the sum of the first three positive integers? Solution: 3
2. Riddle: What number is the sum of the first four posit