<a href="https://colab.research.google.com/github/rishabhranawat/prm-explorations/blob/main/PRM_PPO_Trl_v10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%%capture
!pip install -r /content/drive/MyDrive/colab/requirements-prm-trl-v10.txt

In [None]:
# Standard Library Imports
import gc
import os
import random
import re
import time

# Third-Party Library Imports
from datasets import Dataset, load_dataset
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader
from tqdm import tqdm
import wandb
from transformers import (
    AutoModelForCausalLM,
    AutoModelForTokenClassification,
    AutoTokenizer,
    DataCollatorWithPadding,
    GenerationConfig,
    pipeline,
)
from trl import (
    AutoModelForCausalLMWithValueHead,
    AutoModelForSeq2SeqLMWithValueHead,
    PPOConfig,
    PPOTrainer,
    create_reference_model,
)
from trl.core import LengthSampler

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [None]:
# Load trained PRM (Reward model) and tokenizer
# prm_path = "/content/drive/MyDrive/GPT2PRM/pt2-prm-peft/checkpoint-6741/"  # Replace with your actual PRM path
max_length = 1024 # You may need to tune this parameter
batch_size = 8
step_separator = "\n"
base_policy_model_name = "gpt2"

prm_path = "/content/drive/MyDrive/GPT2PRM/pt2-full/checkpoint-30000/"
tokenizer = AutoTokenizer.from_pretrained(prm_path,
                                          max_length=max_length,
                                          truncation=True,
                                          model_max_length=max_length)

reward_model = AutoModelForTokenClassification.from_pretrained(prm_path)

tokenizer.padding_side="left"
tokenizer.truncation_side = "right"
tokenizer.pad_token = tokenizer.eos_token  # to avoid warnings

In [None]:
policy_model = AutoModelForCausalLMWithValueHead.from_pretrained(base_policy_model_name)
policy_tokenizer = AutoTokenizer.from_pretrained(
    base_policy_model_name,
    max_length=max_length,
    truncation=True,
    model_max_length=1024)
policy_tokenizer.pad_token = policy_tokenizer.eos_token

In [None]:
def policy_tokenize(sample):
    sample["input_ids"] = policy_tokenizer.encode(sample["prompt"],
                                                  return_tensors="pt",
                                                  max_length=max_length,
                                                  truncation=True)\
                                                  .squeeze()\
                                                  .to(reward_model.device)
    return sample


def filter_dataset(dataset, max_len=1024):
    def filter_condition(example):
        if len(example["prompt"] + ','.join(example["completions"])) > max_len:
          return False
        if len(example["prompt"]) > max_len:
            return False
        if len(",".join(example["completions"])) > max_len:
            return False
        return True

    filtered_dataset = dataset.filter(filter_condition)
    return filtered_dataset

splits = ["train", "test[:10%]"]
math_shepherd_dataset, math_shepherd_test_dataset = load_dataset("trl-lib/math_shepherd", split=splits)
math_shepherd_dataset = filter_dataset(math_shepherd_dataset.select(range(8000)))
math_shepherd_dataset = math_shepherd_dataset.map(policy_tokenize, \
                                                  remove_columns=math_shepherd_dataset.column_names, \
                                                  batched=False)

Map:   0%|          | 0/6146 [00:00<?, ? examples/s]

In [None]:
def collate_fn(batch):
    # Extract input_ids from batch
    input_ids = [torch.tensor(item["input_ids"]) for item in batch]

    # Pad sequences to the longest one in the batch
    padded_inputs = pad_sequence(input_ids, batch_first=True, padding_value=policy_tokenizer.pad_token_id, padding_side="left")

    return {
        "input_ids": padded_inputs
    }

math_shepherd_dataloader = DataLoader(math_shepherd_dataset, batch_size=batch_size, collate_fn=collate_fn)

In [None]:
ppo_config = PPOConfig(
    model_name = "gpt2",
    learning_rate=1.41e-5,
    batch_size=batch_size,
    mini_batch_size=batch_size
)

In [None]:
ppo_trainer = PPOTrainer(
    model=policy_model,
    config=ppo_config,
    dataset=math_shepherd_dataset,
    tokenizer=policy_tokenizer
)

In [None]:
generation_kwargs = {
    "min_length": 0,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": policy_tokenizer.eos_token_id,
    "max_length": max_length,
    "max_new_tokens": max_length,
}

In [None]:
# Function to extract the average step-wise reward (using token IDs, pre-encoded input_ids, and no attention mask)
def extract_step_rewards(output_tensor, input_ids, reward_model, step_separator_id):
    """
    This function extracts the average reward across all steps within a generated output.

    Args:
        output_tensor: The generated output tensor from the model. (sequence_length)
        input_ids: The encoded input_ids.
        reward_model: The trained reward model.
        step_separator_id: The token ID of the separator used to identify steps.

    Returns:
        A scalar tensor representing the average reward across all steps.
    """

    # Find the indices of the step separator in the output tensor
    separator_indices = (output_tensor == step_separator_id).nonzero(as_tuple=True)[0]

    # If no separators are found, treat the entire output as a single step
    if len(separator_indices) == 0:
        separator_indices = torch.tensor([len(output_tensor)], device=output_tensor.device)

    step_rewards = []
    start_index = 0

    for end_index in separator_indices:
        # Extract the current step from the output tensor
        current_step_tensor = output_tensor[start_index:end_index].to(reward_model.device)

        # Concatenate the input_ids with the current step tensor
        combined_tensor = torch.cat((input_ids, current_step_tensor), dim=0)

        # Add batch dimension
        combined_tensor = combined_tensor.unsqueeze(0)

        # Get logits from the reward model
        with torch.no_grad():
            logits = reward_model(input_ids=combined_tensor).logits

        # Get the logits for the LAST TOKEN only
        last_token_logits = logits[:, -1, :]  # Shape: (1, vocab_size)

        # Calculate probabilities for the last token
        last_token_probs = F.softmax(last_token_logits, dim=-1)

        # Extract the probability of the positive class (assuming label 1 is positive)
        # for the LAST TOKEN
        step_reward = last_token_probs[:, 1].detach() - 0.5

        step_rewards.append(step_reward)

        # Update the start index for the next step
        start_index = end_index + 1

    # Calculate the average reward across all steps
    if step_rewards:
      average_reward = torch.mean(torch.stack(step_rewards))
    else:
      average_reward = torch.tensor(0.0)

    return average_reward

In [None]:
newline_token_id = policy_tokenizer.encode("\n", add_special_tokens=False)[0]
output_length_sampler = LengthSampler(1, 1024)

In [None]:
wandb.init(project="prm-ppo-gpt2-v2-trl-v10")  # Replace with your project name
wandb.config.update(ppo_config.to_dict())  # Log PPOConfig hyperparameters
wandb.config.update({"batch_size": batch_size, "learning_rate": ppo_config.learning_rate}) # Log other hyperparameters

epochs = 3

for epoch in tqdm(range(epochs), "epoch: "):
    for step, batch in enumerate(math_shepherd_dataloader):
          query_tensors = list(batch["input_ids"].to(reward_model.device))

          if (len(query_tensors) < batch_size):
            continue

          generation_kwargs = {
              "min_length": 0,
              "top_k": 0.0,
              "top_p": 1.0,
              "do_sample": True,
              "pad_token_id": policy_tokenizer.eos_token_id,
              "max_length": max_length,
              "max_new_tokens": max_length,
          }
          response_tensors = ppo_trainer.generate(query_tensors, \
                                                  batch_size=batch_size, \
                                                  return_prompt=False,
                                                  length_sampler=output_length_sampler,
                                                  **generation_kwargs)

          for each in response_tensors:
            print(policy_tokenizer.decode(each))
          #### Compute reward score
          rewards = []
          for completions, prompt in zip(response_tensors, query_tensors):
              rewards.append(extract_step_rewards(completions, prompt, reward_model, newline_token_id))

          # #### Run PPO step
          stats = ppo_trainer.step(query_tensors, response_tensors, rewards)

          # 2. Log PPO Statistics (from the 'stats' dictionary)
          wandb.log(
              {
                  "epoch": epoch,
                  "step": step,

                  # Reward Metrics
                  "rewards/sum": torch.sum(torch.stack(rewards)).item(),
                  "reward/mean": torch.mean(torch.stack(rewards)).item(),
                  "reward/std": torch.std(torch.stack(rewards)).item(),
                  "reward/min": torch.min(torch.stack(rewards)).item(),
                  "reward/max": torch.max(torch.stack(rewards)).item(),

                  # PPO Loss Metrics
                  "ppo/loss/policy": stats["ppo/loss/policy"],
                  "ppo/loss/value": stats["ppo/loss/value"],
                  "ppo/loss/total": stats["ppo/loss/total"],

                  # PPO Policy Metrics
                  "ppo/policy/entropy": stats["ppo/policy/entropy"],
                  "ppo/policy/approxkl": stats["ppo/policy/approxkl"],
                  "ppo/policy/policykl": stats["ppo/policy/policykl"],
                  "ppo/policy/clipfrac": stats["ppo/policy/clipfrac"],
                  "ppo/policy/advantages": stats["ppo/policy/advantages"].mean().item(),  # Log mean advantage
                  "ppo/policy/advantages_std": stats["ppo/policy/advantages"].std().item(),  # Log std of advantage
              }
          )

          del query_tensors, response_tensors, rewards
          gc.collect()
          torch.cuda.empty_cache()

wandb.finish()

ppo_trainer.save_pretrained('/content/drive/MyDrive/PPOv10-v3/')

epoch:   0%|          | 0/3 [00:00<?, ?it/s]Both `max_new_tokens` (=489) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)




s on average, and

a speed determined and the average speed by Nick's bus and by The speed is


or

It's not too long and length but daily.

Even though you didn't took the time to tweet and way too now to actually be surprised.Here are 3 different questions that get gripped by the hustle and address with:


tweet Remember to see one for yourself, to learn of it on average vehicular car travel. Then after that we will discuss the other questions. Only "drifter" will get approved.

My wife takes me to the beginner's park and I get on it to climb it. She starts at 16-20mph. Mother shows up 15-20mph. Follow the trail pouring in at this hour and at 10-15mph this time. We want to lose distance. So quickly one can include multiple trails to climb with water. One One One It's 1230M...

Not many road owners do not look after those features. Here, you are--a little unknown recently, but you also do what you don't commonly do. Except for that but of meaning which 6 million people in the world 

Both `max_new_tokens` (=965) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
epoch:   0%|          | 0/3 [00:37<?, ?it/s]


KeyboardInterrupt: 

In [None]:
def extract_answer(text):
  """
  Extracts the numerical answer from the given text string.

  Args:
    text (str): The input string containing the answer in the format
                "Answer: tensor(number.)".

  Returns:
    float or None: The extracted numerical answer as a float, or None if no
                 match is found.
  """
  match = re.search(r"Answer: tensor\(([\d.]+)\)", text)
  if match:
    try:
      return float(match.group(1))
    except ValueError:
      return None
  return None


def preprocess_function(examples):
    """Preprocesses the examples for model input."""
    prompts = [f"Question: {q} Answer:" for q in examples["question"]]
    answers = [float(a.split("#### ")[1].replace(",", "")) for a in examples["answer"]]
    return {"prompts": prompts, "answers": answers}

def evaluate_model(model, tokenizer, dataset, batch_size=8):
    """Evaluates the given model on the GSM8k dataset using built-in batching."""
    model.eval()

    # Preprocess dataset
    processed_dataset = dataset.map(
        preprocess_function,
        batched=True,
        remove_columns=dataset.column_names
    )

    # Create data collator
    data_collator = DataCollatorWithPadding(
        tokenizer=tokenizer,
        padding=True,
        return_tensors="pt"
    )

    # Create dataloader
    def collate_fn(examples):
        # Tokenize prompts
        prompts = [example["prompts"] for example in examples]
        tokenized = tokenizer(prompts, padding=True, return_tensors="pt")
        # Add answers to batch
        tokenized["answers"] = torch.tensor([example["answers"] for example in examples])
        return tokenized

    dataloader = DataLoader(
        processed_dataset,
        batch_size=batch_size,
        collate_fn=collate_fn
    )

    correct_count = 0
    total_count = 0

    total_inference_time = 0
    num_batches = 0
    for batch in dataloader:
        if num_batches % 10 == 0:
          print(f"batch: {num_batches}...")
        # Move batch to device
        batch = {k: v for k, v in batch.items() if isinstance(v, torch.Tensor)}
        answers = batch.pop("answers")

        start_inference = time.time()
        with torch.no_grad():
            outputs = model.generate(
                **batch,
                pad_token_id=tokenizer.eos_token_id
            )
        batch_inference_time = time.time() - start_inference
        total_inference_time += batch_inference_time
        num_batches += 1

        # Print timing for first few batches
        if num_batches <= 3:
            print(f"Batch {num_batches} inference time: {batch_inference_time:.2f} seconds")
            print(f"Input shape: {batch['input_ids'].shape}")
            print(f"Output shape: {outputs.shape}")
        # Decode all outputs
        generated_texts = [tokenizer.decode(output, skip_special_tokens=True)
                         for output in outputs]

        # Process each result in the batch
        for generated_text, answer in zip(generated_texts, answers):
            print("Generated text \n", generated_text)
            extracted_answer = extract_answer(generated_text)
            print("Extracted Answer here:", extracted_answer)
            print("Actual answer: ", answer)
            if extracted_answer is not None:
                if abs(extracted_answer - answer.item()) < 1e-6:
                    correct_count += 1
                total_count += 1
            else:
                print(f"Could not extract answer from: {generated_text}")
                total_count += 1

    accuracy = correct_count / total_count if total_count > 0 else 0
    return accuracy

if __name__ == "__main__":
    # 0. Setup
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using device: {device}")

    # 1. Load the Saved Model and Tokenizer
    model_path = "/content/drive/MyDrive/PPOv10/"
    tokenizer_fine_tuned = AutoTokenizer.from_pretrained(model_path, padding_side='left')
    saved_ppo_model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float32)
    tokenizer_fine_tuned.pad_token = tokenizer_fine_tuned.eos_token

    # 2. Load the Base GPT-2 Model and Tokenizer
    base_model_name = "gpt2"
    tokenizer_base = AutoTokenizer.from_pretrained(base_model_name, padding_side='left')
    base_gpt2_model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.float32)
    tokenizer_base.pad_token = tokenizer_base.eos_token

    # 3. Load the GSM8k Dataset
    gsm8k_dataset = load_dataset("gsm8k", "main")
    test_dataset = gsm8k_dataset["test"]

    # Set batch size
    BATCH_SIZE = 8  # Adjust based on your GPU memory

    # 4. Evaluate the Fine-tuned Model
    print("Evaluating Fine-tuned Model...")
    fine_tuned_accuracy = evaluate_model(
        saved_ppo_model,
        tokenizer_fine_tuned,
        test_dataset,
        batch_size=BATCH_SIZE,
    )
    print(f"Fine-tuned Model Accuracy: {fine_tuned_accuracy:.4f}")

    # 5. Evaluate the Base GPT-2 Model
    print("Evaluating Base GPT-2 Model...")
    base_accuracy = evaluate_model(
        base_gpt2_model,
        tokenizer_base,
        test_dataset,
        batch_size=BATCH_SIZE,
    )
    print(f"Base GPT-2 Model Accuracy: {base_accuracy:.4f}")


Using device: cuda


Some weights of the model checkpoint at /content/drive/MyDrive/PPOv10/ were not used when initializing GPT2LMHeadModel: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing GPT2LMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GPT2LMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


README.md:   0%|          | 0.00/7.94k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/2.31M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/419k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/7473 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1319 [00:00<?, ? examples/s]

Evaluating Fine-tuned Model...


Map:   0%|          | 0/1319 [00:00<?, ? examples/s]

batch: 0...
Batch 1 inference time: 1.70 seconds
Input shape: torch.Size([8, 108])
Output shape: torch.Size([8, 109])
Generated text 
 Question: Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market? Answer:
Extracted Answer here: None
Actual answer:  tensor(18.)
Could not extract answer from: Question: Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market? Answer:
Generated text 
 Question: A robe takes 2 bolts of blue fiber and half that much white fiber.  How many bolts in total does it take? Answer:
Extracted Answer here: None
Actual answ

KeyboardInterrupt: 

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from tqdm import tqdm

def compare_model_weights(model_path_v10, model_path_v11, device="cpu"):
    """
    Compares the parameters of two models loaded from given paths.

    Args:
        model_path_v10 (str): Path to the directory of the first model (v10).
        model_path_v11 (str): Path to the directory of the second model (v11).
        device (str): Device to load the models onto ('cpu' or 'cuda').

    Returns:
        dict: A dictionary containing statistics about parameter differences:
           - "num_params": total number of parameters compared
           - "num_diff_params": number of parameters with different values
           - "max_diff": maximum difference between parameters
           - "mean_diff": average difference between parameters
    """

    # Load model and tokenizer
    tokenizer_v10 = AutoTokenizer.from_pretrained(model_path_v10, padding_side='left')
    model_v10 = AutoModelForCausalLM.from_pretrained(model_path_v10, torch_dtype=torch.float32)
    tokenizer_v10.pad_token = tokenizer_v10.eos_token

    tokenizer_v11 = AutoTokenizer.from_pretrained(model_path_v11, padding_side='left')
    model_v11 = AutoModelForCausalLM.from_pretrained(model_path_v11, torch_dtype=torch.float32)
    tokenizer_v11.pad_token = tokenizer_v11.eos_token


    params_v10 = list(model_v10.parameters())
    params_v11 = list(model_v11.parameters())

    num_params = 0
    num_diff_params = 0
    max_diff = 0.0
    total_diff = 0.0

    with torch.no_grad():
        for p10, p11 in tqdm(zip(params_v10, params_v11), total=len(params_v10), desc="Comparing parameters"):
          num_params += p10.numel()
          diff = torch.abs(p10 - p11)
          max_param_diff = torch.max(diff).item()
          mean_param_diff = torch.mean(diff).item()

          if max_param_diff > 1e-4:
              num_diff_params += 1
              max_diff = max(max_diff, max_param_diff)
              total_diff += diff.sum().item()


    if num_diff_params > 0:
       mean_diff = total_diff/num_params
    else:
        mean_diff = 0.0


    return {
        "num_params": num_params,
        "num_diff_params": num_diff_params,
        "max_diff": max_diff,
        "mean_diff": mean_diff,
    }

if __name__ == "__main__":
    # Replace these with your actual model paths
    model_path_v10 = "/content/drive/MyDrive/PPOv10/"
    model_path_v11 = "/content/drive/MyDrive/PPOv11/"
    device = "cuda" if torch.cuda.is_available() else "cpu"

    comparison_results = compare_model_weights(model_path_v10, model_path_v11, device=device)

    print(f"Total Number of Parameters: {comparison_results['num_params']:,}")
    print(f"Number of Different Parameters: {comparison_results['num_diff_params']:,}")
    print(f"Maximum Difference: {comparison_results['max_diff']:.6f}")
    print(f"Mean Difference: {comparison_results['mean_diff']:.6f}")

    if comparison_results["num_diff_params"] == 0:
        print("No parameter changes detected between the two models.")
    else:
        print("Some parameter changes detected between the two models")

Some weights of the model checkpoint at /content/drive/MyDrive/PPOv10/ were not used when initializing GPT2LMHeadModel: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing GPT2LMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GPT2LMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at /content/drive/MyDrive/PPOv11/ were not used when initializing GPT2LMHeadModel: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing GPT2LMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification mod

Total Number of Parameters: 124,439,808
Number of Different Parameters: 148
Maximum Difference: 0.023481
Mean Difference: 0.000937
Some parameter changes detected between the two models





In [None]:
device

'cuda'

In [None]:
question = "Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?"
tokenized = policy_tokenizer.encode(question)

In [None]:

saved_ppo_model.generate(
                torch.tensor(tokenized),
                max_new_tokens=1024,
                do_sample=False,
                pad_token_id=tokenizer.eos_token_id,
            )

ValueError: The following `model_kwargs` are not used by the model: ['batch_size'] (note: typos in the generate arguments will also show up in this list)

In [None]:
tokenized

[12128,
 316,
 447,
 247,
 82,
 39694,
 3830,
 1467,
 9653,
 583,
 1110,
 13,
 1375,
 25365,
 1115,
 329,
 12607,
 790,
 3329,
 290,
 275,
 1124,
 27563,
 1040,
 329,
 607,
 2460,
 790,
 1110,
 351,
 1440,
 13,
 1375,
 16015,
 262,
 17675,
 379,
 262,
 9818,
 6,
 1910,
 4445,
 329,
 720,
 17,
 583,
 4713,
 22045,
 5935,
 13,
 1374,
 881,
 287,
 5054,
 857,
 673,
 787,
 790,
 1110,
 379,
 262,
 9818,
 6,
 1910,
 30]

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

def generate_model_output(
    model, tokenizer, prompt, max_new_tokens=1024
):
    """
    Generates text output from a given model and tokenizer.

    Args:
        model (transformers.AutoModelForCausalLM): The model for text generation.
        tokenizer (transformers.PreTrainedTokenizer): The tokenizer for the model.
        prompt (str): The input text prompt.
        device (str): The device to use ('cpu' or 'cuda').
        max_new_tokens (int): The maximum number of tokens to generate.

    Returns:
        str: The generated output text.
    """

    # Tokenize the prompt
    inputs = tokenizer(prompt, return_tensors="pt")
    inputs.pop("attention_mask")
    print(inputs)
    print(inputs)

    # Generate text
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            pad_token_id=tokenizer.pad_token_id,
            max_new_tokens=5
        )

    # Decode and return the output
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return generated_text

In [None]:
# Define your prompt
prompt = "Question: Charlie had 10 stickers. \
  He bought 21 stickers from a store in the mall and got 23 stickers for his birthday. \
  Then Charlie gave 9 of the stickers to his sister and used 28 to decorate a greeting card. \
  How many stickers does Charlie have left? Answer: "

base_model_name = "gpt2"
tokenizer_base = AutoTokenizer.from_pretrained(base_model_name)
base_gpt2_model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.float32)
tokenizer_base.pad_token = tokenizer_base.eos_token

# Generate and print output
generated_output = generate_model_output(
    base_gpt2_model, tokenizer_base, prompt
)
print(f"Prompt:\n{prompt}\n\nGenerated Output:\n{generated_output}")

{'input_ids': tensor([[24361,    25, 11526,   550,   838, 28568,    13,   679,  5839,  2310,
         28568,   422,   257,  3650,   287,   262, 17374,   290,  1392,  2242,
         28568,   329,   465, 10955,    13,  3244, 11526,  2921,   860,   286,
           262, 28568,   284,   465,  6621,   290,   973,  2579,   284, 11705,
           378,   257, 31933,  2657,    13,  1374,   867, 28568,   857, 11526,
           423,  1364,    30, 23998,    25,   220]])}
{'input_ids': tensor([[24361,    25, 11526,   550,   838, 28568,    13,   679,  5839,  2310,
         28568,   422,   257,  3650,   287,   262, 17374,   290,  1392,  2242,
         28568,   329,   465, 10955,    13,  3244, 11526,  2921,   860,   286,
           262, 28568,   284,   465,  6621,   290,   973,  2579,   284, 11705,
           378,   257, 31933,  2657,    13,  1374,   867, 28568,   857, 11526,
           423,  1364,    30, 23998,    25,   220]])}
Prompt:
Question: Charlie had 10 stickers. He bought 21 stickers from a st