<a href="https://colab.research.google.com/github/ritwikraha/nanoRL/blob/experiments/notebooks/verbal_reasoning_synthetic_pair_generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q google-genai datasets

In [None]:
from google import genai
from google.colab import userdata

import random
import json

import time

from datasets import Dataset
from huggingface_hub import login

In [None]:
login()

In [None]:
client = genai.Client(api_key=userdata.get("GEMINI_API_KEY"))
model = "gemini-2.5-flash"

In [None]:
def generate_preference_data(num_samples=2000):
    topics = [
        "a logical analogy problem (e.g., 'Leaf is to Tree as Page is to X')",
        "a short scenario requiring a logical deduction to find the outcome",
        "identifying the unstated, underlying assumption in a given argument",
        "determining the most logical cause for a specifically described effect",
        "finding the word that doesn't belong in a group based on a shared category",
        "evaluating a simple argument to pinpoint its primary logical flaw",
        "drawing a single, valid conclusion from a short passage of text",
        "completing a logical sequence of related words or concepts",
        "identifying the specific relationship between two words (like cause/effect)",
        "solving a simple syllogism to determine if the conclusion is valid",
    ]

    generated_data = []
    while len(generated_data) < num_samples:
        try:
            topic = random.choice(topics)

            prompt_instruction = f"Generate a single, clear question that is an example of the following verbal reasoning task: {topic}. Output nothing but the question itself, without any labels, formatting, or introductory text."
            # prompt = model.generate_content(prompt_instruction)
            prompt = client.models.generate_content(
                model=model, contents=prompt_instruction
            )
            prompt = prompt.text.strip()

            # prompt to generate the chosen/rejected pair with 'answer' and 'reasoning'
            generation_prompt = f"""\
For the verbal reasoning question: "{prompt}"

Please generate two distinct answers, each with an 'answer' and 'reasoning' field:
1.  A logically sound and well-explained correct answer.
2.  A plausible-sounding but logically flawed or incorrect answer.

Format your response as a single, clean JSON object with two keys: "chosen" and "rejected".
Each value (for "chosen" and "rejected") should itself be a JSON object with two keys: "answer" and "reasoning".

Example:
{{
    "chosen": {{
        "answer": "Correct answer text.",
        "reasoning": "Detailed explanation of why this answer is correct."
    }},
    "rejected": {{
        "answer": "Incorrect answer text.",
        "reasoning": "Detailed explanation of why this answer is flawed or incorrect."
    }}
}}\
"""

            # response = model.generate_content(generation_prompt)
            response = client.models.generate_content(
                model=model, contents=generation_prompt
            )
            cleaned_response = (
                response.text.strip().replace("```json", "").replace("```", "")
            )

            response_json = json.loads(cleaned_response)
            if "chosen" in response_json and "rejected" in response_json:
                # Ensure the 'chosen' and 'rejected' values are dictionaries with 'answer' and 'reasoning'
                if (
                    isinstance(response_json["chosen"], dict)
                    and "answer" in response_json["chosen"]
                    and "reasoning" in response_json["chosen"]
                    and isinstance(response_json["rejected"], dict)
                    and "answer" in response_json["rejected"]
                    and "reasoning" in response_json["rejected"]
                ):
                    generated_data.append(
                        {
                            "prompt": prompt,
                            "chosen": response_json["chosen"],
                            "rejected": response_json["rejected"],
                        }
                    )
                    print(
                        f"SUCCESS: Generated sample {len(generated_data)}/{num_samples}"
                    )
                else:
                    print(
                        "WARNING: Skipping sample due to 'chosen' or 'rejected' not having expected 'answer'/'reasoning' format."
                    )
            else:
                print(
                    "WARNING: Skipping sample due to missing 'chosen' or 'rejected' key."
                )

        except json.JSONDecodeError as e:
            print(
                f"WARNING: Skipping sample due to a JSON decoding error: {e}. Raw response: {cleaned_response}"
            )
        except Exception as e:
            print(f"ERROR: An unexpected error occurred: {e}. Retrying in 5 seconds...")
            time.sleep(5)

    if not generated_data:
        print("INFO: No data was generated.")
        return ""

    # Convert the list of dictionaries to a pandas DataFrame and then to a CSV string
    return {
        key: [data[key] for data in generated_data]
        for key in ["prompt", "chosen", "rejected"]
    }

In [None]:
generated_dataset = generate_preference_data(num_samples=2000)
hf_ds = Dataset.from_dict(generated_dataset, split="train")

hf_ds.push_to_hub("ritwikraha/reasoning", token=userdata.get("HF_WRITE_TOKEN"))