In [1]:
import datasets
ds_train = datasets.load_dataset('agentica-org/DeepScaleR-Preview-Dataset', split='train')
ds_train

Dataset({
    features: ['problem', 'answer', 'solution'],
    num_rows: 40315
})

In [2]:
print(ds_train['problem'][0])
print(ds_train['answer'][0])

The operation $\otimes$ is defined for all nonzero numbers by $a \otimes b = \frac{a^{2}}{b}$. Determine $[(1 \otimes 2) \otimes 3] - [1 \otimes (2 \otimes 3)]$.
-\frac{2}{3}


In [3]:
import os
ds_test = datasets.load_dataset('active-reasoning/math_reasoning_benchmark', split='AIME2025')
def add_empty_solution(example):
    example['solution'] = ''
    return example
ds_test = ds_test.map(add_empty_solution, batched=False, num_proc=os.cpu_count())
ds_test

Dataset({
    features: ['problem', 'answer', 'solution'],
    num_rows: 30
})

In [4]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen3-4B-Base')

def generate_chat(query):
    dialogue = [
        {
            "role": "user",
            "content": f"""Given the following math problem, generate a list of insightful hints that help guide a student toward solving the problem. Each hint should be wrapped in a <note> block with the following structure:

<note>
<description>[Brief explanation of a key idea or technique relevant to the problem]</description>
<example>[Concrete illustrative example that demonstrates the idea in action]</example>
</note>
Combine all hint blocks inside a <notes> element. Your goal is to help the student reason through the problem step-by-step by surfacing useful strategies, intermediate goals, or simplifications.

Problem: {query}"""
        }
    ]
    dialogue = tokenizer.apply_chat_template(dialogue, tokenize=False, add_generation_prompt=True)
    return dialogue

In [5]:
print(generate_chat(ds_train['problem'][0]))

<|im_start|>user
Given the following math problem, generate a list of insightful hints that help guide a student toward solving the problem. Each hint should be wrapped in a <note> block with the following structure:

<note>
<description>[Brief explanation of a key idea or technique relevant to the problem]</description>
<example>[Concrete illustrative example that demonstrates the idea in action]</example>
</note>
Combine all hint blocks inside a <notes> element. Your goal is to help the student reason through the problem step-by-step by surfacing useful strategies, intermediate goals, or simplifications.

Problem: The operation $\otimes$ is defined for all nonzero numbers by $a \otimes b = \frac{a^{2}}{b}$. Determine $[(1 \otimes 2) \otimes 3] - [1 \otimes (2 \otimes 3)]$.<|im_end|>
<|im_start|>assistant



In [6]:
from typing import Dict, Any, Optional
def make_map_fn(split: str):
    """Create a mapping function to process dataset examples.

    Args:
        split: Dataset split name ('train' or 'test')

    Returns:
        Function that processes individual dataset examples
    """
    def process_fn(example: Dict[str, Any], idx: int) -> Optional[Dict[str, Any]]:
        question = example.pop('problem')
        instruction = "Let's think step by step and output the final answer within \\boxed{}."
        question = f"{question} {instruction}"
        answer = example.pop('answer')
        solution = example.pop('solution')
        formatted_question = generate_chat(question)

        data = {
            "data_source": "",
            "prompt": [{
                "role": "user",
                "content": formatted_question
            }],
            "ability": "math",
            "reward_model": {
                "style": "rule",
                "ground_truth": answer
            },
            "extra_info": {
                'split': split,
                'index': idx,
                'problem': question,
                'solution': solution,
                'answer': answer
            }
        }
        return data
    return process_fn

ds_train = ds_train.map(function=make_map_fn('train'), with_indices=True)
ds_test = ds_test.map(function=make_map_fn('test'), with_indices=True)

ds_train[0]

Map:   0%|          | 0/40315 [00:00<?, ? examples/s]

{'data_source': '',
 'prompt': [{'content': "<|im_start|>user\nGiven the following math problem, generate a list of insightful hints that help guide a student toward solving the problem. Each hint should be wrapped in a <note> block with the following structure:\n\n<note>\n<description>[Brief explanation of a key idea or technique relevant to the problem]</description>\n<example>[Concrete illustrative example that demonstrates the idea in action]</example>\n</note>\nCombine all hint blocks inside a <notes> element. Your goal is to help the student reason through the problem step-by-step by surfacing useful strategies, intermediate goals, or simplifications.\n\nProblem: The operation $\\otimes$ is defined for all nonzero numbers by $a \\otimes b = \\frac{a^{2}}{b}$. Determine $[(1 \\otimes 2) \\otimes 3] - [1 \\otimes (2 \\otimes 3)]$. Let's think step by step and output the final answer within \\boxed{}.<|im_end|>\n<|im_start|>assistant\n",
   'role': 'user'}],
 'ability': 'math',
 're

In [7]:
ds_test[0]

{'data_source': '',
 'prompt': [{'content': "<|im_start|>user\nGiven the following math problem, generate a list of insightful hints that help guide a student toward solving the problem. Each hint should be wrapped in a <note> block with the following structure:\n\n<note>\n<description>[Brief explanation of a key idea or technique relevant to the problem]</description>\n<example>[Concrete illustrative example that demonstrates the idea in action]</example>\n</note>\nCombine all hint blocks inside a <notes> element. Your goal is to help the student reason through the problem step-by-step by surfacing useful strategies, intermediate goals, or simplifications.\n\nProblem: Find the sum of all integer bases $b>9$ for which $17_b$ is a divisor of $97_b.$ Let's think step by step and output the final answer within \\boxed{}.<|im_end|>\n<|im_start|>assistant\n",
   'role': 'user'}],
 'ability': 'math',
 'reward_model': {'ground_truth': '70', 'style': 'rule'},
 'extra_info': {'answer': '70',
  

In [8]:
base_path = '/home/anikait.singh/rl_behaviors_verl_stable/data_deepscaler_rl_hintgen'
ds_train.to_parquet(os.path.join(base_path, 'train.parquet'))
ds_test.to_parquet(os.path.join(base_path, 'test.parquet'))

Creating parquet from Arrow format:   0%|          | 0/41 [00:00<?, ?ba/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

54728