# DSR Sub Parquet Experiments

This notebook loads `data/train/dsr_sub.parquet`, inspects schema and samples, and shows how to edit prompts.

In [47]:
import pandas as pd
import pyarrow.parquet as pq

PATH = '../data/test/math500.parquet'


In [48]:
# Schema
pq.ParquetFile(PATH).schema

<pyarrow._parquet.ParquetSchema object at 0x76ee341b0780>
required group field_id=-1 schema {
  optional binary field_id=-1 data_source (String);
  optional group field_id=-1 prompt (List) {
    repeated group field_id=-1 list {
      optional group field_id=-1 element {
        optional binary field_id=-1 content (String);
        optional binary field_id=-1 role (String);
      }
    }
  }
  optional binary field_id=-1 ability (String);
  optional group field_id=-1 reward_model {
    optional binary field_id=-1 ground_truth (String);
    optional binary field_id=-1 style (String);
  }
  optional group field_id=-1 extra_info {
    optional int64 field_id=-1 index;
    optional binary field_id=-1 level (String);
    optional binary field_id=-1 solution (String);
    optional binary field_id=-1 subject (String);
    optional binary field_id=-1 unique_id (String);
    optional binary field_id=-1 url (String);
  }
  optional int64 field_id=-1 __index_level_0__;
}

In [49]:
# Load
df = pd.read_parquet(PATH)
df.head(5)

Unnamed: 0,data_source,prompt,ability,reward_model,extra_info
0,simplerl/math500,"[{'content': 'Convert the point $(0,3)$ in rec...",math,"{'ground_truth': '\left( 3, \frac{\pi}{2} \rig...","{'index': 0, 'level': '2', 'solution': 'We hav..."
1,simplerl/math500,[{'content': 'Define \[p = \sum_{k = 1}^\infty...,math,"{'ground_truth': 'p - q', 'style': 'rule'}","{'index': 1, 'level': '5', 'solution': 'We cou..."
2,simplerl/math500,"[{'content': 'If $f(x) = \frac{3x-2}{x-2}$, wh...",math,"{'ground_truth': '\frac{14}{3}', 'style': 'rule'}","{'index': 2, 'level': '3', 'solution': '$f(-2)..."
3,simplerl/math500,[{'content': 'How many positive whole-number d...,math,"{'ground_truth': '9', 'style': 'rule'}","{'index': 3, 'level': '3', 'solution': 'First ..."
4,simplerl/math500,[{'content': 'The results of a cross-country t...,math,"{'ground_truth': '\text{Evelyn}', 'style': 'ru...","{'index': 4, 'level': '2', 'solution': 'Evelyn..."


In [50]:
# Inspect a prompt row
df.loc[0, 'prompt']

array([{'content': 'Convert the point $(0,3)$ in rectangular coordinates to polar coordinates.  Enter your answer in the form $(r,\\theta),$ where $r > 0$ and $0 \\le \\theta < 2 \\pi.$', 'role': 'user'}],
      dtype=object)

In [55]:
# New prompt template and converter
NEW_PREFIX = (
    "Answer the given question. You must conduct reasoning inside <think> and </think> "
    "first every time you get new information. After reasoning, if you find you lack some "
    "knowledge or intermediate result, you can call a helper by <search> query </search>, "
    "and you should return the needed facts, formulas, or computed results between "
    "<information> and </information>. You can call this as many times as you want. "
    "For multi-step problems, break the task into sub-queries and resolve them one by one. "
    "If no further external facts or computations are needed, provide the final answer inside "
    "<answer> and </answer> without detailed illustrations. For example, <answer> {EXAMPLE} </answer>. "
    "Question:"
)
USE_BOXED = True  # set True only if your evaluator expects boxed answers
EXAMPLE_ANSWER = "\\boxed{42}" if USE_BOXED else "42"
# OLD_SUFFIX = " Let's think step by step and output the final answer within \\boxed{}."
OLD_SUFFIX = "asdfasdfasdfasdfasdfasdf"

def convert_prompt_text(text: str) -> str:
    # Extract the question before the old suffix.
    if text.endswith(OLD_SUFFIX):
        question = text[: -len(OLD_SUFFIX)]
    else:
        question = text
    return NEW_PREFIX.format(EXAMPLE=EXAMPLE_ANSWER) + " " + question


In [56]:
# Normalize prompt column (convert numpy arrays to lists)
import numpy as np

def normalize_prompt(p):
    if isinstance(p, np.ndarray):
        p = p.tolist()
    if isinstance(p, list) and len(p) == 1 and isinstance(p[0], np.ndarray):
        p = p[0].tolist()
    return p


In [57]:
# Example: edit all rows and write a new parquet
df_edit = df.copy(deep=True)
def apply_convert(p):
    if isinstance(p, np.ndarray):
        p = p.tolist()
    if isinstance(p, list) and p and isinstance(p[0], dict):
        p = p.copy()
        p[0] = p[0].copy()
        p[0]['content'] = convert_prompt_text(p[0]['content'])
    return p

df_edit['prompt'] = df_edit['prompt'].apply(apply_convert)
print(df_edit)
df_edit['prompt'] = df_edit['prompt'].apply(normalize_prompt)
print(df_edit)

OUT_PATH = '../data/test/math500_new.parquet'
df_edit.to_parquet(OUT_PATH, index=False)
print(df_edit)
OUT_PATH

          data_source                                             prompt  \
0    simplerl/math500  [{'content': 'Answer the given question. You m...   
1    simplerl/math500  [{'content': 'Answer the given question. You m...   
2    simplerl/math500  [{'content': 'Answer the given question. You m...   
3    simplerl/math500  [{'content': 'Answer the given question. You m...   
4    simplerl/math500  [{'content': 'Answer the given question. You m...   
..                ...                                                ...   
495  simplerl/math500  [{'content': 'Answer the given question. You m...   
496  simplerl/math500  [{'content': 'Answer the given question. You m...   
497  simplerl/math500  [{'content': 'Answer the given question. You m...   
498  simplerl/math500  [{'content': 'Answer the given question. You m...   
499  simplerl/math500  [{'content': 'Answer the given question. You m...   

    ability                                       reward_model  \
0      math  {'ground

'../data/test/math500_new.parquet'

In [58]:
NEW_PATH = '../data/test/math500_new.parquet'

# Load
new_df = pd.read_parquet(NEW_PATH)
new_df.head(5)

# Inspect a prompt row
new_df.loc[4, 'prompt']

array([{'content': 'Answer the given question. You must conduct reasoning inside <think> and </think> first every time you get new information. After reasoning, if you find you lack some knowledge or intermediate result, you can call a helper by <search> query </search>, and you should return the needed facts, formulas, or computed results between <information> and </information>. You can call this as many times as you want. For multi-step problems, break the task into sub-queries and resolve them one by one. If no further external facts or computations are needed, provide the final answer inside <answer> and </answer> without detailed illustrations. For example, <answer> \\boxed{42} </answer>. Question: The results of a cross-country team\'s training run are graphed below. Which student has the greatest average speed? [asy]\nfor ( int i = 1; i <= 7; ++i )\n{\n\ndraw((i,0)--(i,6));\n}\n\nfor ( int i = 1; i <= 5; ++i )\n{\n\ndraw((0,i)--(8,i));\n}\ndraw((-0.5,0)--(8,0), linewidth(1));\n