In [2]:
import json
import math
import os
import random
import re
import shutil
import sys
import time
from collections import defaultdict
from pathlib import Path
from datetime import datetime
from torchdata.stateful_dataloader import StatefulDataLoader
from loguru import logger
import torch
import torch.nn.functional as F
from torch import nn
from torch.utils.data import Dataset, DataLoader

RAY_DATA_HOME='/data/corpus/verl'
TRAIN_FILE=f"{RAY_DATA_HOME}/data/dapo-math-17k.parquet"
TEST_FILE=f"{RAY_DATA_HOME}/data/aime-2024.parquet"

In [9]:
import pandas as pd

df_parquet = pd.read_parquet(TEST_FILE)
print(f'{df_parquet.shape=} {df_parquet.columns=}')
for i, row in df_parquet.iterrows():
    for col in df_parquet.columns:
        print(f'{col = }, {row[col] = }')
    break


df_parquet.shape=(960, 5) df_parquet.columns=Index(['data_source', 'prompt', 'ability', 'reward_model', 'extra_info'], dtype='object')
col = 'data_source', row[col] = 'math_dapo'
col = 'prompt', row[col] = array([{'content': 'Solve the following math problem step by step. The last line of your response should be of the form Answer: $Answer (without quotes) where $Answer is the answer to the problem.\n\nFind the largest possible real part of \\[(75+117i)z+\\frac{96+144i}{z}\\]where $z$ is a complex number with $|z|=4$.\n\nRemember to put your answer on its own line after "Answer:".', 'role': 'user'}],
      dtype=object)
col = 'ability', row[col] = 'MATH'
col = 'reward_model', row[col] = {'ground_truth': '540', 'style': 'rule-lighteval/MATH_v2'}
col = 'extra_info', row[col] = {'index': 2, 'raw_problem': 'Find the largest possible real part of \\[(75+117i)z+\\frac{96+144i}{z}\\]where $z$ is a complex number with $|z|=4$.', 'split': None}


In [10]:
import pandas as pd

df_parquet = pd.read_parquet(TRAIN_FILE)
print(f'{df_parquet.shape=} {df_parquet.columns=}')
for i, row in df_parquet.iterrows():
    for col in df_parquet.columns:
        print(f'{col = }, {row[col] = }')
    break

df_parquet.shape=(1791700, 5) df_parquet.columns=Index(['data_source', 'prompt', 'ability', 'reward_model', 'extra_info'], dtype='object')
col = 'data_source', row[col] = 'math_dapo'
col = 'prompt', row[col] = array([{'content': 'Solve the following math problem step by step. The last line of your response should be of the form Answer: $Answer (without quotes) where $Answer is the answer to the problem.\n\nIn triangle $ABC$, $\\sin \\angle A = \\frac{4}{5}$ and $\\angle A < 90^\\circ$. Let $D$ be a point outside triangle $ABC$ such that $\\angle BAD = \\angle DAC$ and $\\angle BDC = 90^\\circ$. Suppose that $AD = 1$ and that $\\frac{BD}{CD} = \\frac{3}{2}$. If $AB + AC$ can be expressed in the form $\\frac{a\\sqrt{b}}{c}$ where $a, b, c$ are pairwise relatively prime integers, find $a + b + c$.\n\nRemember to put your answer on its own line after "Answer:".', 'role': 'user'}],
      dtype=object)
col = 'ability', row[col] = 'MATH'
col = 'reward_model', row[col] = {'ground_truth': '34',

In [3]:
import datasets

for parquet_file in [TRAIN_FILE, TEST_FILE]:
    if not os.path.exists(parquet_file):
        raise FileNotFoundError(f"Parquet file {parquet_file} does not exist.")
    logger.info(f"Loading dataset from {parquet_file}")
    # Load the dataset using the datasets library
    if not parquet_file.endswith('.parquet'):
        raise ValueError(f"File {parquet_file} is not a valid Parquet file.")
    # Load the dataset
    try:
        ds = datasets.load_dataset("parquet", data_files=parquet_file)
        for item in ds['train']:
            logger.info(f'{type(item) = }, {item = }')
            break
        logger.info(f"Dataset loaded successfully from {ds}")
    except Exception as e:
        logger.error(f"Failed to load dataset from {parquet_file}: {e}")
        sys.exit(1)

[32m2025-06-08 15:35:37.141[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mLoading dataset from /data/corpus/verl/data/dapo-math-17k.parquet[0m
[32m2025-06-08 15:35:38.156[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [1mtype(item) = <class 'dict'>, item = {'data_source': 'math_dapo', 'prompt': [{'content': 'Solve the following math problem step by step. The last line of your response should be of the form Answer: $Answer (without quotes) where $Answer is the answer to the problem.\n\nIn triangle $ABC$, $\\sin \\angle A = \\frac{4}{5}$ and $\\angle A < 90^\\circ$. Let $D$ be a point outside triangle $ABC$ such that $\\angle BAD = \\angle DAC$ and $\\angle BDC = 90^\\circ$. Suppose that $AD = 1$ and that $\\frac{BD}{CD} = \\frac{3}{2}$. If $AB + AC$ can be expressed in the form $\\frac{a\\sqrt{b}}{c}$ where $a, b, c$ are pairwise relatively prime integers, find $a + b + c$.\n\nRemember to put your answer on its own line aft