# 汎用 RLHF Parquet 変換ノートブック（Colab / ローカル）
以下の手順の1) で「読み込むデータセットを指定」した任意のデータセット（Hugging Face Hub / ローカルファイル）を読み込み、
**RLHF 用の統一スキーマ**に変換して `train.parquet` / `val.parquet` を出力します。

## 主な特徴
- HF Hub / ローカルファイル入力
- 既存/層化/ランダム分割
- 列名マッピング & 正解抽出ルール（copy/after_hash4/regex_group1/boxed/numeric）- 追加メタ列のパススルー
- G:\マイドライブ\rlhf_data\data の下にParquetファイルを作成します


## メモ

分割は validation/val/dev/test があれば 既存スプリットを使用、無ければ topic 列がある時は層化、無ければランダムに設定します。

正解抽出は、サンプルに応じて after_hash4（####）、boxed（\boxed{}）、numeric（数値多め）を自動選択、該当なしは copy。

自動推定が外す可能性もあるので、プレビューを見て必要に応じて CONFIG["COLUMNS"] や REWARD.extractor を手で上書きしてください。

このセルを入れておけば、今後は データセット名を変えるだけで初期設定が一通り整います。

⚠️ 大規模データではメモリ使用量に注意してください（`NUM_PROC` を下げるなど）。

　ランタイム RAM が不足する場合、 num_proc を小さくする、または GPU/ハイメモリランタイムを使用してください。

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip -q install datasets pyarrow pandas

## 0) データセットの構造を自動で解読する関数の読み込み

In [None]:
# ==== Auto-Configurator for HF datasets → CONFIG ====
from datasets import load_dataset, Dataset, DatasetDict
import re, textwrap, os

def _pick_col(candidates, present, samples, prefer_numeric=False):
    """候補名の優先順で列を選ぶ。samples は dict のリスト。"""
    present_set = set(present)
    for name in candidates:
        if name in present_set:
            # 文字/数値のヒント検査
            vals = [s.get(name) for s in samples if s.get(name) is not None]
            if not vals:
                continue
            if prefer_numeric:
                # 数値比率
                num_like = 0
                for v in vals:
                    s = str(v)
                    if re.fullmatch(r"\s*[-+]?(?:\d+(\.\d+)?|\d+/\d+)\s*", s):
                        num_like += 1
                ratio = num_like / max(1, len(vals))
                if ratio < 0.4:  # 数値列っぽくなければスキップ
                    continue
            return name
    return None

def _guess_reward_extractor(ans_samples):
    text = "\n".join(map(lambda x: "" if x is None else str(x), ans_samples[:20]))
    if "####" in text:
        return "after_hash4"
    if re.search(r"\\boxed\{[^}]+\}", text):
        return "boxed"
    # 数値が多ければ numeric
    tokens = re.findall(r"[-+]?\d*\.?\d+|\d+/\d+", text)
    if len(tokens) >= 5:  # 適当な閾値
        return "numeric"
    return "copy"

def infer_config_hf(dataset_id, subset=None, preview_rows=5, base_dir="/content/drive/MyDrive/rlhf_data"):
    obj = load_dataset(dataset_id, subset)
    # どの split を train/val にするか
    if isinstance(obj, Dataset):
        splits = {"train": obj}
    elif isinstance(obj, DatasetDict):
        keys = list(obj.keys())
        # 優先順で train 相当を選ぶ
        train_key_pref = ["train", "cot", "main", "default"] + keys
        train_key = next((k for k in train_key_pref if k in obj), keys[0])
        # val は validation/val/test があれば取り、無ければ None
        val_key = next((k for k in ["validation", "val", "dev", "test"] if k in obj), None)
        splits = {train_key: obj[train_key]}
        if val_key: splits[val_key] = obj[val_key]
    else:
        raise RuntimeError("Unknown object type from load_dataset")

    # サンプル取得（train 相当）
    train_split_name = list(splits.keys())[0]
    ds = splits[train_split_name]
    cols = ds.column_names
    samples = [ds[i] for i in range(min(preview_rows, len(ds)))]
    present = cols

    # 列推定
    q_candidates = ["question","problem","prompt","input","query","instruction","problem_text","text","title","task"]
    a_candidates = ["final_answer","expected_answer","answer","target","label","ground_truth","gt","solution","output"]

    question_col = _pick_col(q_candidates, present, samples) or present[0]
    answer_col   = _pick_col(a_candidates, present, samples) or (present[1] if len(present)>1 else present[0])

    # トピック/難易度
    topic_candidates = ["topic","subject","category","domain","type"]
    diff_candidates  = ["difficulty","level","hardness","complexity"]
    topic_col = _pick_col(topic_candidates, present, samples)
    diff_col  = _pick_col(diff_candidates, present, samples, prefer_numeric=False)

    # EXTRA_PASSTHROUGH：よくある生成・メタ列を自動選択
    extra_pool = [
        "generated_solution","generation","cot","chain_of_thought","rationale","scratchpad",
        "problem_source","problem_type","source","metadata","model","model_name","generation_model",
        "inference_mode","pass_rate_72b_tir","has_answer_extracted"
    ]
    extra = [k for k in extra_pool if k in present and k not in [question_col, answer_col, topic_col, diff_col]]

    # REWARD 抽出器の推定
    ans_samples = [s.get(answer_col) for s in samples]
    extractor = _guess_reward_extractor(ans_samples)

    # 分割戦略の推定
    if "validation" in splits or "val" in splits or "dev" in splits or "test" in splits:
        split_strategy = {"type": "existing"}
        hf_split_train = train_split_name
        hf_split_val = ("validation" if "validation" in splits else
                        "val" if "val" in splits else
                        "dev" if "dev" in splits else
                        "test")
    else:
        if topic_col:
            split_strategy = {"type": "stratified", "by": topic_col, "val_ratio": 0.02}
        else:
            split_strategy = {"type": "random", "val_ratio": 0.02}
        hf_split_train, hf_split_val = train_split_name, None

    # ability 推定（軽いヒューリスティック）
    ability = "math" if any(k in dataset_id.lower() for k in ["math","gsm8k","openmath","aime"]) or any(c in present for c in ["problem","expected_answer"]) else "general"

    # 出力プレフィックス
    prefix = re.sub(r"[^a-zA-Z0-9]+", "_", dataset_id).strip("_").lower()

    config = {
        "LOAD_MODE": "hf",
        "HF_DATASET": dataset_id,
        "HF_NAME": subset,
        "HF_SPLIT_TRAIN": hf_split_train,
        "HF_SPLIT_VAL": hf_split_val,
        "FILES": {"train": None, "val": None},
        "SPLIT_STRATEGY": split_strategy,
        "SEED": 2025,
        "COLUMNS": {
            "question":   question_col,
            "answer":     answer_col,
            "topic":      topic_col,
            "difficulty": diff_col
        },
        "EXTRA_PASSTHROUGH": extra,
        "INSTRUCTION": "Let's think step by step and output the final answer after \"####\". Only output the final answer after \"####\" on the last line.",
        "PROMPT_TEMPLATE": "{question}\n\n{instruction}",
        "ABILITY": ability,
        "REWARD": {"style": "rule", "extractor": extractor, "regex": None},
        "USE_DRIVE": True,
        "BASE_DIR": base_dir,
        "OUTPUT_PREFIX": prefix,
        "NUM_PROC": "auto",
    }

    # 予告的なプレビュー出力
    print("=== Auto-detected CONFIG preview ===")
    print(f"- splits: {list(splits.keys())}")
    print(f"- train split: {hf_split_train} / val split: {hf_split_val}")
    print(f"- columns: {present[:12]}{' ...' if len(present)>12 else ''}")
    print(f"- mapping: question='{question_col}', answer='{answer_col}', topic='{topic_col}', difficulty='{diff_col}'")
    print(f"- reward.extractor: {extractor}")
    if extra:
        print(f"- extra passthrough: {extra}")
    # サンプル値を少し
    def trunc(x):
        s = "" if x is None else str(x)
        return (s[:120] + "…") if len(s) > 120 else s
    for i, s in enumerate(samples[:3]):
        print(f"  sample[{i}] question: {trunc(s.get(question_col))}")
        print(f"            answer  : {trunc(s.get(answer_col))}")
    print("====================================")
    return config

# 使い方（例）：
# auto = infer_config_hf("unsloth/OpenMathReasoning-mini")
# CONFIG = auto            # 新規に設定
# または既存 CONFIG に反映するなら:
# CONFIG.update(auto)


## 1) **「読み込むデータセットを指定」**して、データを読み取り、CONFIGファイルに設定

In [None]:
#CONFIG = infer_config_hf("unsloth/OpenMathReasoning-mini")
CONFIG = infer_config_hf("zwhe99/DeepMath-103K")
#CONFIG = infer_config_hf("KbsdJames/Omni-MATH")
#CONFIG = infer_config_hf("RabotniKuma/Fast-Math-R1-GRPO")

README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00010.parquet:   0%|          | 0.00/217M [00:00<?, ?B/s]

data/train-00001-of-00010.parquet:   0%|          | 0.00/212M [00:00<?, ?B/s]

data/train-00002-of-00010.parquet:   0%|          | 0.00/214M [00:00<?, ?B/s]

data/train-00003-of-00010.parquet:   0%|          | 0.00/208M [00:00<?, ?B/s]

data/train-00004-of-00010.parquet:   0%|          | 0.00/207M [00:00<?, ?B/s]

data/train-00005-of-00010.parquet:   0%|          | 0.00/208M [00:00<?, ?B/s]

data/train-00006-of-00010.parquet:   0%|          | 0.00/207M [00:00<?, ?B/s]

data/train-00007-of-00010.parquet:   0%|          | 0.00/207M [00:00<?, ?B/s]

data/train-00008-of-00010.parquet:   0%|          | 0.00/273M [00:00<?, ?B/s]

data/train-00009-of-00010.parquet:   0%|          | 0.00/183M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/103022 [00:00<?, ? examples/s]

=== Auto-detected CONFIG preview ===
- splits: ['train']
- train split: train / val split: None
- columns: ['question', 'final_answer', 'difficulty', 'topic', 'r1_solution_1', 'r1_solution_2', 'r1_solution_3']
- mapping: question='question', answer='final_answer', topic='topic', difficulty='difficulty'
- reward.extractor: numeric
  sample[0] question: Evaluate the limit: \[ \lim_{x \to \infty} \sqrt{x} \left( \sqrt[3]{x+1} - \sqrt[3]{x-1} \right) \]
            answer  : 0
  sample[1] question: Find the auxiliary equation for the ordinary differential equation with constant coefficients: \((x^2D^2 + xD + 1)y = \s…
            answer  : m^2 + 1 = 0
  sample[2] question: Evaluate the limit: \[ \lim_{x \to 0} \left(\dfrac{1}{\tan^2 x}-\dfrac{1}{x^2} \right) \]
            answer  : -\dfrac{2}{3}


## 2) 準備（ドライブ/ログ/変数）

In [None]:
import os, time, logging, re, random, json, numpy as np
import pandas as pd
import datasets
from datasets import load_dataset

USE_DRIVE = bool(CONFIG.get("USE_DRIVE", False))
BASE_DIR = CONFIG.get("BASE_DIR", "/content/rlhf_data")
if USE_DRIVE:
    try:
        from google.colab import drive
        drive.mount('/content/drive', force_remount=True)
    except Exception as e:
        print("Colab ドライブのマウントに失敗しました（ローカルなら無視可）:", e)

DATA_DIR = os.path.join(BASE_DIR, 'data')
LOG_DIR  = os.path.join(BASE_DIR, 'logs')
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(LOG_DIR, exist_ok=True)

ts = time.strftime('%Y%m%d_%H%M%S')
LOG_FILE = os.path.join(LOG_DIR, f'rlhf_parquet_{ts}.log')
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s',
                    handlers=[logging.FileHandler(LOG_FILE), logging.StreamHandler()])
logging.info(f"BASE_DIR={BASE_DIR}")
logging.info(f"LOG_FILE={LOG_FILE}")

NUM_PROC = CONFIG.get("NUM_PROC", "auto")
if NUM_PROC == "auto":
    NUM_PROC = max(1, min(8, (os.cpu_count() or 1)))
else:
    NUM_PROC = int(NUM_PROC)
logging.info(f"NUM_PROC={NUM_PROC}")


Mounted at /content/drive


## 3) 入力読み込み（HF / ファイル）

In [None]:
def _detect_files_format(files):
    exts = []
    def push(p):
        if isinstance(p, str):
            exts.append(os.path.splitext(p)[1].lower())
        elif isinstance(p, (list, tuple)):
            for q in p:
                push(q)
    if isinstance(files, dict):
        for v in files.values():
            push(v)
    else:
        push(files)
    if any(e in ['.jsonl', '.json'] for e in exts):
        return 'json'
    if any(e == '.parquet' for e in exts):
        return 'parquet'
    if any(e == '.csv' for e in exts):
        return 'csv'
    return 'json'

def load_input_dataset():
    mode = CONFIG.get('LOAD_MODE', 'hf')
    if mode == 'hf':
        name = CONFIG.get('HF_DATASET')
        subset = CONFIG.get('HF_NAME')
        split_train = CONFIG.get('HF_SPLIT_TRAIN') or 'train'
        split_val = CONFIG.get('HF_SPLIT_VAL')
        logging.info(f"Loading HF dataset: {name} (subset={subset})")
        ds_all = load_dataset(name, subset)
        train = ds_all[split_train] if split_train in ds_all else None
        val = ds_all[split_val] if (split_val and split_val in ds_all) else None
        return train, val
    else:
        files = CONFIG.get('FILES', {})
        builder = _detect_files_format(files)
        logging.info(f"Loading from files with builder='{builder}': {files}")
        ds_all = load_dataset(builder, data_files=files)
        train = ds_all.get('train')
        val = ds_all.get('val')
        return train, val

train_raw, val_raw = load_input_dataset()
logging.info(f"Loaded: train={0 if train_raw is None else len(train_raw):,}, val={(0 if val_raw is None else len(val_raw)):,}")


## 4) 分割（既存 / 層化 / ランダム）

In [None]:
def stratified_split(ds, by: str, val_ratio: float, seed: int):
    rnd = random.Random(seed)
    by_bucket = {}
    for i, ex in enumerate(ds):
        key = ex.get(by) if (by and by in ex and ex.get(by) is not None) else "__NO_TOPIC__"
        by_bucket.setdefault(str(key), []).append(i)
    val_indices = []
    for t, idxs in by_bucket.items():
        k = max(1, int(len(idxs) * val_ratio))
        rnd.shuffle(idxs)
        val_indices.extend(idxs[:k])
    all_idx = set(range(len(ds)))
    train_indices = sorted(list(all_idx - set(val_indices)))
    val_indices = sorted(val_indices)
    return ds.select(train_indices), ds.select(val_indices)

def random_split(ds, val_ratio: float, seed: int):
    n = len(ds)
    indices = list(range(n))
    rnd = random.Random(seed)
    rnd.shuffle(indices)
    k = max(1, int(n * val_ratio))
    val_idx = sorted(indices[:k])
    train_idx = sorted(indices[k:])
    return ds.select(train_idx), ds.select(val_idx)

strategy = CONFIG.get('SPLIT_STRATEGY', {"type": "existing"})
if train_raw is None:
    raise RuntimeError("train データが見つかりません。CONFIG を確認してください。")

if val_raw is None:
    t = strategy.get('type', 'random')
    logging.info(f"No explicit val found. Splitting via strategy: {t}")
    if t == 'stratified':
        by = strategy.get('by', 'topic')
        val_ratio = float(strategy.get('val_ratio', 0.02))
        train_raw, val_raw = stratified_split(train_raw, by, val_ratio, CONFIG.get('SEED', 2025))
    else:
        val_ratio = float(strategy.get('val_ratio', 0.02))
        train_raw, val_raw = random_split(train_raw, val_ratio, CONFIG.get('SEED', 2025))

logging.info(f"Splits ready: train={len(train_raw):,}, val={len(val_raw):,}")


## 5) 正解抽出器 & スキーママッピング

In [None]:
REQUIRED_KEYS = {"data_source", "prompt", "ability", "reward_model", "extra_info"}

def validate_row(row):
    missing = REQUIRED_KEYS - set(row.keys())
    assert not missing, f"Missing keys: {missing}"
    assert isinstance(row["prompt"], list) and len(row["prompt"]) >= 1, "prompt must be a list of chat turns"
    for turn in row["prompt"]:
        assert "role" in turn and "content" in turn, "each prompt turn needs role & content"
    assert "ground_truth" in row["reward_model"], "reward_model.ground_truth is required"

def _norm_spaces(s):
    s = s.strip()
    s = re.sub(r"\s+", " ", s)
    if s.endswith(".") and not re.search(r"\.\d+$", s):
        s = s[:-1]
    return s

def extract_ground_truth(raw_answer: str):
    if raw_answer is None:
        return None
    raw = str(raw_answer)
    ext = CONFIG["REWARD"].get("extractor", "copy")
    if ext == "copy":
        return _norm_spaces(raw)
    if ext == "after_hash4":
        m = re.search(r"####\s*(.+)$", raw, flags=re.DOTALL)
        return _norm_spaces(m.group(1)) if m else _norm_spaces(raw)
    if ext == "regex_group1":
        pat = CONFIG["REWARD"].get("regex")
        if not pat:
            raise ValueError("extractor='regex_group1' には CONFIG['REWARD']['regex'] が必要です。")
        m = re.search(pat, raw, flags=re.DOTALL)
        return _norm_spaces(m.group(1)) if m else _norm_spaces(raw)
    if ext == "boxed":
        m = re.search(r"\\boxed\{([^}]+)\}", raw)
        return _norm_spaces(m.group(1)) if m else _norm_spaces(raw)
    if ext == "numeric":
        m = re.findall(r"[-+]?\d*\.?\d+|[-+]?\d+\/\d+", raw)
        return m[-1] if m else _norm_spaces(raw)
    return _norm_spaces(raw)

def make_map_fn(split: str, data_source: str):
    q_col = CONFIG["COLUMNS"]["question"]
    a_col = CONFIG["COLUMNS"]["answer"]
    topic_col = CONFIG["COLUMNS"].get("topic")
    diff_col = CONFIG["COLUMNS"].get("difficulty")
    passthrough = CONFIG.get("EXTRA_PASSTHROUGH", [])
    instruction = CONFIG.get("INSTRUCTION", "")
    tmpl = CONFIG.get("PROMPT_TEMPLATE", "{question}\n\n{instruction}")
    ability = CONFIG.get("ABILITY", "general")
    reward_style = CONFIG["REWARD"].get("style", "rule")

    def process(ex, idx):
        if q_col not in ex:
            raise KeyError(f"質問列 '{q_col}' が見つかりません。存在する列: {list(ex.keys())[:20]} ...")
        if a_col not in ex:
            raise KeyError(f"正解列 '{a_col}' が見つかりません。存在する列: {list(ex.keys())[:20]} ...")

        q_raw = ex[q_col]
        a_raw = ex[a_col]
        question = q_raw.rstrip() if isinstance(q_raw, str) else str(q_raw)
        gt = extract_ground_truth(a_raw)

        prompt_text = tmpl.format(question=question, instruction=instruction)
        prompt = [{"role": "user", "content": prompt_text}]

        extra = {"split": split, "index": idx}
        if topic_col and topic_col in ex: extra["topic"] = ex.get(topic_col)
        if diff_col and diff_col in ex:
            try:
                extra["difficulty"] = float(ex.get(diff_col)) if ex.get(diff_col) is not None else None
            except Exception:
                extra["difficulty"] = ex.get(diff_col)
        for k in passthrough:
            if k in ex:
                extra[k] = ex[k]

        return {
            "data_source": data_source,
            "prompt": prompt,
            "ability": ability,
            "reward_model": {"style": reward_style, "ground_truth": gt},
            "extra_info": extra
        }
    return process


## 6) 変換・保存・検証

In [None]:
DATA_SOURCE_TAG = CONFIG.get('HF_DATASET') if CONFIG.get('LOAD_MODE') == 'hf' else 'files'
train = train_raw.map(make_map_fn('train', DATA_SOURCE_TAG), with_indices=True, num_proc=NUM_PROC, remove_columns=train_raw.column_names)
val   = val_raw.map(  make_map_fn('val',   DATA_SOURCE_TAG), with_indices=True, num_proc=NUM_PROC, remove_columns=val_raw.column_names)
logging.info(f"Mapped: train={len(train):,}, val={len(val):,}")

for r in train.select(range(min(50, len(train)))): validate_row(r)
for r in val.select(range(min(50, len(val)))):     validate_row(r)
logging.info("Validation passed on samples.")

prefix = CONFIG.get('OUTPUT_PREFIX', 'dataset')
out_train = os.path.join(DATA_DIR, f"{prefix}_train.parquet")
out_val   = os.path.join(DATA_DIR, f"{prefix}_val.parquet")
train.to_parquet(out_train)
val.to_parquet(out_val)

s_train = os.path.getsize(out_train) / 1e6
s_val   = os.path.getsize(out_val) / 1e6
logging.info(f"Saved: {out_train} = {s_train:.1f} MB")
logging.info(f"Saved: {out_val}   = {s_val:.1f} MB")

df = pd.read_parquet(out_train)
display(df.head(3))


Map (num_proc=2):   0%|          | 0/100985 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/2037 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/101 [00:00<?, ?ba/s]

Creating parquet from Arrow format:   0%|          | 0/3 [00:00<?, ?ba/s]

Unnamed: 0,data_source,prompt,ability,reward_model,extra_info
0,zwhe99/DeepMath-103K,[{'content': 'Evaluate the limit: \[ \lim_{x \...,math,"{'ground_truth': '0', 'style': 'rule'}","{'difficulty': 4.5, 'index': 0, 'split': 'trai..."
1,zwhe99/DeepMath-103K,[{'content': 'Find the auxiliary equation for ...,math,"{'ground_truth': '0', 'style': 'rule'}","{'difficulty': 5.0, 'index': 1, 'split': 'trai..."
2,zwhe99/DeepMath-103K,[{'content': 'Evaluate the limit: \[ \lim_{x \...,math,"{'ground_truth': '3', 'style': 'rule'}","{'difficulty': 4.0, 'index': 2, 'split': 'trai..."


## 7) 調整のヒント
- 列名が違う → `CONFIG['COLUMNS']` を修正
- 正解抽出 → `CONFIG['REWARD']` を調整（GSM8Kなら `after_hash4`）
- 分割が難しい → `SPLIT_STRATEGY.type='random'`
- 大きすぎる → `NUM_PROC` を 1〜2 に下げる
- プロンプト変更 → `PROMPT_TEMPLATE` / `INSTRUCTION`
- ability 変更 → 例 'math' → 'reasoning'


# 以下は、変換データの確認用

In [None]:
# prompt の先頭ターンのテキストだけ列に出す
df["user_prompt"] = df["prompt"].apply(
    lambda turns: turns[0]["content"] if isinstance(turns, list) and turns else None
)
# 正解（ground_truth）を列に出す
df["ground_truth"] = df["reward_model"].apply(
    lambda d: d.get("ground_truth") if isinstance(d, dict) else None
)
# extra_info を平坦化
import pandas as pd
extra = pd.json_normalize(df["extra_info"])
display(df[["data_source","ability","user_prompt","ground_truth"]].head(3))
display(extra.head(3))


Unnamed: 0,data_source,ability,user_prompt,ground_truth
0,zwhe99/DeepMath-103K,math,,0
1,zwhe99/DeepMath-103K,math,,0
2,zwhe99/DeepMath-103K,math,,3


Unnamed: 0,difficulty,index,split,topic
0,4.5,0,train,Mathematics -> Precalculus -> Limits
1,5.0,1,train,Mathematics -> Differential Equations -> Ordin...
2,4.0,2,train,Mathematics -> Precalculus -> Limits


In [None]:
import json, numpy as np

def fallback(o):
    if isinstance(o, np.ndarray):
        return o.tolist()
    if isinstance(o, (np.integer, np.floating, np.bool_)):
        return o.item()
    return str(o)  # 最後の砦: 文字列化

row = df.iloc[0].to_dict()
print(json.dumps(row, ensure_ascii=False, indent=2, default=fallback))


{
  "data_source": "zwhe99/DeepMath-103K",
  "prompt": [
    {
      "content": "Evaluate the limit: \\[ \\lim_{x \\to \\infty} \\sqrt{x} \\left( \\sqrt[3]{x+1} - \\sqrt[3]{x-1} \\right) \\]\n\nLet's think step by step and output the final answer after \"####\". Only output the final answer after \"####\" on the last line.",
      "role": "user"
    }
  ],
  "ability": "math",
  "reward_model": {
    "ground_truth": "0",
    "style": "rule"
  },
  "extra_info": {
    "difficulty": 4.5,
    "index": 0,
    "split": "train",
    "topic": "Mathematics -> Precalculus -> Limits"
  },
  "user_prompt": null,
  "ground_truth": "0"
}
