In [22]:
from pathlib import Path 
import json
from reasoning_gym.utils import SYSTEM_PROMPTS

In [23]:
# Load all samples
evals = [
    Path("./eval/math-traces/x-ai_grok-3-mini-beta_easy"),
    Path("./eval/math-traces/x-ai_grok-3-mini-beta_medium"),
]

samples = [] 

for eval in evals:
    for category in eval.iterdir():
        if not category.is_dir():
            continue
        for dataset in category.iterdir():
            with open(dataset, "r") as f:
                data = json.load(f)
            
            for sample in data["results"]:
                samples.append({
                    "question": sample["question"],
                    "answer": sample["best_model_answer"],
                    "full_answer": sample["best_full_model_response"],
                    "score": sample["best_score"],
                    "metadata": sample["metadata"],
                })

print(f"Loaded {len(samples)} samples from {len(evals)} evals")

Loaded 24000 samples from 2 evals


In [24]:
# Filter out samples with score less than a given threshold
THRESHOLD = 0.9
filtered_samples = [sample for sample in samples if sample["score"] >= THRESHOLD]
print(f"Filtered samples: {len(filtered_samples)}")

Filtered samples: 17649


In [25]:
# Save the filtered samples for RL training
output_path = Path("./training/data/train_rl.jsonl")
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w") as f:
    for sample in filtered_samples:
        f.write(json.dumps(sample) + "\n")
print(f"Filtered samples for RL training saved to {output_path}")


# Save the filtered samples for Supervised Fine-Tuning (SFT)
output_path_sft = Path("./training/data/train_sft.json")
output_path_sft.parent.mkdir(parents=True, exist_ok=True)

sft_samples = [
    {
        "messages": [
            {
                "role": "system",
                "content": SYSTEM_PROMPTS["DeepSeekZero"]
            },
            {
                "role": "user",
                "content": sample["question"]
            },
            {
                "role": "assistant",
                "content": sample["full_answer"]
            }
        ]
    }
    for sample in filtered_samples
]

with open(output_path_sft, "w") as f:
    json.dump(sft_samples, f, indent=4)
print(f"Filtered samples for SFT saved to {output_path_sft}")

Filtered samples for RL training saved to training/data/train_rl.jsonl
Filtered samples for SFT saved to training/data/train_sft.json
