# üß† Mathematical Reasoning SFT Data Pipeline (Evol-Instruct + PoT)
This Notebook integrates a complete closed-loop workflow: **Seed Data Sampling**, **Instruction Evolution (Evol)**, **Program-of-Thought (PoT) Generation**, and **Sandbox Execution Verification**.

### 1. Environment Preparation and Dependency Installation

In [None]:
pip install requests tqdm

### 2. Phase 1: Seed Data Sampling
Extract basic samples from a raw dataset (e.g., GSM8K) to serve as "seeds."

In [None]:
import json
import random
import os

INPUT_FILE = "../data/gsm8k_train.jsonl"
SEED_FILE = "../data/seed_samples.jsonl"
SAMPLE_SIZE = 10

os.makedirs("../data", exist_ok=True)

def sample_seeds():
    if not os.path.exists(INPUT_FILE):
        print(f"‚ùå Input file {INPUT_FILE} not found. Please ensure the path is correct.")
        return
    
    with open(INPUT_FILE, 'r', encoding='utf-8') as f:
        data = [json.loads(line) for line in f if line.strip()]
    
    sampled = random.sample(data, min(len(data), SAMPLE_SIZE))
    
    with open(SEED_FILE, 'w', encoding='utf-8') as f:
        for entry in sampled:
            seed = {
                "id": random.randint(1000, 9999),
                "seed_question": entry['question'],
                "original_answer": entry['answer']
            }
            f.write(json.dumps(seed, ensure_ascii=False) + '\n')
    print(f"‚úÖ Sampled {len(sampled)} seed samples to {SEED_FILE}")

sample_seeds()

### 3. Phase 2: Instruction Evolution and PoT Solution Generation
Use a Large Language Model (DeepSeek-V3) to complicate simple questions and generate Python code solutions.

In [None]:
import requests
import time
from tqdm.notebook import tqdm

API_KEY = "YOUR_API_KEY"
BASE_URL = "https://api.siliconflow.cn/v1/chat/completions"
MODEL_NAME = "deepseek-ai/DeepSeek-V3"
EVOL_OUTPUT = "../data/evolved_samples.jsonl"

def call_llm(prompt):
    headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
    payload = {
        "model": MODEL_NAME,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "max_tokens": 2048
    }
    try:
        res = requests.post(BASE_URL, json=payload, headers=headers, timeout=120)
        return res.json()['choices'][0]['message']['content']
    except: return None

def process_evol():
    with open(SEED_FILE, 'r') as f:
        seeds = [json.loads(l) for l in f]
    
    for entry in tqdm(seeds, desc="Evolving & Solving"):
        # 1. Instruction Evolution
        evol_p = f"Rewrite this math problem to be more complex and scenario-based. Output the question only: {entry['seed_question']}"
        evolved_q = call_llm(evol_p)
        if not evolved_q: continue
        
        # 2. PoT Generation
        pot_p = f"Write a solve() function to solve this problem and return the numerical result: {evolved_q}"
        solution = call_llm(pot_p)
        if not solution: continue
        
        res = {"id": entry['id'], "evolved_question": evolved_q, "pot_solution": solution}
        with open(EVOL_OUTPUT, 'a', encoding='utf-8') as f:
            f.write(json.dumps(res, ensure_ascii=False) + '\n')

process_evol()

### 4. Phase 3: Sandbox Code Verification
Automatically extract generated Python code and run it in an isolated environment to ensure the solutions are correct and executable.

In [None]:
import re
import subprocess

FINAL_OUTPUT = "../data/verified_textbook.jsonl"

def extract_code(text):
    match = re.search(r"```python\s*(.*?)\s*```", text, re.DOTALL)
    return match.group(1) if match else None

def run_code(code):
    try:
        result = subprocess.run(['python3', '-c', code], capture_output=True, text=True, timeout=5)
        return (True, result.stdout.strip()) if result.returncode == 0 else (False, result.stderr)
    except: return (False, "Timeout")

def verify_pipeline():
    valid_data = []
    with open(EVOL_OUTPUT, 'r') as f:
        for line in f:
            entry = json.loads(line)
            code = extract_code(entry['pot_solution'])
            if not code: continue
            
            success, output = run_code(code)
            if success and output:
                valid_data.append({
                    "instruction": entry['evolved_question'],
                    "output": entry['pot_solution'],
                    "answer": output
                })
    
    with open(FINAL_OUTPUT, 'w', encoding='utf-8') as f:
        for item in valid_data:
            f.write(json.dumps(item, ensure_ascii=False) + '\n')
    print(f"‚úÖ Verification complete! {len(valid_data)} high-quality samples saved to {FINAL_OUTPUT}")

verify_pipeline()