In [27]:
# --- Paths to uploaded files (adjust if you moved them) ---
from pathlib import Path

ROOT = Path(".").resolve()

PYPROJECT = ROOT / "pyproject.toml"
SPEC_PY   = ROOT / "src" / "rft" / "tasks" / "spec.py"
REG_PY    = ROOT / "src" / "rft" / "tasks" / "registry.py"
IO_PY     = ROOT / "src" / "rft" / "tasks" / "io.py"
TPL_PY    = ROOT / "src" / "rft" / "prompt" / "templates.py"
RENDER_PY = ROOT / "src" / "rft" / "prompt" / "render.py"

for p in [PYPROJECT, SPEC_PY, REG_PY, IO_PY, TPL_PY, RENDER_PY]:
    print(p, "exists =", p.exists())


/Users/lizhuoyang/Desktop/RFT/pyproject.toml exists = True
/Users/lizhuoyang/Desktop/RFT/src/rft/tasks/spec.py exists = True
/Users/lizhuoyang/Desktop/RFT/src/rft/tasks/registry.py exists = True
/Users/lizhuoyang/Desktop/RFT/src/rft/tasks/io.py exists = True
/Users/lizhuoyang/Desktop/RFT/src/rft/prompt/templates.py exists = True
/Users/lizhuoyang/Desktop/RFT/src/rft/prompt/render.py exists = True


In [30]:
# --- Utility: import a module from a file path (SAFE VERSION) ---
import importlib.util
import sys
from types import ModuleType
from pathlib import Path

def load_module(name: str, path: Path) -> ModuleType:
    spec = importlib.util.spec_from_file_location(name, str(path))
    if spec is None or spec.loader is None:
        raise ImportError(f"Cannot load module {name} from {path}")
    mod = importlib.util.module_from_spec(spec)

    # ⭐ 关键修复：注册到 sys.modules
    sys.modules[name] = mod

    spec.loader.exec_module(mod)  # type: ignore[attr-defined]
    return mod


spec_mod   = load_module("rft_tasks_spec", SPEC_PY)
registry_mod = load_module("rft_tasks_registry", REG_PY)
io_mod     = load_module("rft_tasks_io", IO_PY)
tpl_mod    = load_module("rft_prompt_templates", TPL_PY)
render_mod = load_module("rft_prompt_render", RENDER_PY)

print("Loaded:", spec_mod, registry_mod, io_mod, tpl_mod, render_mod)


Loaded: <module 'rft_tasks_spec' from '/Users/lizhuoyang/Desktop/RFT/src/rft/tasks/spec.py'> <module 'rft_tasks_registry' from '/Users/lizhuoyang/Desktop/RFT/src/rft/tasks/registry.py'> <module 'rft_tasks_io' from '/Users/lizhuoyang/Desktop/RFT/src/rft/tasks/io.py'> <module 'rft_prompt_templates' from '/Users/lizhuoyang/Desktop/RFT/src/rft/prompt/templates.py'> <module 'rft_prompt_render' from '/Users/lizhuoyang/Desktop/RFT/src/rft/prompt/render.py'>


In [32]:
# --- 1) Packaging sanity checks ---
try:
    import tomllib  # Python >= 3.11
except ModuleNotFoundError:
    import tomli as tomllib  # Python <= 3.10

data = tomllib.loads(PYPROJECT.read_text(encoding="utf-8"))

print("project.name:", data["project"]["name"])
print("project.requires-python:", data["project"].get("requires-python"))
print("project.scripts:", list(data.get("project", {}).get("scripts", {}).keys()))

expected_scripts = {"rft-generate", "rft-verify", "rft-build", "rft-train", "rft-eval"}
missing = expected_scripts - set(data.get("project", {}).get("scripts", {}).keys())
print("Missing scripts:", missing)


project.name: rft
project.requires-python: >=3.10,<3.11
project.scripts: ['rft-generate', 'rft-verify', 'rft-build', 'rft-train', 'rft-eval']
Missing scripts: set()


In [33]:
# --- 2) TaskSpec unit checks ---
TaskSpec = spec_mod.TaskSpec

# A valid minimal TaskSpec should validate
t = TaskSpec(task_id="demo_task", split="sab", eval_entrypoint="python -m benchmark.eval_programs.eval_demo")
t.validate()
print("TaskSpec.validate OK:", t.short_name())

# Missing eval_entrypoint must fail
try:
    TaskSpec(task_id="bad", split="sab", eval_entrypoint="").validate()
    raise AssertionError("Expected validation failure but got success")
except ValueError as e:
    print("Expected failure:", e)


TaskSpec.validate OK: sab:demo_task
Expected failure: TaskSpec.eval_entrypoint missing for task 'bad'


In [34]:

# --- 3) Create a tiny annotation table and test TaskRegistry ---
import tempfile, csv, json
from pathlib import Path

TaskRegistry = registry_mod.TaskRegistry

with tempfile.TemporaryDirectory() as td:
    td = Path(td)
    ann = td / "ann.csv"
    with ann.open("w", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=["task_id","task_inst","eval_script_name","dataset_preview","domain","subtask_categories"])
        w.writeheader()
        w.writerow({
            "task_id": "toy_1",
            "task_inst": "Write a program that prints 'hello'.",
            "eval_script_name": "eval_toy_1.py",
            "dataset_preview": "a,b\n1,2\n",
            "domain": "toy",
            "subtask_categories": "io,printing"
        })

    reg = TaskRegistry(annotation_path=ann, benchmark_root=Path("."), split="sab")
    tasks = reg.list_tasks()
    print("Num tasks:", len(tasks))
    print("First task:", tasks[0])

    ts = reg.get("toy_1")
    print("eval_entrypoint:", ts.eval_entrypoint)
    assert ts.eval_entrypoint.endswith("benchmark.eval_programs.eval_toy_1"), "entrypoint resolution mismatch"


Num tasks: 1
First task: TaskSpec(task_id='toy_1', split='sab', version='v1', domain='toy', subtask_categories=['io', 'printing'], instruction="Write a program that prints 'hello'.", dataset_preview='a,b\n1,2\n', input_format_hint=None, eval_entrypoint='python -m benchmark.eval_programs.eval_toy_1', eval_timeout_sec=1800, max_memory_mb=4096, execution_env={}, evaluation_params={}, sampling_hint={}, metadata={})
eval_entrypoint: python -m benchmark.eval_programs.eval_toy_1


In [35]:

# --- 4) Dataset preview tokens + model-visible input ---
PREVIEW_START_TOKEN = io_mod.PREVIEW_START_TOKEN
PREVIEW_END_TOKEN   = io_mod.PREVIEW_END_TOKEN

ts = TaskSpec(
    task_id="toy_2",
    split="sab",
    instruction="Do something.",
    dataset_preview="x,y\n3,4\n",
    eval_entrypoint="python -m benchmark.eval_programs.eval_toy_2"
)

preview = io_mod.get_dataset_preview(ts)
print(preview)
assert preview.startswith(PREVIEW_START_TOKEN)
assert preview.strip().endswith(PREVIEW_END_TOKEN)

visible = io_mod.get_model_visible_input(ts)
print("\n--- model visible ---\n", visible)
assert "Do something." in visible
assert PREVIEW_START_TOKEN in visible


[START Preview of Dataset]
x,y
3,4
[END Preview of Dataset]

--- model visible ---
 Do something.

[START Preview of Dataset]
x,y
3,4
[END Preview of Dataset]


In [36]:

# --- 5) Prompt template and render() contract checks ---
template = tpl_mod.THINKING_CODE_TEMPLATE
assert "<thinking>" in template and "</thinking>" in template, "Template missing thinking tags"
assert "```python" in template, "Template missing python fenced block instruction"

msgs = render_mod.render(ts)
print(msgs)
assert isinstance(msgs, list) and len(msgs) == 2
assert msgs[0]["role"] == "system"
assert msgs[1]["role"] == "user"
assert msgs[0]["content"] == template
assert "Do something." in msgs[1]["content"]


