# ScopeBench Interactive Quickstart Notebook

A guided, hands-on tutorial for template selection, plan editing, effect annotation, and API evaluation.

## 0) Environment setup
Run these once in a fresh environment.

In [None]:
# %pip install -U pip
# %pip install -e ".[dev]"
import sys
print(sys.version)

## 1) Template selection
Start by discovering available templates and choosing a baseline example.

In [None]:
import json, subprocess

def run_json(cmd: str):
    out = subprocess.check_output(cmd, shell=True, text=True)
    return json.loads(out)

print(subprocess.check_output("scopebench list-templates", shell=True, text=True))
baseline = run_json("scopebench run examples/coding_bugfix.contract.yaml examples/coding_bugfix.plan.yaml --json")
baseline["decision"], baseline.get("scores", {})

## 2) Plan editing sandbox
Modify a plan and compare decisions before/after edits.

In [None]:
import tempfile
from pathlib import Path

original_path = Path("examples/coding_bugfix.plan.yaml")
plan_text = original_path.read_text()
print(plan_text[:700])

# Example edit: narrow potentially broad wording
edited_text = plan_text.replace("Apply broad refactor", "Apply minimal targeted refactor")
with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as f:
    _ = f.write(edited_text)
    edited_plan_path = f.name

before = run_json("scopebench run examples/coding_bugfix.contract.yaml examples/coding_bugfix.plan.yaml --json")
after = run_json(f"scopebench run examples/coding_bugfix.contract.yaml {edited_plan_path} --json")
{
    "before_decision": before["decision"],
    "after_decision": after["decision"],
    "before_reasons": before.get("reasons", []),
    "after_reasons": after.get("reasons", []),
}

## 3) Effect annotation
Use ScopeBench effect suggestions to enrich plan metadata.

In [None]:
suggested = run_json(f"scopebench suggest-effects {edited_plan_path} --json")
list(suggested.keys())

To apply effect annotations directly to the plan file, run:

In [None]:
subprocess.check_output(f"scopebench suggest-effects {edited_plan_path} --in-place", shell=True, text=True)
post_effect = run_json(f"scopebench run examples/coding_bugfix.contract.yaml {edited_plan_path} --json")
post_effect["decision"], post_effect.get("scores", {})

## 4) API usage (`/evaluate`)
Make sure `scopebench serve --host 0.0.0.0 --port 8080` is running in another terminal first.

In [None]:
import requests

payload = {
    "contract": {"goal": "Fix failing unit test", "preset": "team"},
    "plan": {
        "task": "Fix failing unit test",
        "steps": [
            {"id": "1", "description": "Read failing test", "tool": "git_read"},
            {"id": "2", "description": "Apply minimal patch", "tool": "git_patch", "depends_on": ["1"]},
            {"id": "3", "description": "Run targeted test", "tool": "pytest", "depends_on": ["2"]},
        ],
    },
    "include_summary": True,
    "include_next_steps": True,
    "include_patch": True,
}

r = requests.post("http://localhost:8080/evaluate", json=payload, timeout=30)
r.raise_for_status()
result = r.json()
result["decision"], result.get("summary"), result.get("next_steps")

## 5) Integration in Python code
The SDK wrapper provides the same behavior directly in your pipelines.

In [None]:
from scopebench.integrations.sdk import evaluate_plan

integration_result = evaluate_plan(
    contract={"goal": "Fix failing unit test", "preset": "team"},
    plan={
        "task": "Fix failing unit test",
        "steps": [
            {"id": "1", "description": "Read failing test", "tool": "git_read"},
            {"id": "2", "description": "Apply minimal patch", "tool": "git_patch", "depends_on": ["1"]},
            {"id": "3", "description": "Run targeted test", "tool": "pytest", "depends_on": ["2"]},
        ],
    },
    include_summary=True,
)
integration_result["decision"], integration_result.get("summary")

## 6) Suggested follow-up experiments
- Compare decisions across presets: `personal`, `team`, `enterprise`, `regulated`.
- Add/remove steps and observe cumulative scoring effects.
- Use API toggles like `include_telemetry` and `shadow_mode` for integration testing.