# ScopeBench Interactive Quickstart Notebook

This notebook walks through installation, template selection, plan editing, API usage, and Python integration.


## 0) Environment setup
Run this once in a fresh environment.


In [None]:
# %pip install -U pip
# %pip install -e ".[dev]"
import sys
print(sys.version)


## 1) Run CLI quickstarts


In [None]:
import json, subprocess

def run_json(cmd: str):
    out = subprocess.check_output(cmd, shell=True, text=True)
    try:
        return json.loads(out)
    except json.JSONDecodeError:
        return {"raw": out}

quickstart = run_json("scopebench quickstart --json")
coding = run_json("scopebench coding-quickstart --json")
quickstart.get("decision"), coding.get("decision")


## 2) Template selection
Compare outcomes by changing `preset` values.


In [None]:
import tempfile, textwrap

plan_path = "examples/coding_small.patch.plan.yaml"

def evaluate_with_preset(preset: str):
    contract = textwrap.dedent(f'''
    goal: "Fix flaky test"
    preset: {preset}
    ''').strip()
    with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as f:
        f.write(contract)
        contract_path = f.name
    out = subprocess.check_output(
        f"scopebench run {contract_path} {plan_path} --json", shell=True, text=True
    )
    data = json.loads(out)
    return {"preset": preset, "decision": data.get("decision"), "scores": data.get("scores", {})}

[evaluate_with_preset(p) for p in ["personal", "team", "enterprise", "regulated"]]


## 3) Plan editing sandbox
Edit the plan text to make it more/less proportional and re-run.


In [None]:
import pathlib

plan_text = pathlib.Path("examples/phone_charge.plan.yaml").read_text()
print(plan_text[:600])


In [None]:
edited_plan = plan_text.replace("deploy", "prototype")  # example small scope reduction
tmp_plan = tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False)
_ = tmp_plan.write(edited_plan)
tmp_plan.close()

result = subprocess.check_output(
    f"scopebench run examples/phone_charge.contract.yaml {tmp_plan.name} --json",
    shell=True,
    text=True,
)
json.loads(result).get("decision")


## 4) API usage (`/evaluate`)
Make sure `scopebench serve --host 0.0.0.0 --port 8080` is running in another terminal first.


In [None]:
import requests

payload = {
    "contract": {"goal": "Fix failing unit test", "preset": "team"},
    "plan": {
        "task": "Fix failing unit test",
        "steps": [
            {"id": "1", "description": "Read failing test", "tool": "git_read"},
            {"id": "2", "description": "Apply minimal patch", "tool": "git_patch", "depends_on": ["1"]},
            {"id": "3", "description": "Run targeted test", "tool": "pytest", "depends_on": ["2"]}
        ]
    },
    "include_summary": True,
    "include_next_steps": True,
    "include_patch": True,
    "include_telemetry": True
}
resp = requests.post("http://localhost:8080/evaluate", json=payload, timeout=15)
resp.status_code, resp.json().get("decision"), resp.json().get("summary")


## 5) Integration in Python code


In [None]:
from scopebench.integrations.sdk import evaluate_plan

integration_result = evaluate_plan(
    contract={"goal": "Fix failing unit test", "preset": "team"},
    plan={
        "task": "Fix failing unit test",
        "steps": [
            {"id": "1", "description": "Read failing test", "tool": "git_read"},
            {"id": "2", "description": "Apply minimal patch", "tool": "git_patch", "depends_on": ["1"]},
            {"id": "3", "description": "Run targeted test", "tool": "pytest", "depends_on": ["2"]},
        ]
    },
    include_summary=True,
)
integration_result.get("decision"), integration_result.get("summary")


## 6) Suggested experiments
- Change presets and compare shifts from `ALLOW` to `ASK` or `DENY`.
- Add higher-impact tools to test cumulative scope.
- Compare direct SDK use vs. API mode in your own orchestrator.
