From 9329fba6ed69fd65cd462f1f604126c7fac03ca3 Mon Sep 17 00:00:00 2001 From: NagyVikt Date: Tue, 28 Apr 2026 20:06:08 +0200 Subject: [PATCH] Make agent contract Colony-first across repos The managed AGENTS block needs to be portable across Guardex repos while making Colony the first coordination surface and OMX state the fallback. The template and installed root AGENTS block now match byte-for-byte, with focused setup and prompt assertions updated to lock the new contract shape. Constraint: Keep the marker-managed block reusable while preserving repo-specific content outside it. Rejected: Patch only AGENTS.md | setup refresh would overwrite the installed block from the unchanged template. Confidence: high Scope-risk: narrow Directive: Keep AGENTS.md marker content synchronized with templates/AGENTS.multiagent-safety.md before changing setup or prompt tests. Tested: git diff --check Tested: managed block parity check against templates/AGENTS.multiagent-safety.md Tested: node --test test/prompt.test.js Tested: node --test --test-name-pattern 'setup provisions workflow files|setup refreshes existing managed AGENTS block by default|install configures AGENTS managed policy block|prompt --snippet' test/setup.test.js test/prompt.test.js Tested: openspec validate --specs Not-tested: Full metadata/setup suite; broad run hit unrelated README about_description metadata baseline and OpenSpec worktree reuse failure. --- AGENTS.md | 433 +++++++++++++++++++++++--- templates/AGENTS.multiagent-safety.md | 430 ++++++++++++++++++++++--- test/prompt.test.js | 7 +- test/setup.test.js | 19 +- 4 files changed, 800 insertions(+), 89 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 1744b8e0..862329af 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -345,74 +345,423 @@ scripts/openspec/init-plan-workspace.sh ``` -## Multi-Agent Execution Contract (GX) +## Multi-Agent Execution Contract: Guardex + Colony -**Repo toggle.** Guardex is enabled by default. If the repo root `.env` sets `GUARDEX_ON=0`, `false`, `no`, or `off`, treat this entire Guardex contract as disabled for the repo and do not require Guardex worktrees, lock claims, completion flow, or OpenSpec workflow until `GUARDEX_ON` is re-enabled. +### Repo toggle -**Repo toggle examples.** Add one of these lines to the repo-root `.env` file: -`GUARDEX_ON=0` disables Guardex for that repo. -`GUARDEX_ON=1` explicitly enables Guardex for that repo again. +Guardex is enabled by default. -**Task-size routing.** Small tasks stay in direct caveman-only mode. For typos, single-file tweaks, one-liners, version bumps, or similarly bounded asks, solve directly and do not escalate into heavy OMX orchestration just because a keyword appears. Treat `quick:`, `simple:`, `tiny:`, `minor:`, `small:`, `just:`, and `only:` as explicit lightweight escape hatches. -Promote to OMX orchestration only when the task is medium/large: multi-file behavior changes, API/schema work, refactors, migrations, architecture, cross-cutting scope, or long prompts. Heavy OMX modes (`ralph`, `autopilot`, `team`, `ultrawork`, `swarm`, `ralplan`) are for that larger scope. If the task grows while working, upgrade then. +If the repo-root `.env` sets any of these values, treat this entire Guardex contract as disabled for the repo: -## Token / Context Budget +```text +GUARDEX_ON=0 +GUARDEX_ON=false +GUARDEX_ON=no +GUARDEX_ON=off +``` + +When disabled, do not require Guardex worktrees, lock claims, completion flow, or OpenSpec workflow until `GUARDEX_ON` is re-enabled. + +To explicitly enable: + +```text +GUARDEX_ON=1 +``` + +### Core rules + +- Work from an `agent/*` branch and worktree, never directly on the protected base branch. +- Claim files before edits. +- Use Colony for coordination before falling back to OMX state/notepad. +- Use OpenSpec for durable behavior contracts and change-driven work. +- Keep outputs compact: less word, same proof. +- Commit, push, and open/update a PR for completed work unless the user explicitly says to keep it local. +- Do not embed stale memory dumps, generated status snapshots, PR transcripts, session history, or long logs in this file. + +### Task-size routing + +Small tasks stay direct and caveman-only. + +For typos, single-file tweaks, one-liners, version bumps, comment-only changes, or similarly bounded asks, solve directly and do not escalate into heavy orchestration just because a keyword appears. + +Treat these prefixes as explicit lightweight escape hatches: + +- `quick:` +- `simple:` +- `tiny:` +- `minor:` +- `small:` +- `just:` +- `only:` + +Promote to full Guardex / OMX orchestration only when scope grows into: + +- multi-file behavior change +- API/schema work +- refactor +- migration +- architecture +- cross-cutting scope +- long prompt +- multi-agent execution + +### Colony coordination loop + +Use Colony as the primary coordination surface. + +On every startup, resume, follow-up, or "continue" request, run this order: + +1. `mcp__colony__hivemind_context` +2. `mcp__colony__attention_inbox` +3. `mcp__colony__task_ready_for_agent` +4. `mcp__colony__search` only when prior decisions, earlier lanes, file history, or error context matter. + +Rules: + +- Use `task_ready_for_agent` to choose work. +- Use `task_list` only for browsing/debugging. Do not use `task_list` as the normal work picker. +- If an agent reaches for `task_list` repeatedly while choosing work, stop and call `task_ready_for_agent` instead. `task_list` is an inventory tool, not a scheduler. +- Before editing files on an active task, call `task_claim_file` for each touched file. +- Use `task_post` for task-thread notes, decisions, blockers, and working-state updates. +- Use `task_message` / `task_messages` for directed agent-to-agent communication. +- Use `get_observations` only after compact Colony tools return IDs worth hydrating. + +Fallback: + +- Colony is considered unavailable only when the MCP namespace is missing, the tool call fails, or the installed Colony server does not expose the required tool. +- If `attention_inbox` or `task_ready_for_agent` is missing, fall back to `hivemind_context`, then `task_list`, then hydrate only the relevant task IDs. +- Do not skip Colony just because OMX state exists. OMX is fallback, not the first coordination source. +- Read `.omx/state` and `.omx/notepad.md` only when Colony is unavailable, missing the needed state, or the task explicitly depends on legacy OMX state. +- Keep `.omx/notepad.md` lean: live handoffs only. + +### Working-state notes + +Colony is preferred over generic notepad state. + +A working-state note should be task-scoped, searchable, and useful to another agent resuming the lane. + +When saving progress, use a task-scoped Colony note when possible: + +```text +task_post kind=note +content="branch=; task=; blocker=; next=; evidence=" +``` + +Use exactly these fields for handoff-style notes: + +- `branch` +- `task` +- `blocker` +- `next` +- `evidence` + +Do not store long proof dumps, stale narrative, or full logs in notepads. Put bulky proof in OpenSpec artifacts, PRs, or command output. + +### Token / context budget Default: less word, same proof. -- For prompts about `token inefficiency`, `reviewer mode`, `minimal token overhead`, or session waste patterns, switch into low-overhead mode: plan in at most 4 bullets, execute by phase, batch related reads/commands, avoid duplicate reads and interactive loops, keep outputs compact, and verify once per phase. -- Low output alone is not a defect. A bounded run that finishes in roughly <=10 steps is usually fine; low output spread across 20+ steps with rising per-turn input is fragmentation and should be treated as context growth first. -- Startup / resume summaries stay tiny: `branch`, `task`, `blocker`, `next step`, and `evidence`. -- Memory-driven starts stay ordered: read active `.omx/state` first, then one live `.omx/notepad.md` handoff, then external memory only when the task depends on prior repo decisions, a previous lane, or ambiguous continuity. Stop after the first 1-2 relevant hits. -- Front-load scaffold/path discovery into one grouped inspection pass. Avoid serial `ls` / `find` / `rg` / `cat` retries that only rediscover the same path state. -- Treat repeated `write_stdin`, repeated `sed` / `cat` peeks, and tiny diagnostic follow-up checks as strong negative signals. If they appear alongside climbing input cost, stop the probe loop and batch the next phase. +- For prompts about `token inefficiency`, `reviewer mode`, `minimal token overhead`, or session waste patterns, switch into low-overhead mode. +- Plan in at most 4 bullets. +- Execute by phase. +- Batch related reads and commands. +- Avoid duplicate reads and interactive loops. +- Keep outputs compact. +- Verify once per phase. +- Low output alone is not a defect. A bounded run that finishes in roughly <=10 steps is usually fine. +- Low output spread across 20+ steps with rising per-turn input is fragmentation and should be treated as context growth first. +- Startup / resume summaries stay tiny: `branch`, `task`, `blocker`, `next`, and `evidence`. +- Front-load scaffold/path discovery into one grouped inspection pass. Avoid serial `ls` / `find` / `rg` / `cat` retries that rediscover the same path state. +- Treat repeated `write_stdin`, repeated `sed` / `cat` peeks, and tiny diagnostic follow-up checks as strong negative signals. +- If a session turns fragmented, collapse back to inspect once, patch once, verify once, and summarize once. - Tool / hook summaries stay tiny: command, status, last meaningful lines only. Drop routine hook boilerplate. - Keep raw terminal interaction out of long-lived context. For `write_stdin` or interactive babysitting, retain only process, action sent, current result, and next action. - Keep execution log separate from reasoning context: full commands/stdout belong in logs, while prompt context keeps only the latest 1-2 checkpoints plus the newest tool-result summary. -- Treat local edit/commit, remote publish/PR, CI diagnosis, and cleanup as bounded phases. Do not spend fresh narration or approval turns on obvious safe follow-ons inside an already authorized phase unless the risk changes. -- When a session turns fragmented, collapse back to inspect once, patch once, verify once, and summarize once. -- Use a fixed checkpoint shape when compacting: `Task`, `Done`, `Current status`, and `Next`. -- Keep `.omx/notepad.md` lean: live handoffs only. Use exactly `branch`, `task`, `blocker`, `next step`, and `evidence`; move narrative proof into OpenSpec artifacts, PRs, or command output. +- Treat local edit/commit, remote publish/PR, CI diagnosis, and cleanup as bounded phases. +- Do not spend fresh narration or approval turns on obvious safe follow-ons inside an already authorized phase unless the risk changes. + +### Caveman style + +Commentary and progress updates use smart-caveman `ultra` by default: + +- Answer order stays fixed: answer first, cause next, fix or next step last. +- drop filler +- use fragments when clear +- answer first +- cause next +- fix or next step last -## OMX Caveman Style +Keep exact literals unchanged: -- Commentary and progress updates use smart-caveman `ultra` by default: drop articles, filler, pleasantries, and hedging. Fragments are fine when they stay clear. -- Answer order stays fixed: answer first, cause next, fix or next step last. If yes/no fits, say yes/no first. -- Keep literals exact: code, commands, file paths, flags, env vars, URLs, numbers, timestamps, and error text are never caveman-compressed. -- Auto-clarity wins: switch back to `lite` or normal wording for security warnings, irreversible actions, privacy/compliance notes, ordered instructions where fragments may confuse, or when the user is confused and needs more detail. -- Boundaries stay normal/exact for code, commits, PR text, specs, logs, and blocker evidence. +- code +- commands +- file paths +- flags +- env vars +- URLs +- numbers +- timestamps +- error text + +Switch back to `lite` or normal wording for: + +- security warnings +- irreversible actions +- privacy/compliance notes +- ordered instructions where fragments may confuse +- confused users +- commits +- PR text +- specs +- logs +- blocker evidence + +Never caveman-compress commands, file paths, specs, logs, or blocker evidence. + +### Isolation + +Every task runs on a dedicated `agent/*` branch and worktree. + +Start with: + +```bash +gx branch start "" "" +``` + +Treat the base branch (`main` / `dev`) as read-only while an agent branch is active. -**Isolation.** Every task runs on a dedicated `agent/*` branch + worktree. Start with `gx branch start "" ""`. Treat the base branch (`main`/`dev`) as read-only while an agent branch is active. The `.githooks/post-checkout` hook auto-reverts primary-branch switches during agent sessions and auto-stashes a dirty tree before reverting - bypass only with `GUARDEX_ALLOW_PRIMARY_BRANCH_SWITCH=1`. For every new task, including follow-up work in the same chat/session, if an assigned agent sub-branch/worktree is already open, continue in that sub-branch instead of creating a fresh lane unless the user explicitly redirects scope. -Never implement directly on the local/base branch checkout; keep it unchanged and perform all edits in the agent sub-branch/worktree. -**Primary-tree lock (blocking).** On the primary checkout, do NOT run any of: `git checkout `, `git switch `, `git switch -c ...`, `git checkout -b ...`, or `git worktree add `. The only branch-changing commands allowed on primary are `git fetch` and `git pull --ff-only` against the protected branch itself. To work on any `agent/*` branch, run `gx branch start ...` first, then `cd` into the printed `.omc/agent-worktrees/...` path and run every subsequent git command from inside that worktree. If you find yourself typing `git checkout agent/...` or `git switch agent/...` from the primary cwd, stop - that is the mistake that flips primary onto an agent branch. +Never implement directly on the local/base branch checkout. Keep it unchanged and perform all edits in the agent sub-branch/worktree. + +### Primary-tree lock + +On the primary checkout, do not run: -**Dirty-tree rule.** Finish or stash edits inside the worktree they belong to before any branch switch on primary. The post-checkout guard auto-stashes a dirty primary tree as `guardex-auto-revert ->` before reverting, but that is a safety net, not a workflow; do not rely on it routinely. Recover stashed changes with `git stash list | grep 'guardex-auto-revert'`. +```bash +git checkout +git switch +git switch -c ... +git checkout -b ... +git worktree add +``` + +Allowed on primary: + +```bash +git fetch +git pull --ff-only +``` -**Ownership.** Before editing, claim files: `gx locks claim --branch "" `. Before deleting, confirm the path is in your claim. Don't edit outside your scope unless reassigned. +To work on any `agent/*` branch, run `gx branch start ...` first, then `cd` into the printed worktree path and run every subsequent git command from inside that worktree. -**Handoff gate.** Post a one-line handoff note (plan/change, owned scope, intended action) before editing. Re-read the latest handoffs before replacing others' code. +If you are about to type `git checkout agent/...` or `git switch agent/...` from the primary checkout, stop. That is the mistake that flips primary onto an agent branch. + +### Dirty-tree rule + +Finish or stash edits inside the worktree they belong to before any branch switch on primary. + +The post-checkout guard may auto-stash a dirty primary tree as: + +```text +guardex-auto-revert -> +``` + +That is a safety net, not a workflow. Do not rely on it routinely. + +Recover stashed changes with: + +```bash +git stash list | grep 'guardex-auto-revert' +``` + +### Ownership + +Before editing, claim files. + +Preferred Colony path when on an active task: + +```text +mcp__colony__task_claim_file +``` + +Guardex lock path: + +```bash +gx locks claim --branch "" +``` + +Before deleting, confirm the path is in your claim. + +Do not edit outside your scope unless reassigned. + +If another agent owns or recently touched nearby code: + +1. read latest Colony context +2. post a handoff or question +3. avoid reverting unrelated changes +4. report conflicts instead of overwriting + +### Handoff gate + +Before editing, post a one-line handoff note through Colony `task_post` when a task is active. + +Use `.omx/notepad.md` only when Colony is unavailable or the lane explicitly depends on legacy OMX state. + +Handoff shape: + +```text +branch=; task=; blocker=; next=; evidence= +``` + +Re-read latest Colony context before replacing another agent's code. + +### Completion + +Finish with: + +```bash +gx branch finish --branch "" --via-pr --wait-for-merge --cleanup +``` + +or: + +```bash +gx finish --all +``` + +Task is complete only when: + +1. changes are committed +2. branch is pushed +3. PR URL is recorded +4. PR state is `MERGED` +5. sandbox worktree is pruned +6. final handoff records proof + +If anything blocks, append a `BLOCKED:` note and stop. Do not half-finish. -**Completion.** Finish with `gx branch finish --branch "" --via-pr --wait-for-merge --cleanup` (or `gx finish --all`). Task is only complete when: commit pushed, PR URL recorded, state = `MERGED`, sandbox worktree pruned. If anything blocks, append a `BLOCKED:` note and stop - don't half-finish. OMX completion policy: when a task is done, the agent must commit the task changes, push the agent branch, and create/update a PR before considering the branch complete. -**Parallel safety.** Assume other agents edit nearby. Never revert unrelated changes. Report conflicts in the handoff. +### Parallel safety -**Reporting.** Every completion handoff includes: files changed, behavior touched, verification commands + results, risks/follow-ups. +Assume other agents edit nearby. -**Open questions.** If Codex/Claude hits an unresolved question, branching decision, or blocker that should survive chat, record it in `openspec/plan//open-questions.md` as an unchecked `- [ ]` item. Resolve it in-place when answered instead of burying it in chat-only notes. +- Never revert unrelated changes. +- Never simplify or delete critical shared paths without explicit request and regression coverage. +- Report conflicts in the handoff. +- Prefer compatibility-preserving changes over endpoint-specific rewrites when other agents may be changing adjacent systems. -**OpenSpec (when change-driven).** Keep `openspec/changes//tasks.md` checkboxes current during work, not batched at the end. Task scaffolds and manual task edits must include an explicit final completion/cleanup section that ends with PR merge + sandbox cleanup (`gx finish --via-pr --wait-for-merge --cleanup` or `gx branch finish ... --cleanup`) and records PR URL + final `MERGED` evidence. Verify specs with `openspec validate --specs` before archive. Don't archive unverified. +### Reporting -**Version bumps.** If a change bumps a published version, the same PR updates release notes/changelog. - +Every completion handoff includes: + +```text +branch +task +files changed +behavior touched +verification commands/results +PR URL +merge state +sandbox cleanup state +risks/follow-ups +``` + +If blocked, use: + +```text +BLOCKED: +branch= +task= +blocker= +next= +evidence= +``` +### Open questions - -# Memory Context +If Codex/Claude hits an unresolved question, branching decision, or blocker that should survive chat, record it in: -# [gitguardex] recent context, 2026-04-27 9:55am GMT+2 +```text +openspec/plan//open-questions.md +``` + +as an unchecked item: + +```md +- [ ] Question or blocker... +``` -No previous sessions found. - \ No newline at end of file +Resolve it in-place when answered instead of burying it in chat-only notes. + +### OpenSpec + +OpenSpec is the source of truth for change-driven repo work. + +For change-driven tasks, keep: + +```text +openspec/changes//tasks.md +``` + +current during work, not batched at the end. + +Task scaffolds and manual task edits must include a final completion/cleanup section that ends with PR merge + sandbox cleanup and records PR URL + final `MERGED` evidence. + +Validate specs before archive: + +```bash +openspec validate --specs +``` + +Never archive unverified work. + +For `T0` / small `T1` lanes, use the compact Colony spec path when available. One Colony handoff plus `colony-spec.md` is enough. Do not create proposal/spec/tasks unless the task grows. + +For `T2` / `T3` lanes, keep proposal, spec, design, and tasks live while implementing. + +### Version bumps + +If a change bumps a published version, the same PR records release notes in the appropriate OpenSpec artifact or release-note mechanism for the repo. + +Do not edit `CHANGELOG.md` directly unless the repo explicitly requires manual changelog edits. + +### Verification gates + +Before claiming completion, run the narrowest meaningful verification for the touched area. + +Examples: + +```bash +pnpm test +pnpm typecheck +pnpm lint +``` + +If a command cannot run, record: + +```text +command +reason it could not run +risk +next +``` + +Do not claim green verification without command output evidence. + +### What not to put in this file + +Do not embed: + +- stale memory dumps +- PR transcripts +- long logs +- generated status snapshots +- session history +- full OpenSpec examples +- repeated copies of long workflow docs + +Keep this section as the hard multi-agent contract. Put long examples and recovery docs in repo-specific workflow docs. + + diff --git a/templates/AGENTS.multiagent-safety.md b/templates/AGENTS.multiagent-safety.md index 26008d58..f97780e7 100644 --- a/templates/AGENTS.multiagent-safety.md +++ b/templates/AGENTS.multiagent-safety.md @@ -1,63 +1,421 @@ -## Multi-Agent Execution Contract (GX) +## Multi-Agent Execution Contract: Guardex + Colony -**Repo toggle.** Guardex is enabled by default. If the repo root `.env` sets `GUARDEX_ON=0`, `false`, `no`, or `off`, treat this entire Guardex contract as disabled for the repo and do not require Guardex worktrees, lock claims, completion flow, or OpenSpec workflow until `GUARDEX_ON` is re-enabled. +### Repo toggle -**Repo toggle examples.** Add one of these lines to the repo-root `.env` file: -`GUARDEX_ON=0` disables Guardex for that repo. -`GUARDEX_ON=1` explicitly enables Guardex for that repo again. +Guardex is enabled by default. -**Task-size routing.** Small tasks stay in direct caveman-only mode. For typos, single-file tweaks, one-liners, version bumps, or similarly bounded asks, solve directly and do not escalate into heavy OMX orchestration just because a keyword appears. Treat `quick:`, `simple:`, `tiny:`, `minor:`, `small:`, `just:`, and `only:` as explicit lightweight escape hatches. -Promote to OMX orchestration only when the task is medium/large: multi-file behavior changes, API/schema work, refactors, migrations, architecture, cross-cutting scope, or long prompts. Heavy OMX modes (`ralph`, `autopilot`, `team`, `ultrawork`, `swarm`, `ralplan`) are for that larger scope. If the task grows while working, upgrade then. +If the repo-root `.env` sets any of these values, treat this entire Guardex contract as disabled for the repo: -## Token / Context Budget +```text +GUARDEX_ON=0 +GUARDEX_ON=false +GUARDEX_ON=no +GUARDEX_ON=off +``` + +When disabled, do not require Guardex worktrees, lock claims, completion flow, or OpenSpec workflow until `GUARDEX_ON` is re-enabled. + +To explicitly enable: + +```text +GUARDEX_ON=1 +``` + +### Core rules + +- Work from an `agent/*` branch and worktree, never directly on the protected base branch. +- Claim files before edits. +- Use Colony for coordination before falling back to OMX state/notepad. +- Use OpenSpec for durable behavior contracts and change-driven work. +- Keep outputs compact: less word, same proof. +- Commit, push, and open/update a PR for completed work unless the user explicitly says to keep it local. +- Do not embed stale memory dumps, generated status snapshots, PR transcripts, session history, or long logs in this file. + +### Task-size routing + +Small tasks stay direct and caveman-only. + +For typos, single-file tweaks, one-liners, version bumps, comment-only changes, or similarly bounded asks, solve directly and do not escalate into heavy orchestration just because a keyword appears. + +Treat these prefixes as explicit lightweight escape hatches: + +- `quick:` +- `simple:` +- `tiny:` +- `minor:` +- `small:` +- `just:` +- `only:` + +Promote to full Guardex / OMX orchestration only when scope grows into: + +- multi-file behavior change +- API/schema work +- refactor +- migration +- architecture +- cross-cutting scope +- long prompt +- multi-agent execution + +### Colony coordination loop + +Use Colony as the primary coordination surface. + +On every startup, resume, follow-up, or "continue" request, run this order: + +1. `mcp__colony__hivemind_context` +2. `mcp__colony__attention_inbox` +3. `mcp__colony__task_ready_for_agent` +4. `mcp__colony__search` only when prior decisions, earlier lanes, file history, or error context matter. + +Rules: + +- Use `task_ready_for_agent` to choose work. +- Use `task_list` only for browsing/debugging. Do not use `task_list` as the normal work picker. +- If an agent reaches for `task_list` repeatedly while choosing work, stop and call `task_ready_for_agent` instead. `task_list` is an inventory tool, not a scheduler. +- Before editing files on an active task, call `task_claim_file` for each touched file. +- Use `task_post` for task-thread notes, decisions, blockers, and working-state updates. +- Use `task_message` / `task_messages` for directed agent-to-agent communication. +- Use `get_observations` only after compact Colony tools return IDs worth hydrating. + +Fallback: + +- Colony is considered unavailable only when the MCP namespace is missing, the tool call fails, or the installed Colony server does not expose the required tool. +- If `attention_inbox` or `task_ready_for_agent` is missing, fall back to `hivemind_context`, then `task_list`, then hydrate only the relevant task IDs. +- Do not skip Colony just because OMX state exists. OMX is fallback, not the first coordination source. +- Read `.omx/state` and `.omx/notepad.md` only when Colony is unavailable, missing the needed state, or the task explicitly depends on legacy OMX state. +- Keep `.omx/notepad.md` lean: live handoffs only. + +### Working-state notes + +Colony is preferred over generic notepad state. + +A working-state note should be task-scoped, searchable, and useful to another agent resuming the lane. + +When saving progress, use a task-scoped Colony note when possible: + +```text +task_post kind=note +content="branch=; task=; blocker=; next=; evidence=" +``` + +Use exactly these fields for handoff-style notes: + +- `branch` +- `task` +- `blocker` +- `next` +- `evidence` + +Do not store long proof dumps, stale narrative, or full logs in notepads. Put bulky proof in OpenSpec artifacts, PRs, or command output. + +### Token / context budget Default: less word, same proof. -- For prompts about `token inefficiency`, `reviewer mode`, `minimal token overhead`, or session waste patterns, switch into low-overhead mode: plan in at most 4 bullets, execute by phase, batch related reads/commands, avoid duplicate reads and interactive loops, keep outputs compact, and verify once per phase. -- Low output alone is not a defect. A bounded run that finishes in roughly <=10 steps is usually fine; low output spread across 20+ steps with rising per-turn input is fragmentation and should be treated as context growth first. -- Startup / resume summaries stay tiny: `branch`, `task`, `blocker`, `next step`, and `evidence`. -- Memory-driven starts stay ordered: read active `.omx/state` first, then one live `.omx/notepad.md` handoff, then external memory only when the task depends on prior repo decisions, a previous lane, or ambiguous continuity. Stop after the first 1-2 relevant hits. -- Front-load scaffold/path discovery into one grouped inspection pass. Avoid serial `ls` / `find` / `rg` / `cat` retries that only rediscover the same path state. -- Treat repeated `write_stdin`, repeated `sed` / `cat` peeks, and tiny diagnostic follow-up checks as strong negative signals. If they appear alongside climbing input cost, stop the probe loop and batch the next phase. +- For prompts about `token inefficiency`, `reviewer mode`, `minimal token overhead`, or session waste patterns, switch into low-overhead mode. +- Plan in at most 4 bullets. +- Execute by phase. +- Batch related reads and commands. +- Avoid duplicate reads and interactive loops. +- Keep outputs compact. +- Verify once per phase. +- Low output alone is not a defect. A bounded run that finishes in roughly <=10 steps is usually fine. +- Low output spread across 20+ steps with rising per-turn input is fragmentation and should be treated as context growth first. +- Startup / resume summaries stay tiny: `branch`, `task`, `blocker`, `next`, and `evidence`. +- Front-load scaffold/path discovery into one grouped inspection pass. Avoid serial `ls` / `find` / `rg` / `cat` retries that rediscover the same path state. +- Treat repeated `write_stdin`, repeated `sed` / `cat` peeks, and tiny diagnostic follow-up checks as strong negative signals. +- If a session turns fragmented, collapse back to inspect once, patch once, verify once, and summarize once. - Tool / hook summaries stay tiny: command, status, last meaningful lines only. Drop routine hook boilerplate. - Keep raw terminal interaction out of long-lived context. For `write_stdin` or interactive babysitting, retain only process, action sent, current result, and next action. - Keep execution log separate from reasoning context: full commands/stdout belong in logs, while prompt context keeps only the latest 1-2 checkpoints plus the newest tool-result summary. -- Treat local edit/commit, remote publish/PR, CI diagnosis, and cleanup as bounded phases. Do not spend fresh narration or approval turns on obvious safe follow-ons inside an already authorized phase unless the risk changes. -- When a session turns fragmented, collapse back to inspect once, patch once, verify once, and summarize once. -- Use a fixed checkpoint shape when compacting: `Task`, `Done`, `Current status`, and `Next`. -- Keep `.omx/notepad.md` lean: live handoffs only. Use exactly `branch`, `task`, `blocker`, `next step`, and `evidence`; move narrative proof into OpenSpec artifacts, PRs, or command output. +- Treat local edit/commit, remote publish/PR, CI diagnosis, and cleanup as bounded phases. +- Do not spend fresh narration or approval turns on obvious safe follow-ons inside an already authorized phase unless the risk changes. + +### Caveman style + +Commentary and progress updates use smart-caveman `ultra` by default: + +- Answer order stays fixed: answer first, cause next, fix or next step last. +- drop filler +- use fragments when clear +- answer first +- cause next +- fix or next step last + +Keep exact literals unchanged: + +- code +- commands +- file paths +- flags +- env vars +- URLs +- numbers +- timestamps +- error text + +Switch back to `lite` or normal wording for: -## OMX Caveman Style +- security warnings +- irreversible actions +- privacy/compliance notes +- ordered instructions where fragments may confuse +- confused users +- commits +- PR text +- specs +- logs +- blocker evidence -- Commentary and progress updates use smart-caveman `ultra` by default: drop articles, filler, pleasantries, and hedging. Fragments are fine when they stay clear. -- Answer order stays fixed: answer first, cause next, fix or next step last. If yes/no fits, say yes/no first. -- Keep literals exact: code, commands, file paths, flags, env vars, URLs, numbers, timestamps, and error text are never caveman-compressed. -- Auto-clarity wins: switch back to `lite` or normal wording for security warnings, irreversible actions, privacy/compliance notes, ordered instructions where fragments may confuse, or when the user is confused and needs more detail. -- Boundaries stay normal/exact for code, commits, PR text, specs, logs, and blocker evidence. +Never caveman-compress commands, file paths, specs, logs, or blocker evidence. + +### Isolation + +Every task runs on a dedicated `agent/*` branch and worktree. + +Start with: + +```bash +gx branch start "" "" +``` + +Treat the base branch (`main` / `dev`) as read-only while an agent branch is active. -**Isolation.** Every task runs on a dedicated `agent/*` branch + worktree. Start with `gx branch start "" ""`. Treat the base branch (`main`/`dev`) as read-only while an agent branch is active. The `.githooks/post-checkout` hook auto-reverts primary-branch switches during agent sessions and auto-stashes a dirty tree before reverting - bypass only with `GUARDEX_ALLOW_PRIMARY_BRANCH_SWITCH=1`. For every new task, including follow-up work in the same chat/session, if an assigned agent sub-branch/worktree is already open, continue in that sub-branch instead of creating a fresh lane unless the user explicitly redirects scope. -Never implement directly on the local/base branch checkout; keep it unchanged and perform all edits in the agent sub-branch/worktree. -**Primary-tree lock (blocking).** On the primary checkout, do NOT run any of: `git checkout `, `git switch `, `git switch -c ...`, `git checkout -b ...`, or `git worktree add `. The only branch-changing commands allowed on primary are `git fetch` and `git pull --ff-only` against the protected branch itself. To work on any `agent/*` branch, run `gx branch start ...` first, then `cd` into the printed `.omc/agent-worktrees/...` path and run every subsequent git command from inside that worktree. If you find yourself typing `git checkout agent/...` or `git switch agent/...` from the primary cwd, stop - that is the mistake that flips primary onto an agent branch. +Never implement directly on the local/base branch checkout. Keep it unchanged and perform all edits in the agent sub-branch/worktree. + +### Primary-tree lock + +On the primary checkout, do not run: + +```bash +git checkout +git switch +git switch -c ... +git checkout -b ... +git worktree add +``` + +Allowed on primary: + +```bash +git fetch +git pull --ff-only +``` + +To work on any `agent/*` branch, run `gx branch start ...` first, then `cd` into the printed worktree path and run every subsequent git command from inside that worktree. + +If you are about to type `git checkout agent/...` or `git switch agent/...` from the primary checkout, stop. That is the mistake that flips primary onto an agent branch. + +### Dirty-tree rule + +Finish or stash edits inside the worktree they belong to before any branch switch on primary. + +The post-checkout guard may auto-stash a dirty primary tree as: + +```text +guardex-auto-revert -> +``` + +That is a safety net, not a workflow. Do not rely on it routinely. + +Recover stashed changes with: + +```bash +git stash list | grep 'guardex-auto-revert' +``` + +### Ownership -**Dirty-tree rule.** Finish or stash edits inside the worktree they belong to before any branch switch on primary. The post-checkout guard auto-stashes a dirty primary tree as `guardex-auto-revert ->` before reverting, but that is a safety net, not a workflow; do not rely on it routinely. Recover stashed changes with `git stash list | grep 'guardex-auto-revert'`. +Before editing, claim files. -**Ownership.** Before editing, claim files: `gx locks claim --branch "" `. Before deleting, confirm the path is in your claim. Don't edit outside your scope unless reassigned. +Preferred Colony path when on an active task: -**Handoff gate.** Post a one-line handoff note (plan/change, owned scope, intended action) before editing. Re-read the latest handoffs before replacing others' code. +```text +mcp__colony__task_claim_file +``` + +Guardex lock path: + +```bash +gx locks claim --branch "" +``` + +Before deleting, confirm the path is in your claim. + +Do not edit outside your scope unless reassigned. + +If another agent owns or recently touched nearby code: + +1. read latest Colony context +2. post a handoff or question +3. avoid reverting unrelated changes +4. report conflicts instead of overwriting + +### Handoff gate + +Before editing, post a one-line handoff note through Colony `task_post` when a task is active. + +Use `.omx/notepad.md` only when Colony is unavailable or the lane explicitly depends on legacy OMX state. + +Handoff shape: + +```text +branch=; task=; blocker=; next=; evidence= +``` + +Re-read latest Colony context before replacing another agent's code. + +### Completion + +Finish with: + +```bash +gx branch finish --branch "" --via-pr --wait-for-merge --cleanup +``` + +or: + +```bash +gx finish --all +``` + +Task is complete only when: + +1. changes are committed +2. branch is pushed +3. PR URL is recorded +4. PR state is `MERGED` +5. sandbox worktree is pruned +6. final handoff records proof + +If anything blocks, append a `BLOCKED:` note and stop. Do not half-finish. -**Completion.** Finish with `gx branch finish --branch "" --via-pr --wait-for-merge --cleanup` (or `gx finish --all`). Task is only complete when: commit pushed, PR URL recorded, state = `MERGED`, sandbox worktree pruned. If anything blocks, append a `BLOCKED:` note and stop - don't half-finish. OMX completion policy: when a task is done, the agent must commit the task changes, push the agent branch, and create/update a PR before considering the branch complete. -**Parallel safety.** Assume other agents edit nearby. Never revert unrelated changes. Report conflicts in the handoff. +### Parallel safety + +Assume other agents edit nearby. + +- Never revert unrelated changes. +- Never simplify or delete critical shared paths without explicit request and regression coverage. +- Report conflicts in the handoff. +- Prefer compatibility-preserving changes over endpoint-specific rewrites when other agents may be changing adjacent systems. + +### Reporting + +Every completion handoff includes: + +```text +branch +task +files changed +behavior touched +verification commands/results +PR URL +merge state +sandbox cleanup state +risks/follow-ups +``` + +If blocked, use: + +```text +BLOCKED: +branch= +task= +blocker= +next= +evidence= +``` + +### Open questions + +If Codex/Claude hits an unresolved question, branching decision, or blocker that should survive chat, record it in: + +```text +openspec/plan//open-questions.md +``` + +as an unchecked item: + +```md +- [ ] Question or blocker... +``` + +Resolve it in-place when answered instead of burying it in chat-only notes. + +### OpenSpec + +OpenSpec is the source of truth for change-driven repo work. + +For change-driven tasks, keep: + +```text +openspec/changes//tasks.md +``` + +current during work, not batched at the end. + +Task scaffolds and manual task edits must include a final completion/cleanup section that ends with PR merge + sandbox cleanup and records PR URL + final `MERGED` evidence. + +Validate specs before archive: + +```bash +openspec validate --specs +``` + +Never archive unverified work. + +For `T0` / small `T1` lanes, use the compact Colony spec path when available. One Colony handoff plus `colony-spec.md` is enough. Do not create proposal/spec/tasks unless the task grows. + +For `T2` / `T3` lanes, keep proposal, spec, design, and tasks live while implementing. + +### Version bumps + +If a change bumps a published version, the same PR records release notes in the appropriate OpenSpec artifact or release-note mechanism for the repo. + +Do not edit `CHANGELOG.md` directly unless the repo explicitly requires manual changelog edits. + +### Verification gates + +Before claiming completion, run the narrowest meaningful verification for the touched area. + +Examples: + +```bash +pnpm test +pnpm typecheck +pnpm lint +``` + +If a command cannot run, record: + +```text +command +reason it could not run +risk +next +``` + +Do not claim green verification without command output evidence. + +### What not to put in this file -**Reporting.** Every completion handoff includes: files changed, behavior touched, verification commands + results, risks/follow-ups. +Do not embed: -**Open questions.** If Codex/Claude hits an unresolved question, branching decision, or blocker that should survive chat, record it in `openspec/plan//open-questions.md` as an unchecked `- [ ]` item. Resolve it in-place when answered instead of burying it in chat-only notes. +- stale memory dumps +- PR transcripts +- long logs +- generated status snapshots +- session history +- full OpenSpec examples +- repeated copies of long workflow docs -**OpenSpec (when change-driven).** Keep `openspec/changes//tasks.md` checkboxes current during work, not batched at the end. Task scaffolds and manual task edits must include an explicit final completion/cleanup section that ends with PR merge + sandbox cleanup (`gx finish --via-pr --wait-for-merge --cleanup` or `gx branch finish ... --cleanup`) and records PR URL + final `MERGED` evidence. Verify specs with `openspec validate --specs` before archive. Don't archive unverified. +Keep this section as the hard multi-agent contract. Put long examples and recovery docs in repo-specific workflow docs. -**Version bumps.** If a change bumps a published version, the same PR updates release notes/changelog. diff --git a/test/prompt.test.js b/test/prompt.test.js index d5612975..fbcf9ee7 100644 --- a/test/prompt.test.js +++ b/test/prompt.test.js @@ -150,12 +150,13 @@ test('prompt --snippet prints the managed AGENTS template with token budget and const result = runNode(['prompt', '--snippet'], repoDir); assert.equal(result.status, 0, result.stderr || result.stdout); assert.match(result.stdout, //); - assert.match(result.stdout, /## Token \/ Context Budget/); + assert.match(result.stdout, /### Colony coordination loop/); + assert.match(result.stdout, /Use Colony as the primary coordination surface\./); + assert.match(result.stdout, /### Token \/ context budget/); assert.match(result.stdout, /Default: less word, same proof\./); assert.match(result.stdout, /Keep raw terminal interaction out of long-lived context/); assert.match(result.stdout, /Keep execution log separate from reasoning context/); - assert.match(result.stdout, /Use a fixed checkpoint shape when compacting: `Task`, `Done`, `Current status`, and `Next`\./); - assert.match(result.stdout, /## OMX Caveman Style/); + assert.match(result.stdout, /### Caveman style/); assert.match(result.stdout, /Answer order stays fixed: answer first, cause next, fix or next step last\./); }); diff --git a/test/setup.test.js b/test/setup.test.js index 0e6d278e..a63d38cb 100644 --- a/test/setup.test.js +++ b/test/setup.test.js @@ -142,9 +142,11 @@ test('setup provisions workflow files and repo config', () => { agentsContent, /For every new task, including follow-up work in the same chat\/session, if an assigned agent sub-branch\/worktree is already open, continue in that sub-branch/, ); - assert.match(agentsContent, /## Token \/ Context Budget/); + assert.match(agentsContent, /### Colony coordination loop/); + assert.match(agentsContent, /Use Colony as the primary coordination surface\./); + assert.match(agentsContent, /### Token \/ context budget/); assert.match(agentsContent, /Default: less word, same proof\./); - assert.match(agentsContent, /## OMX Caveman Style/); + assert.match(agentsContent, /### Caveman style/); assert.match(agentsContent, /Answer order stays fixed: answer first, cause next, fix or next step last\./); const gitignoreContent = fs.readFileSync(path.join(repoDir, '.gitignore'), 'utf8'); @@ -390,9 +392,9 @@ test('setup refreshes existing managed AGENTS block by default', () => { assert.match(currentAgents, /Guardex is enabled by default/); assert.match(currentAgents, /GUARDEX_ON=0/); assert.match(currentAgents, /GUARDEX_ON=1/); - assert.match(currentAgents, /Small tasks stay in direct caveman-only mode\./); - assert.match(currentAgents, /Promote to OMX orchestration only when the task is medium\/large/); - assert.match(currentAgents, /explicit final completion\/cleanup section/); + assert.match(currentAgents, /Small tasks stay direct and caveman-only\./); + assert.match(currentAgents, /Promote to full Guardex \/ OMX orchestration only when scope grows into/); + assert.match(currentAgents, /final completion\/cleanup section/); assert.match(currentAgents, /PR URL \+ final `MERGED` evidence/); assert.doesNotMatch(currentAgents, /legacy managed clause/); assert.match(result.stdout, /refreshed gitguardex-managed block/); @@ -862,13 +864,14 @@ test('install configures AGENTS managed policy block with GX contract wording', const agentsContent = fs.readFileSync(path.join(repoDir, 'AGENTS.md'), 'utf8'); assert.match(agentsContent, //); - assert.match(agentsContent, /## Multi-Agent Execution Contract \(GX\)/); + assert.match(agentsContent, /## Multi-Agent Execution Contract: Guardex \+ Colony/); assert.match( agentsContent, /OMX completion policy: when a task is done, the agent must commit the task changes, push the agent branch, and create\/update a PR/, ); - assert.match(agentsContent, /## Token \/ Context Budget/); - assert.match(agentsContent, /## OMX Caveman Style/); + assert.match(agentsContent, /### Colony coordination loop/); + assert.match(agentsContent, /### Token \/ context budget/); + assert.match(agentsContent, /### Caveman style/); });