diff --git a/openspec/changes/agent-codex-codex-session-task-mode-decider-2026-04-22-12-16/.openspec.yaml b/openspec/changes/agent-codex-codex-session-task-mode-decider-2026-04-22-12-16/.openspec.yaml new file mode 100644 index 0000000..25345f4 --- /dev/null +++ b/openspec/changes/agent-codex-codex-session-task-mode-decider-2026-04-22-12-16/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-04-22 diff --git a/openspec/changes/agent-codex-codex-session-task-mode-decider-2026-04-22-12-16/proposal.md b/openspec/changes/agent-codex-codex-session-task-mode-decider-2026-04-22-12-16/proposal.md new file mode 100644 index 0000000..35f18b5 --- /dev/null +++ b/openspec/changes/agent-codex-codex-session-task-mode-decider-2026-04-22-12-16/proposal.md @@ -0,0 +1,17 @@ +## Why + +- Guardex currently documents OpenSpec tiers on `gx branch start`, but the start script still treated tier selection as a no-op and always behaved like the full scaffold path once auto-bootstrap was enabled. +- Codex launches also had no lightweight task-size gate, so tiny asks paid the full OMX/T3 setup cost even when the repo contract says those asks should stay caveman-only. +- Active session records did not preserve the routing decision, which made takeover prompts and the Active Agents surface blind to whether a sandbox was intentionally lightweight or OMX-backed. + +## What Changes + +- Wire real `T0`/`T1`/`T2`/`T3` behavior into `scripts/agent-branch-start.sh` and its template so auto-bootstrapped branch starts create the right OpenSpec footprint for the requested tier. +- Add a task-size decider to `scripts/codex-agent.sh` and its template so explicit lightweight asks route to caveman with `T1`, standard behavior changes route to OMX with `T2`, and orchestration-heavy asks escalate to `T3`. +- Persist `taskMode`, `openspecTier`, and `taskRoutingReason` in the active-session record schema and cover the new behavior with focused branch-start, sandbox, and session-state tests. + +## Impact + +- Affected surfaces are the Guardex branch bootstrap path, Codex launcher flow, and the VS Code Active Agents session metadata readers. +- Main risk is routing drift between the branch-start script, the Codex launcher, and the session schema; the targeted regression suite now exercises those surfaces together to keep them aligned. +- Existing default `gx branch start` behavior remains intact when `GUARDEX_OPENSPEC_AUTO_INIT=false`; tiered scaffolding only materializes on the same auto-bootstrap path Guardex already uses when OpenSpec initialization is enabled. diff --git a/openspec/changes/agent-codex-codex-session-task-mode-decider-2026-04-22-12-16/specs/codex-session-task-routing/spec.md b/openspec/changes/agent-codex-codex-session-task-mode-decider-2026-04-22-12-16/specs/codex-session-task-routing/spec.md new file mode 100644 index 0000000..143e56e --- /dev/null +++ b/openspec/changes/agent-codex-codex-session-task-mode-decider-2026-04-22-12-16/specs/codex-session-task-routing/spec.md @@ -0,0 +1,41 @@ +## ADDED Requirements + +### Requirement: Guardex branch start honors OpenSpec tiers +`gx branch start` SHALL apply the requested OpenSpec tier instead of always creating the full T3 scaffold. + +#### Scenario: T1 branch start creates a notes-only change workspace +- **WHEN** an operator runs `gx branch start --tier T1 ...` +- **THEN** Guardex creates the agent branch/worktree +- **AND** it initializes `openspec/changes//notes.md` plus `.openspec.yaml` +- **AND** it does not create `proposal.md`, `tasks.md`, or an `openspec/plan//` workspace. + +#### Scenario: T2 branch start skips the plan workspace +- **WHEN** an operator runs `gx branch start --tier T2 ...` +- **THEN** Guardex creates the full change workspace with `proposal.md`, `tasks.md`, and `specs/.../spec.md` +- **AND** it does not create an `openspec/plan//` workspace. + +#### Scenario: T3 branch start keeps the full scaffold +- **WHEN** an operator runs `gx branch start --tier T3 ...` +- **THEN** Guardex creates both the full change workspace and the plan workspace. + +### Requirement: Codex launcher auto-routes task size into mode plus tier +The Codex launcher SHALL classify the requested task before starting the sandbox and choose the lightweight or OMX lane accordingly. + +#### Scenario: explicit lightweight task routes to caveman and T1 +- **WHEN** `scripts/codex-agent.sh` launches a task whose text starts with `quick:`, `simple:`, `tiny:`, `minor:`, `small:`, `just:`, or `only:` +- **THEN** it reports a `caveman` task mode +- **AND** it starts the sandbox with OpenSpec tier `T1` +- **AND** the launched Codex process receives the selected mode/tier in its environment. + +#### Scenario: non-trivial task routes to OMX-backed tiers +- **WHEN** `scripts/codex-agent.sh` launches a broader behavior/refactor/workflow task without a lightweight prefix +- **THEN** it reports an `omx` task mode +- **AND** it selects `T2` by default +- **AND** it upgrades to `T3` for clearly plan-heavy or orchestration-heavy requests. + +### Requirement: active session records capture the routing decision +The active session record written for Codex sandboxes SHALL preserve the selected task mode and OpenSpec tier. + +#### Scenario: active session record stores mode plus tier +- **WHEN** `scripts/agent-session-state.js start ...` is called with task-routing metadata +- **THEN** the written `.omx/state/active-sessions/*.json` record includes the selected task mode, OpenSpec tier, and routing reason. diff --git a/openspec/changes/agent-codex-codex-session-task-mode-decider-2026-04-22-12-16/tasks.md b/openspec/changes/agent-codex-codex-session-task-mode-decider-2026-04-22-12-16/tasks.md new file mode 100644 index 0000000..6799b3e --- /dev/null +++ b/openspec/changes/agent-codex-codex-session-task-mode-decider-2026-04-22-12-16/tasks.md @@ -0,0 +1,36 @@ +## Definition of Done + +This change is complete only when **all** of the following are true: + +- Every checkbox below is checked. +- The agent branch reaches `MERGED` state on `origin` and the PR URL + state are recorded in the completion handoff. +- If any step blocks (test failure, conflict, ambiguous result), append a `BLOCKED:` line under section 4 explaining the blocker and **STOP**. Do not tick remaining cleanup boxes; do not silently skip the cleanup pipeline. + +## Handoff + +- Handoff: change=`agent-codex-codex-session-task-mode-decider-2026-04-22-12-16`; branch=`agent/codex/codex-session-task-mode-decider-2026-04-22-12-16`; scope=`scripts/agent-branch-start.sh, scripts/codex-agent.sh, scripts/agent-session-state.js, templates/scripts/agent-branch-start.sh, templates/scripts/codex-agent.sh, vscode/guardex-active-agents/session-schema.js, templates/vscode/guardex-active-agents/session-schema.js, test/branch.test.js, test/sandbox.test.js, test/vscode-active-agents-session-state.test.js, openspec/changes/agent-codex-codex-session-task-mode-decider-2026-04-22-12-16/*`; action=`finish the tier-aware branch-start and task-routing lane, verify the focused coverage, then run the mandatory PR merge + cleanup flow`. +- Copy prompt: Continue `agent-codex-codex-session-task-mode-decider-2026-04-22-12-16` on branch `agent/codex/codex-session-task-mode-decider-2026-04-22-12-16`. Work inside the existing sandbox, review `openspec/changes/agent-codex-codex-session-task-mode-decider-2026-04-22-12-16/tasks.md`, continue from the current state instead of creating a new sandbox, and when the work is done run `gx branch finish --branch agent/codex/codex-session-task-mode-decider-2026-04-22-12-16 --base main --via-pr --wait-for-merge --cleanup`. + +## 1. Specification + +- [x] 1.1 Finalize proposal scope and acceptance criteria for `agent-codex-codex-session-task-mode-decider-2026-04-22-12-16`. +- [x] 1.2 Define normative requirements in `specs/codex-session-task-routing/spec.md`. + +## 2. Implementation + +- [x] 2.1 Wire real `T0/T1/T2/T3` behavior into `scripts/agent-branch-start.sh` and `templates/scripts/agent-branch-start.sh`. +- [x] 2.2 Add a Codex-side task-mode decider in `scripts/codex-agent.sh` and `templates/scripts/codex-agent.sh` so simple asks route to caveman/T1 and broader asks route to OMX/T2-or-T3. +- [x] 2.3 Persist the selected task mode/tier in the active-session record surface. +- [x] 2.4 Add/update focused regression coverage for branch-start tier scaffolding, codex-agent routing, and session metadata. + +## 3. Verification + +- [x] 3.1 Run targeted project verification commands. `node --test test/branch.test.js test/sandbox.test.js test/vscode-active-agents-session-state.test.js` passed after aligning the new tiered branch-start tests with the existing `GUARDEX_OPENSPEC_AUTO_INIT=true` bootstrap path; `git diff --check` also passed cleanly. +- [x] 3.2 Run `openspec validate agent-codex-codex-session-task-mode-decider-2026-04-22-12-16 --type change --strict`. Result: `Change 'agent-codex-codex-session-task-mode-decider-2026-04-22-12-16' is valid`. +- [x] 3.3 Run `openspec validate --specs`. Result: command completed successfully and reported `No items found to validate.` + +## 4. Cleanup (mandatory; run before claiming completion) + +- [ ] 4.1 Run the cleanup pipeline: `gx branch finish --branch agent/codex/codex-session-task-mode-decider-2026-04-22-12-16 --base main --via-pr --wait-for-merge --cleanup`. This handles commit -> push -> PR create -> merge wait -> worktree prune in one invocation. +- [ ] 4.2 Record the PR URL and final merge state (`MERGED`) in the completion handoff. +- [ ] 4.3 Confirm the sandbox worktree is gone (`git worktree list` no longer shows the agent path; `git branch -a` shows no surviving local/remote refs for the branch). diff --git a/scripts/agent-branch-start.sh b/scripts/agent-branch-start.sh index c871372..9da6d60 100755 --- a/scripts/agent-branch-start.sh +++ b/scripts/agent-branch-start.sh @@ -14,6 +14,7 @@ OPENSPEC_PLAN_SLUG_OVERRIDE="${GUARDEX_OPENSPEC_PLAN_SLUG:-}" OPENSPEC_CHANGE_SLUG_OVERRIDE="${GUARDEX_OPENSPEC_CHANGE_SLUG:-}" OPENSPEC_CAPABILITY_SLUG_OVERRIDE="${GUARDEX_OPENSPEC_CAPABILITY_SLUG:-}" OPENSPEC_MASTERPLAN_LABEL_RAW="${GUARDEX_OPENSPEC_MASTERPLAN_LABEL-masterplan}" +OPENSPEC_TIER_RAW="${GUARDEX_OPENSPEC_TIER:-T3}" PRINT_NAME_ONLY=0 POSITIONAL_ARGS=() @@ -54,8 +55,7 @@ while [[ $# -gt 0 ]]; do shift ;; --tier) - # Accepted for CLAUDE.md compatibility; scaffold size is not yet wired - # through this script. Consume the value so callers can pass it. + OPENSPEC_TIER_RAW="${2:-$OPENSPEC_TIER_RAW}" shift 2 ;; --in-place|--allow-in-place) @@ -246,11 +246,45 @@ normalize_bool() { OPENSPEC_AUTO_INIT="$(normalize_bool "$OPENSPEC_AUTO_INIT_RAW" "1")" +normalize_tier() { + local raw="${1:-}" + local fallback="${2:-T3}" + local upper + upper="$(printf '%s' "$raw" | tr '[:lower:]' '[:upper:]')" + case "$upper" in + T0|T1|T2|T3) printf '%s' "$upper" ;; + '') printf '%s' "$fallback" ;; + *) return 1 ;; + esac +} + +if ! OPENSPEC_TIER="$(normalize_tier "$OPENSPEC_TIER_RAW" "T3")"; then + echo "[agent-branch-start] Unsupported OpenSpec tier: ${OPENSPEC_TIER_RAW}" >&2 + exit 1 +fi + +OPENSPEC_SKIP_CHANGE=0 +OPENSPEC_SKIP_PLAN=0 +OPENSPEC_MINIMAL=0 +case "$OPENSPEC_TIER" in + T0) + OPENSPEC_SKIP_CHANGE=1 + OPENSPEC_SKIP_PLAN=1 + ;; + T1) + OPENSPEC_SKIP_PLAN=1 + OPENSPEC_MINIMAL=1 + ;; + T2) + OPENSPEC_SKIP_PLAN=1 + ;; +esac + resolve_openspec_masterplan_label() { local raw="${OPENSPEC_MASTERPLAN_LABEL_RAW:-}" local label - if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]] || [[ -z "$raw" ]]; then + if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]] || [[ "$OPENSPEC_SKIP_PLAN" -eq 1 ]] || [[ -z "$raw" ]]; then printf '' return 0 fi @@ -404,7 +438,7 @@ initialize_openspec_plan_workspace() { local worktree="$2" local plan_slug="$3" - if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]]; then + if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]] || [[ "$OPENSPEC_SKIP_PLAN" -eq 1 ]]; then return 0 fi @@ -430,14 +464,15 @@ initialize_openspec_change_workspace() { local change_slug="$3" local capability_slug="$4" - if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]]; then + if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]] || [[ "$OPENSPEC_SKIP_CHANGE" -eq 1 ]]; then return 0 fi local init_output="" if ! init_output="$( cd "$worktree" - run_guardex_cli internal run-shell changeInit "$change_slug" "$capability_slug" 2>&1 + GUARDEX_OPENSPEC_MINIMAL="$OPENSPEC_MINIMAL" \ + run_guardex_cli internal run-shell changeInit "$change_slug" "$capability_slug" 2>&1 )"; then printf '%s\n' "$init_output" >&2 echo "[agent-branch-start] OpenSpec workspace initialization failed for change '${change_slug}'." >&2 @@ -599,8 +634,17 @@ fi echo "[agent-branch-start] Created branch: ${branch_name}" echo "[agent-branch-start] Worktree: ${worktree_path}" -echo "[agent-branch-start] OpenSpec change: openspec/changes/${openspec_change_slug}" -echo "[agent-branch-start] OpenSpec plan: openspec/plan/${openspec_plan_slug}" +echo "[agent-branch-start] OpenSpec tier: ${OPENSPEC_TIER}" +if [[ "$OPENSPEC_SKIP_CHANGE" -eq 1 ]]; then + echo "[agent-branch-start] OpenSpec change: skipped by tier ${OPENSPEC_TIER}" +else + echo "[agent-branch-start] OpenSpec change: openspec/changes/${openspec_change_slug}" +fi +if [[ "$OPENSPEC_SKIP_PLAN" -eq 1 ]]; then + echo "[agent-branch-start] OpenSpec plan: skipped by tier ${OPENSPEC_TIER}" +else + echo "[agent-branch-start] OpenSpec plan: openspec/plan/${openspec_plan_slug}" +fi echo "[agent-branch-start] Next steps:" echo " cd \"${worktree_path}\"" echo " gx locks claim --branch \"${branch_name}\" " diff --git a/scripts/agent-session-state.js b/scripts/agent-session-state.js index ae65c18..2e65554 100755 --- a/scripts/agent-session-state.js +++ b/scripts/agent-session-state.js @@ -23,7 +23,7 @@ const sessionSchema = resolveSessionSchemaModule(); function usage() { return ( 'Usage:\n' + - ' node scripts/agent-session-state.js start --repo --branch --task --agent --worktree --pid --cli \n' + + ' node scripts/agent-session-state.js start --repo --branch --task --agent --worktree --pid --cli [--task-mode ] [--openspec-tier ] [--routing-reason ]\n' + ' node scripts/agent-session-state.js stop --repo --branch \n' ); } @@ -65,6 +65,9 @@ function writeSessionRecord(options) { worktreePath: requireOption(options, 'worktree'), pid: requireOption(options, 'pid'), cliName: requireOption(options, 'cli'), + taskMode: options['task-mode'], + openspecTier: options['openspec-tier'], + taskRoutingReason: options['routing-reason'], }); const targetPath = sessionSchema.sessionFilePathForBranch(repoRoot, branch); diff --git a/scripts/codex-agent.sh b/scripts/codex-agent.sh index 6a05817..e287707 100755 --- a/scripts/codex-agent.sh +++ b/scripts/codex-agent.sh @@ -17,6 +17,13 @@ OPENSPEC_PLAN_SLUG_OVERRIDE="${GUARDEX_OPENSPEC_PLAN_SLUG:-}" OPENSPEC_CHANGE_SLUG_OVERRIDE="${GUARDEX_OPENSPEC_CHANGE_SLUG:-}" OPENSPEC_CAPABILITY_SLUG_OVERRIDE="${GUARDEX_OPENSPEC_CAPABILITY_SLUG:-}" OPENSPEC_MASTERPLAN_LABEL_RAW="${GUARDEX_OPENSPEC_MASTERPLAN_LABEL-masterplan}" +OPENSPEC_TIER_RAW="${GUARDEX_OPENSPEC_TIER:-}" +OPENSPEC_TIER="" +OPENSPEC_SKIP_CHANGE=0 +OPENSPEC_SKIP_PLAN=0 +OPENSPEC_MINIMAL=0 +TASK_MODE="" +TASK_ROUTING_REASON="" run_guardex_cli() { if [[ -n "$CLI_ENTRY" ]]; then @@ -48,6 +55,117 @@ normalize_bool() { esac } +normalize_tier() { + local raw="${1:-}" + local fallback="${2:-T2}" + local upper + upper="$(printf '%s' "$raw" | tr '[:lower:]' '[:upper:]')" + case "$upper" in + T0|T1|T2|T3) printf '%s' "$upper" ;; + '') printf '%s' "$fallback" ;; + *) return 1 ;; + esac +} + +string_contains_any() { + local haystack="$1" + shift + local needle + for needle in "$@"; do + if [[ "$haystack" == *"$needle"* ]]; then + return 0 + fi + done + return 1 +} + +string_has_lightweight_prefix() { + local text="$1" + local prefix + for prefix in "quick:" "simple:" "tiny:" "minor:" "small:" "just:" "only:"; do + if [[ "$text" == "$prefix"* ]]; then + return 0 + fi + done + return 1 +} + +derive_task_mode_from_tier() { + case "$1" in + T0|T1) printf 'caveman' ;; + T2|T3) printf 'omx' ;; + *) return 1 ;; + esac +} + +apply_openspec_tier() { + OPENSPEC_SKIP_CHANGE=0 + OPENSPEC_SKIP_PLAN=0 + OPENSPEC_MINIMAL=0 + case "$1" in + T0) + OPENSPEC_SKIP_CHANGE=1 + OPENSPEC_SKIP_PLAN=1 + ;; + T1) + OPENSPEC_SKIP_PLAN=1 + OPENSPEC_MINIMAL=1 + ;; + T2) + OPENSPEC_SKIP_PLAN=1 + ;; + esac +} + +decide_task_routing() { + local task_lower + task_lower="$(printf '%s' "$TASK_NAME" | tr '[:upper:]' '[:lower:]')" + + if [[ -n "$OPENSPEC_TIER_RAW" ]]; then + if ! OPENSPEC_TIER="$(normalize_tier "$OPENSPEC_TIER_RAW" "T2")"; then + echo "[codex-agent] Unsupported OpenSpec tier: ${OPENSPEC_TIER_RAW}" >&2 + return 1 + fi + TASK_ROUTING_REASON="explicit tier override" + elif string_has_lightweight_prefix "$task_lower"; then + OPENSPEC_TIER="T1" + TASK_ROUTING_REASON="explicit lightweight prefix" + elif string_contains_any "$task_lower" \ + "ralph" "autopilot" "ultrawork" "ultraqa" "ralplan" "deep interview" "ouroboros" \ + "migration" "refactor" "architecture" "re-architect" "cross-cutting" "multi-agent" \ + "multiagent" "parallel" "orchestr" "release" "zero-copy" "install surface" "workflow" + then + OPENSPEC_TIER="T3" + TASK_ROUTING_REASON="plan-heavy or orchestration-heavy task wording" + elif string_contains_any "$task_lower" \ + "typo" "spelling" "comment-only" "comment only" "format-only" "format only" \ + "whitespace" "one-liner" "one liner" "version bump" "bump version" \ + "single-file" "single file" + then + OPENSPEC_TIER="T1" + TASK_ROUTING_REASON="small bounded maintenance wording" + else + OPENSPEC_TIER="T2" + TASK_ROUTING_REASON="default behavior-change route" + fi + + if ! TASK_MODE="$(derive_task_mode_from_tier "$OPENSPEC_TIER")"; then + echo "[codex-agent] Unsupported task mode tier: ${OPENSPEC_TIER}" >&2 + return 1 + fi + apply_openspec_tier "$OPENSPEC_TIER" +} + +describe_task_routing() { + case "$OPENSPEC_TIER" in + T0) printf 'caveman / T0 (no OpenSpec scaffold)' ;; + T1) printf 'caveman / T1 (notes-only OpenSpec)' ;; + T2) printf 'omx / T2 (change workspace only)' ;; + T3) printf 'omx / T3 (change plus plan workspace)' ;; + *) printf 'unknown / %s' "${OPENSPEC_TIER:-unset}" ;; + esac +} + AUTO_FINISH="$(normalize_bool "$AUTO_FINISH_RAW" "1")" AUTO_REVIEW_ON_CONFLICT="$(normalize_bool "$AUTO_REVIEW_ON_CONFLICT_RAW" "1")" AUTO_CLEANUP="$(normalize_bool "$AUTO_CLEANUP_RAW" "1")" @@ -58,7 +176,7 @@ resolve_openspec_masterplan_label() { local raw="${OPENSPEC_MASTERPLAN_LABEL_RAW:-}" local label - if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]] || [[ -z "$raw" ]]; then + if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]] || [[ "$OPENSPEC_SKIP_PLAN" -eq 1 ]] || [[ -z "$raw" ]]; then printf '' return 0 fi @@ -86,6 +204,10 @@ while [[ $# -gt 0 ]]; do BASE_BRANCH_EXPLICIT=1 shift 2 ;; + --tier) + OPENSPEC_TIER_RAW="${2:-$OPENSPEC_TIER_RAW}" + shift 2 + ;; --codex-bin) CODEX_BIN="${2:-$CODEX_BIN}" shift 2 @@ -151,6 +273,10 @@ if [[ "$BASE_BRANCH_EXPLICIT" -eq 1 && -z "$BASE_BRANCH" ]]; then exit 1 fi +if ! decide_task_routing; then + exit 1 +fi + if ! command -v "$CODEX_BIN" >/dev/null 2>&1; then echo "[codex-agent] Missing Codex CLI command: $CODEX_BIN" >&2 echo "[codex-agent] Install Codex first, then retry." >&2 @@ -393,7 +519,7 @@ start_sandbox_fallback() { printf '[agent-branch-start] Worktree: %s\n' "$worktree_path" } -start_args=("$TASK_NAME" "$AGENT_NAME") +start_args=(--tier "$OPENSPEC_TIER" "$TASK_NAME" "$AGENT_NAME") if [[ "$BASE_BRANCH_EXPLICIT" -eq 1 ]]; then start_args+=("$BASE_BRANCH") fi @@ -487,7 +613,10 @@ record_active_session_state() { --agent "$AGENT_NAME" \ --worktree "$wt" \ --pid "$$" \ - --cli "$CODEX_BIN" + --cli "$CODEX_BIN" \ + --task-mode "$TASK_MODE" \ + --openspec-tier "$OPENSPEC_TIER" \ + --routing-reason "$TASK_ROUTING_REASON" } clear_active_session_state() { @@ -545,6 +674,7 @@ print_takeover_prompt() { finish_cmd="gx branch finish --branch \"${branch}\" --base ${base_branch} --via-pr --wait-for-merge --cleanup" echo "[codex-agent] Takeover sandbox: ${wt}" + echo "[codex-agent] Takeover routing: $(describe_task_routing) (${TASK_ROUTING_REASON})" echo "[codex-agent] Takeover prompt: Continue \`${change_slug}\` on branch \`${branch}\`. Work inside \`${wt}\`, review \`${change_artifact}\`, continue from the current state instead of creating a new sandbox, and when the work is done run \`${finish_cmd}\`." } @@ -594,7 +724,7 @@ ensure_openspec_plan_workspace() { local wt="$1" local branch="$2" - if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]]; then + if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]] || [[ "$OPENSPEC_SKIP_PLAN" -eq 1 ]]; then return 0 fi @@ -619,7 +749,7 @@ ensure_openspec_change_workspace() { local wt="$1" local branch="$2" - if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]]; then + if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]] || [[ "$OPENSPEC_SKIP_CHANGE" -eq 1 ]]; then return 0 fi @@ -628,7 +758,8 @@ ensure_openspec_change_workspace() { capability_slug="$(resolve_openspec_capability_slug)" if ! init_output="$( cd "$wt" - run_guardex_cli internal run-shell changeInit "$change_slug" "$capability_slug" 2>&1 + GUARDEX_OPENSPEC_MINIMAL="$OPENSPEC_MINIMAL" \ + run_guardex_cli internal run-shell changeInit "$change_slug" "$capability_slug" 2>&1 )"; then printf '%s\n' "$init_output" >&2 echo "[codex-agent] OpenSpec workspace initialization failed for change '${change_slug}'." >&2 @@ -878,6 +1009,8 @@ if ! ensure_openspec_plan_workspace "$worktree_path" "$worktree_branch"; then exit 1 fi +echo "[codex-agent] Task routing: $(describe_task_routing) (${TASK_ROUTING_REASON})" + active_session_recorded=0 cleanup_active_session_state_on_exit() { set +e @@ -894,7 +1027,10 @@ trap cleanup_active_session_state_on_exit EXIT INT TERM echo "[codex-agent] Launching ${CODEX_BIN} in sandbox: $worktree_path" cd "$worktree_path" set +e -"$CODEX_BIN" "$@" +GUARDEX_TASK_MODE="$TASK_MODE" \ +GUARDEX_OPENSPEC_TIER="$OPENSPEC_TIER" \ +GUARDEX_TASK_ROUTING_REASON="$TASK_ROUTING_REASON" \ + "$CODEX_BIN" "$@" codex_exit="$?" set -e diff --git a/templates/scripts/agent-branch-start.sh b/templates/scripts/agent-branch-start.sh index c871372..9da6d60 100755 --- a/templates/scripts/agent-branch-start.sh +++ b/templates/scripts/agent-branch-start.sh @@ -14,6 +14,7 @@ OPENSPEC_PLAN_SLUG_OVERRIDE="${GUARDEX_OPENSPEC_PLAN_SLUG:-}" OPENSPEC_CHANGE_SLUG_OVERRIDE="${GUARDEX_OPENSPEC_CHANGE_SLUG:-}" OPENSPEC_CAPABILITY_SLUG_OVERRIDE="${GUARDEX_OPENSPEC_CAPABILITY_SLUG:-}" OPENSPEC_MASTERPLAN_LABEL_RAW="${GUARDEX_OPENSPEC_MASTERPLAN_LABEL-masterplan}" +OPENSPEC_TIER_RAW="${GUARDEX_OPENSPEC_TIER:-T3}" PRINT_NAME_ONLY=0 POSITIONAL_ARGS=() @@ -54,8 +55,7 @@ while [[ $# -gt 0 ]]; do shift ;; --tier) - # Accepted for CLAUDE.md compatibility; scaffold size is not yet wired - # through this script. Consume the value so callers can pass it. + OPENSPEC_TIER_RAW="${2:-$OPENSPEC_TIER_RAW}" shift 2 ;; --in-place|--allow-in-place) @@ -246,11 +246,45 @@ normalize_bool() { OPENSPEC_AUTO_INIT="$(normalize_bool "$OPENSPEC_AUTO_INIT_RAW" "1")" +normalize_tier() { + local raw="${1:-}" + local fallback="${2:-T3}" + local upper + upper="$(printf '%s' "$raw" | tr '[:lower:]' '[:upper:]')" + case "$upper" in + T0|T1|T2|T3) printf '%s' "$upper" ;; + '') printf '%s' "$fallback" ;; + *) return 1 ;; + esac +} + +if ! OPENSPEC_TIER="$(normalize_tier "$OPENSPEC_TIER_RAW" "T3")"; then + echo "[agent-branch-start] Unsupported OpenSpec tier: ${OPENSPEC_TIER_RAW}" >&2 + exit 1 +fi + +OPENSPEC_SKIP_CHANGE=0 +OPENSPEC_SKIP_PLAN=0 +OPENSPEC_MINIMAL=0 +case "$OPENSPEC_TIER" in + T0) + OPENSPEC_SKIP_CHANGE=1 + OPENSPEC_SKIP_PLAN=1 + ;; + T1) + OPENSPEC_SKIP_PLAN=1 + OPENSPEC_MINIMAL=1 + ;; + T2) + OPENSPEC_SKIP_PLAN=1 + ;; +esac + resolve_openspec_masterplan_label() { local raw="${OPENSPEC_MASTERPLAN_LABEL_RAW:-}" local label - if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]] || [[ -z "$raw" ]]; then + if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]] || [[ "$OPENSPEC_SKIP_PLAN" -eq 1 ]] || [[ -z "$raw" ]]; then printf '' return 0 fi @@ -404,7 +438,7 @@ initialize_openspec_plan_workspace() { local worktree="$2" local plan_slug="$3" - if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]]; then + if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]] || [[ "$OPENSPEC_SKIP_PLAN" -eq 1 ]]; then return 0 fi @@ -430,14 +464,15 @@ initialize_openspec_change_workspace() { local change_slug="$3" local capability_slug="$4" - if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]]; then + if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]] || [[ "$OPENSPEC_SKIP_CHANGE" -eq 1 ]]; then return 0 fi local init_output="" if ! init_output="$( cd "$worktree" - run_guardex_cli internal run-shell changeInit "$change_slug" "$capability_slug" 2>&1 + GUARDEX_OPENSPEC_MINIMAL="$OPENSPEC_MINIMAL" \ + run_guardex_cli internal run-shell changeInit "$change_slug" "$capability_slug" 2>&1 )"; then printf '%s\n' "$init_output" >&2 echo "[agent-branch-start] OpenSpec workspace initialization failed for change '${change_slug}'." >&2 @@ -599,8 +634,17 @@ fi echo "[agent-branch-start] Created branch: ${branch_name}" echo "[agent-branch-start] Worktree: ${worktree_path}" -echo "[agent-branch-start] OpenSpec change: openspec/changes/${openspec_change_slug}" -echo "[agent-branch-start] OpenSpec plan: openspec/plan/${openspec_plan_slug}" +echo "[agent-branch-start] OpenSpec tier: ${OPENSPEC_TIER}" +if [[ "$OPENSPEC_SKIP_CHANGE" -eq 1 ]]; then + echo "[agent-branch-start] OpenSpec change: skipped by tier ${OPENSPEC_TIER}" +else + echo "[agent-branch-start] OpenSpec change: openspec/changes/${openspec_change_slug}" +fi +if [[ "$OPENSPEC_SKIP_PLAN" -eq 1 ]]; then + echo "[agent-branch-start] OpenSpec plan: skipped by tier ${OPENSPEC_TIER}" +else + echo "[agent-branch-start] OpenSpec plan: openspec/plan/${openspec_plan_slug}" +fi echo "[agent-branch-start] Next steps:" echo " cd \"${worktree_path}\"" echo " gx locks claim --branch \"${branch_name}\" " diff --git a/templates/scripts/codex-agent.sh b/templates/scripts/codex-agent.sh index 6a05817..e287707 100755 --- a/templates/scripts/codex-agent.sh +++ b/templates/scripts/codex-agent.sh @@ -17,6 +17,13 @@ OPENSPEC_PLAN_SLUG_OVERRIDE="${GUARDEX_OPENSPEC_PLAN_SLUG:-}" OPENSPEC_CHANGE_SLUG_OVERRIDE="${GUARDEX_OPENSPEC_CHANGE_SLUG:-}" OPENSPEC_CAPABILITY_SLUG_OVERRIDE="${GUARDEX_OPENSPEC_CAPABILITY_SLUG:-}" OPENSPEC_MASTERPLAN_LABEL_RAW="${GUARDEX_OPENSPEC_MASTERPLAN_LABEL-masterplan}" +OPENSPEC_TIER_RAW="${GUARDEX_OPENSPEC_TIER:-}" +OPENSPEC_TIER="" +OPENSPEC_SKIP_CHANGE=0 +OPENSPEC_SKIP_PLAN=0 +OPENSPEC_MINIMAL=0 +TASK_MODE="" +TASK_ROUTING_REASON="" run_guardex_cli() { if [[ -n "$CLI_ENTRY" ]]; then @@ -48,6 +55,117 @@ normalize_bool() { esac } +normalize_tier() { + local raw="${1:-}" + local fallback="${2:-T2}" + local upper + upper="$(printf '%s' "$raw" | tr '[:lower:]' '[:upper:]')" + case "$upper" in + T0|T1|T2|T3) printf '%s' "$upper" ;; + '') printf '%s' "$fallback" ;; + *) return 1 ;; + esac +} + +string_contains_any() { + local haystack="$1" + shift + local needle + for needle in "$@"; do + if [[ "$haystack" == *"$needle"* ]]; then + return 0 + fi + done + return 1 +} + +string_has_lightweight_prefix() { + local text="$1" + local prefix + for prefix in "quick:" "simple:" "tiny:" "minor:" "small:" "just:" "only:"; do + if [[ "$text" == "$prefix"* ]]; then + return 0 + fi + done + return 1 +} + +derive_task_mode_from_tier() { + case "$1" in + T0|T1) printf 'caveman' ;; + T2|T3) printf 'omx' ;; + *) return 1 ;; + esac +} + +apply_openspec_tier() { + OPENSPEC_SKIP_CHANGE=0 + OPENSPEC_SKIP_PLAN=0 + OPENSPEC_MINIMAL=0 + case "$1" in + T0) + OPENSPEC_SKIP_CHANGE=1 + OPENSPEC_SKIP_PLAN=1 + ;; + T1) + OPENSPEC_SKIP_PLAN=1 + OPENSPEC_MINIMAL=1 + ;; + T2) + OPENSPEC_SKIP_PLAN=1 + ;; + esac +} + +decide_task_routing() { + local task_lower + task_lower="$(printf '%s' "$TASK_NAME" | tr '[:upper:]' '[:lower:]')" + + if [[ -n "$OPENSPEC_TIER_RAW" ]]; then + if ! OPENSPEC_TIER="$(normalize_tier "$OPENSPEC_TIER_RAW" "T2")"; then + echo "[codex-agent] Unsupported OpenSpec tier: ${OPENSPEC_TIER_RAW}" >&2 + return 1 + fi + TASK_ROUTING_REASON="explicit tier override" + elif string_has_lightweight_prefix "$task_lower"; then + OPENSPEC_TIER="T1" + TASK_ROUTING_REASON="explicit lightweight prefix" + elif string_contains_any "$task_lower" \ + "ralph" "autopilot" "ultrawork" "ultraqa" "ralplan" "deep interview" "ouroboros" \ + "migration" "refactor" "architecture" "re-architect" "cross-cutting" "multi-agent" \ + "multiagent" "parallel" "orchestr" "release" "zero-copy" "install surface" "workflow" + then + OPENSPEC_TIER="T3" + TASK_ROUTING_REASON="plan-heavy or orchestration-heavy task wording" + elif string_contains_any "$task_lower" \ + "typo" "spelling" "comment-only" "comment only" "format-only" "format only" \ + "whitespace" "one-liner" "one liner" "version bump" "bump version" \ + "single-file" "single file" + then + OPENSPEC_TIER="T1" + TASK_ROUTING_REASON="small bounded maintenance wording" + else + OPENSPEC_TIER="T2" + TASK_ROUTING_REASON="default behavior-change route" + fi + + if ! TASK_MODE="$(derive_task_mode_from_tier "$OPENSPEC_TIER")"; then + echo "[codex-agent] Unsupported task mode tier: ${OPENSPEC_TIER}" >&2 + return 1 + fi + apply_openspec_tier "$OPENSPEC_TIER" +} + +describe_task_routing() { + case "$OPENSPEC_TIER" in + T0) printf 'caveman / T0 (no OpenSpec scaffold)' ;; + T1) printf 'caveman / T1 (notes-only OpenSpec)' ;; + T2) printf 'omx / T2 (change workspace only)' ;; + T3) printf 'omx / T3 (change plus plan workspace)' ;; + *) printf 'unknown / %s' "${OPENSPEC_TIER:-unset}" ;; + esac +} + AUTO_FINISH="$(normalize_bool "$AUTO_FINISH_RAW" "1")" AUTO_REVIEW_ON_CONFLICT="$(normalize_bool "$AUTO_REVIEW_ON_CONFLICT_RAW" "1")" AUTO_CLEANUP="$(normalize_bool "$AUTO_CLEANUP_RAW" "1")" @@ -58,7 +176,7 @@ resolve_openspec_masterplan_label() { local raw="${OPENSPEC_MASTERPLAN_LABEL_RAW:-}" local label - if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]] || [[ -z "$raw" ]]; then + if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]] || [[ "$OPENSPEC_SKIP_PLAN" -eq 1 ]] || [[ -z "$raw" ]]; then printf '' return 0 fi @@ -86,6 +204,10 @@ while [[ $# -gt 0 ]]; do BASE_BRANCH_EXPLICIT=1 shift 2 ;; + --tier) + OPENSPEC_TIER_RAW="${2:-$OPENSPEC_TIER_RAW}" + shift 2 + ;; --codex-bin) CODEX_BIN="${2:-$CODEX_BIN}" shift 2 @@ -151,6 +273,10 @@ if [[ "$BASE_BRANCH_EXPLICIT" -eq 1 && -z "$BASE_BRANCH" ]]; then exit 1 fi +if ! decide_task_routing; then + exit 1 +fi + if ! command -v "$CODEX_BIN" >/dev/null 2>&1; then echo "[codex-agent] Missing Codex CLI command: $CODEX_BIN" >&2 echo "[codex-agent] Install Codex first, then retry." >&2 @@ -393,7 +519,7 @@ start_sandbox_fallback() { printf '[agent-branch-start] Worktree: %s\n' "$worktree_path" } -start_args=("$TASK_NAME" "$AGENT_NAME") +start_args=(--tier "$OPENSPEC_TIER" "$TASK_NAME" "$AGENT_NAME") if [[ "$BASE_BRANCH_EXPLICIT" -eq 1 ]]; then start_args+=("$BASE_BRANCH") fi @@ -487,7 +613,10 @@ record_active_session_state() { --agent "$AGENT_NAME" \ --worktree "$wt" \ --pid "$$" \ - --cli "$CODEX_BIN" + --cli "$CODEX_BIN" \ + --task-mode "$TASK_MODE" \ + --openspec-tier "$OPENSPEC_TIER" \ + --routing-reason "$TASK_ROUTING_REASON" } clear_active_session_state() { @@ -545,6 +674,7 @@ print_takeover_prompt() { finish_cmd="gx branch finish --branch \"${branch}\" --base ${base_branch} --via-pr --wait-for-merge --cleanup" echo "[codex-agent] Takeover sandbox: ${wt}" + echo "[codex-agent] Takeover routing: $(describe_task_routing) (${TASK_ROUTING_REASON})" echo "[codex-agent] Takeover prompt: Continue \`${change_slug}\` on branch \`${branch}\`. Work inside \`${wt}\`, review \`${change_artifact}\`, continue from the current state instead of creating a new sandbox, and when the work is done run \`${finish_cmd}\`." } @@ -594,7 +724,7 @@ ensure_openspec_plan_workspace() { local wt="$1" local branch="$2" - if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]]; then + if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]] || [[ "$OPENSPEC_SKIP_PLAN" -eq 1 ]]; then return 0 fi @@ -619,7 +749,7 @@ ensure_openspec_change_workspace() { local wt="$1" local branch="$2" - if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]]; then + if [[ "$OPENSPEC_AUTO_INIT" -ne 1 ]] || [[ "$OPENSPEC_SKIP_CHANGE" -eq 1 ]]; then return 0 fi @@ -628,7 +758,8 @@ ensure_openspec_change_workspace() { capability_slug="$(resolve_openspec_capability_slug)" if ! init_output="$( cd "$wt" - run_guardex_cli internal run-shell changeInit "$change_slug" "$capability_slug" 2>&1 + GUARDEX_OPENSPEC_MINIMAL="$OPENSPEC_MINIMAL" \ + run_guardex_cli internal run-shell changeInit "$change_slug" "$capability_slug" 2>&1 )"; then printf '%s\n' "$init_output" >&2 echo "[codex-agent] OpenSpec workspace initialization failed for change '${change_slug}'." >&2 @@ -878,6 +1009,8 @@ if ! ensure_openspec_plan_workspace "$worktree_path" "$worktree_branch"; then exit 1 fi +echo "[codex-agent] Task routing: $(describe_task_routing) (${TASK_ROUTING_REASON})" + active_session_recorded=0 cleanup_active_session_state_on_exit() { set +e @@ -894,7 +1027,10 @@ trap cleanup_active_session_state_on_exit EXIT INT TERM echo "[codex-agent] Launching ${CODEX_BIN} in sandbox: $worktree_path" cd "$worktree_path" set +e -"$CODEX_BIN" "$@" +GUARDEX_TASK_MODE="$TASK_MODE" \ +GUARDEX_OPENSPEC_TIER="$OPENSPEC_TIER" \ +GUARDEX_TASK_ROUTING_REASON="$TASK_ROUTING_REASON" \ + "$CODEX_BIN" "$@" codex_exit="$?" set -e diff --git a/templates/vscode/guardex-active-agents/session-schema.js b/templates/vscode/guardex-active-agents/session-schema.js index e9282a0..fd4feca 100644 --- a/templates/vscode/guardex-active-agents/session-schema.js +++ b/templates/vscode/guardex-active-agents/session-schema.js @@ -35,6 +35,16 @@ function toPositiveInteger(value) { return Number.isInteger(normalized) && normalized > 0 ? normalized : null; } +function normalizeTaskMode(value) { + const normalized = toNonEmptyString(value).toLowerCase(); + return normalized === 'caveman' || normalized === 'omx' ? normalized : ''; +} + +function normalizeOpenSpecTier(value) { + const normalized = toNonEmptyString(value).toUpperCase(); + return ['T0', 'T1', 'T2', 'T3'].includes(normalized) ? normalized : ''; +} + function sanitizeBranchForFile(branch) { const normalized = toNonEmptyString(branch, 'session'); return normalized.replace(/[^a-zA-Z0-9._-]+/g, '__').replace(/^_+|_+$/g, '') || 'session'; @@ -420,6 +430,9 @@ function buildSessionRecord(input) { worktreePath, pid, cliName: toNonEmptyString(input.cliName, 'codex'), + taskMode: normalizeTaskMode(input.taskMode), + openspecTier: normalizeOpenSpecTier(input.openspecTier), + taskRoutingReason: toNonEmptyString(input.taskRoutingReason), startedAt: startedAt.toISOString(), }; } @@ -456,6 +469,9 @@ function normalizeSessionRecord(input, options = {}) { worktreePath: path.resolve(worktreePath), pid, cliName: toNonEmptyString(input.cliName, 'codex'), + taskMode: normalizeTaskMode(input.taskMode), + openspecTier: normalizeOpenSpecTier(input.openspecTier), + taskRoutingReason: toNonEmptyString(input.taskRoutingReason), startedAt: startedAt.toISOString(), filePath: toNonEmptyString(options.filePath), label: deriveSessionLabel(branch, worktreePath), diff --git a/test/branch.test.js b/test/branch.test.js index a0d9280..08ad465 100644 --- a/test/branch.test.js +++ b/test/branch.test.js @@ -233,6 +233,86 @@ test('agent-branch-start links dependency node_modules directories into new work }); +test('agent-branch-start honors T1 notes-only OpenSpec scaffolding', () => { + const repoDir = initRepo(); + seedCommit(repoDir); + + let result = runNode(['setup', '--target', repoDir, '--no-global-install'], repoDir); + assert.equal(result.status, 0, result.stderr || result.stdout); + + result = runCmd('git', ['add', '.'], repoDir); + assert.equal(result.status, 0, result.stderr || result.stdout); + result = runCmd('git', ['commit', '-m', 'apply gx setup'], repoDir, { + ALLOW_COMMIT_ON_PROTECTED_BRANCH: '1', + }); + assert.equal(result.status, 0, result.stderr || result.stdout); + + result = runBranchStart(['--tier', 'T1', 'simple: tighten copy', 'bot'], repoDir, { + GUARDEX_OPENSPEC_AUTO_INIT: 'true', + }); + assert.equal(result.status, 0, result.stderr || result.stdout); + assert.match(result.stdout, /\[agent-branch-start\] OpenSpec tier: T1/); + assert.match(result.stdout, /\[agent-branch-start\] OpenSpec plan: skipped by tier T1/); + + const createdWorktree = extractCreatedWorktree(result.stdout); + const changeSlug = extractOpenSpecChangeSlug(result.stdout); + const changeDir = path.join(createdWorktree, 'openspec', 'changes', changeSlug); + + assert.doesNotMatch(createdWorktree, /masterplan/); + assert.equal(fs.existsSync(path.join(changeDir, '.openspec.yaml')), true, '.openspec.yaml missing'); + assert.equal(fs.existsSync(path.join(changeDir, 'notes.md')), true, 'notes.md missing'); + assert.equal(fs.existsSync(path.join(changeDir, 'proposal.md')), false, 'proposal.md should be absent for T1'); + assert.equal(fs.existsSync(path.join(changeDir, 'tasks.md')), false, 'tasks.md should be absent for T1'); + assert.equal( + fs.existsSync(path.join(createdWorktree, 'openspec', 'plan', changeSlug)), + false, + 'T1 branch start should not create a plan workspace', + ); +}); + + +test('agent-branch-start honors T2 full change scaffolding without a plan workspace', () => { + const repoDir = initRepo(); + seedCommit(repoDir); + + let result = runNode(['setup', '--target', repoDir, '--no-global-install'], repoDir); + assert.equal(result.status, 0, result.stderr || result.stdout); + + result = runCmd('git', ['add', '.'], repoDir); + assert.equal(result.status, 0, result.stderr || result.stdout); + result = runCmd('git', ['commit', '-m', 'apply gx setup'], repoDir, { + ALLOW_COMMIT_ON_PROTECTED_BRANCH: '1', + }); + assert.equal(result.status, 0, result.stderr || result.stdout); + + result = runBranchStart(['--tier', 'T2', 'improve-routing-decider', 'bot'], repoDir, { + GUARDEX_OPENSPEC_AUTO_INIT: 'true', + }); + assert.equal(result.status, 0, result.stderr || result.stdout); + assert.match(result.stdout, /\[agent-branch-start\] OpenSpec tier: T2/); + assert.match(result.stdout, /\[agent-branch-start\] OpenSpec plan: skipped by tier T2/); + + const createdWorktree = extractCreatedWorktree(result.stdout); + const changeSlug = extractOpenSpecChangeSlug(result.stdout); + const changeDir = path.join(createdWorktree, 'openspec', 'changes', changeSlug); + + assert.doesNotMatch(createdWorktree, /masterplan/); + assert.equal(fs.existsSync(path.join(changeDir, '.openspec.yaml')), true, '.openspec.yaml missing'); + assert.equal(fs.existsSync(path.join(changeDir, 'proposal.md')), true, 'proposal.md missing'); + assert.equal(fs.existsSync(path.join(changeDir, 'tasks.md')), true, 'tasks.md missing'); + assert.equal( + fs.existsSync(path.join(changeDir, 'specs', 'improve-routing-decider', 'spec.md')), + true, + 'spec.md missing', + ); + assert.equal( + fs.existsSync(path.join(createdWorktree, 'openspec', 'plan', changeSlug)), + false, + 'T2 branch start should not create a plan workspace', + ); +}); + + test('protect command manages configured protected branches', () => { const repoDir = initRepo(); seedCommit(repoDir); diff --git a/test/sandbox.test.js b/test/sandbox.test.js index f031369..b85bc13 100644 --- a/test/sandbox.test.js +++ b/test/sandbox.test.js @@ -89,7 +89,7 @@ test('codex-agent launches codex inside a fresh sandbox worktree and keeps branc const cwdMarker = path.join(repoDir, '.codex-agent-cwd'); const argsMarker = path.join(repoDir, '.codex-agent-args'); - const launch = runCodexAgent(['launch-task', 'planner', 'dev', '--model', 'gpt-5.4-mini'], repoDir, { + const launch = runCodexAgent(['--tier', 'T3', 'launch-task', 'planner', 'dev', '--model', 'gpt-5.4-mini'], repoDir, { PATH: `${fakeBin}:${process.env.PATH}`, GUARDEX_TEST_CODEX_CWD: cwdMarker, GUARDEX_TEST_CODEX_ARGS: argsMarker, @@ -141,6 +141,73 @@ test('codex-agent launches codex inside a fresh sandbox worktree and keeps branc }); +test('codex-agent routes lightweight tasks to caveman T1 with notes-only OpenSpec', () => { + const repoDir = initRepo(); + seedCommit(repoDir); + + const setupResult = runNode(['setup', '--target', repoDir, '--no-global-install'], repoDir); + assert.equal(setupResult.status, 0, setupResult.stderr || setupResult.stdout); + let result = runCmd('git', ['add', '.'], repoDir); + assert.equal(result.status, 0, result.stderr); + result = runCmd('git', ['commit', '-m', 'apply gx setup'], repoDir, { + ALLOW_COMMIT_ON_PROTECTED_BRANCH: '1', + }); + assert.equal(result.status, 0, result.stderr || result.stdout); + + const fakeBin = fs.mkdtempSync(path.join(os.tmpdir(), 'guardex-fake-codex-lightweight-')); + const fakeCodexPath = path.join(fakeBin, 'codex'); + fs.writeFileSync( + fakeCodexPath, + `#!/usr/bin/env bash\n` + + `pwd > "${'${GUARDEX_TEST_CODEX_CWD}'}"\n` + + `echo "$@" > "${'${GUARDEX_TEST_CODEX_ARGS}'}"\n` + + `printf '%s' "${'${GUARDEX_TASK_MODE}'}" > "${'${GUARDEX_TEST_TASK_MODE}'}"\n` + + `printf '%s' "${'${GUARDEX_OPENSPEC_TIER}'}" > "${'${GUARDEX_TEST_TASK_TIER}'}"\n` + + `printf '%s' "${'${GUARDEX_TASK_ROUTING_REASON}'}" > "${'${GUARDEX_TEST_TASK_REASON}'}"\n`, + 'utf8', + ); + fs.chmodSync(fakeCodexPath, 0o755); + + const cwdMarker = path.join(repoDir, '.codex-agent-cwd-lightweight'); + const argsMarker = path.join(repoDir, '.codex-agent-args-lightweight'); + const modeMarker = path.join(repoDir, '.codex-agent-mode-lightweight'); + const tierMarker = path.join(repoDir, '.codex-agent-tier-lightweight'); + const reasonMarker = path.join(repoDir, '.codex-agent-reason-lightweight'); + const launch = runCodexAgent(['simple: tighten copy', 'planner', 'dev', '--model', 'gpt-5.4-mini'], repoDir, { + PATH: `${fakeBin}:${process.env.PATH}`, + GUARDEX_TEST_CODEX_CWD: cwdMarker, + GUARDEX_TEST_CODEX_ARGS: argsMarker, + GUARDEX_TEST_TASK_MODE: modeMarker, + GUARDEX_TEST_TASK_TIER: tierMarker, + GUARDEX_TEST_TASK_REASON: reasonMarker, + }); + assert.equal(launch.status, 0, launch.stderr || launch.stdout); + assert.match(launch.stdout, /\[codex-agent\] Task routing: caveman \/ T1 \(notes-only OpenSpec\) \(explicit lightweight prefix\)/); + assert.doesNotMatch(launch.stdout, /\[codex-agent\] OpenSpec plan workspace:/); + + const launchedCwd = fs.readFileSync(cwdMarker, 'utf8').trim(); + const launchedBranch = extractCreatedBranch(launch.stdout); + const changeSlug = sanitizeSlug(launchedBranch, 'simple-tighten-copy'); + const changeDir = path.join(launchedCwd, 'openspec', 'changes', changeSlug); + const launchedArgs = fs.readFileSync(argsMarker, 'utf8').trim(); + + assert.doesNotMatch(launchedCwd, /masterplan/); + assert.match(launchedArgs, /--model gpt-5\.4-mini/); + assert.equal(fs.readFileSync(modeMarker, 'utf8'), 'caveman'); + assert.equal(fs.readFileSync(tierMarker, 'utf8'), 'T1'); + assert.match(fs.readFileSync(reasonMarker, 'utf8'), /explicit lightweight prefix/); + assert.equal(fs.existsSync(path.join(changeDir, '.openspec.yaml')), true, '.openspec.yaml missing'); + assert.equal(fs.existsSync(path.join(changeDir, 'notes.md')), true, 'notes.md missing'); + assert.equal(fs.existsSync(path.join(changeDir, 'proposal.md')), false, 'proposal.md should be absent for T1'); + assert.equal(fs.existsSync(path.join(changeDir, 'tasks.md')), false, 'tasks.md should be absent for T1'); + assert.equal( + fs.existsSync(path.join(launchedCwd, 'openspec', 'plan', changeSlug)), + false, + 'T1 codex-agent launch should not create a plan workspace', + ); +}); + + test('codex-agent ignores stale repo-local starter shims and keeps the visible checkout stable', () => { const repoDir = initRepo(); seedCommit(repoDir); @@ -179,7 +246,7 @@ test('codex-agent ignores stale repo-local starter shims and keeps the visible c const cwdMarker = path.join(repoDir, '.codex-agent-cwd-fallback'); const argsMarker = path.join(repoDir, '.codex-agent-args-fallback'); - const launch = runCodexAgent(['fallback-task', 'planner', 'dev', '--model', 'gpt-5.4-mini'], repoDir, { + const launch = runCodexAgent(['--tier', 'T3', 'fallback-task', 'planner', 'dev', '--model', 'gpt-5.4-mini'], repoDir, { PATH: `${fakeBin}:${process.env.PATH}`, GUARDEX_TEST_CODEX_CWD: cwdMarker, GUARDEX_TEST_CODEX_ARGS: argsMarker, diff --git a/test/vscode-active-agents-session-state.test.js b/test/vscode-active-agents-session-state.test.js index 24da325..2d4c517 100644 --- a/test/vscode-active-agents-session-state.test.js +++ b/test/vscode-active-agents-session-state.test.js @@ -383,6 +383,12 @@ test('agent-session-state writes and removes active session records', () => { String(process.pid), '--cli', 'codex', + '--task-mode', + 'caveman', + '--openspec-tier', + 'T1', + '--routing-reason', + 'explicit lightweight prefix', ]); assert.equal(start.status, 0, start.stderr); @@ -395,10 +401,15 @@ test('agent-session-state writes and removes active session records', () => { assert.equal(parsed.taskName, 'demo-task'); assert.equal(parsed.agentName, 'codex'); assert.equal(parsed.worktreePath, worktreePath); + assert.equal(parsed.taskMode, 'caveman'); + assert.equal(parsed.openspecTier, 'T1'); + assert.equal(parsed.taskRoutingReason, 'explicit lightweight prefix'); const sessions = sessionSchema.readActiveSessions(tempRoot); assert.equal(sessions.length, 1); assert.equal(sessions[0].label, 'agent__codex__demo-task'); + assert.equal(sessions[0].taskMode, 'caveman'); + assert.equal(sessions[0].openspecTier, 'T1'); const stop = runNode(sessionScript, [ 'stop', diff --git a/vscode/guardex-active-agents/session-schema.js b/vscode/guardex-active-agents/session-schema.js index e9282a0..fd4feca 100644 --- a/vscode/guardex-active-agents/session-schema.js +++ b/vscode/guardex-active-agents/session-schema.js @@ -35,6 +35,16 @@ function toPositiveInteger(value) { return Number.isInteger(normalized) && normalized > 0 ? normalized : null; } +function normalizeTaskMode(value) { + const normalized = toNonEmptyString(value).toLowerCase(); + return normalized === 'caveman' || normalized === 'omx' ? normalized : ''; +} + +function normalizeOpenSpecTier(value) { + const normalized = toNonEmptyString(value).toUpperCase(); + return ['T0', 'T1', 'T2', 'T3'].includes(normalized) ? normalized : ''; +} + function sanitizeBranchForFile(branch) { const normalized = toNonEmptyString(branch, 'session'); return normalized.replace(/[^a-zA-Z0-9._-]+/g, '__').replace(/^_+|_+$/g, '') || 'session'; @@ -420,6 +430,9 @@ function buildSessionRecord(input) { worktreePath, pid, cliName: toNonEmptyString(input.cliName, 'codex'), + taskMode: normalizeTaskMode(input.taskMode), + openspecTier: normalizeOpenSpecTier(input.openspecTier), + taskRoutingReason: toNonEmptyString(input.taskRoutingReason), startedAt: startedAt.toISOString(), }; } @@ -456,6 +469,9 @@ function normalizeSessionRecord(input, options = {}) { worktreePath: path.resolve(worktreePath), pid, cliName: toNonEmptyString(input.cliName, 'codex'), + taskMode: normalizeTaskMode(input.taskMode), + openspecTier: normalizeOpenSpecTier(input.openspecTier), + taskRoutingReason: toNonEmptyString(input.taskRoutingReason), startedAt: startedAt.toISOString(), filePath: toNonEmptyString(options.filePath), label: deriveSessionLabel(branch, worktreePath),