diff --git a/scripts/codex-fleet/auto-reviewer.sh b/scripts/codex-fleet/auto-reviewer.sh index 345a8bb..0e5b60a 100755 --- a/scripts/codex-fleet/auto-reviewer.sh +++ b/scripts/codex-fleet/auto-reviewer.sh @@ -30,6 +30,9 @@ REPO_ROOT="${CODEX_FLEET_REPO_ROOT:-$(cd "$SCRIPT_DIR/../.." && pwd)}" PLAN_SLUG="" DRY_RUN=0 +LOOP=0 +INTERVAL=300 +ONCE=0 DIFF_LINE_LIMIT="${AUTO_REVIEW_DIFF_LINES:-200}" OUTPUT_DIR="${AUTO_REVIEW_OUTPUT_DIR:-/tmp/claude-viz}" @@ -46,13 +49,18 @@ usage() { auto-reviewer.sh - end-of-plan auto-reviewer for codex-fleet. Usage: - auto-reviewer.sh [--plan-slug ] [--dry-run] [-h|--help] + auto-reviewer.sh [--plan-slug ] [--dry-run] [--once|--loop [--interval=]] [-h|--help] Options: --plan-slug Plan slug to review. Defaults to contents of .codex-fleet/active-plan. --dry-run Build and print the prompt to stdout; do not invoke claude and do not write the review artifact. + --once Run main() once and exit (the default). Mutually + exclusive with --loop. + --loop Run main() repeatedly with --interval between runs. + Used by full-bringup.sh's ticker window. + --interval= Seconds between iterations in --loop mode (default 300). -h, --help Show this help and exit. USAGE } @@ -62,6 +70,10 @@ while [[ $# -gt 0 ]]; do --plan-slug) PLAN_SLUG="${2:-}"; shift 2 ;; --plan-slug=*) PLAN_SLUG="${1#*=}"; shift ;; --dry-run) DRY_RUN=1; shift ;; + --once) ONCE=1; LOOP=0; shift ;; + --loop) LOOP=1; ONCE=0; shift ;; + --interval) INTERVAL="${2:-300}"; shift 2 ;; + --interval=*) INTERVAL="${1#*=}"; shift ;; -h|--help) usage; exit 0 ;; *) die "unknown arg: $1" ;; esac @@ -309,4 +321,12 @@ main() { log "review written plan=$slug file=$output_file" } -main "$@" +if [[ "$LOOP" -eq 1 ]]; then + log "loop mode interval=${INTERVAL}s" + while true; do + main || warn "iteration failed rc=$?; continuing" + sleep "$INTERVAL" + done +else + main +fi diff --git a/scripts/codex-fleet/supervisor.sh b/scripts/codex-fleet/supervisor.sh index b550d89..e22a27c 100755 --- a/scripts/codex-fleet/supervisor.sh +++ b/scripts/codex-fleet/supervisor.sh @@ -10,9 +10,11 @@ # This file is the canonical home for the codex-fleet supervisor *classifier # prompt*. The classifier runs once per supervisor tick per pane: a captured # pane snapshot (last ~80 lines of tmux output) is sent to Claude, and Claude -# returns one of four labels — working / asking / blocked / done — so the +# returns one of four labels — busy / asking / blocked / quiet — so the # fleet can decide whether to leave the pane alone, answer its question, -# escalate, or harvest results. +# escalate, or harvest results. These labels match the live classifier +# library at scripts/codex-fleet/lib/claude-supervisor-classifier.sh so +# the supervisor daemon and any replay/test harness agree on naming. # # Sister daemons (claude-supervisor.sh, plan-watcher.sh, cap-swap-daemon.sh, # auto-reviewer.sh) SOURCE this file to pull the canonical prompt + tiering @@ -55,20 +57,21 @@ # counter keyed by (pane_id, classification, iso8601_timestamp). The metric # that trips the guard is exactly: # -# pane_id= classification= +# pane_id= classification= # timestamp= # # Rule: after 3 consecutive identical classifications on the same pane, # STOP re-running the classifier on that pane and escalate to a different # action instead: -# - working x3 -> no-op (healthy steady state, just reset the streak +# - busy x3 -> no-op (healthy steady state, just reset the streak # and skip the next classifier call to save tokens) # - asking x3 -> page the operator + post a Colony note; the question # was not answered after 3 ticks, human attention needed # - blocked x3 -> escalate to Opus 4.7 once; if Opus also returns # "blocked" then poke the pane (Ctrl-C + retry hint) # and post a Colony note with the captured snapshot -# - done x3 -> harvest results, mark the lane complete, and respawn +# - quiet x3 -> harvest any completed results (PR URL, MERGED line), +# mark the lane complete if appropriate, then respawn # the worker into a fresh task # # Reset rules: the streak counter resets to zero whenever the @@ -107,13 +110,13 @@ snapshot (the last ~80 lines of terminal output for a single Codex/Claude worker) and you must return EXACTLY ONE of the following lowercase labels, with no surrounding prose, no punctuation, and no whitespace: - working | asking | blocked | done | uncertain + busy | asking | blocked | quiet | uncertain Category definitions and one canonical example per category follow. Match on intent and current state, not on stray keywords that may appear in earlier scrollback. -[CATEGORY: working] +[CATEGORY: busy] The pane shows an agent actively making progress: editing files, running commands, streaming model output, or printing tool results. No prompt is currently waiting on human input and no fatal error is in the last few @@ -150,10 +153,13 @@ Example: error: You've hit your usage limit. Try again in 4h 12m. [agent] giving up; awaiting supervisor swap. -[CATEGORY: done] -The pane has finished its current task cleanly: a PR URL is printed, a -"merged" / "completed" / "all green" line appears in the last few rows, -or the agent is idle at a shell prompt after announcing completion. +[CATEGORY: quiet] +The pane is idle: no recent activity in the on-screen window, the cursor +sits at a shell prompt waiting for the next task, or the agent has just +finished cleanly (PR URL printed, "merged" / "completed" / "all green" +line in the last few rows). Quiet covers both "task done" and "between +tasks" — the supervisor distinguishes them by checking for a PR URL in +the same snapshot. Example: [agent] gx branch finish ... --via-pr --cleanup https://github.com/NagyVikt/codex-fleet/pull/72 @@ -171,7 +177,7 @@ Example: [some bytes that look like a partial ANSI escape sequence] ESC[?25l Output contract: -- Return exactly one of: working asking blocked done uncertain +- Return exactly one of: busy asking blocked quiet uncertain - No quotes, no markdown, no trailing newline beyond a single \n. - Do not explain. The supervisor parses your reply with a strict regex.