From 0337377e61f09889264e5ab974eaa0a2fdd7cd9e Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 15:29:24 -0800 Subject: [PATCH 01/28] refactor(cli): remove moat attach command Simplifying to two modes: interactive (attached) and non-interactive (monitor via moat logs). The attach command allowed reconnecting to running containers, which added complexity without sufficient value. --- cmd/moat/cli/attach.go | 375 ----------------------------------------- cmd/moat/cli/exec.go | 11 ++ 2 files changed, 11 insertions(+), 375 deletions(-) delete mode 100644 cmd/moat/cli/attach.go diff --git a/cmd/moat/cli/attach.go b/cmd/moat/cli/attach.go deleted file mode 100644 index e8a941b8..00000000 --- a/cmd/moat/cli/attach.go +++ /dev/null @@ -1,375 +0,0 @@ -package cli - -import ( - "context" - "fmt" - "io" - "os" - "os/signal" - "syscall" - "time" - - "github.com/majorcontext/moat/internal/log" - "github.com/majorcontext/moat/internal/run" - "github.com/majorcontext/moat/internal/term" - "github.com/majorcontext/moat/internal/trace" - "github.com/spf13/cobra" -) - -// Timing constants for attach behavior -const ( - // doublePressWindow is how quickly Ctrl+C must be pressed twice to stop a run - doublePressWindow = 500 * time.Millisecond - // containerExitCheckDelay is how long to wait for container exit detection - containerExitCheckDelay = 200 * time.Millisecond - // ttyStartupDelay is how long to wait before resizing TTY after container starts - // This allows the container process to initialize before we resize. - ttyStartupDelay = 200 * time.Millisecond -) - -var ( - attachInteractive bool - attachTTYTrace string -) - -var attachCmd = &cobra.Command{ - Use: "attach ", - Short: "Attach to a running agent", - Long: `Attach local stdin, stdout, and stderr to a running agent. - -Accepts a run ID or name. If a name matches multiple runs, you must -specify the run ID. - -By default, attaches in the same mode the run was started with: - - If the run was started with -i, attach will use interactive mode - - Otherwise, only stdout/stderr are connected - -Use -i to force interactive mode, or -i=false to force output-only mode. - -Non-interactive mode: - Ctrl+C Detach (run continues) - Ctrl+C Ctrl+C Stop the run (within 500ms) - -Interactive mode (-i): - Ctrl-/ d Detach (run continues) - Ctrl-/ k Stop the run - Ctrl+C Sent to container process - -Examples: - # Attach by name or ID - moat attach my-agent - moat attach run_a1b2c3d4e5f6 - - # Force interactive mode - moat attach -i run_a1b2c3d4e5f6 - - # Force output-only mode even if run was started interactively - moat attach -i=false run_a1b2c3d4e5f6`, - Args: cobra.ExactArgs(1), - RunE: attachToRun, -} - -func init() { - rootCmd.AddCommand(attachCmd) - attachCmd.Flags().BoolVarP(&attachInteractive, "interactive", "i", false, "interactive mode (use -i=false to force output-only)") - attachCmd.Flags().StringVar(&attachTTYTrace, "tty-trace", "", "capture terminal I/O to file for debugging (e.g., session.json)") -} - -func attachToRun(cmd *cobra.Command, args []string) error { - // Create manager - manager, err := run.NewManager() - if err != nil { - return fmt.Errorf("creating run manager: %w", err) - } - defer manager.Close() - - // Resolve argument to a single run - runID, err := resolveRunArgSingle(manager, args[0]) - if err != nil { - return err - } - - // Verify run exists and is running - r, err := manager.Get(runID) - if err != nil { - return fmt.Errorf("run not found: %w", err) - } - - if state := r.GetState(); state != run.StateRunning { - return fmt.Errorf("run %s is not running (state: %s)", runID, state) - } - - // Determine interactive mode: - // - If -i flag was explicitly set, use that value - // - Otherwise, use the run's original mode - interactive := r.Interactive // Default to run's mode - if cmd.Flags().Changed("interactive") { - interactive = attachInteractive - } - - fmt.Printf("Attaching to run %s (%s)...\n", r.ID, r.Name) - if interactive { - fmt.Printf("Escape: Ctrl-/ d (detach), Ctrl-/ k (stop). Ctrl+C goes to container.\n") - } else { - fmt.Println("Press Ctrl+C to detach (run continues), Ctrl+C twice to stop") - } - fmt.Println() - - ctx := context.Background() - - if interactive { - return attachInteractiveMode(ctx, manager, r, attachTTYTrace) - } - - return attachOutputMode(ctx, manager, r) -} - -// attachOutputMode attaches in output-only mode (no stdin). -// Uses container logs with follow mode for reliable output streaming. -func attachOutputMode(ctx context.Context, manager *run.Manager, r *run.Run) error { - // Set up signal handling - sigCh := make(chan os.Signal, 1) - signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) - defer signal.Stop(sigCh) - - var lastSigTime time.Time - - // Create cancellable context for logs - logsCtx, logsCancel := context.WithCancel(ctx) - defer logsCancel() - - // Use logs with follow mode for output-only attach - // This is more reliable than attach for containers already running - logsDone := make(chan error, 1) - go func() { - logsDone <- manager.FollowLogs(logsCtx, r.ID, os.Stdout) - }() - - // Also monitor if container exits - waitDone := make(chan error, 1) - go func() { - waitDone <- manager.Wait(ctx, r.ID) - }() - - for { - select { - case sig := <-sigCh: - now := time.Now() - if now.Sub(lastSigTime) < doublePressWindow { - // Double Ctrl+C - stop the run - fmt.Printf("\nStopping run %s...\n", r.ID) - logsCancel() - if err := manager.Stop(context.Background(), r.ID); err != nil { - log.Error("failed to stop run", "id", r.ID, "error", err) - } - fmt.Printf("Run %s stopped\n", r.ID) - return nil - } - - // First Ctrl+C - detach - log.Debug("received signal, detaching", "signal", sig) - fmt.Printf("\nDetaching from run %s (still running)\n", r.ID) - fmt.Printf("Press Ctrl+C again within 500ms to stop\n") - return nil - - case <-logsDone: - // Logs ended - wait a moment for container exit to be detected - select { - case err := <-waitDone: - if err != nil { - return err - } - fmt.Printf("Run %s completed\n", r.ID) - return nil - case <-time.After(containerExitCheckDelay): - // Container didn't exit quickly - check run state - currentRun, getErr := manager.Get(r.ID) - if getErr != nil || currentRun.State != run.StateRunning { - // Run ended or was cleaned up - fmt.Printf("Run %s completed\n", r.ID) - return nil - } - // Container still running, logs stream ended - fmt.Printf("\nDetached from run %s\n", r.ID) - return nil - } - - case err := <-waitDone: - logsCancel() - if err != nil { - return err - } - fmt.Printf("Run %s completed\n", r.ID) - return nil - } - } -} - -// attachInteractiveMode attaches with stdin connected. -func attachInteractiveMode(ctx context.Context, manager *run.Manager, r *run.Run, tracePath string) error { - // Set up TTY tracing if requested - // Note: We don't have the original command for attach, use a placeholder - command := []string{"(attach to " + r.Name + ")"} - tracer := setupTTYTracer(tracePath, r, command) - defer tracer.save() - - // Show recent logs before attaching so user has context - if logs, err := manager.RecentLogs(r.ID, 50); err == nil && len(logs) > 0 { - fmt.Print(logs) - // Add a newline if logs don't end with one - if len(logs) > 0 && logs[len(logs)-1] != '\n' { - fmt.Println() - } - } - - // Put terminal in raw mode to capture escape sequences without echo - if term.IsTerminal(os.Stdin) { - rawState, err := term.EnableRawMode(os.Stdin) - if err != nil { - log.Debug("failed to enable raw mode", "error", err) - // Continue without raw mode - escapes may echo - } else { - defer func() { - if err := term.RestoreTerminal(rawState); err != nil { - log.Debug("failed to restore terminal", "error", err) - } - }() - } - } - - // Set up status bar for interactive session - statusWriter, statusCleanup, stdout := setupStatusBar(manager, r) - defer statusCleanup() - - // Wrap stdout with tracer if tracing is enabled - if tracer != nil { - stdout = trace.NewRecordingWriter(stdout, tracer.recorder, trace.EventStdout) - } - - // Wrap stdin with escape proxy to detect detach/stop sequences - escapeProxy := term.NewEscapeProxy(os.Stdin) - - // Set up callback to update footer when escape sequence is in progress - if statusWriter != nil { - statusWriter.SetupEscapeHints(escapeProxy) - } - - // Wrap stdin with tracer if tracing is enabled - stdin := io.Reader(escapeProxy) - if tracer != nil { - stdin = trace.NewRecordingReader(escapeProxy, tracer.recorder, trace.EventStdin) - } - - // Set up signal handling - sigCh := make(chan os.Signal, 1) - signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM, syscall.SIGWINCH) - defer signal.Stop(sigCh) - - // Channel to receive escape actions from the attach goroutine - escapeCh := make(chan term.EscapeAction, 1) - - attachCtx, attachCancel := context.WithCancel(ctx) - defer attachCancel() - - attachDone := make(chan error, 1) - go func() { - err := manager.Attach(attachCtx, r.ID, stdin, stdout, os.Stderr) - // Check if the error is an escape sequence - if term.IsEscapeError(err) { - escapeCh <- term.GetEscapeAction(err) - attachDone <- nil - } else { - attachDone <- err - } - }() - - waitDone := make(chan error, 1) - go func() { - waitDone <- manager.Wait(ctx, r.ID) - }() - - for { - select { - case sig := <-sigCh: - if sig == syscall.SIGWINCH { - // Handle terminal resize - if statusWriter != nil && term.IsTerminal(os.Stdout) { - width, height := term.GetSize(os.Stdout) - if width > 0 && height > 0 { - // Record resize event for tracing - if tracer != nil { - tracer.recorder.AddResize(width, height) - } - _ = statusWriter.Resize(width, height) - // Also resize container TTY - // #nosec G115 -- width/height are validated positive above - _ = manager.ResizeTTY(ctx, r.ID, uint(height), uint(width)) - } - } - continue // Don't break out of loop - } - if sig == syscall.SIGTERM { - fmt.Printf("\nStopping run %s...\n", r.ID) - attachCancel() - if err := manager.Stop(context.Background(), r.ID); err != nil { - log.Error("failed to stop run", "id", r.ID, "error", err) - } - return nil - } - // SIGINT is forwarded to container via stdin/tty - - case action := <-escapeCh: - // Handle escape sequence - switch action { - case term.EscapeDetach: - attachCancel() - fmt.Printf("\r\nDetached from run %s (still running)\r\n", r.ID) - fmt.Printf("Use 'moat attach %s' to reattach\r\n", r.ID) - return nil - - case term.EscapeStop: - fmt.Printf("\r\nStopping run %s...\r\n", r.ID) - attachCancel() - if err := manager.Stop(context.Background(), r.ID); err != nil { - log.Error("failed to stop run", "id", r.ID, "error", err) - } - fmt.Printf("Run %s stopped\r\n", r.ID) - return nil - } - - case err := <-attachDone: - // Attach ended - wait a moment for container exit to be detected - if err != nil && ctx.Err() == nil && !term.IsEscapeError(err) { - log.Error("attach failed", "id", r.ID, "error", err) - } - // Give the wait goroutine time to detect container exit - select { - case waitErr := <-waitDone: - if waitErr != nil { - return waitErr - } - fmt.Printf("Run %s completed\n", r.ID) - return nil - case <-time.After(containerExitCheckDelay): - // Container didn't exit quickly - check run state - currentRun, getErr := manager.Get(r.ID) - if getErr != nil || currentRun.State != run.StateRunning { - // Run ended or was cleaned up - fmt.Printf("Run %s completed\n", r.ID) - return nil - } - // Container still running, we just got disconnected - fmt.Printf("\nDetached from run %s\n", r.ID) - return nil - } - - case err := <-waitDone: - attachCancel() - if err != nil { - return err - } - fmt.Printf("Run %s completed\n", r.ID) - return nil - } - } -} diff --git a/cmd/moat/cli/exec.go b/cmd/moat/cli/exec.go index 22f09b04..bf9c8c91 100644 --- a/cmd/moat/cli/exec.go +++ b/cmd/moat/cli/exec.go @@ -20,6 +20,17 @@ import ( "github.com/spf13/cobra" ) +// Timing constants for attached execution behavior +const ( + // doublePressWindow is how quickly Ctrl+C must be pressed twice to stop a run + doublePressWindow = 500 * time.Millisecond + // containerExitCheckDelay is how long to wait for container exit detection + containerExitCheckDelay = 200 * time.Millisecond + // ttyStartupDelay is how long to wait before resizing TTY after container starts. + // This allows the container process to initialize before we resize. + ttyStartupDelay = 200 * time.Millisecond +) + // Re-export types from internal/cli for backward compatibility // with code in cmd/moat/cli that uses these types. type ExecFlags = intcli.ExecFlags From 587802a18118c908bffad172ea5eaf08414d7dda Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 15:34:30 -0800 Subject: [PATCH 02/28] refactor(cli): remove --detach flag and simplify execution modes Non-interactive runs now always start in background. Interactive runs always own the terminal. Removes RunAttached and RunInteractive in favor of the single RunInteractiveAttached path. --- cmd/moat/cli/exec.go | 230 ++----------------------------- cmd/moat/cli/run.go | 14 +- cmd/moat/cli/wt.go | 10 +- internal/cli/types.go | 2 - internal/providers/claude/cli.go | 5 +- internal/providers/codex/cli.go | 5 +- internal/providers/gemini/cli.go | 5 +- 7 files changed, 18 insertions(+), 253 deletions(-) diff --git a/cmd/moat/cli/exec.go b/cmd/moat/cli/exec.go index bf9c8c91..e0796dc4 100644 --- a/cmd/moat/cli/exec.go +++ b/cmd/moat/cli/exec.go @@ -20,12 +20,8 @@ import ( "github.com/spf13/cobra" ) -// Timing constants for attached execution behavior +// Timing constants for interactive execution behavior const ( - // doublePressWindow is how quickly Ctrl+C must be pressed twice to stop a run - doublePressWindow = 500 * time.Millisecond - // containerExitCheckDelay is how long to wait for container exit detection - containerExitCheckDelay = 200 * time.Millisecond // ttyStartupDelay is how long to wait before resizing TTY after container starts. // This allows the container process to initialize before we resize. ttyStartupDelay = 200 * time.Millisecond @@ -242,15 +238,15 @@ func ExecuteRun(ctx context.Context, opts intcli.ExecOptions) (*run.Run, error) }) } - // Interactive mode: use StartAttached to ensure TTY is connected before process starts + // Interactive mode: use StartAttached to ensure TTY is connected before process starts. // This is required for TUI applications like Codex CLI that need to detect terminal // capabilities immediately on startup. - if opts.Interactive && !opts.Flags.Detach { + if opts.Interactive { return r, RunInteractiveAttached(ctx, manager, r, opts.Command, opts.Flags.TTYTrace) } - // Start run (non-interactive or detached) - startOpts := run.StartOptions{StreamLogs: !opts.Interactive} + // Non-interactive: start in background + startOpts := run.StartOptions{StreamLogs: false} if err := manager.Start(ctx, r.ID, startOpts); err != nil { log.Error("failed to start run", "id", r.ID, "error", err) return r, fmt.Errorf("starting run: %w", err) @@ -270,218 +266,10 @@ func ExecuteRun(ctx context.Context, opts intcli.ExecOptions) (*run.Run, error) } } - // If detached, return immediately - if opts.Flags.Detach { - fmt.Printf("\nRun %s started in background\n", r.ID) - fmt.Printf("Use 'moat logs %s' to view output\n", r.ID) - fmt.Printf("Use 'moat attach %s' to attach\n", r.ID) - fmt.Printf("Use 'moat stop %s' to stop\n", r.ID) - return r, nil - } - - // Non-interactive attached mode: stream output, Ctrl+C detaches - return r, RunAttached(ctx, manager, r) -} - -// RunAttached runs in attached mode where output is streamed but stdin is not connected. -// Ctrl+C detaches (leaves run active), double Ctrl+C stops the run. -func RunAttached(ctx context.Context, manager *run.Manager, r *run.Run) error { - // Set up signal handling for detach behavior - sigCh := make(chan os.Signal, 1) - signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) - defer signal.Stop(sigCh) - - // Track Ctrl+C timing for double-press detection - lastSigTime := time.Time{} - - // Create a cancellable context for the wait - waitCtx, waitCancel := context.WithCancel(ctx) - defer waitCancel() - - // Channel to receive wait result - waitDone := make(chan error, 1) - go func() { - waitDone <- manager.Wait(waitCtx, r.ID) - }() - - for { - select { - case sig := <-sigCh: - now := time.Now() - if now.Sub(lastSigTime) < doublePressWindow { - // Double Ctrl+C - stop the run - fmt.Printf("\nStopping run %s...\n", r.ID) - waitCancel() - if err := manager.Stop(context.Background(), r.ID); err != nil { - log.Error("failed to stop run", "id", r.ID, "error", err) - } - fmt.Printf("Run %s stopped\n", r.ID) - return nil - } - // First Ctrl+C - detach (double-press detection happens before this point) - log.Debug("received signal, detaching", "signal", sig) - fmt.Printf("\nDetaching from run %s (still running)\n", r.ID) - fmt.Printf("Press Ctrl+C again within 500ms to stop, or use 'moat stop %s'\n", r.ID) - fmt.Printf("Use 'moat attach %s' to reattach\n", r.ID) - // Don't cancel - let the run continue - return nil - - case err := <-waitDone: - if err != nil { - log.Error("run failed", "id", r.ID, "error", err) - return err - } - log.Info("run completed", "id", r.ID) - fmt.Printf("Run %s completed\n", r.ID) - return nil - } - } -} - -// RunInteractive runs in interactive mode with stdin connected and TTY allocated. -func RunInteractive(ctx context.Context, manager *run.Manager, r *run.Run) error { - fmt.Printf("%s\n\n", term.EscapeHelpText()) - - // Resize container TTY to match terminal size - if term.IsTerminal(os.Stdout) { - width, height := term.GetSize(os.Stdout) - if width > 0 && height > 0 { - // #nosec G115 -- width/height are validated positive above - if err := manager.ResizeTTY(ctx, r.ID, uint(height), uint(width)); err != nil { - log.Debug("failed to resize TTY", "error", err) - } - } - } - - // Put terminal in raw mode to capture escape sequences without echo - if term.IsTerminal(os.Stdin) { - rawState, err := term.EnableRawMode(os.Stdin) - if err != nil { - log.Debug("failed to enable raw mode", "error", err) - // Continue without raw mode - escapes may echo - } else { - defer func() { - if err := term.RestoreTerminal(rawState); err != nil { - log.Debug("failed to restore terminal", "error", err) - } - }() - } - } - - // Set up signal handling - sigCh := make(chan os.Signal, 1) - signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) - defer signal.Stop(sigCh) - - // Wrap stdin with escape proxy to detect detach/stop sequences - escapeProxy := term.NewEscapeProxy(os.Stdin) - - // Channel to receive escape actions from the attach goroutine - escapeCh := make(chan term.EscapeAction, 1) - - // Attach to container with escape-proxied stdin - attachCtx, attachCancel := context.WithCancel(ctx) - defer attachCancel() - - attachDone := make(chan error, 1) - go func() { - err := manager.Attach(attachCtx, r.ID, escapeProxy, os.Stdout, os.Stderr) - // Check if the error is an escape sequence - if term.IsEscapeError(err) { - escapeCh <- term.GetEscapeAction(err) - attachDone <- nil - } else { - attachDone <- err - } - }() - - // Also wait for container to exit - waitDone := make(chan error, 1) - go func() { - waitDone <- manager.Wait(ctx, r.ID) - }() - - for { - select { - case sig := <-sigCh: - // In interactive mode, forward SIGINT to container (it will handle it) - // Only SIGTERM causes us to stop - if sig == syscall.SIGTERM { - fmt.Printf("\nStopping run %s...\n", r.ID) - attachCancel() - if err := manager.Stop(context.Background(), r.ID); err != nil { - log.Error("failed to stop run", "id", r.ID, "error", err) - } - return nil - } - // SIGINT is forwarded to container via attached stdin/tty - - case action := <-escapeCh: - // Handle escape sequence - switch action { - case term.EscapeDetach: - attachCancel() - // Wait for attach goroutine to complete before checking state - select { - case <-attachDone: - case <-time.After(500 * time.Millisecond): - } - currentRun, _ := manager.Get(r.ID) - if currentRun != nil && currentRun.State == run.StateRunning { - fmt.Printf("\r\nDetached from run %s (still running)\r\n", r.ID) - fmt.Printf("Use 'moat attach %s' to reattach\r\n", r.ID) - } else { - fmt.Printf("\r\nRun %s stopped\r\n", r.ID) - } - return nil - - case term.EscapeStop: - fmt.Printf("\r\nStopping run %s...\r\n", r.ID) - attachCancel() - if err := manager.Stop(context.Background(), r.ID); err != nil { - log.Error("failed to stop run", "id", r.ID, "error", err) - } - fmt.Printf("Run %s stopped\r\n", r.ID) - return nil - } - - case err := <-attachDone: - // Attach ended - wait a moment for container exit to be detected - if err != nil && ctx.Err() == nil && !term.IsEscapeError(err) { - log.Error("attach failed", "id", r.ID, "error", err) - } - // Give the wait goroutine time to detect container exit - select { - case waitErr := <-waitDone: - if waitErr != nil { - return waitErr - } - fmt.Printf("Run %s completed\n", r.ID) - return nil - case <-time.After(containerExitCheckDelay): - // Container didn't exit quickly - check run state - currentRun, getErr := manager.Get(r.ID) - if getErr != nil || currentRun.State != run.StateRunning { - // Run ended or was cleaned up - fmt.Printf("Run %s completed\n", r.ID) - return nil - } - // Container still running, we just got disconnected - fmt.Printf("\nDetached from run %s\n", r.ID) - return nil - } - - case err := <-waitDone: - // Container exited - attachCancel() // Stop the attach goroutine - if err != nil { - log.Error("run failed", "id", r.ID, "error", err) - return err - } - fmt.Printf("Run %s completed\n", r.ID) - return nil - } - } + fmt.Printf("\nRun %s started in background\n", r.ID) + fmt.Printf("Use 'moat logs %s -f' to follow output\n", r.ID) + fmt.Printf("Use 'moat stop %s' to stop\n", r.ID) + return r, nil } // RunInteractiveAttached runs in interactive mode using StartAttached to ensure diff --git a/cmd/moat/cli/run.go b/cmd/moat/cli/run.go index e35b52d1..d7f6baf5 100644 --- a/cmd/moat/cli/run.go +++ b/cmd/moat/cli/run.go @@ -20,7 +20,7 @@ var runCmd = &cobra.Command{ Long: `Run an agent in an isolated container with workspace mounting, credential injection, and full observability. -The agent runs in a Docker container with your workspace mounted at /workspace. +The agent runs in a container with your workspace mounted at /workspace. If an agent.yaml exists in the workspace, its settings are used as defaults. Arguments: @@ -28,11 +28,10 @@ Arguments: [-- cmd] Optional command to run instead of agent's default Non-interactive mode (default): - Ctrl+C Detach (run continues) - Ctrl+C Ctrl+C Stop the run (within 500ms) + Run starts in background. Monitor with 'moat logs' and 'moat trace'. + Stop with 'moat stop'. Interactive mode (-i): - Ctrl-/ d Detach (run continues) Ctrl-/ k Stop the run Ctrl+C Sent to container process @@ -58,9 +57,6 @@ Examples: # Run multiple commands moat run -- sh -c "npm install && npm test" - # Run detached (in background) - moat run -d ./my-project - # Run interactive shell moat run -i -- bash`, Args: cobra.ArbitraryArgs, @@ -159,7 +155,6 @@ func runAgent(cmd *cobra.Command, args []string) error { "workspace", absPath, "grants", runFlags.Grants, "cmd", containerCmd, - "detach", runFlags.Detach, "interactive", interactive, ) @@ -187,8 +182,7 @@ func runAgent(cmd *cobra.Command, args []string) error { return err } - // Print started message if not already printed by Execute - if !runFlags.Detach && r != nil { + if r != nil { fmt.Printf("Started agent %q (run %s)\n", r.Name, r.ID) } diff --git a/cmd/moat/cli/wt.go b/cmd/moat/cli/wt.go index b5a9376e..aadd944d 100644 --- a/cmd/moat/cli/wt.go +++ b/cmd/moat/cli/wt.go @@ -36,9 +36,6 @@ Examples: # Start a run on a new feature branch moat wt dark-mode - # Run in background - moat wt dark-mode -d - # Run a specific command in the worktree moat wt dark-mode -- make test @@ -165,11 +162,8 @@ func runWorktree(cmd *cobra.Command, args []string) error { } } - // Determine interactive mode: CLI flags > config > default - interactive := !wtFlags.Detach - if !interactive && cfg != nil && cfg.Interactive { - interactive = true - } + // Determine interactive mode from config + interactive := cfg != nil && cfg.Interactive log.Debug("starting worktree run", "branch", branch, diff --git a/internal/cli/types.go b/internal/cli/types.go index 02e095a7..c364aee8 100644 --- a/internal/cli/types.go +++ b/internal/cli/types.go @@ -14,7 +14,6 @@ type ExecFlags struct { Runtime string Rebuild bool KeepContainer bool - Detach bool Interactive bool NoSandbox bool TTYTrace string // Path to save terminal I/O trace for debugging @@ -27,7 +26,6 @@ func AddExecFlags(cmd *cobra.Command, flags *ExecFlags) { cmd.Flags().StringVarP(&flags.Name, "name", "n", "", "name for this run (default: from agent.yaml or random)") cmd.Flags().BoolVar(&flags.Rebuild, "rebuild", false, "force rebuild of container image") cmd.Flags().BoolVar(&flags.KeepContainer, "keep", false, "keep container after run completes (for debugging)") - cmd.Flags().BoolVarP(&flags.Detach, "detach", "d", false, "run in background and return immediately") cmd.Flags().StringVar(&flags.Runtime, "runtime", "", "container runtime to use (apple, docker)") cmd.Flags().BoolVar(&flags.NoSandbox, "no-sandbox", false, "disable gVisor sandbox (reduced isolation, Docker only)") cmd.Flags().StringVar(&flags.TTYTrace, "tty-trace", "", "capture terminal I/O to file for debugging (e.g., session.json)") diff --git a/internal/providers/claude/cli.go b/internal/providers/claude/cli.go index 26b1684d..fa50826e 100644 --- a/internal/providers/claude/cli.go +++ b/internal/providers/claude/cli.go @@ -61,9 +61,6 @@ Examples: # Name the session for easy reference moat claude --name my-feature - # Run in background - moat claude -d - # Force rebuild of container image moat claude --rebuild @@ -275,7 +272,7 @@ func runClaudeCode(cmd *cobra.Command, args []string) error { return err } - if result != nil && !claudeFlags.Detach { + if result != nil { fmt.Printf("Starting Claude Code in %s\n", absPath) fmt.Printf("Run: %s (%s)\n", result.Name, result.ID) } diff --git a/internal/providers/codex/cli.go b/internal/providers/codex/cli.go index 2e8c60d1..2f72634b 100644 --- a/internal/providers/codex/cli.go +++ b/internal/providers/codex/cli.go @@ -80,9 +80,6 @@ Examples: # Name the session for easy reference moat codex --name my-feature - # Run in background - moat codex -d - # Force rebuild of container image moat codex --rebuild @@ -272,7 +269,7 @@ func runCodex(cmd *cobra.Command, args []string) error { return err } - if result != nil && !codexFlags.Detach { + if result != nil { fmt.Printf("Starting Codex CLI in %s\n", absPath) fmt.Printf("Run: %s (%s)\n", result.Name, result.ID) } diff --git a/internal/providers/gemini/cli.go b/internal/providers/gemini/cli.go index 2f1e4f96..aefcf95d 100644 --- a/internal/providers/gemini/cli.go +++ b/internal/providers/gemini/cli.go @@ -67,9 +67,6 @@ Examples: # Name the session for easy reference moat gemini --name my-feature - # Run in background - moat gemini -d - # Force rebuild of container image moat gemini --rebuild @@ -235,7 +232,7 @@ func runGemini(cmd *cobra.Command, args []string) error { return err } - if result != nil && !geminiFlags.Detach { + if result != nil { fmt.Printf("Starting Gemini in %s\n", absPath) fmt.Printf("Run: %s (%s)\n", result.Name, result.ID) } From b07af8917a55d46d4f235669a6cba2d8a1d3b0da Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 15:37:56 -0800 Subject: [PATCH 03/28] refactor(term): remove detach escape sequence Only Ctrl-/ k (stop) remains. Ctrl-/ d is no longer an escape sequence and passes through to the container. --- internal/term/escape.go | 26 ++------------- internal/term/escape_test.go | 63 ++++++++++++------------------------ 2 files changed, 24 insertions(+), 65 deletions(-) diff --git a/internal/term/escape.go b/internal/term/escape.go index 20fb9630..47ede5d5 100644 --- a/internal/term/escape.go +++ b/internal/term/escape.go @@ -12,8 +12,6 @@ type EscapeAction int const ( // EscapeNone means no escape action was triggered. EscapeNone EscapeAction = iota - // EscapeDetach means the user wants to detach from the session. - EscapeDetach // EscapeStop means the user wants to stop the run. EscapeStop ) @@ -25,8 +23,6 @@ type EscapeError struct { func (e EscapeError) Error() string { switch e.Action { - case EscapeDetach: - return "escape: detach" case EscapeStop: return "escape: stop" default: @@ -54,14 +50,12 @@ const ( EscapePrefix byte = 0x1f // Command keys (after the prefix) - escapeKeyDetach byte = 'd' - escapeKeyStop byte = 'k' + escapeKeyStop byte = 'k' ) // EscapeProxy wraps a reader and watches for escape sequences. // // Escape sequences are: Ctrl-/ followed by: -// - d: detach from session (run continues) // - k: stop the run // // When an escape sequence is detected, Read returns an EscapeError. @@ -87,7 +81,7 @@ func NewEscapeProxy(r io.Reader) *EscapeProxy { } // OnPrefixChange sets a callback that fires when the escape prefix state changes. -// The callback receives true when Ctrl-/ is pressed (waiting for d/k), and false +// The callback receives true when Ctrl-/ is pressed (waiting for command key), and false // when the sequence completes or is canceled. This can be used to update UI state, // such as showing escape key hints in a status bar. // @@ -151,18 +145,6 @@ func (e *EscapeProxy) Read(p []byte) (int, error) { // Check for escape commands switch b { - case escapeKeyDetach: - // Buffer any remaining bytes for next Read - if i+1 < n { - e.buf = append(e.buf, buf[i+1:n]...) - } - // If we have output to return first, defer the escape - if len(out) > 0 { - pendingEscape = &EscapeError{Action: EscapeDetach} - break - } - return 0, EscapeError{Action: EscapeDetach} - case escapeKeyStop: if i+1 < n { e.buf = append(e.buf, buf[i+1:n]...) @@ -237,8 +219,6 @@ func (e *EscapeProxy) Read(p []byte) (int, error) { b := oneByte[0] e.setPrefixState(false) switch b { - case escapeKeyDetach: - return 0, EscapeError{Action: EscapeDetach} case escapeKeyStop: return 0, EscapeError{Action: EscapeStop} case EscapePrefix: @@ -279,5 +259,5 @@ func (e *EscapeProxy) Read(p []byte) (int, error) { // EscapeHelpText returns help text explaining the escape sequences. func EscapeHelpText() string { - return "Escape sequences: Ctrl-/ d (detach), Ctrl-/ k (stop)" + return "Escape: Ctrl-/ k (stop)" } diff --git a/internal/term/escape_test.go b/internal/term/escape_test.go index 18ac4592..13c7eef0 100644 --- a/internal/term/escape_test.go +++ b/internal/term/escape_test.go @@ -20,31 +20,19 @@ func TestEscapeProxy_PassThrough(t *testing.T) { } } -func TestEscapeProxy_Detach(t *testing.T) { - // Ctrl-/ d should trigger detach +func TestEscapeProxy_DPassesThrough(t *testing.T) { + // Ctrl-/ d is not an escape sequence; both bytes should pass through input := []byte{EscapePrefix, 'd', 'x', 'y', 'z'} r := NewEscapeProxy(bytes.NewReader(input)) - buf := make([]byte, 10) - n, err := r.Read(buf) - - if !IsEscapeError(err) { - t.Fatalf("expected EscapeError, got: %v", err) - } - if GetEscapeAction(err) != EscapeDetach { - t.Errorf("expected EscapeDetach, got: %v", GetEscapeAction(err)) - } - if n != 0 { - t.Errorf("expected 0 bytes, got %d", n) - } - - // Remaining bytes should be available - n, err = r.Read(buf) + out, err := io.ReadAll(r) if err != nil { - t.Fatalf("unexpected error reading remaining: %v", err) + t.Fatalf("unexpected error: %v", err) } - if string(buf[:n]) != "xyz" { - t.Errorf("remaining bytes: got %q, want %q", buf[:n], "xyz") + + expected := []byte{EscapePrefix, 'd', 'x', 'y', 'z'} + if !bytes.Equal(out, expected) { + t.Errorf("got %v, want %v", out, expected) } } @@ -97,27 +85,19 @@ func TestEscapeProxy_UnrecognizedEscape(t *testing.T) { } func TestEscapeProxy_MixedContent(t *testing.T) { - // Normal content with escape in the middle + // Normal content with unrecognized escape in the middle - both bytes pass through input := []byte{'a', 'b', EscapePrefix, 'd', 'c'} r := NewEscapeProxy(bytes.NewReader(input)) - buf := make([]byte, 10) - n, err := r.Read(buf) - - // First read should return "ab" then the escape - // Behavior depends on read size - may get "ab" then escape on next read - // or escape immediately - - if IsEscapeError(err) { - // Escape detected - "ab" should be buffered or returned - if n > 0 { - if string(buf[:n]) != "ab" { - t.Errorf("before escape: got %q, want %q", buf[:n], "ab") - } - } - } else if err != nil { + out, err := io.ReadAll(r) + if err != nil { t.Fatalf("unexpected error: %v", err) } + + expected := []byte{'a', 'b', EscapePrefix, 'd', 'c'} + if !bytes.Equal(out, expected) { + t.Errorf("got %v, want %v", out, expected) + } } func TestEscapeProxy_EscapeAtEnd(t *testing.T) { @@ -138,8 +118,8 @@ func TestEscapeProxy_EscapeAtEnd(t *testing.T) { } func TestEscapeProxy_SmallReads(t *testing.T) { - // Read one byte at a time - input := []byte{'a', EscapePrefix, 'd', 'b'} + // Read one byte at a time with a stop escape + input := []byte{'a', EscapePrefix, 'k', 'b'} r := NewEscapeProxy(bytes.NewReader(input)) buf := make([]byte, 1) @@ -158,8 +138,8 @@ func TestEscapeProxy_SmallReads(t *testing.T) { if !IsEscapeError(err) { t.Fatalf("read 2: expected EscapeError, got: %v", err) } - if GetEscapeAction(err) != EscapeDetach { - t.Errorf("read 2: expected EscapeDetach, got: %v", GetEscapeAction(err)) + if GetEscapeAction(err) != EscapeStop { + t.Errorf("read 2: expected EscapeStop, got: %v", GetEscapeAction(err)) } // Read 'b' @@ -177,7 +157,6 @@ func TestEscapeError_Error(t *testing.T) { action EscapeAction want string }{ - {EscapeDetach, "escape: detach"}, {EscapeStop, "escape: stop"}, {EscapeNone, "escape: unknown"}, } @@ -205,7 +184,7 @@ func TestEscapeProxy_OnPrefixChange(t *testing.T) { wantFinalState bool }{ { - name: "prefix detected then completed with detach", + name: "prefix detected then canceled with unrecognized d", input: []byte{EscapePrefix, 'd'}, wantCallbacks: []bool{true, false}, wantFinalState: false, From f8d3928da3fc5728f3c5e328244bcef1cc6f93d9 Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 15:38:57 -0800 Subject: [PATCH 04/28] refactor(cli): remove detach handling from interactive mode Interactive sessions now run until the process exits or user sends Ctrl-/ k to stop. No detach path exists. --- cmd/moat/cli/exec.go | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/cmd/moat/cli/exec.go b/cmd/moat/cli/exec.go index e0796dc4..f444c4e9 100644 --- a/cmd/moat/cli/exec.go +++ b/cmd/moat/cli/exec.go @@ -312,7 +312,7 @@ func RunInteractiveAttached(ctx context.Context, manager *run.Manager, r *run.Ru stdout = trace.NewRecordingWriter(stdout, tracer.recorder, trace.EventStdout) } - // Wrap stdin with escape proxy to detect detach/stop sequences + // Wrap stdin with escape proxy to detect stop sequences escapeProxy := term.NewEscapeProxy(os.Stdin) // Set up callback to update footer when escape sequence is in progress @@ -402,25 +402,7 @@ func RunInteractiveAttached(ctx context.Context, manager *run.Manager, r *run.Ru // SIGINT is forwarded to container via attached stdin/tty case action := <-escapeCh: - // Handle escape sequence - switch action { - case term.EscapeDetach: - attachCancel() - // Wait for attach goroutine to complete before checking state - select { - case <-attachDone: - case <-time.After(500 * time.Millisecond): - } - currentRun, _ := manager.Get(r.ID) - if currentRun != nil && currentRun.State == run.StateRunning { - fmt.Printf("\r\nDetached from run %s (still running)\r\n", r.ID) - fmt.Printf("Use 'moat attach %s' to reattach\r\n", r.ID) - } else { - fmt.Printf("\r\nRun %s stopped\r\n", r.ID) - } - return nil - - case term.EscapeStop: + if action == term.EscapeStop { fmt.Printf("\r\nStopping run %s...\r\n", r.ID) attachCancel() if err := manager.Stop(context.Background(), r.ID); err != nil { From 8182d553f680e9bcf52c57d407daf6003c4b5e66 Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 15:44:15 -0800 Subject: [PATCH 05/28] refactor(container): remove Attach from runtime interface Only StartAttached remains for interactive sessions. The separate Attach method was only needed for the removed moat attach command. --- cmd/moat/cli/list_clean_test.go | 3 -- internal/container/apple.go | 32 ----------------- internal/container/docker.go | 61 +-------------------------------- internal/container/runtime.go | 4 --- internal/run/manager.go | 19 ---------- 5 files changed, 1 insertion(+), 118 deletions(-) diff --git a/cmd/moat/cli/list_clean_test.go b/cmd/moat/cli/list_clean_test.go index d74f17ad..cef767a2 100644 --- a/cmd/moat/cli/list_clean_test.go +++ b/cmd/moat/cli/list_clean_test.go @@ -81,9 +81,6 @@ func (s *listCleanStubRuntime) ContainerState(ctx context.Context, id string) (s func (s *listCleanStubRuntime) RemoveImage(ctx context.Context, id string) error { panic("unexpected call to RemoveImage") } -func (s *listCleanStubRuntime) Attach(ctx context.Context, id string, opts container.AttachOptions) error { - panic("unexpected call to Attach") -} func (s *listCleanStubRuntime) StartAttached(ctx context.Context, id string, opts container.AttachOptions) error { panic("unexpected call to StartAttached") } diff --git a/internal/container/apple.go b/internal/container/apple.go index bcb0c439..e4ab3c99 100644 --- a/internal/container/apple.go +++ b/internal/container/apple.go @@ -1137,38 +1137,6 @@ func (r *AppleRuntime) ContainerState(ctx context.Context, containerID string) ( return info[0].Status, nil } -// Attach connects stdin/stdout/stderr to a running container. -func (r *AppleRuntime) Attach(ctx context.Context, containerID string, opts AttachOptions) error { - // Build attach command arguments - args := []string{"attach"} - if opts.Stdin != nil { - args = append(args, "--stdin") - } - args = append(args, containerID) - - cmd := exec.CommandContext(ctx, r.containerBin, args...) - - // Connect stdin/stdout/stderr - if opts.Stdin != nil { - cmd.Stdin = opts.Stdin - } - if opts.Stdout != nil { - cmd.Stdout = opts.Stdout - } - if opts.Stderr != nil { - cmd.Stderr = opts.Stderr - } - - // Run the attach command - if err := cmd.Run(); err != nil { - if ctx.Err() != nil { - return ctx.Err() - } - return fmt.Errorf("attaching to container: %w", err) - } - return nil -} - // ResizeTTY resizes the container's TTY to the given dimensions. // For Apple containers, this resizes the PTY master created during StartAttached. func (r *AppleRuntime) ResizeTTY(ctx context.Context, containerID string, height, width uint) error { diff --git a/internal/container/docker.go b/internal/container/docker.go index 8080742f..32ae3dcf 100644 --- a/internal/container/docker.go +++ b/internal/container/docker.go @@ -618,65 +618,6 @@ func (r *DockerRuntime) ContainerState(ctx context.Context, containerID string) return inspect.State.Status, nil } -// Attach connects stdin/stdout/stderr to a running container. -func (r *DockerRuntime) Attach(ctx context.Context, containerID string, opts AttachOptions) error { - resp, err := r.cli.ContainerAttach(ctx, containerID, container.AttachOptions{ - Stream: true, - Stdin: opts.Stdin != nil, - Stdout: opts.Stdout != nil, - Stderr: opts.Stderr != nil, - }) - if err != nil { - return fmt.Errorf("attaching to container: %w", err) - } - defer resp.Close() - - // Set up bidirectional copy - outputDone := make(chan error, 1) - stdinDone := make(chan error, 1) - - // Copy container output to stdout/stderr. - // Since containers are always created with Tty: true (see CreateContainer), - // output is always raw (not multiplexed). Use io.Copy unconditionally. - go func() { - _, err := io.Copy(opts.Stdout, resp.Reader) - outputDone <- err - }() - - // Copy stdin to container (if provided) - if opts.Stdin != nil { - go func() { - _, err := io.Copy(resp.Conn, opts.Stdin) - // Close write side when stdin ends - if closeWriter, ok := resp.Conn.(interface{ CloseWrite() error }); ok { - if closeErr := closeWriter.CloseWrite(); closeErr != nil && err == nil { - err = closeErr - } - } - stdinDone <- err - }() - } - - // Wait for context cancellation, stdin error (e.g., escape sequence), or output completion - for { - select { - case <-ctx.Done(): - return ctx.Err() - case err := <-stdinDone: - // Stdin error - could be escape sequence or EOF - if err != nil && err != io.EOF { - return err - } - // Normal stdin EOF - continue waiting for output - case err := <-outputDone: - if err != nil && err != io.EOF { - return err - } - return nil - } - } -} - // ResizeTTY resizes the container's TTY to the given dimensions. func (r *DockerRuntime) ResizeTTY(ctx context.Context, containerID string, height, width uint) error { return r.cli.ContainerResize(ctx, containerID, container.ResizeOptions{ @@ -690,7 +631,7 @@ func (r *DockerRuntime) ResizeTTY(ctx context.Context, containerID string, heigh // before the process starts. The attach happens first, then start, ensuring // the I/O streams are ready when the container's process begins. func (r *DockerRuntime) StartAttached(ctx context.Context, containerID string, opts AttachOptions) error { - // Attach first (before starting) - this is the key difference from Attach() + // Attach before starting so I/O streams are ready when the process begins resp, err := r.cli.ContainerAttach(ctx, containerID, container.AttachOptions{ Stream: true, Stdin: opts.Stdin != nil, diff --git a/internal/container/runtime.go b/internal/container/runtime.go index 7566e157..b09c0f8c 100644 --- a/internal/container/runtime.go +++ b/internal/container/runtime.go @@ -109,10 +109,6 @@ type Runtime interface { // RemoveImage removes an image by ID or tag. RemoveImage(ctx context.Context, id string) error - // Attach connects stdin/stdout/stderr to a running container. - // Returns when the attachment ends (container exits or context canceled). - Attach(ctx context.Context, id string, opts AttachOptions) error - // StartAttached starts a container with stdin/stdout/stderr already attached. // This is required for TUI applications that need the terminal connected // before the process starts (e.g., to read cursor position). diff --git a/internal/run/manager.go b/internal/run/manager.go index b1f761d5..eafff1b2 100644 --- a/internal/run/manager.go +++ b/internal/run/manager.go @@ -3122,25 +3122,6 @@ func (m *Manager) Destroy(ctx context.Context, runID string) error { return nil } -// Attach connects stdin/stdout/stderr to a running container. -func (m *Manager) Attach(ctx context.Context, runID string, stdin io.Reader, stdout, stderr io.Writer) error { - m.mu.RLock() - r, ok := m.runs[runID] - if !ok { - m.mu.RUnlock() - return fmt.Errorf("run %s not found", runID) - } - containerID := r.ContainerID - m.mu.RUnlock() - - return m.runtime.Attach(ctx, containerID, container.AttachOptions{ - Stdin: stdin, - Stdout: stdout, - Stderr: stderr, - TTY: true, // Default to TTY mode for now - }) -} - // ResizeTTY resizes the container's TTY to the given dimensions. func (m *Manager) ResizeTTY(ctx context.Context, runID string, height, width uint) error { m.mu.RLock() From 22b1cb784e921f35b6ed88330daf556f145eaa96 Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 15:49:29 -0800 Subject: [PATCH 06/28] fix(cli): update messages to remove moat attach references Point users to moat logs and moat stop instead of the removed moat attach command. --- cmd/moat/cli/resolve.go | 2 +- cmd/moat/cli/wt.go | 4 ++-- internal/cli/worktree.go | 2 +- internal/e2e/logs_capture_test.go | 4 ++-- internal/providers/claude/cli.go | 4 ++-- internal/providers/claude/cli_test.go | 4 ++-- internal/run/manager.go | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/cmd/moat/cli/resolve.go b/cmd/moat/cli/resolve.go index 7fa483ea..28a34566 100644 --- a/cmd/moat/cli/resolve.go +++ b/cmd/moat/cli/resolve.go @@ -36,7 +36,7 @@ func resolveRunArg(manager *run.Manager, arg string, action string) ([]string, e // resolveRunArgSingle resolves a user-provided argument to exactly one run ID. // If multiple runs match, it prints them and returns an error telling the user // to specify a run ID. This is used by commands that only operate on a single -// run (e.g., logs, trace, audit, attach). +// run (e.g., logs, trace, audit). func resolveRunArgSingle(manager *run.Manager, arg string) (string, error) { matches, err := manager.Resolve(arg) if err != nil { diff --git a/cmd/moat/cli/wt.go b/cmd/moat/cli/wt.go index aadd944d..2c02473e 100644 --- a/cmd/moat/cli/wt.go +++ b/cmd/moat/cli/wt.go @@ -25,7 +25,7 @@ var wtCmd = &cobra.Command{ If the branch doesn't exist, it's created from HEAD. If the worktree doesn't exist, it's created. If a run is already active in the worktree, returns an -error with instructions to attach or stop it. +error with instructions to view logs or stop it. Configuration is read from agent.yaml in the repository root. @@ -140,7 +140,7 @@ func runWorktree(cmd *cobra.Command, args []string) error { for _, r := range manager.List() { if r.WorktreePath == result.WorkspacePath && r.GetState() == run.StateRunning { - return fmt.Errorf("a run is already active in worktree for branch %q: %s (%s)\nAttach with 'moat attach %s' or stop with 'moat stop %s'", branch, r.Name, r.ID, r.ID, r.ID) + return fmt.Errorf("a run is already active in worktree for branch %q: %s (%s)\nView logs with 'moat logs %s' or stop with 'moat stop %s'", branch, r.Name, r.ID, r.ID, r.ID) } } diff --git a/internal/cli/worktree.go b/internal/cli/worktree.go index 79da7ddf..51ce2a2c 100644 --- a/internal/cli/worktree.go +++ b/internal/cli/worktree.go @@ -64,7 +64,7 @@ func ResolveWorktreeWorkspace(wtBranch, workspace string, flags *ExecFlags, cfg // Check for active run in this worktree (only possible if reused) if result.Reused && CheckWorktreeActive != nil { if name, id := CheckWorktreeActive(result.WorkspacePath); name != "" { - return nil, fmt.Errorf("a run is already active in worktree for branch %q: %s (%s)\nStop it first with 'moat stop %s' or attach with 'moat attach %s'", wtBranch, name, id, id, id) + return nil, fmt.Errorf("a run is already active in worktree for branch %q: %s (%s)\nStop it first with 'moat stop %s' or view logs with 'moat logs %s'", wtBranch, name, id, id, id) } } diff --git a/internal/e2e/logs_capture_test.go b/internal/e2e/logs_capture_test.go index 6cf4d73a..cecd84c9 100644 --- a/internal/e2e/logs_capture_test.go +++ b/internal/e2e/logs_capture_test.go @@ -16,7 +16,7 @@ import ( // TestLogsCapturedInAttachedMode verifies that logs are captured to logs.jsonl // when a run completes in attached (non-interactive) mode. -// This is the standard flow: moat run (without -d) +// This is the standard flow: moat run func TestLogsCapturedInAttachedMode(t *testing.T) { testOnAllRuntimes(t, func(t *testing.T, rt container.Runtime) { ctx, cancel := context.WithTimeout(context.Background(), testTimeout) @@ -89,7 +89,7 @@ func TestLogsCapturedInAttachedMode(t *testing.T) { // TestLogsCapturedInDetachedMode verifies that logs are captured to logs.jsonl // when a run is started in detached mode and later completes. -// This is the critical bug: moat run -d doesn't capture logs. +// This verifies logs are captured when a container runs without attached I/O. func TestLogsCapturedInDetachedMode(t *testing.T) { testOnAllRuntimes(t, func(t *testing.T, rt container.Runtime) { ctx, cancel := context.WithTimeout(context.Background(), testTimeout) diff --git a/internal/providers/claude/cli.go b/internal/providers/claude/cli.go index fa50826e..478ccf7f 100644 --- a/internal/providers/claude/cli.go +++ b/internal/providers/claude/cli.go @@ -416,9 +416,9 @@ func resolveResumeSessionInDir(arg, baseDir string) (string, error) { } // If the run is still active, the session ID hasn't been captured yet. - // Tell the user to attach instead of starting a new container. + // Tell the user to view logs or stop instead of starting a new container. if match.State == "running" || match.State == "starting" { - return "", fmt.Errorf("run %s (%s) is still running\n\nUse 'moat attach %s' to reconnect.", matchID, match.Name, arg) + return "", fmt.Errorf("run %s (%s) is still running\n\nUse 'moat logs %s' to view output or 'moat stop %s' to stop.", matchID, match.Name, matchID, matchID) } sessionID := match.ProviderMeta["claude_session_id"] diff --git a/internal/providers/claude/cli_test.go b/internal/providers/claude/cli_test.go index bcf25a1f..2070a7fe 100644 --- a/internal/providers/claude/cli_test.go +++ b/internal/providers/claude/cli_test.go @@ -353,8 +353,8 @@ func TestResolveResumeSession_StillRunning(t *testing.T) { if !strings.Contains(err.Error(), "still running") { t.Errorf("error = %q, want mention of 'still running'", err) } - if !strings.Contains(err.Error(), "moat attach") { - t.Errorf("error = %q, want mention of 'moat attach'", err) + if !strings.Contains(err.Error(), "moat logs") { + t.Errorf("error = %q, want mention of 'moat logs'", err) } } diff --git a/internal/run/manager.go b/internal/run/manager.go index eafff1b2..a0a31fea 100644 --- a/internal/run/manager.go +++ b/internal/run/manager.go @@ -3159,7 +3159,7 @@ func (m *Manager) FollowLogs(ctx context.Context, runID string, w io.Writer) err } // RecentLogs returns the last n lines of container logs. -// Used to show context when re-attaching to a running container. +// Used to show recent output context for a running container. func (m *Manager) RecentLogs(runID string, lines int) (string, error) { m.mu.RLock() r, ok := m.runs[runID] From 8926e73756183c354bea0c27f6b88553de937b04 Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 15:53:18 -0800 Subject: [PATCH 07/28] docs: update for interactive-only execution model Remove documentation for moat attach command, --detach flag, and Ctrl-/ d escape sequence. Document the simplified two-mode model. --- docs/content/guides/01-claude-code.md | 10 +- docs/content/guides/02-codex.md | 10 +- docs/content/guides/03-gemini.md | 10 +- docs/content/guides/11-observability.md | 10 +- docs/content/guides/12-worktrees.md | 12 +- docs/content/reference/01-cli.md | 110 +++++------------- docs/content/reference/02-agent-yaml.md | 6 +- .../2026-01-19-interactive-attach-model.md | 2 +- 8 files changed, 61 insertions(+), 109 deletions(-) diff --git a/docs/content/guides/01-claude-code.md b/docs/content/guides/01-claude-code.md index 9592bf2b..53d02284 100644 --- a/docs/content/guides/01-claude-code.md +++ b/docs/content/guides/01-claude-code.md @@ -171,22 +171,22 @@ moat claude --name feature-auth ./my-project The name appears in `moat list` and makes it easier to manage multiple runs. -### Background runs +### Non-interactive runs -Run Claude Code in the background: +Run Claude Code non-interactively with a prompt: ```bash -moat claude -d ./my-project +moat claude -p "fix the failing tests" ./my-project ``` -Reattach later: +Monitor progress: ```bash $ moat list NAME RUN ID STATE AGE feature-auth run_a1b2c3d4e5f6 running 5m -$ moat attach run_a1b2c3d4e5f6 +$ moat logs -f run_a1b2c3d4e5f6 ``` ## Adding GitHub access diff --git a/docs/content/guides/02-codex.md b/docs/content/guides/02-codex.md index 6d72420f..4e5e1292 100644 --- a/docs/content/guides/02-codex.md +++ b/docs/content/guides/02-codex.md @@ -102,22 +102,22 @@ moat codex --name feature-auth ./my-project The name appears in `moat list` and makes it easier to manage multiple runs. -### Background runs +### Non-interactive runs -Run Codex in the background: +Run Codex non-interactively with a prompt: ```bash -moat codex -d ./my-project +moat codex -p "fix the failing tests" ./my-project ``` -Reattach later: +Monitor progress: ```bash $ moat list NAME RUN ID STATE AGE feature-auth run_a1b2c3d4e5f6 running 5m -$ moat attach run_a1b2c3d4e5f6 +$ moat logs -f run_a1b2c3d4e5f6 ``` ## Adding GitHub access diff --git a/docs/content/guides/03-gemini.md b/docs/content/guides/03-gemini.md index f19e46be..d2d5dcf1 100644 --- a/docs/content/guides/03-gemini.md +++ b/docs/content/guides/03-gemini.md @@ -138,22 +138,22 @@ moat gemini --name feature-auth ./my-project The name appears in `moat list` and makes it easier to manage multiple runs. -### Background runs +### Non-interactive runs -Run Gemini in the background: +Run Gemini non-interactively with a prompt: ```bash -moat gemini -d ./my-project +moat gemini -p "fix the failing tests" ./my-project ``` -Reattach later: +Monitor progress: ```bash $ moat list NAME RUN ID STATE AGE feature-auth run_a1b2c3d4e5f6 running 5m -$ moat attach run_a1b2c3d4e5f6 +$ moat logs -f run_a1b2c3d4e5f6 ``` ## Adding GitHub access diff --git a/docs/content/guides/11-observability.md b/docs/content/guides/11-observability.md index affcc610..a89bac93 100644 --- a/docs/content/guides/11-observability.md +++ b/docs/content/guides/11-observability.md @@ -42,7 +42,11 @@ Show the last N lines: $ moat logs -n 50 ``` -> **Note:** The `--follow` flag for `moat logs` is not yet implemented. Use `moat attach` to see live output from a running container. +Follow logs in real time with `--follow`: + +```bash +$ moat logs -f my-agent +``` See [CLI reference](../reference/01-cli.md) for the complete list of `moat logs` flags. @@ -265,10 +269,10 @@ $ jq 'select(.status == 401)' ~/.moat/runs/run_*/network.jsonl ### No output from `moat logs` -The run may not have produced any output. Verify the run exists with `moat list`. If the run is still active, use `moat attach` to see live output: +The run may not have produced any output. Verify the run exists with `moat list`. If the run is still active, use `moat logs -f` to follow live output: ```bash -$ moat attach run_a1b2c3d4e5f6 +$ moat logs -f run_a1b2c3d4e5f6 ``` ### No network traces diff --git a/docs/content/guides/12-worktrees.md b/docs/content/guides/12-worktrees.md index 69fa141a..5e9ca488 100644 --- a/docs/content/guides/12-worktrees.md +++ b/docs/content/guides/12-worktrees.md @@ -114,7 +114,7 @@ If a run is already active in a worktree, `moat wt` returns an error: ```text Error: a run is already active in worktree for branch "dark-mode": my-agent-dark-mode (run_a1b2c3d4e5f6) -Attach with 'moat attach run_a1b2c3d4e5f6' or stop with 'moat stop run_a1b2c3d4e5f6' +Follow with 'moat logs -f run_a1b2c3d4e5f6' or stop with 'moat stop run_a1b2c3d4e5f6' ``` The `--worktree` flag on agent commands behaves the same way. @@ -182,15 +182,15 @@ Branch-specific configuration works as follows: 2. Start runs on multiple branches: ```bash - moat wt feature/auth -d - moat wt feature/dark-mode -d + moat wt feature/auth + moat wt feature/dark-mode ``` 3. Monitor progress: ```bash moat wt list moat logs run_a1b2c3d4e5f6 - moat attach run_a1b2c3d4e5f6 + moat logs -f run_a1b2c3d4e5f6 ``` 4. When finished, clean up worktree directories: @@ -216,10 +216,10 @@ Run `moat wt` from within a git repository. Worktrees are a git feature and requ ### "a run is already active in worktree" -Another run is already active on that branch. Either attach to it or stop it first: +Another run is already active on that branch. Follow its logs or stop it first: ```bash -moat attach +moat logs -f moat stop ``` diff --git a/docs/content/reference/01-cli.md b/docs/content/reference/01-cli.md index 6de964ff..93bc3896 100644 --- a/docs/content/reference/01-cli.md +++ b/docs/content/reference/01-cli.md @@ -23,7 +23,7 @@ These flags apply to all commands: ## Run identification -Commands that operate on a run (`stop`, `destroy`, `attach`, `logs`, `trace`, `audit`, `snapshot`) accept a run ID or a run name: +Commands that operate on a run (`stop`, `destroy`, `logs`, `trace`, `audit`, `snapshot`) accept a run ID or a run name: ```bash moat stop run_a1b2c3d4e5f6 # by full ID @@ -33,7 +33,7 @@ moat stop my-agent # by name Resolution priority: exact ID > ID prefix > exact name. -If a name matches multiple runs, batch commands (`stop`, `destroy`) prompt for confirmation while single-target commands (`logs`, `attach`) list the matches and ask you to specify a run ID. +If a name matches multiple runs, batch commands (`stop`, `destroy`) prompt for confirmation while single-target commands (`logs`) list the matches and ask you to specify a run ID. ## Common agent flags @@ -44,7 +44,6 @@ The agent commands (`moat claude`, `moat codex`, `moat gemini`) share the follow | `-g`, `--grant PROVIDER` | Inject credential (repeatable). See [Grants reference](./04-grants.md) for available providers. | | `-e KEY=VALUE` | Set environment variable (repeatable) | | `-n`, `--name NAME` | Run name (default: from `agent.yaml` or random) | -| `-d`, `--detach` | Run in background | | `--rebuild` | Force rebuild of container image | | `--allow-host HOST` | Additional hosts to allow network access to (repeatable) | | `--runtime RUNTIME` | Container runtime to use (`apple`, `docker`) | @@ -52,9 +51,9 @@ The agent commands (`moat claude`, `moat codex`, `moat gemini`) share the follow | `--no-sandbox` | Disable gVisor sandbox (Docker only) | | `--worktree BRANCH` | Run in a git worktree for this branch (alias: `--wt`) | -Each agent command also accepts `-p`/`--prompt` for non-interactive mode, plus command-specific flags documented in their own sections. +Agent commands run interactively by default, owning the terminal with stdin/stdout/stderr connected. Use `-p`/`--prompt` for non-interactive mode (runs in background, exits when done). Each agent command also accepts command-specific flags documented in their own sections. -All agent commands support passing an initial prompt after `--`. Unlike `-p`, which runs non-interactively and exits when done, arguments after `--` start an interactive session with the prompt pre-filled: +All agent commands support passing an initial prompt after `--`. Unlike `-p`, which runs non-interactively in the background and exits when done, arguments after `--` start an interactive session with the prompt pre-filled: ```bash moat claude -- "is this thing on?" @@ -85,7 +84,6 @@ moat run [flags] [path] [-- command] | `--name NAME` | Set run name (used for hostname routing) | | `--grant PROVIDER` | Inject credential (repeatable) | | `-e KEY=VALUE` | Set environment variable (repeatable) | -| `-d`, `--detach` | Run in background | | `-i`, `--interactive` | Enable interactive mode (stdin + TTY) | | `--rebuild` | Force rebuild of container image | | `--runtime RUNTIME` | Container runtime to use (apple, docker) | @@ -94,10 +92,25 @@ moat run [flags] [path] [-- command] | `--no-snapshots` | Disable snapshots for this run | | `--no-sandbox` | Disable gVisor sandboxing (Docker only) | +### Execution modes + +**Non-interactive (default):** The run starts in the background. Use `moat logs -f` to follow output and `moat stop ` to stop. + +```bash +moat run ./my-project +moat logs -f +``` + +**Interactive (`-i`):** The run owns the terminal with stdin/stdout/stderr connected and a TTY allocated. Press `Ctrl-/ k` to stop the run. `Ctrl+C` is forwarded to the container process. + +```bash +moat run -i -- bash +``` + ### Examples ```bash -# Run in current directory +# Run in current directory (non-interactive, background) moat run # Run in specific directory @@ -115,9 +128,6 @@ moat run -- sh -c "npm install && npm test" # Interactive shell moat run -i -- bash -# Background run -moat run -d ./my-project - # Multiple credentials moat run --grant github --grant anthropic ./my-project @@ -206,11 +216,8 @@ moat claude --grant github # Named run moat claude --name feature-auth ./my-project -# Run in a git worktree -moat claude --worktree=dark-mode --prompt "implement dark mode" --detach - -# Background run -moat claude -d ./my-project +# Run in a git worktree (non-interactive with prompt) +moat claude --worktree=dark-mode --prompt "implement dark mode" # Require manual approval for each tool use moat claude --noyolo @@ -273,11 +280,8 @@ moat codex --grant github # Named run moat codex --name my-feature -# Run in a git worktree -moat codex --worktree=dark-mode --prompt "implement dark mode" --detach - -# Run in background -moat codex -d +# Run in a git worktree (non-interactive with prompt) +moat codex --worktree=dark-mode --prompt "implement dark mode" # Force rebuild moat codex --rebuild @@ -331,11 +335,8 @@ moat gemini --grant github # Named run moat gemini --name my-feature -# Run in a git worktree -moat gemini --worktree=dark-mode --prompt "implement dark mode" --detach - -# Run in background -moat gemini -d +# Run in a git worktree (non-interactive with prompt) +moat gemini --worktree=dark-mode --prompt "implement dark mode" # Force rebuild moat gemini --rebuild @@ -353,7 +354,7 @@ moat wt [-- command] The branch is created from HEAD if it doesn't exist. The worktree is created at `~/.moat/worktrees//`. -Configuration is read from `agent.yaml` in the repository root. If a run is already active in the worktree, returns an error with instructions to attach or stop it. +Configuration is read from `agent.yaml` in the repository root. If a run is already active in the worktree, returns an error with instructions to stop it. ### Arguments @@ -366,7 +367,6 @@ Configuration is read from `agent.yaml` in the repository root. If a run is alre | Flag | Description | |------|-------------| -| `-d`, `--detach` | Run in background | | `-n`, `--name NAME` | Override auto-generated run name | | `-g`, `--grant PROVIDER` | Inject credential (repeatable) | | `-e KEY=VALUE` | Set environment variable (repeatable) | @@ -390,9 +390,6 @@ Override the default worktree base path (`~/.moat/worktrees/`) with the `MOAT_WO # Start a run in a worktree for the dark-mode branch moat wt dark-mode -# Run in background -moat wt dark-mode -d - # Run a specific command in the worktree moat wt dark-mode -- make test @@ -438,59 +435,6 @@ moat wt clean dark-mode --- -## moat attach - -Attach to a running container. - -``` -moat attach [flags] -``` - -### Arguments - -| Argument | Description | -|----------|-------------| -| `run` | Run ID or name | - -### Flags - -| Flag | Description | -|------|-------------| -| `-i`, `--interactive` | Force interactive mode | -| `-i=false` | Force output-only mode | - -By default, uses the run's original mode (interactive or not). - -On attach, Moat sends a TTY resize signal to the container to force a screen repaint. This ensures the terminal displays the current state immediately, rather than waiting for new output. - -### Examples - -```bash -# Attach by name -moat attach my-agent - -# Attach by ID -moat attach run_a1b2c3d4e5f6 - -# Force interactive -moat attach -i run_a1b2c3d4e5f6 - -# Force output-only -moat attach -i=false run_a1b2c3d4e5f6 -``` - -### Detach sequences - -**Non-interactive mode:** -- `Ctrl+C` -- Detach (run continues) -- `Ctrl+C Ctrl+C` (within 500ms) -- Stop the run - -**Interactive mode:** -- `Ctrl-/ d` -- Detach (run continues) -- `Ctrl-/ k` -- Stop the run - ---- - ## moat grant Store credentials for injection into runs. See [Grants reference](./04-grants.md) for details on each provider, host matching rules, and credential sources. diff --git a/docs/content/reference/02-agent-yaml.md b/docs/content/reference/02-agent-yaml.md index 217d6c97..23deb3b7 100644 --- a/docs/content/reference/02-agent-yaml.md +++ b/docs/content/reference/02-agent-yaml.md @@ -577,7 +577,7 @@ command: ["sh", "-c", "npm install && npm start"] ### interactive -Enable interactive mode (stdin + TTY). +Enable interactive mode. ```yaml interactive: true @@ -587,6 +587,10 @@ interactive: true - Default: `false` - CLI override: `-i` +When `true`, allocates a TTY and connects stdin. The session owns the terminal. Press `Ctrl-/ k` to stop the run; `Ctrl+C` is forwarded to the container process. + +When `false` (default), the run starts in the background. Use `moat logs -f` to follow output and `moat stop ` to stop. + Required for shells, REPLs, and interactive tools. --- diff --git a/docs/plans/2026-01-19-interactive-attach-model.md b/docs/plans/2026-01-19-interactive-attach-model.md index 9d2c73f0..ba3fcd76 100644 --- a/docs/plans/2026-01-19-interactive-attach-model.md +++ b/docs/plans/2026-01-19-interactive-attach-model.md @@ -1,7 +1,7 @@ # Interactive and Attach Model for Moat **Date:** 2026-01-19 -**Status:** Implemented +**Status:** Superseded by [Interactive-Only Simplification](2026-02-27-interactive-only-simplification.md) ## Problem Statement From d62efa4c1972c6077a3aaed9b7151e9a4f7bfc4e Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 16:10:15 -0800 Subject: [PATCH 08/28] refactor: remove unused TTY field from ExecOptions and run.Options TTY was always set identically to Interactive by every caller and was never read inside internal/run/. The actual TTY decision is made at runtime via term.IsTerminal(os.Stdin) in manager.StartAttached(). --- cmd/moat/cli/exec.go | 1 - cmd/moat/cli/run.go | 1 - cmd/moat/cli/wt.go | 1 - internal/cli/types.go | 1 - internal/providers/claude/cli.go | 1 - internal/providers/codex/cli.go | 1 - internal/providers/gemini/cli.go | 1 - internal/run/run.go | 1 - 8 files changed, 8 deletions(-) diff --git a/cmd/moat/cli/exec.go b/cmd/moat/cli/exec.go index f444c4e9..49d7ebb2 100644 --- a/cmd/moat/cli/exec.go +++ b/cmd/moat/cli/exec.go @@ -209,7 +209,6 @@ func ExecuteRun(ctx context.Context, opts intcli.ExecOptions) (*run.Run, error) Rebuild: opts.Flags.Rebuild, KeepContainer: opts.Flags.KeepContainer, Interactive: opts.Interactive, - TTY: opts.TTY, } // Create run diff --git a/cmd/moat/cli/run.go b/cmd/moat/cli/run.go index d7f6baf5..0fed22de 100644 --- a/cmd/moat/cli/run.go +++ b/cmd/moat/cli/run.go @@ -174,7 +174,6 @@ func runAgent(cmd *cobra.Command, args []string) error { Command: containerCmd, Config: cfg, Interactive: interactive, - TTY: interactive, } r, err := ExecuteRun(ctx, opts) diff --git a/cmd/moat/cli/wt.go b/cmd/moat/cli/wt.go index 2c02473e..8572a0c3 100644 --- a/cmd/moat/cli/wt.go +++ b/cmd/moat/cli/wt.go @@ -189,7 +189,6 @@ func runWorktree(cmd *cobra.Command, args []string) error { Command: containerCmd, Config: cfg, Interactive: interactive, - TTY: interactive, } intcli.SetWorktreeFields(&opts, result) diff --git a/internal/cli/types.go b/internal/cli/types.go index c364aee8..81a33aae 100644 --- a/internal/cli/types.go +++ b/internal/cli/types.go @@ -48,7 +48,6 @@ type ExecOptions struct { Command []string Config *config.Config Interactive bool // Can be set by flags or command logic - TTY bool // Worktree tracking (set by moat wt or --wt flag) WorktreeBranch string diff --git a/internal/providers/claude/cli.go b/internal/providers/claude/cli.go index 478ccf7f..e4d7e478 100644 --- a/internal/providers/claude/cli.go +++ b/internal/providers/claude/cli.go @@ -262,7 +262,6 @@ func runClaudeCode(cmd *cobra.Command, args []string) error { Command: containerCmd, Config: cfg, Interactive: interactive, - TTY: interactive, } cli.SetWorktreeFields(&opts, wtOut.Result) diff --git a/internal/providers/codex/cli.go b/internal/providers/codex/cli.go index 2f72634b..90432d3a 100644 --- a/internal/providers/codex/cli.go +++ b/internal/providers/codex/cli.go @@ -259,7 +259,6 @@ func runCodex(cmd *cobra.Command, args []string) error { Command: containerCmd, Config: cfg, Interactive: interactive, - TTY: interactive, } cli.SetWorktreeFields(&opts, wtOut.Result) diff --git a/internal/providers/gemini/cli.go b/internal/providers/gemini/cli.go index aefcf95d..04fa1556 100644 --- a/internal/providers/gemini/cli.go +++ b/internal/providers/gemini/cli.go @@ -222,7 +222,6 @@ func runGemini(cmd *cobra.Command, args []string) error { Command: containerCmd, Config: cfg, Interactive: interactive, - TTY: interactive, } cli.SetWorktreeFields(&opts, wtOut.Result) diff --git a/internal/run/run.go b/internal/run/run.go index 51e6f0a4..7dae47f0 100644 --- a/internal/run/run.go +++ b/internal/run/run.go @@ -123,7 +123,6 @@ type Options struct { Rebuild bool // Force rebuild of container image (ignores cache) KeepContainer bool // If true, don't auto-remove container after run Interactive bool // Keep stdin open for interactive input - TTY bool // Allocate a pseudo-TTY } // generateID creates a unique run identifier. From ccd92175900db405df4d0690d26a80a7044bf297 Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 16:12:21 -0800 Subject: [PATCH 09/28] refactor(run): remove unused StreamLogs feature from Start StreamLogs was never true in production - all callers passed false. Non-interactive runs tell users to use 'moat logs -f' instead of streaming to stdout. Removes the streamLogs method and its stdcopy import. --- cmd/moat/cli/doctor_claude.go | 2 +- cmd/moat/cli/exec.go | 3 +-- internal/e2e/logs_capture_test.go | 8 +++--- internal/run/manager.go | 41 +------------------------------ 4 files changed, 7 insertions(+), 47 deletions(-) diff --git a/cmd/moat/cli/doctor_claude.go b/cmd/moat/cli/doctor_claude.go index ab607065..6b105ab6 100644 --- a/cmd/moat/cli/doctor_claude.go +++ b/cmd/moat/cli/doctor_claude.go @@ -945,7 +945,7 @@ func testContainerAuth(ctx context.Context, diag *claudeDiagnostic) error { diag.ContainerTest = result // Start container (don't stream logs to avoid cluttering output) - if startErr := mgr.Start(ctx, r.ID, run.StartOptions{StreamLogs: false}); startErr != nil { + if startErr := mgr.Start(ctx, r.ID, run.StartOptions{}); startErr != nil { return fmt.Errorf("starting container: %w", startErr) } diff --git a/cmd/moat/cli/exec.go b/cmd/moat/cli/exec.go index 49d7ebb2..5626cfd5 100644 --- a/cmd/moat/cli/exec.go +++ b/cmd/moat/cli/exec.go @@ -245,8 +245,7 @@ func ExecuteRun(ctx context.Context, opts intcli.ExecOptions) (*run.Run, error) } // Non-interactive: start in background - startOpts := run.StartOptions{StreamLogs: false} - if err := manager.Start(ctx, r.ID, startOpts); err != nil { + if err := manager.Start(ctx, r.ID, run.StartOptions{}); err != nil { log.Error("failed to start run", "id", r.ID, "error", err) return r, fmt.Errorf("starting run: %w", err) } diff --git a/internal/e2e/logs_capture_test.go b/internal/e2e/logs_capture_test.go index cecd84c9..16729852 100644 --- a/internal/e2e/logs_capture_test.go +++ b/internal/e2e/logs_capture_test.go @@ -42,7 +42,7 @@ func TestLogsCapturedInAttachedMode(t *testing.T) { defer mgr.Destroy(context.Background(), r.ID) // Start and wait for completion (simulating attached mode) - if err := mgr.Start(ctx, r.ID, run.StartOptions{StreamLogs: true}); err != nil { + if err := mgr.Start(ctx, r.ID, run.StartOptions{}); err != nil { t.Fatalf("Start: %v", err) } @@ -115,7 +115,7 @@ func TestLogsCapturedInDetachedMode(t *testing.T) { defer mgr.Destroy(context.Background(), r.ID) // Start without waiting (detached mode) - if err := mgr.Start(ctx, r.ID, run.StartOptions{StreamLogs: false}); err != nil { + if err := mgr.Start(ctx, r.ID, run.StartOptions{}); err != nil { t.Fatalf("Start: %v", err) } @@ -252,7 +252,7 @@ func TestLogsCapturedAfterStop(t *testing.T) { defer mgr.Destroy(context.Background(), r.ID) // Start the run - if err := mgr.Start(ctx, r.ID, run.StartOptions{StreamLogs: false}); err != nil { + if err := mgr.Start(ctx, r.ID, run.StartOptions{}); err != nil { t.Fatalf("Start: %v", err) } @@ -327,7 +327,7 @@ func TestLogsAlwaysExistForAudit(t *testing.T) { } defer mgr.Destroy(context.Background(), r.ID) - if err := mgr.Start(ctx, r.ID, run.StartOptions{StreamLogs: false}); err != nil { + if err := mgr.Start(ctx, r.ID, run.StartOptions{}); err != nil { t.Fatalf("Start: %v", err) } diff --git a/internal/run/manager.go b/internal/run/manager.go index a0a31fea..974d30b9 100644 --- a/internal/run/manager.go +++ b/internal/run/manager.go @@ -20,7 +20,6 @@ import ( "syscall" "time" - "github.com/docker/docker/pkg/stdcopy" "github.com/majorcontext/moat/internal/audit" "github.com/majorcontext/moat/internal/config" "github.com/majorcontext/moat/internal/container" @@ -2158,11 +2157,7 @@ region = %s } // StartOptions configures how a run is started. -type StartOptions struct { - // StreamLogs controls whether container logs are streamed to stdout. - // Set to false for interactive mode where attach handles I/O. - StreamLogs bool -} +type StartOptions struct{} // Start begins execution of a run. func (m *Manager) Start(ctx context.Context, runID string, opts StartOptions) error { @@ -2255,13 +2250,7 @@ func (m *Manager) Start(ctx context.Context, runID string, opts StartOptions) er } } - // Stream logs to stdout (unless disabled for interactive mode) - if opts.StreamLogs { - go m.streamLogs(context.Background(), r) - } - // Start background monitor to capture logs when container exits. - // This is critical for detached runs where Wait() is never called. // Use m.ctx so the goroutine is canceled when the Manager closes. monitorCtx := m.ctx if monitorCtx == nil { @@ -2442,34 +2431,6 @@ func (m *Manager) StartAttached(ctx context.Context, runID string, stdin io.Read return attachErr } -// streamLogs streams container logs to stdout for real-time feedback. -// Note: Final log capture to storage is handled by captureLogs() which is called -// from all container exit paths (Wait, StartAttached, Stop) to ensure complete -// logs are captured even for fast-exiting containers. -func (m *Manager) streamLogs(ctx context.Context, r *Run) { - logs, err := m.runtime.ContainerLogs(ctx, r.ContainerID) - if err != nil { - ui.Errorf("Getting logs: %v", err) - return - } - defer logs.Close() - - // Stream to stdout only for real-time feedback - // Storage is handled by Wait() after container exits - // - // Note: streamLogs is only called for non-interactive runs (see exec.go). - // Interactive runs use StartAttached which handles I/O directly. - // Non-interactive Docker containers use multiplexed streams (no TTY), - // so we must demultiplex to avoid 8-byte headers leaking into output. - if m.runtime.Type() == container.RuntimeDocker { - // Docker non-interactive container: demultiplex the stream - _, _ = stdcopy.StdCopy(os.Stdout, os.Stderr, logs) - } else { - // Apple container: output is already raw - _, _ = io.Copy(os.Stdout, logs) - } -} - // Stop terminates a running run. func (m *Manager) Stop(ctx context.Context, runID string) error { m.mu.Lock() From e9bf974eb34333169e6de029a70a246720caf19e Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 16:15:41 -0800 Subject: [PATCH 10/28] refactor(run): extract postStart helpers from Start/StartAttached Extract three shared helpers that were duplicated between Start() and StartAttached(): - setLogContext: configures structured logger with run fields - setupPortBindings: resolves host ports and registers routes - setupFirewall: configures iptables isolation for strict network policy Reduces ~80 lines of duplication while preserving method-specific ordering (Start does firewall before ports, StartAttached the reverse). --- internal/run/manager.go | 197 +++++++++++++++++----------------------- 1 file changed, 82 insertions(+), 115 deletions(-) diff --git a/internal/run/manager.go b/internal/run/manager.go index 974d30b9..46d9139b 100644 --- a/internal/run/manager.go +++ b/internal/run/manager.go @@ -2159,6 +2159,80 @@ region = %s // StartOptions configures how a run is started. type StartOptions struct{} +// setLogContext configures the structured logger with run-specific fields +// so all subsequent log entries in this goroutine are correlated to the run. +func setLogContext(r *Run) { + log.SetRunContext(log.RunContext{ + RunID: r.ID, + RunName: r.Name, + Agent: r.Agent, + Workspace: filepath.Base(r.Workspace), + Image: r.Image, + Grants: r.Grants, + }) +} + +// setupPortBindings retrieves the host-side port mappings for a container's +// exposed ports and registers them as routes with both the local route table +// and the proxy daemon. Port binding lookup is retried because the container +// runtime may not have mappings ready immediately after start. +func (m *Manager) setupPortBindings(ctx context.Context, r *Run) { + if len(r.Ports) == 0 { + return + } + + var bindings map[int]int + var err error + for i := 0; i < 5; i++ { + bindings, err = m.runtime.GetPortBindings(ctx, r.ContainerID) + if err != nil || len(bindings) >= len(r.Ports) { + break + } + time.Sleep(50 * time.Millisecond) + } + if err != nil { + ui.Warnf("Getting port bindings: %v", err) + return + } + + r.HostPorts = make(map[string]int) + services := make(map[string]string) + for serviceName, containerPort := range r.Ports { + if hostPort, ok := bindings[containerPort]; ok { + r.HostPorts[serviceName] = hostPort + services[serviceName] = fmt.Sprintf("127.0.0.1:%d", hostPort) + } + } + if len(services) > 0 { + if err := m.routes.Add(r.Name, services); err != nil { + ui.Warnf("Registering routes: %v", err) + } + if m.daemonClient != nil { + if err := m.daemonClient.RegisterRoutes(ctx, r.Name, services); err != nil { + log.Debug("failed to register routes via daemon", "error", err) + } + } + } +} + +// setupFirewall configures iptables-based network isolation inside the +// container so that only traffic through the credential-injecting proxy is +// allowed. Returns an error if firewall setup fails, since a strict network +// policy without a working firewall would leave the container unprotected. +func (m *Manager) setupFirewall(ctx context.Context, r *Run) error { + if !r.FirewallEnabled || r.ProxyPort <= 0 { + return nil + } + if err := m.runtime.SetupFirewall(ctx, r.ContainerID, r.ProxyHost, r.ProxyPort); err != nil { + r.SetStateFailedAt(fmt.Sprintf("firewall setup failed: %v", err), time.Now()) + if stopErr := m.runtime.StopContainer(ctx, r.ContainerID); stopErr != nil { + ui.Warnf("Failed to stop container after firewall error: %v", stopErr) + } + return fmt.Errorf("firewall setup failed (required for strict network policy): %w", err) + } + return nil +} + // Start begins execution of a run. func (m *Manager) Start(ctx context.Context, runID string, opts StartOptions) error { m.mu.Lock() @@ -2169,74 +2243,18 @@ func (m *Manager) Start(ctx context.Context, runID string, opts StartOptions) er } m.mu.Unlock() r.SetState(StateStarting) - - // Set run context in logger for correlation - log.SetRunContext(log.RunContext{ - RunID: runID, - RunName: r.Name, - Agent: r.Agent, - Workspace: filepath.Base(r.Workspace), - Image: r.Image, - Grants: r.Grants, - }) + setLogContext(r) if err := m.runtime.StartContainer(ctx, r.ContainerID); err != nil { r.SetStateFailedAt(err.Error(), time.Now()) return err } - // Set up firewall if enabled (strict network policy) - // This blocks all outbound traffic except to the proxy - if r.FirewallEnabled && r.ProxyPort > 0 { - if err := m.runtime.SetupFirewall(ctx, r.ContainerID, r.ProxyHost, r.ProxyPort); err != nil { - // Firewall setup failed - this is fatal for strict policy since the user - // explicitly requested network isolation. Without iptables, only proxy-level - // filtering applies, which can be bypassed by tools that ignore HTTP_PROXY. - r.SetStateFailedAt(fmt.Sprintf("firewall setup failed: %v", err), time.Now()) - if stopErr := m.runtime.StopContainer(ctx, r.ContainerID); stopErr != nil { - ui.Warnf("Failed to stop container after firewall error: %v", stopErr) - } - return fmt.Errorf("firewall setup failed (required for strict network policy): %w", err) - } + if err := m.setupFirewall(ctx, r); err != nil { + return err } - // Get actual port bindings after container starts - if len(r.Ports) > 0 { - // Retry a few times - Docker may need a moment to set up port bindings - var bindings map[int]int - var err error - for i := 0; i < 5; i++ { - bindings, err = m.runtime.GetPortBindings(ctx, r.ContainerID) - if err != nil || len(bindings) >= len(r.Ports) { - break - } - time.Sleep(50 * time.Millisecond) - } - if err != nil { - // Log but don't fail - container is running - ui.Warnf("Getting port bindings: %v", err) - } else { - r.HostPorts = make(map[string]int) - services := make(map[string]string) - for serviceName, containerPort := range r.Ports { - if hostPort, ok := bindings[containerPort]; ok { - r.HostPorts[serviceName] = hostPort - services[serviceName] = fmt.Sprintf("127.0.0.1:%d", hostPort) - } - } - // Register routes - if len(services) > 0 { - if err := m.routes.Add(r.Name, services); err != nil { - ui.Warnf("Registering routes: %v", err) - } - if m.daemonClient != nil { - if err := m.daemonClient.RegisterRoutes(ctx, r.Name, services); err != nil { - log.Debug("failed to register routes via daemon", "error", err) - } - } - } - } - } + m.setupPortBindings(ctx, r) r.SetStateWithTime(StateRunning, time.Now()) @@ -2275,16 +2293,7 @@ func (m *Manager) StartAttached(ctx context.Context, runID string, stdin io.Read containerID := r.ContainerID m.mu.Unlock() r.SetState(StateStarting) - - // Set run context in logger for correlation - log.SetRunContext(log.RunContext{ - RunID: runID, - RunName: r.Name, - Agent: r.Agent, - Workspace: filepath.Base(r.Workspace), - Image: r.Image, - Grants: r.Grants, - }) + setLogContext(r) // Start with attachment - this ensures TTY is connected before process starts. // TTY mode must match how the container was created (see CreateContainer in @@ -2346,54 +2355,12 @@ func (m *Manager) StartAttached(ctx context.Context, runID string, stdin io.Read r.SetStateWithTime(StateRunning, time.Now()) } - // Get actual port bindings after container starts - if len(r.Ports) > 0 { - var bindings map[int]int - var bindErr error - for i := 0; i < 5; i++ { - bindings, bindErr = m.runtime.GetPortBindings(ctx, r.ContainerID) - if bindErr != nil || len(bindings) >= len(r.Ports) { - break - } - time.Sleep(50 * time.Millisecond) - } - if bindErr != nil { - ui.Warnf("Getting port bindings: %v", bindErr) - } else { - r.HostPorts = make(map[string]int) - services := make(map[string]string) - for serviceName, containerPort := range r.Ports { - if hostPort, ok := bindings[containerPort]; ok { - r.HostPorts[serviceName] = hostPort - services[serviceName] = fmt.Sprintf("127.0.0.1:%d", hostPort) - } - } - if len(services) > 0 { - if routeErr := m.routes.Add(r.Name, services); routeErr != nil { - ui.Warnf("Registering routes: %v", routeErr) - } - if m.daemonClient != nil { - if routeErr := m.daemonClient.RegisterRoutes(ctx, r.Name, services); routeErr != nil { - log.Debug("failed to register routes via daemon", "error", routeErr) - } - } - } - } - } + m.setupPortBindings(ctx, r) - // Save state to disk _ = r.SaveMetadata() - // Set up firewall if enabled (do this after container starts) - if r.FirewallEnabled && r.ProxyPort > 0 { - if err := m.runtime.SetupFirewall(ctx, r.ContainerID, r.ProxyHost, r.ProxyPort); err != nil { - // Firewall setup failed - this is fatal for strict policy - r.SetStateFailedAt(fmt.Sprintf("firewall setup failed: %v", err), time.Now()) - if stopErr := m.runtime.StopContainer(ctx, r.ContainerID); stopErr != nil { - ui.Warnf("Failed to stop container after firewall error: %v", stopErr) - } - return fmt.Errorf("firewall setup failed (required for strict network policy): %w", err) - } + if err := m.setupFirewall(ctx, r); err != nil { + return err } // Wait for the attachment to complete (container exits or context canceled) From 26d397ddeea3e8013916265e7ec9431dd8b76407 Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 16:17:18 -0800 Subject: [PATCH 11/28] refactor: simplify interactive event loop by removing escapeCh channel With only one escape action (EscapeStop), the separate escapeCh channel was unnecessary. Escape errors now flow through attachDone directly and are handled inline, eliminating a channel and simplifying the goroutine to a one-liner. --- cmd/moat/cli/exec.go | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/cmd/moat/cli/exec.go b/cmd/moat/cli/exec.go index 5626cfd5..33949609 100644 --- a/cmd/moat/cli/exec.go +++ b/cmd/moat/cli/exec.go @@ -329,9 +329,6 @@ func RunInteractiveAttached(ctx context.Context, manager *run.Manager, r *run.Ru signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM, syscall.SIGWINCH) defer signal.Stop(sigCh) - // Channel to receive escape actions from the attach goroutine - escapeCh := make(chan term.EscapeAction, 1) - // Create cancellable context for the attach attachCtx, attachCancel := context.WithCancel(ctx) defer attachCancel() @@ -339,14 +336,7 @@ func RunInteractiveAttached(ctx context.Context, manager *run.Manager, r *run.Ru // Start with attachment - this ensures TTY is connected before process starts attachDone := make(chan error, 1) go func() { - err := manager.StartAttached(attachCtx, r.ID, stdin, stdout, os.Stderr) - // Check if the error is an escape sequence - if term.IsEscapeError(err) { - escapeCh <- term.GetEscapeAction(err) - attachDone <- nil - } else { - attachDone <- err - } + attachDone <- manager.StartAttached(attachCtx, r.ID, stdin, stdout, os.Stderr) }() // Give container a moment to start, then resize TTY to match terminal. @@ -399,20 +389,16 @@ func RunInteractiveAttached(ctx context.Context, manager *run.Manager, r *run.Ru } // SIGINT is forwarded to container via attached stdin/tty - case action := <-escapeCh: - if action == term.EscapeStop { + case err := <-attachDone: + if term.IsEscapeError(err) { fmt.Printf("\r\nStopping run %s...\r\n", r.ID) - attachCancel() - if err := manager.Stop(context.Background(), r.ID); err != nil { - log.Error("failed to stop run", "id", r.ID, "error", err) + if stopErr := manager.Stop(context.Background(), r.ID); stopErr != nil { + log.Error("failed to stop run", "id", r.ID, "error", stopErr) } fmt.Printf("Run %s stopped\r\n", r.ID) return nil } - - case err := <-attachDone: - // Attachment ended (container exited or error) - if err != nil && ctx.Err() == nil && !term.IsEscapeError(err) { + if err != nil && ctx.Err() == nil { log.Error("run failed", "id", r.ID, "error", err) return fmt.Errorf("run failed: %w", err) } From 7c735d2a7be0b4fec9d5736d5dbf652d377d0ded Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 16:23:34 -0800 Subject: [PATCH 12/28] refactor(run): extract idempotent cleanupResources for all exit paths Consolidate resource cleanup duplicated across Stop(), Wait(), monitorContainerExit(), and Destroy() into a single cleanupResources() method guarded by sync.Once. This ensures proxy unregistration, SSH agent stop, service/BuildKit teardown, container removal, network cleanup, route unregistration, and temp dir removal happen exactly once regardless of which code path runs first. Reduces ~300 lines of duplicated cleanup code across four methods. --- internal/run/manager.go | 465 ++++++++++------------------------------ internal/run/run.go | 1 + 2 files changed, 113 insertions(+), 353 deletions(-) diff --git a/internal/run/manager.go b/internal/run/manager.go index 46d9139b..3564b248 100644 --- a/internal/run/manager.go +++ b/internal/run/manager.go @@ -2415,134 +2415,23 @@ func (m *Manager) Stop(ctx context.Context, runID string) error { } r.SetState(StateStopping) - buildkitContainerID := r.BuildkitContainerID - networkID := r.NetworkID - serviceContainers := r.ServiceContainers m.mu.Unlock() - // Stop service containers - if len(serviceContainers) > 0 { - svcMgr := m.runtime.ServiceManager() - if svcMgr != nil { - for svcName, containerID := range serviceContainers { - log.Debug("stopping service", "service", svcName, "container_id", containerID) - if err := svcMgr.StopService(ctx, container.ServiceInfo{ID: containerID}); err != nil { - log.Debug("failed to stop service", "service", svcName, "error", err) - } - } - } - } - - // Stop and remove BuildKit sidecar if present (before main container). - // Must remove before network cleanup — Docker refuses to remove networks - // with connected containers (#131). - if buildkitContainerID != "" { - log.Debug("stopping buildkit sidecar", "container_id", buildkitContainerID) - if err := m.runtime.StopContainer(ctx, buildkitContainerID); err != nil { - log.Debug("failed to stop buildkit sidecar", "error", err) - } - if err := m.runtime.RemoveContainer(ctx, buildkitContainerID); err != nil { - log.Debug("failed to remove buildkit sidecar", "error", err) - } - } - + // Stop the main container if err := m.runtime.StopContainer(ctx, r.ContainerID); err != nil { - // Log but don't fail - container might already be stopped ui.Warnf("%v", err) log.Debug("failed to stop container", "container_id", r.ContainerID, "error", err) } - // Capture logs after container stops (defense-in-depth). - // monitorContainerExit should also capture these when exitCh is signaled, - // but capturing here provides a safety net if moat crashes before that. - // captureLogs is idempotent so multiple calls are safe. + // Capture logs and run provider hooks (both idempotent) m.captureLogs(r) - - // Run provider stopped hooks before saving metadata. runProviderStoppedHooks(r) - // Cancel token refresh and unregister run from proxy daemon - if err := r.stopProxyServer(ctx); err != nil { - ui.Warnf("Stopping proxy: %v", err) - } - if r.ProxyAuthToken != "" && m.daemonClient != nil { - if err := m.daemonClient.UnregisterRun(ctx, r.ProxyAuthToken); err != nil { - ui.Warnf("Unregistering from proxy daemon: %v", err) - } - } - - // Stop the SSH agent server if one was created - if err := r.stopSSHAgentServer(); err != nil { - ui.Warnf("Stopping SSH agent proxy: %v", err) - } - - // Unregister routes for this agent - if r.Name != "" { - _ = m.routes.Remove(r.Name) - if m.daemonClient != nil { - if err := m.daemonClient.UnregisterRoutes(ctx, r.Name); err != nil { - log.Debug("failed to unregister routes via daemon", "error", err) - } - } - } - r.SetStateWithTime(StateStopped, time.Now()) - m.mu.Lock() - keepContainer := r.KeepContainer - containerID := r.ContainerID - providerCleanupPaths := r.ProviderCleanupPaths - m.mu.Unlock() - - // Save state to disk _ = r.SaveMetadata() - // Auto-remove container unless --keep was specified - if !keepContainer { - if rmErr := m.runtime.RemoveContainer(ctx, containerID); rmErr != nil { - ui.Warnf("Removing container: %v", rmErr) - } - } - - // Clean up provider resources - for providerName, cleanupPath := range providerCleanupPaths { - if prov := provider.Get(providerName); prov != nil { - prov.Cleanup(cleanupPath) - } - } - - // Clean up Claude config temp directory - m.mu.RLock() - claudeConfigTempDir := r.ClaudeConfigTempDir - m.mu.RUnlock() - if claudeConfigTempDir != "" { - if err := os.RemoveAll(claudeConfigTempDir); err != nil { - log.Debug("failed to remove Claude config temp dir", "path", claudeConfigTempDir, "error", err) - } - } - - // Clean up Gemini config temp directory - m.mu.RLock() - geminiConfigTempDir := r.GeminiConfigTempDir - m.mu.RUnlock() - if geminiConfigTempDir != "" { - if err := os.RemoveAll(geminiConfigTempDir); err != nil { - log.Debug("failed to remove Gemini config temp dir", "path", geminiConfigTempDir, "error", err) - } - } - - // Remove network if present (with force-disconnect fallback) - if networkID != "" { - log.Debug("removing docker network", "network_id", networkID) - netMgr := m.runtime.NetworkManager() - if netMgr != nil { - if err := netMgr.RemoveNetwork(ctx, networkID); err != nil { - log.Debug("network removal failed, trying force removal", "network_id", networkID, "error", err) - if forceErr := netMgr.ForceRemoveNetwork(ctx, networkID); forceErr != nil { - log.Debug("force network removal also failed", "network_id", networkID, "error", forceErr) - } - } - } - } + // Clean up all resources + m.cleanupResources(ctx, r) return nil } @@ -2555,16 +2444,14 @@ func (m *Manager) Wait(ctx context.Context, runID string) error { m.mu.RUnlock() return fmt.Errorf("run %s not found", runID) } - containerID := r.ContainerID m.mu.RUnlock() - // Wait for container to exit (signaled by monitorContainerExit) or context cancellation - // NOTE: We don't call WaitContainer here to avoid race condition - monitorContainerExit + // Wait for container to exit (signaled by monitorContainerExit) or context cancellation. + // We don't call WaitContainer here to avoid race conditions — monitorContainerExit // is the only goroutine that waits on the container and will close exitCh when done. select { case <-r.exitCh: // Container has exited (monitorContainerExit already captured logs and updated state) - // Capture logs again here (idempotent) for defense-in-depth m.captureLogs(r) // Get final error (thread-safe read) @@ -2575,74 +2462,8 @@ func (m *Manager) Wait(ctx context.Context, runID string) error { } r.stateMu.Unlock() - // NOTE: stopProxyServer, UnregisterRun, and stopSSHAgentServer are - // handled by monitorContainerExit (which runs for ALL runs, including - // detached). We don't repeat them here to avoid duplicate 404 warnings - // and races when monitorContainerExit's cleanup has already completed. - - // Unregister routes for this agent - if r.Name != "" { - _ = m.routes.Remove(r.Name) - if m.daemonClient != nil { - if stopErr := m.daemonClient.UnregisterRoutes(context.Background(), r.Name); stopErr != nil { - log.Debug("failed to unregister routes via daemon", "error", stopErr) - } - } - } - - // NOTE: We don't update r.State, r.StoppedAt, or r.Error here because - // monitorContainerExit already set them when the container exited. - // Overwriting would lose StateFailed state and error details. - - // Get values needed for cleanup - m.mu.RLock() - keepContainer := r.KeepContainer - providerCleanupPaths := r.ProviderCleanupPaths - m.mu.RUnlock() - - // Auto-remove container unless --keep was specified. - // Use log.Debug for the error — Destroy() may also try to remove - // the same container, and Docker can report "removal already in progress". - if !keepContainer { - if rmErr := m.runtime.RemoveContainer(context.Background(), containerID); rmErr != nil { - log.Debug("removing container after wait", "container_id", containerID, "error", rmErr) - } - } - - // Clean up provider resources - for providerName, cleanupPath := range providerCleanupPaths { - if prov := provider.Get(providerName); prov != nil { - prov.Cleanup(cleanupPath) - } - } - - // Clean up AWS temp directory - if r.awsTempDir != "" { - if rmErr := os.RemoveAll(r.awsTempDir); rmErr != nil { - ui.Warnf("Removing AWS temp dir: %v", rmErr) - } - } - - // Clean up Claude config temp directory - if r.ClaudeConfigTempDir != "" { - if rmErr := os.RemoveAll(r.ClaudeConfigTempDir); rmErr != nil { - log.Debug("failed to remove Claude config temp dir", "path", r.ClaudeConfigTempDir, "error", rmErr) - } - } - - // Clean up Codex config temp directory - if r.CodexConfigTempDir != "" { - if rmErr := os.RemoveAll(r.CodexConfigTempDir); rmErr != nil { - log.Debug("failed to remove Codex config temp dir", "path", r.CodexConfigTempDir, "error", rmErr) - } - } - - // Clean up Gemini config temp directory - if r.GeminiConfigTempDir != "" { - if rmErr := os.RemoveAll(r.GeminiConfigTempDir); rmErr != nil { - log.Debug("failed to remove Gemini config temp dir", "path", r.GeminiConfigTempDir, "error", rmErr) - } - } + // Clean up resources (usually no-op because monitorContainerExit already did it) + m.cleanupResources(context.Background(), r) return err case <-ctx.Done(): @@ -2776,6 +2597,106 @@ func runProviderStoppedHooks(r *Run) { } } +// cleanupResources tears down all resources associated with a run. It is +// idempotent — only the first call does work, subsequent calls are no-ops. +// This is safe to call from Stop, Wait, monitorContainerExit, or Destroy. +// +// It does NOT: +// - stop the main container (caller handles that) +// - update run state (path-specific) +// - capture logs (has its own idempotency guard) +// - run provider stopped hooks (has its own idempotency guard) +// - close audit store or remove storage (Destroy-only) +func (m *Manager) cleanupResources(ctx context.Context, r *Run) { + r.cleanupOnce.Do(func() { + // Cancel token refresh and unregister run from proxy daemon + if err := r.stopProxyServer(ctx); err != nil { + log.Debug("cleanup: stopping proxy", "error", err) + } + if r.ProxyAuthToken != "" && m.daemonClient != nil { + if err := m.daemonClient.UnregisterRun(ctx, r.ProxyAuthToken); err != nil { + log.Debug("cleanup: unregistering from proxy daemon", "error", err) + } + } + + // Stop the SSH agent server + if err := r.stopSSHAgentServer(); err != nil { + log.Debug("cleanup: stopping SSH agent", "error", err) + } + + // Stop service containers + if len(r.ServiceContainers) > 0 { + svcMgr := m.runtime.ServiceManager() + if svcMgr != nil { + for svcName, svcContainerID := range r.ServiceContainers { + log.Debug("cleanup: stopping service", "service", svcName, "container_id", svcContainerID) + if err := svcMgr.StopService(ctx, container.ServiceInfo{ID: svcContainerID}); err != nil { + log.Debug("cleanup: failed to stop service", "service", svcName, "error", err) + } + } + } + } + + // Remove BuildKit sidecar before network (Docker requires containers + // disconnected before network removal, see #131). + if r.BuildkitContainerID != "" { + log.Debug("cleanup: removing buildkit sidecar", "container_id", r.BuildkitContainerID) + if err := m.runtime.StopContainer(ctx, r.BuildkitContainerID); err != nil { + log.Debug("cleanup: failed to stop buildkit sidecar", "error", err) + } + if err := m.runtime.RemoveContainer(ctx, r.BuildkitContainerID); err != nil { + log.Debug("cleanup: failed to remove buildkit sidecar", "error", err) + } + } + + // Remove main container unless --keep was specified + if !r.KeepContainer { + if err := m.runtime.RemoveContainer(ctx, r.ContainerID); err != nil { + log.Debug("cleanup: failed to remove container", "error", err) + } + } + + // Remove network (with force-disconnect fallback) + if r.NetworkID != "" { + netMgr := m.runtime.NetworkManager() + if netMgr != nil { + if err := netMgr.RemoveNetwork(ctx, r.NetworkID); err != nil { + log.Debug("cleanup: network removal failed, trying force", "network", r.NetworkID, "error", err) + if forceErr := netMgr.ForceRemoveNetwork(ctx, r.NetworkID); forceErr != nil { + log.Debug("cleanup: force network removal failed", "network", r.NetworkID, "error", forceErr) + } + } + } + } + + // Unregister routes + if r.Name != "" { + _ = m.routes.Remove(r.Name) + if m.daemonClient != nil { + if err := m.daemonClient.UnregisterRoutes(ctx, r.Name); err != nil { + log.Debug("cleanup: failed to unregister routes", "error", err) + } + } + } + + // Clean up provider resources + for providerName, cleanupPath := range r.ProviderCleanupPaths { + if prov := provider.Get(providerName); prov != nil { + prov.Cleanup(cleanupPath) + } + } + + // Clean up temp directories + for _, dir := range []string{r.awsTempDir, r.ClaudeConfigTempDir, r.CodexConfigTempDir, r.GeminiConfigTempDir} { + if dir != "" { + if err := os.RemoveAll(dir); err != nil { + log.Debug("cleanup: failed to remove temp dir", "path", dir, "error", err) + } + } + } + }) +} + // monitorContainerExit watches for container exit and captures logs. // This runs in the background for ALL runs to ensure logs are captured // even in detached mode where Wait() is never called. @@ -2825,78 +2746,10 @@ func (m *Manager) monitorContainerExit(ctx context.Context, r *Run) { // Save updated state _ = r.SaveMetadata() - // Use a single 30-second timeout for all cleanup operations so a hung - // container operation can't block this goroutine indefinitely. + // Clean up all resources (30-second timeout for cleanup operations) cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), 30*time.Second) defer cleanupCancel() - - // Cancel token refresh and unregister run from proxy daemon - if stopErr := r.stopProxyServer(cleanupCtx); stopErr != nil { - log.Debug("stopping proxy after container exit", "error", stopErr) - } - if r.ProxyAuthToken != "" && m.daemonClient != nil { - if stopErr := m.daemonClient.UnregisterRun(cleanupCtx, r.ProxyAuthToken); stopErr != nil { - log.Debug("unregistering from proxy daemon after container exit", "error", stopErr) - } - } - - // Stop the SSH agent server if one was created - if stopErr := r.stopSSHAgentServer(); stopErr != nil { - log.Debug("stopping SSH agent proxy after container exit", "error", stopErr) - } - - // Stop service containers - if len(r.ServiceContainers) > 0 { - svcMgr := m.runtime.ServiceManager() - if svcMgr != nil { - for svcName, svcContainerID := range r.ServiceContainers { - log.Debug("stopping service after container exit", "service", svcName, "container_id", svcContainerID) - if stopErr := svcMgr.StopService(cleanupCtx, container.ServiceInfo{ID: svcContainerID}); stopErr != nil { - log.Debug("failed to stop service", "service", svcName, "error", stopErr) - } - } - } - } - - // Remove containers before network — Docker refuses to remove networks - // with connected containers, causing network leaks (#131). - if r.BuildkitContainerID != "" { - log.Debug("removing buildkit sidecar after container exit", "container_id", r.BuildkitContainerID) - if stopErr := m.runtime.StopContainer(cleanupCtx, r.BuildkitContainerID); stopErr != nil { - log.Debug("failed to stop buildkit sidecar", "error", stopErr) - } - if rmErr := m.runtime.RemoveContainer(cleanupCtx, r.BuildkitContainerID); rmErr != nil { - log.Debug("failed to remove buildkit sidecar", "error", rmErr) - } - } - if !r.KeepContainer { - if rmErr := m.runtime.RemoveContainer(cleanupCtx, r.ContainerID); rmErr != nil { - log.Debug("failed to remove main container after exit", "error", rmErr) - } - } - - // Remove network (with force-disconnect fallback) - if r.NetworkID != "" { - netMgr := m.runtime.NetworkManager() - if netMgr != nil { - if removeErr := netMgr.RemoveNetwork(cleanupCtx, r.NetworkID); removeErr != nil { - log.Debug("network removal failed, trying force removal", "network", r.NetworkID, "error", removeErr) - if forceErr := netMgr.ForceRemoveNetwork(cleanupCtx, r.NetworkID); forceErr != nil { - log.Debug("force network removal also failed", "network", r.NetworkID, "error", forceErr) - } - } - } - } - - // Unregister routes - if r.Name != "" { - _ = m.routes.Remove(r.Name) - if m.daemonClient != nil { - if err := m.daemonClient.UnregisterRoutes(cleanupCtx, r.Name); err != nil { - log.Debug("failed to unregister routes via daemon", "error", err) - } - } - } + m.cleanupResources(cleanupCtx, r) } // List returns all runs. @@ -2919,73 +2772,14 @@ func (m *Manager) Destroy(ctx context.Context, runID string) error { m.mu.Unlock() return fmt.Errorf("run %s not found", runID) } - m.mu.Unlock() if r.GetState() == StateRunning { return fmt.Errorf("cannot destroy running run %s; stop it first", runID) } - // Remove container (may already be removed by Wait or monitorContainerExit) - if err := m.runtime.RemoveContainer(ctx, r.ContainerID); err != nil { - log.Debug("removing container in destroy", "container_id", r.ContainerID, "error", err) - } - - // Remove service containers - for svcName, svcContainerID := range r.ServiceContainers { - if err := m.runtime.RemoveContainer(ctx, svcContainerID); err != nil { - log.Debug("removing service container in destroy", "service", svcName, "error", err) - } - } - - // Remove BuildKit sidecar container if present - if r.BuildkitContainerID != "" { - if err := m.runtime.RemoveContainer(ctx, r.BuildkitContainerID); err != nil { - log.Debug("removing buildkit sidecar in destroy", "error", err) - } - } - - // Remove network if present (with force-disconnect fallback) - if r.NetworkID != "" { - netMgr := m.runtime.NetworkManager() - if netMgr != nil { - if err := netMgr.RemoveNetwork(ctx, r.NetworkID); err != nil { - log.Debug("network removal failed, trying force removal", "network", r.NetworkID, "error", err) - if forceErr := netMgr.ForceRemoveNetwork(ctx, r.NetworkID); forceErr != nil { - log.Debug("force network removal also failed", "network", r.NetworkID, "error", forceErr) - } - } - } - } - - // Cancel token refresh and unregister run from proxy daemon. - // These may already have been handled by monitorContainerExit — that's fine, - // Destroy is a best-effort cleanup for anything that was missed. - if err := r.stopProxyServer(ctx); err != nil { - log.Debug("stopping proxy in destroy", "error", err) - } - if r.ProxyAuthToken != "" && m.daemonClient != nil { - if err := m.daemonClient.UnregisterRun(ctx, r.ProxyAuthToken); err != nil { - log.Debug("unregistering from daemon in destroy", "error", err) - } - } - - // Stop the SSH agent server if one was created and still running - if err := r.stopSSHAgentServer(); err != nil { - log.Debug("stopping SSH agent in destroy", "error", err) - } - - // Unregister routes for this agent - if r.Name != "" { - if err := m.routes.Remove(r.Name); err != nil { - ui.Warnf("Removing routes: %v", err) - } - if m.daemonClient != nil { - if err := m.daemonClient.UnregisterRoutes(ctx, r.Name); err != nil { - log.Debug("failed to unregister routes via daemon", "error", err) - } - } - } + // Clean up all run resources (idempotent - may already be done by Stop/monitorContainerExit) + m.cleanupResources(ctx, r) // Check if we should stop the routing proxy (no more agents with ports) if m.proxyLifecycle.ShouldStop() { @@ -3001,41 +2795,6 @@ func (m *Manager) Destroy(ctx context.Context, runID string) error { } } - // Clean up provider resources - for providerName, cleanupPath := range r.ProviderCleanupPaths { - if prov := provider.Get(providerName); prov != nil { - prov.Cleanup(cleanupPath) - } - } - - // Clean up AWS temp directory - if r.awsTempDir != "" { - if err := os.RemoveAll(r.awsTempDir); err != nil { - ui.Warnf("Removing AWS temp dir: %v", err) - } - } - - // Clean up Claude config temp directory - if r.ClaudeConfigTempDir != "" { - if err := os.RemoveAll(r.ClaudeConfigTempDir); err != nil { - log.Debug("failed to remove Claude config temp dir", "path", r.ClaudeConfigTempDir, "error", err) - } - } - - // Clean up Codex config temp directory - if r.CodexConfigTempDir != "" { - if err := os.RemoveAll(r.CodexConfigTempDir); err != nil { - log.Debug("failed to remove Codex config temp dir", "path", r.CodexConfigTempDir, "error", err) - } - } - - // Clean up Gemini config temp directory - if r.GeminiConfigTempDir != "" { - if err := os.RemoveAll(r.GeminiConfigTempDir); err != nil { - log.Debug("failed to remove Gemini config temp dir", "path", r.GeminiConfigTempDir, "error", err) - } - } - // Remove run storage directory (logs, traces, metadata) if r.Store != nil { if err := r.Store.Remove(); err != nil { diff --git a/internal/run/run.go b/internal/run/run.go index 7dae47f0..ad7e307f 100644 --- a/internal/run/run.go +++ b/internal/run/run.go @@ -64,6 +64,7 @@ type Run struct { // Shutdown coordination to prevent race conditions sshAgentStopOnce sync.Once // Ensures SSHAgentServer.Stop() called only once + cleanupOnce sync.Once // Ensures resource cleanup runs only once // State protection - guards State, Error, StartedAt, StoppedAt fields // Use this lock when reading or modifying these fields to prevent races From 2f04e12310e416d8ade9949d430db54f4ebfceb1 Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 16:34:50 -0800 Subject: [PATCH 13/28] refactor: extract ProviderRunner helper for provider CLI dedup All three provider CLIs (claude, codex, gemini) followed an identical ~170 line pattern for workspace resolution, grant dedup, config setup, dry run, and execution. Extract a RunProvider() helper in internal/cli that handles the shared boilerplate. Each provider is now ~30-50 lines of config + flag registration, with provider-specific logic isolated in BuildCommand and ConfigureAgent callbacks. The ProviderRunConfig struct makes the differences between providers explicit and declarative. --- internal/cli/provider.go | 229 +++++++++++++++++++++++++++++++ internal/providers/claude/cli.go | 220 ++++++----------------------- internal/providers/codex/cli.go | 207 +++++----------------------- internal/providers/gemini/cli.go | 177 ++++-------------------- 4 files changed, 331 insertions(+), 502 deletions(-) create mode 100644 internal/cli/provider.go diff --git a/internal/cli/provider.go b/internal/cli/provider.go new file mode 100644 index 00000000..1c3f9318 --- /dev/null +++ b/internal/cli/provider.go @@ -0,0 +1,229 @@ +package cli + +import ( + "context" + "fmt" + "strings" + + "github.com/majorcontext/moat/internal/config" + "github.com/majorcontext/moat/internal/log" + "github.com/spf13/cobra" +) + +// ProviderRunConfig describes a provider's run configuration. +// Each provider supplies a ProviderRunConfig to RunProvider +// to eliminate repeated boilerplate for workspace resolution, +// grant dedup, config setup, dry run, and execution. +type ProviderRunConfig struct { + // Name is the provider name (e.g., "claude", "codex", "gemini"). + // Used in log messages and dry run output. + Name string + + // Flags is a pointer to the provider's ExecFlags. + Flags *ExecFlags + + // PromptFlag is the value of the -p/--prompt flag. + PromptFlag string + + // AllowedHosts are additional hosts from the --allow-host flag. + AllowedHosts []string + + // WtFlag is the value of the --worktree flag. + WtFlag string + + // GetCredentialGrant returns the grant name for the provider's credential. + // Returns empty string if no credential exists. + GetCredentialGrant func() string + + // Dependencies are the required dependencies (e.g., ["node@20", "git", "claude-code"]). + Dependencies []string + + // NetworkHosts are hosts the provider needs network access to. + NetworkHosts []string + + // BuildCommand builds the container command from the prompt flag value and + // initial prompt (from -- args). Called after grants and interactive mode + // are resolved. + BuildCommand func(promptFlag, initialPrompt string) ([]string, error) + + // ConfigureAgent applies provider-specific config tweaks (e.g., syncLogs). + // Called after dependencies and network hosts are added. + // cfg is guaranteed non-nil. + ConfigureAgent func(cfg *config.Config) + + // SupportsInitialPrompt indicates whether this provider supports the + // -- "prompt" syntax for passing an initial prompt via cobra's ArgsLenAtDash. + // If false, args are treated as a simple workspace path. + SupportsInitialPrompt bool + + // DryRunNote is an optional extra line to print during dry run + // (e.g., "Note: No API key configured. Claude will prompt for login."). + // Only printed when grants is empty. + DryRunNote string +} + +// RunProvider executes the shared boilerplate for provider CLI commands. +// It handles workspace resolution, config loading, worktree support, +// grant dedup, dependency injection, network hosts, dry run, and execution. +func RunProvider(cmd *cobra.Command, args []string, rc ProviderRunConfig) error { + // Guard: if a subcommand was invoked, skip the parent run function + if cmd.CalledAs() != rc.Name { + return nil + } + + // Parse workspace and optional initial prompt from args + workspace := "." + var initialPrompt string + + if rc.SupportsInitialPrompt { + dashIdx := cmd.ArgsLenAtDash() + if dashIdx >= 0 { + if dashIdx > 0 { + workspace = args[0] + } + passthroughArgs := args[dashIdx:] + if len(passthroughArgs) > 0 { + initialPrompt = strings.Join(passthroughArgs, " ") + } + } else if len(args) > 0 { + workspace = args[0] + } + } else { + if len(args) > 0 { + workspace = args[0] + } + } + + absPath, err := ResolveWorkspacePath(workspace) + if err != nil { + return err + } + + cfg, err := config.Load(absPath) + if err != nil { + return fmt.Errorf("loading config: %w", err) + } + + // Handle --wt/--worktree flag + wtOut, err := ResolveWorktreeWorkspace(rc.WtFlag, absPath, rc.Flags, cfg) + if err != nil { + return err + } + absPath = wtOut.Workspace + cfg = wtOut.Config + + // Build grants list with deduplication: credential grant first, + // then config grants, then flag grants. + grantSet := make(map[string]bool) + var grants []string + addGrant := func(g string) { + if !grantSet[g] { + grantSet[g] = true + grants = append(grants, g) + } + } + + if rc.GetCredentialGrant != nil { + if credName := rc.GetCredentialGrant(); credName != "" { + addGrant(credName) + } + } + if cfg != nil { + for _, g := range cfg.Grants { + addGrant(g) + } + } + for _, g := range rc.Flags.Grants { + addGrant(g) + } + rc.Flags.Grants = grants + + interactive := rc.PromptFlag == "" + + // Build container command (provider-specific logic) + containerCmd, err := rc.BuildCommand(rc.PromptFlag, initialPrompt) + if err != nil { + return err + } + + // Name from flag, or config, or let manager generate one + if rc.Flags.Name == "" && cfg != nil && cfg.Name != "" { + rc.Flags.Name = cfg.Name + } + + // Ensure config is non-nil before modifying dependencies/network + if cfg == nil { + cfg = &config.Config{} + } + + // Add required dependencies, skipping any already present + for _, dep := range rc.Dependencies { + prefix := dep + for i := range dep { + if dep[i] == '@' { + prefix = dep[:i] + break + } + } + if !HasDependency(cfg.Dependencies, prefix) { + cfg.Dependencies = append(cfg.Dependencies, dep) + } + } + + // Network: provider hosts first, then user-specified allowed hosts + cfg.Network.Allow = append(cfg.Network.Allow, rc.NetworkHosts...) + cfg.Network.Allow = append(cfg.Network.Allow, rc.AllowedHosts...) + + // Provider-specific config tweaks (e.g., enabling log sync) + if rc.ConfigureAgent != nil { + rc.ConfigureAgent(cfg) + } + + if envErr := ParseEnvFlags(rc.Flags.Env, cfg); envErr != nil { + return envErr + } + + log.Debug(fmt.Sprintf("starting %s", rc.Name), + "workspace", absPath, + "grants", grants, + "interactive", interactive, + "prompt", rc.PromptFlag, + "rebuild", rc.Flags.Rebuild, + ) + + if DryRun { + fmt.Printf("Dry run - would start %s\n", rc.Name) + fmt.Printf("Workspace: %s\n", absPath) + fmt.Printf("Grants: %v\n", grants) + fmt.Printf("Interactive: %v\n", interactive) + fmt.Printf("Rebuild: %v\n", rc.Flags.Rebuild) + if len(grants) == 0 && rc.DryRunNote != "" { + fmt.Println(rc.DryRunNote) + } + return nil + } + + ctx := context.Background() + + opts := ExecOptions{ + Flags: *rc.Flags, + Workspace: absPath, + Command: containerCmd, + Config: cfg, + Interactive: interactive, + } + + SetWorktreeFields(&opts, wtOut.Result) + + result, err := ExecuteRun(ctx, opts) + if err != nil { + return err + } + + if result != nil { + fmt.Printf("Starting %s in %s\n", rc.Name, absPath) + fmt.Printf("Run: %s (%s)\n", result.Name, result.ID) + } + + return nil +} diff --git a/internal/providers/claude/cli.go b/internal/providers/claude/cli.go index e4d7e478..8afd1b73 100644 --- a/internal/providers/claude/cli.go +++ b/internal/providers/claude/cli.go @@ -1,7 +1,6 @@ package claude import ( - "context" "fmt" "strings" @@ -96,187 +95,52 @@ Use 'moat list' to see running and recent runs.`, } func runClaudeCode(cmd *cobra.Command, args []string) error { - // If subcommand is being run, don't execute this - if cmd.CalledAs() != "claude" { - return nil - } - - // Separate workspace arg from initial prompt args. - // Everything after "--" is passed as an initial prompt to Claude. - // moat claude -- "is this thing on?" → workspace=".", prompt="is this thing on?" - // moat claude ./project -- "explain this" → workspace="./project", prompt="explain this" - // moat claude ./project → workspace="./project", no prompt - var initialPrompt string - workspace := "." - dashIdx := cmd.ArgsLenAtDash() - if dashIdx >= 0 { - // Args before "--" are moat args (workspace), after are passthrough - if dashIdx > 0 { - workspace = args[0] - } - passthroughArgs := args[dashIdx:] - if len(passthroughArgs) > 0 { - initialPrompt = strings.Join(passthroughArgs, " ") - } - } else if len(args) > 0 { - workspace = args[0] - } - - absPath, err := cli.ResolveWorkspacePath(workspace) - if err != nil { - return err - } - - // Load agent.yaml if present, otherwise use defaults - cfg, err := config.Load(absPath) - if err != nil { - return fmt.Errorf("loading config: %w", err) - } - - // Handle --wt flag - wtOut, err := cli.ResolveWorktreeWorkspace(claudeWtFlag, absPath, &claudeFlags, cfg) - if err != nil { - return err - } - absPath = wtOut.Workspace - cfg = wtOut.Config - - // Build grants list using a set for deduplication - grantSet := make(map[string]bool) - var grants []string - addGrant := func(g string) { - if !grantSet[g] { - grantSet[g] = true - grants = append(grants, g) - } - } - - if credName := getClaudeCredentialName(); credName != "" { - addGrant(credName) // Use the actual name the credential is stored under - } - if cfg != nil { - for _, g := range cfg.Grants { - addGrant(g) - } - } - for _, g := range claudeFlags.Grants { - addGrant(g) - } - claudeFlags.Grants = grants - - // Determine interactive mode - interactive := claudePromptFlag == "" - - // Validate mutually exclusive flags + // Validate mutually exclusive flags before delegating to shared runner if claudeContinue && claudeResume != "" { return fmt.Errorf("--continue and --resume are mutually exclusive") } - // Build container command - containerCmd := []string{"claude"} - - // By default, skip permission prompts since Moat provides isolation. - if !claudeNoYolo { - containerCmd = append(containerCmd, "--dangerously-skip-permissions") - } - - if claudeContinue { - containerCmd = append(containerCmd, "--continue") - } - - if claudeResume != "" { - sessionID, resolveErr := resolveResumeSession(claudeResume) - if resolveErr != nil { - return resolveErr - } - containerCmd = append(containerCmd, "--resume", sessionID) - } - - if claudePromptFlag != "" { - containerCmd = append(containerCmd, "-p", claudePromptFlag) - } - - if initialPrompt != "" { - containerCmd = append(containerCmd, initialPrompt) - } - - // Use name from flag, or config, or let manager generate one - if claudeFlags.Name == "" && cfg != nil && cfg.Name != "" { - claudeFlags.Name = cfg.Name - } - - // Ensure dependencies for Claude Code - if cfg == nil { - cfg = &config.Config{} - } - if !cli.HasDependency(cfg.Dependencies, "node") { - cfg.Dependencies = append(cfg.Dependencies, "node@20") - } - if !cli.HasDependency(cfg.Dependencies, "git") { - cfg.Dependencies = append(cfg.Dependencies, "git") - } - if !cli.HasDependency(cfg.Dependencies, "claude-code") { - cfg.Dependencies = append(cfg.Dependencies, "claude-code") - } - - // Always sync Claude logs - syncLogs := true - cfg.Claude.SyncLogs = &syncLogs - - // Allow network access to claude.ai for OAuth login - cfg.Network.Allow = append(cfg.Network.Allow, "claude.ai", "*.claude.ai") - - // Add allowed hosts if specified - cfg.Network.Allow = append(cfg.Network.Allow, claudeAllowedHosts...) - - // Add environment variables from flags - if envErr := cli.ParseEnvFlags(claudeFlags.Env, cfg); envErr != nil { - return envErr - } - - log.Debug("starting claude code", - "workspace", absPath, - "grants", grants, - "interactive", interactive, - "prompt", claudePromptFlag, - "rebuild", claudeFlags.Rebuild, - ) - - if cli.DryRun { - fmt.Println("Dry run - would start Claude Code") - fmt.Printf("Workspace: %s\n", absPath) - fmt.Printf("Grants: %v\n", grants) - fmt.Printf("Interactive: %v\n", interactive) - fmt.Printf("Rebuild: %v\n", claudeFlags.Rebuild) - if len(grants) == 0 { - fmt.Println("Note: No API key configured. Claude will prompt for login.") - } - return nil - } - - ctx := context.Background() - - opts := cli.ExecOptions{ - Flags: claudeFlags, - Workspace: absPath, - Command: containerCmd, - Config: cfg, - Interactive: interactive, - } - - cli.SetWorktreeFields(&opts, wtOut.Result) - - result, err := cli.ExecuteRun(ctx, opts) - if err != nil { - return err - } - - if result != nil { - fmt.Printf("Starting Claude Code in %s\n", absPath) - fmt.Printf("Run: %s (%s)\n", result.Name, result.ID) - } - - return nil + return cli.RunProvider(cmd, args, cli.ProviderRunConfig{ + Name: "claude", + Flags: &claudeFlags, + PromptFlag: claudePromptFlag, + AllowedHosts: claudeAllowedHosts, + WtFlag: claudeWtFlag, + GetCredentialGrant: getClaudeCredentialName, + Dependencies: []string{"node@20", "git", "claude-code"}, + NetworkHosts: []string{"claude.ai", "*.claude.ai"}, + SupportsInitialPrompt: true, + DryRunNote: "Note: No API key configured. Claude will prompt for login.", + BuildCommand: func(promptFlag, initialPrompt string) ([]string, error) { + containerCmd := []string{"claude"} + + // By default, skip permission prompts since Moat provides isolation. + if !claudeNoYolo { + containerCmd = append(containerCmd, "--dangerously-skip-permissions") + } + if claudeContinue { + containerCmd = append(containerCmd, "--continue") + } + if claudeResume != "" { + sessionID, err := resolveResumeSession(claudeResume) + if err != nil { + return nil, err + } + containerCmd = append(containerCmd, "--resume", sessionID) + } + if promptFlag != "" { + containerCmd = append(containerCmd, "-p", promptFlag) + } + if initialPrompt != "" { + containerCmd = append(containerCmd, initialPrompt) + } + return containerCmd, nil + }, + ConfigureAgent: func(cfg *config.Config) { + syncLogs := true + cfg.Claude.SyncLogs = &syncLogs + }, + }) } // getClaudeCredentialName returns the grant name to use for moat claude. diff --git a/internal/providers/codex/cli.go b/internal/providers/codex/cli.go index 90432d3a..4f511e6d 100644 --- a/internal/providers/codex/cli.go +++ b/internal/providers/codex/cli.go @@ -1,16 +1,11 @@ package codex import ( - "context" - "fmt" - "strings" - "github.com/spf13/cobra" "github.com/majorcontext/moat/internal/cli" "github.com/majorcontext/moat/internal/config" "github.com/majorcontext/moat/internal/credential" - "github.com/majorcontext/moat/internal/log" ) var ( @@ -106,174 +101,40 @@ Use 'moat list' to see running and recent runs.`, } func runCodex(cmd *cobra.Command, args []string) error { - // If subcommand is being run, don't execute this - if cmd.CalledAs() != "codex" { - return nil - } - - // Separate workspace arg from initial prompt args. - // Everything after "--" is passed as an initial prompt to Codex. - // moat codex -- "testing" → workspace=".", prompt="testing" - // moat codex ./project -- "explain this" → workspace="./project", prompt="explain this" - // moat codex ./project → workspace="./project", no prompt - var initialPrompt string - workspace := "." - dashIdx := cmd.ArgsLenAtDash() - if dashIdx >= 0 { - // Args before "--" are moat args (workspace), after are passthrough - if dashIdx > 0 { - workspace = args[0] - } - passthroughArgs := args[dashIdx:] - if len(passthroughArgs) > 0 { - initialPrompt = strings.Join(passthroughArgs, " ") - } - } else if len(args) > 0 { - workspace = args[0] - } - - absPath, err := cli.ResolveWorkspacePath(workspace) - if err != nil { - return err - } - - // Load agent.yaml if present, otherwise use defaults - cfg, err := config.Load(absPath) - if err != nil { - return fmt.Errorf("loading config: %w", err) - } - - // Handle --wt flag - wtOut, err := cli.ResolveWorktreeWorkspace(codexWtFlag, absPath, &codexFlags, cfg) - if err != nil { - return err - } - absPath = wtOut.Workspace - cfg = wtOut.Config - - // Build grants list using a set for deduplication - grantSet := make(map[string]bool) - var grants []string - addGrant := func(g string) { - if !grantSet[g] { - grantSet[g] = true - grants = append(grants, g) - } - } - - if credName := getCodexCredentialName(); credName != "" { - addGrant(credName) // Use the actual name the credential is stored under - } - if cfg != nil { - for _, g := range cfg.Grants { - addGrant(g) - } - } - for _, g := range codexFlags.Grants { - addGrant(g) - } - codexFlags.Grants = grants - - // Determine interactive mode - interactive := codexPromptFlag == "" - - // Build container command - // codex is installed globally via the dependency system - var containerCmd []string - - if codexPromptFlag != "" { - // Non-interactive mode: use `codex exec` with the prompt - // --full-auto allows edits during execution (safe since we're in a container) - containerCmd = []string{"codex", "exec"} - if codexFullAuto { - containerCmd = append(containerCmd, "--full-auto") - } - containerCmd = append(containerCmd, codexPromptFlag) - } else { - // Interactive mode: just run `codex` for the TUI - containerCmd = []string{"codex"} - if initialPrompt != "" { - containerCmd = append(containerCmd, initialPrompt) - } - } - - // Use name from flag, or config, or let manager generate one - if codexFlags.Name == "" && cfg != nil && cfg.Name != "" { - codexFlags.Name = cfg.Name - } - - // Ensure dependencies for Codex CLI - if cfg == nil { - cfg = &config.Config{} - } - if !cli.HasDependency(cfg.Dependencies, "node") { - cfg.Dependencies = append(cfg.Dependencies, "node@20") - } - if !cli.HasDependency(cfg.Dependencies, "git") { - cfg.Dependencies = append(cfg.Dependencies, "git") - } - if !cli.HasDependency(cfg.Dependencies, "codex-cli") { - cfg.Dependencies = append(cfg.Dependencies, "codex-cli") - } - - // Allow network access to OpenAI - cfg.Network.Allow = append(cfg.Network.Allow, NetworkHosts()...) - - // Add allowed hosts if specified - cfg.Network.Allow = append(cfg.Network.Allow, codexAllowedHosts...) - - // Always sync Codex logs - syncLogs := true - cfg.Codex.SyncLogs = &syncLogs - - // Add environment variables from flags - if envErr := cli.ParseEnvFlags(codexFlags.Env, cfg); envErr != nil { - return envErr - } - - log.Debug("starting codex cli", - "workspace", absPath, - "grants", grants, - "interactive", interactive, - "prompt", codexPromptFlag, - "rebuild", codexFlags.Rebuild, - ) - - if cli.DryRun { - fmt.Println("Dry run - would start Codex CLI") - fmt.Printf("Workspace: %s\n", absPath) - fmt.Printf("Grants: %v\n", grants) - fmt.Printf("Interactive: %v\n", interactive) - fmt.Printf("Rebuild: %v\n", codexFlags.Rebuild) - if len(grants) == 0 { - fmt.Println("Note: No API key configured. Codex will prompt for login.") - } - return nil - } - - ctx := context.Background() - - opts := cli.ExecOptions{ - Flags: codexFlags, - Workspace: absPath, - Command: containerCmd, - Config: cfg, - Interactive: interactive, - } - - cli.SetWorktreeFields(&opts, wtOut.Result) - - result, err := cli.ExecuteRun(ctx, opts) - if err != nil { - return err - } - - if result != nil { - fmt.Printf("Starting Codex CLI in %s\n", absPath) - fmt.Printf("Run: %s (%s)\n", result.Name, result.ID) - } - - return nil + return cli.RunProvider(cmd, args, cli.ProviderRunConfig{ + Name: "codex", + Flags: &codexFlags, + PromptFlag: codexPromptFlag, + AllowedHosts: codexAllowedHosts, + WtFlag: codexWtFlag, + GetCredentialGrant: getCodexCredentialName, + Dependencies: DefaultDependencies(), + NetworkHosts: NetworkHosts(), + SupportsInitialPrompt: true, + DryRunNote: "Note: No API key configured. Codex will prompt for login.", + BuildCommand: func(promptFlag, initialPrompt string) ([]string, error) { + if promptFlag != "" { + // Non-interactive: use `codex exec` with the prompt. + // --full-auto allows edits during execution (safe in a container). + containerCmd := []string{"codex", "exec"} + if codexFullAuto { + containerCmd = append(containerCmd, "--full-auto") + } + containerCmd = append(containerCmd, promptFlag) + return containerCmd, nil + } + // Interactive: run `codex` for the TUI + containerCmd := []string{"codex"} + if initialPrompt != "" { + containerCmd = append(containerCmd, initialPrompt) + } + return containerCmd, nil + }, + ConfigureAgent: func(cfg *config.Config) { + syncLogs := true + cfg.Codex.SyncLogs = &syncLogs + }, + }) } // getCodexCredentialName returns the name under which the Codex credential is stored. diff --git a/internal/providers/gemini/cli.go b/internal/providers/gemini/cli.go index 04fa1556..ab7ad404 100644 --- a/internal/providers/gemini/cli.go +++ b/internal/providers/gemini/cli.go @@ -1,14 +1,10 @@ package gemini import ( - "context" - "fmt" - "github.com/spf13/cobra" "github.com/majorcontext/moat/internal/cli" "github.com/majorcontext/moat/internal/config" - "github.com/majorcontext/moat/internal/log" ) var ( @@ -89,152 +85,31 @@ Use 'moat list' to see running and recent runs.`, } func runGemini(cmd *cobra.Command, args []string) error { - // If subcommand is being run, don't execute this - if cmd.CalledAs() != "gemini" { - return nil - } - - // Determine workspace - workspace := "." - if len(args) > 0 { - workspace = args[0] - } - - absPath, err := cli.ResolveWorkspacePath(workspace) - if err != nil { - return err - } - - // Load agent.yaml if present, otherwise use defaults - cfg, err := config.Load(absPath) - if err != nil { - return fmt.Errorf("loading config: %w", err) - } - - // Handle --wt flag - wtOut, err := cli.ResolveWorktreeWorkspace(geminiWtFlag, absPath, &geminiFlags, cfg) - if err != nil { - return err - } - absPath = wtOut.Workspace - cfg = wtOut.Config - - // Build grants list using a set for deduplication - grantSet := make(map[string]bool) - var grants []string - addGrant := func(g string) { - if !grantSet[g] { - grantSet[g] = true - grants = append(grants, g) - } - } - - // If user has an API key stored via `moat grant gemini`, use proxy injection - if HasCredential() { - addGrant("gemini") - } - if cfg != nil { - for _, g := range cfg.Grants { - addGrant(g) - } - } - for _, g := range geminiFlags.Grants { - addGrant(g) - } - geminiFlags.Grants = grants - - // Determine interactive mode - interactive := geminiPromptFlag == "" - - // Build container command - var containerCmd []string - if geminiPromptFlag != "" { - containerCmd = []string{"gemini", "-p", geminiPromptFlag} - } else { - containerCmd = []string{"gemini"} - } - - // Use name from flag, or config, or let manager generate one - if geminiFlags.Name == "" && cfg != nil && cfg.Name != "" { - geminiFlags.Name = cfg.Name - } - - // Ensure dependencies for Gemini CLI - if cfg == nil { - cfg = &config.Config{} - } - for _, dep := range DefaultDependencies() { - prefix := dep - if idx := len(dep) - 1; idx >= 0 { - // Extract prefix before @version - for i := range dep { - if dep[i] == '@' { - prefix = dep[:i] - break - } + return cli.RunProvider(cmd, args, cli.ProviderRunConfig{ + Name: "gemini", + Flags: &geminiFlags, + PromptFlag: geminiPromptFlag, + AllowedHosts: geminiAllowedHosts, + WtFlag: geminiWtFlag, + GetCredentialGrant: func() string { + if HasCredential() { + return "gemini" } - } - if !cli.HasDependency(cfg.Dependencies, prefix) { - cfg.Dependencies = append(cfg.Dependencies, dep) - } - } - - // Allow network access to Google APIs - cfg.Network.Allow = append(cfg.Network.Allow, NetworkHosts()...) - - // Add allowed hosts if specified - cfg.Network.Allow = append(cfg.Network.Allow, geminiAllowedHosts...) - - // Always sync Gemini logs for moat gemini command - syncLogs := true - cfg.Gemini.SyncLogs = &syncLogs - - // Add environment variables from flags - if envErr := cli.ParseEnvFlags(geminiFlags.Env, cfg); envErr != nil { - return envErr - } - - log.Debug("starting gemini", - "workspace", absPath, - "grants", grants, - "interactive", interactive, - "prompt", geminiPromptFlag, - "rebuild", geminiFlags.Rebuild, - ) - - if cli.DryRun { - fmt.Println("Dry run - would start Gemini") - fmt.Printf("Workspace: %s\n", absPath) - fmt.Printf("Grants: %v\n", grants) - fmt.Printf("Interactive: %v\n", interactive) - fmt.Printf("Rebuild: %v\n", geminiFlags.Rebuild) - if len(grants) == 0 { - fmt.Println("Note: No API key configured. Gemini will prompt for login.") - } - return nil - } - - ctx := context.Background() - - opts := cli.ExecOptions{ - Flags: geminiFlags, - Workspace: absPath, - Command: containerCmd, - Config: cfg, - Interactive: interactive, - } - - cli.SetWorktreeFields(&opts, wtOut.Result) - - result, err := cli.ExecuteRun(ctx, opts) - if err != nil { - return err - } - - if result != nil { - fmt.Printf("Starting Gemini in %s\n", absPath) - fmt.Printf("Run: %s (%s)\n", result.Name, result.ID) - } - - return nil + return "" + }, + Dependencies: DefaultDependencies(), + NetworkHosts: NetworkHosts(), + SupportsInitialPrompt: false, + DryRunNote: "Note: No API key configured. Gemini will prompt for login.", + BuildCommand: func(promptFlag, _ string) ([]string, error) { + if promptFlag != "" { + return []string{"gemini", "-p", promptFlag}, nil + } + return []string{"gemini"}, nil + }, + ConfigureAgent: func(cfg *config.Config) { + syncLogs := true + cfg.Gemini.SyncLogs = &syncLogs + }, + }) } From ab5ab36ee05f0ec3c3b00578db8cf06f687aaa77 Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 16:37:06 -0800 Subject: [PATCH 14/28] docs: add runtime flow analysis with completed simplifications --- docs/runtime-flow.md | 366 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 366 insertions(+) create mode 100644 docs/runtime-flow.md diff --git a/docs/runtime-flow.md b/docs/runtime-flow.md new file mode 100644 index 00000000..dfa176ac --- /dev/null +++ b/docs/runtime-flow.md @@ -0,0 +1,366 @@ +# Runtime Flow: Interactive vs Non-Interactive + +Here's the summary: + + Created docs/runtime-flow.md with 7 mermaid diagrams tracing both paths from CLI entry through container lifecycle: + + 1. Entry points → ExecuteRun() + 2. The interactive/non-interactive fork + 3. manager.Create() (shared heavyweight setup) + 4. Non-interactive: manager.Start() → background + 5. Interactive: RunInteractiveAttached() → event loop + 6. manager.StartAttached() internals + 7. monitorContainerExit() cleanup + + 7 simplification opportunities identified, ranked by impact/effort: + ┌──────────┬──────────────────────────────────────────────┬────────────────────────────────────────────────────────────────────────────┐ + │ Priority │ What │ Why │ + ├──────────┼──────────────────────────────────────────────┼────────────────────────────────────────────────────────────────────────────┤ + │ 1 │ Remove TTY field from ExecOptions │ Always equals Interactive — dead weight │ + ├──────────┼──────────────────────────────────────────────┼────────────────────────────────────────────────────────────────────────────┤ + │ 2 │ Remove streamLogs from non-interactive Start │ Users use moat logs -f now; goroutine lifecycle is unclear │ + ├──────────┼──────────────────────────────────────────────┼────────────────────────────────────────────────────────────────────────────┤ + │ 3 │ Extract postStart() helper │ Start() and StartAttached() share ~60% of code │ + ├──────────┼──────────────────────────────────────────────┼────────────────────────────────────────────────────────────────────────────┤ + │ 4 │ Simplify interactive event loop │ Only one escape action now; the channel dance is overkill │ + ├──────────┼──────────────────────────────────────────────┼────────────────────────────────────────────────────────────────────────────┤ + │ 5 │ Extract idempotent cleanupRun() │ Cleanup is duplicated across 3 paths with inconsistent guards │ + ├──────────┼──────────────────────────────────────────────┼────────────────────────────────────────────────────────────────────────────┤ + │ 6 │ ProviderRunner helper │ claude/codex/gemini CLIs are ~200 lines each of near-identical boilerplate │ + ├──────────┼──────────────────────────────────────────────┼────────────────────────────────────────────────────────────────────────────┤ + │ 7 │ Skip: TTY trace builder │ Future-proofing, not needed now │ + └──────────┴──────────────────────────────────────────────┴────────────────────────────────────────────────────────────────────────────┘ + Want me to proceed with any of these? + +Current state after the attach/detach simplification. + +## Entry Points + +All commands converge on `ExecuteRun()`: + +```mermaid +flowchart TD + A["moat run"] + B["moat claude"] + C["moat codex"] + D["moat gemini"] + E["moat wt"] + + A -->|"parse args, load config,
resolve interactive mode"| F["ExecuteRun()"] + B -->|"interactive = (no -p flag)"| F + C -->|"interactive = (no -p flag)"| F + D -->|"interactive = (no -p flag)"| F + E -->|"interactive = config.Interactive"| F +``` + +Each entry point builds an `ExecOptions` struct with `Interactive` and `TTY` set identically (both true or both false), then calls `ExecuteRun()`. + +## ExecuteRun: The Fork + +```mermaid +flowchart TD + F["ExecuteRun()"] + F --> G["Set runtime env"] + G --> H["NewManagerWithOptions()"] + H --> I["manager.Create(ctx, opts)"] + + I --> WT{"worktree
metadata?"} + WT -->|yes| WTS["Save worktree metadata"] + WT -->|no| CB + WTS --> CB + + CB{"OnRunCreated
callback?"} + CB -->|yes| CBS["Call callback"] + CB -->|no| MODE + CBS --> MODE + + MODE{"opts.Interactive?"} + MODE -->|yes| INT["RunInteractiveAttached()"] + MODE -->|no| NONINT["manager.Start()"] + + NONINT --> PORTS{"ports?"} + PORTS -->|yes| PRINTP["Print endpoints"] + PORTS -->|no| BG + PRINTP --> BG["Print background instructions
(moat logs, moat stop)"] + BG --> RET["return"] + + INT --> DONE["Blocks until process exits
or Ctrl-/ k"] +``` + +## manager.Create(): Shared Setup (Both Modes) + +This is the heavyweight function (~500 lines). Both modes go through the same path. + +```mermaid +flowchart TD + C["manager.Create()"] + C --> NAME["Resolve name
(flag > config > random)"] + NAME --> VGRANT["Validate grants
(credential store check)"] + VGRANT --> RUN["Create Run struct
(ID, state=Created)"] + + RUN --> MOUNTS["Build mounts
(workspace + config + volumes + worktree .git)"] + + MOUNTS --> PROXY{"needs proxy?
(grants or strict network)"} + PROXY -->|yes| DAEMON["EnsureRunning(daemon)"] + DAEMON --> CREDS["Load credentials per grant"] + CREDS --> RUNCTX["Build RunContext
(headers, network rules, MCP)"] + RUNCTX --> REGISTER["daemon.RegisterRun()"] + REGISTER --> PROXYENV["Set proxy env vars
(HTTP_PROXY, CA cert mount)"] + PROXY -->|no| NOPROXY["No proxy setup"] + + PROXYENV --> SSH + NOPROXY --> SSH + + SSH{"ssh grant?"} + SSH -->|yes| SSHA["Start SSH agent server
(mount socket)"] + SSH -->|no| AGENTCFG + + SSHA --> AGENTCFG + + AGENTCFG{"agent provider?
(claude/codex/gemini)"} + AGENTCFG -->|yes| PCFG["Provider.ContainerConfig()
(config files, env, mounts)"] + AGENTCFG -->|no| IMG + + PCFG --> IMG["image.Resolve()
(deps → base image)"] + IMG --> BUILD{"image exists?"} + BUILD -->|no| BUILDIMG["Build image
(Dockerfile generation + build)"] + BUILD -->|yes| SNAP + BUILDIMG --> SNAP + + SNAP{"snapshots
configured?"} + SNAP -->|yes| SNAPENG["Create snapshot engine"] + SNAP -->|no| CONT + + SNAPENG --> CONT + CONT --> LANGSERV{"language
servers?"} + LANGSERV -->|yes| LS["Configure language server
mounts and env"] + LANGSERV -->|no| SVC + LS --> SVC + + SVC{"services?
(postgres, redis, etc)"} + SVC -->|yes| SVCSTART["Create network
Start service containers
Wait for readiness"] + SVC -->|no| DEPS + SVCSTART --> DEPS + + DEPS{"build deps?
(BuildKit)"} + DEPS -->|yes| BK["Start BuildKit sidecar"] + DEPS -->|no| AUDIT + BK --> AUDIT + + AUDIT["Create audit store"] + AUDIT --> CCFG["Build container.Config"] + CCFG --> CREATE["runtime.CreateContainer()"] + CREATE --> FW{"strict
network?"} + FW -->|yes| FWSET["Record firewall settings"] + FW -->|no| SAVE + FWSET --> SAVE + SAVE["SaveMetadata()"] + SAVE --> RRET["return Run"] +``` + +## Non-Interactive Path: manager.Start() + +```mermaid +flowchart TD + S["manager.Start()"] + S --> SS["state = Starting"] + SS --> LOG["Set log context"] + LOG --> SC["runtime.StartContainer()"] + SC --> FW{"firewall
enabled?"} + FW -->|yes| FWS["runtime.SetupFirewall()"] + FW -->|no| PB + FWS --> PB + + PB{"ports?"} + PB -->|yes| PBG["GetPortBindings()
Register routes"] + PB -->|no| RUN + PBG --> RUN + + RUN["state = Running"] + RUN --> SAVE["SaveMetadata()"] + SAVE --> SNAP{"snapshot
engine?"} + SNAP -->|yes| SNPC["Create pre-run snapshot"] + SNAP -->|no| STREAM + SNPC --> STREAM + + STREAM["go streamLogs()"] + STREAM --> MON["go monitorContainerExit()"] + MON --> RET["return"] +``` + +After `Start()` returns, `ExecuteRun()` prints the background instructions and the CLI exits. The container runs independently. `monitorContainerExit()` runs in a background goroutine that captures logs, runs provider hooks, and cleans up when the container eventually exits. + +## Interactive Path: RunInteractiveAttached() + +```mermaid +flowchart TD + R["RunInteractiveAttached()"] + R --> HELP["Print escape help
(Ctrl-/ k)"] + HELP --> TRACE{"tty-trace
path?"} + TRACE -->|yes| TT["Setup TTY tracer"] + TRACE -->|no| RAW + TT --> RAW + + RAW{"stdin is
terminal?"} + RAW -->|yes| RAWM["EnableRawMode()"] + RAW -->|no| SB + RAWM --> SB + + SB{"stdout is
terminal?"} + SB -->|yes| SBS["Setup status bar
(scroll region, grants)"] + SB -->|no| WRAP + SBS --> WRAP + + WRAP["Wrap stdin: EscapeProxy"] + WRAP --> EHINTS{"status
writer?"} + EHINTS -->|yes| EH["SetupEscapeHints()"] + EHINTS -->|no| TWRAP + EH --> TWRAP + + TWRAP{"tracer?"} + TWRAP -->|yes| TW["Wrap stdin/stdout
with RecordingReader/Writer"] + TWRAP -->|no| SIG + TW --> SIG + + SIG["signal.Notify
(SIGINT, SIGTERM, SIGWINCH)"] + + SIG --> SA["go manager.StartAttached()
(blocks until container exits)"] + SA --> RESIZE["go sleep(200ms) then
ResizeTTY()"] + + RESIZE --> LOOP["Event loop"] + + LOOP --> |SIGWINCH| WINCH["Resize status bar + TTY"] + WINCH --> LOOP + + LOOP --> |SIGTERM| TERM["Stop run"] + TERM --> EXIT["return"] + + LOOP --> |SIGINT| FWDI["Forwarded to container
via TTY"] + FWDI --> LOOP + + LOOP --> |EscapeStop| STOP["Cancel attach
manager.Stop()"] + STOP --> EXIT + + LOOP --> |attachDone err| ERR{"error?"} + ERR -->|yes| ERRET["return error"] + ERR -->|no| COMP["Print completed"] + COMP --> EXIT +``` + +## manager.StartAttached() + +```mermaid +flowchart TD + SA["manager.StartAttached()"] + SA --> SS["state = Starting"] + SS --> LOG["Set log context"] + LOG --> TTY{"stdin is terminal?"} + TTY -->|yes| USTY["useTTY = true"] + TTY -->|no| USTN["useTTY = false"] + + USTY --> TEE + USTN --> TEE + + TEE{"interactive +
has store?"} + TEE -->|yes| TEEW["Tee stdout/stderr
to logBuffer"] + TEE -->|no| AOPTS + TEEW --> AOPTS + + AOPTS["Build AttachOptions
(stdin, stdout, stderr, TTY,
initial terminal size)"] + AOPTS --> GORTN["go runtime.StartAttached()"] + GORTN --> DELAY["sleep(100ms)"] + DELAY --> RUN["state = Running"] + RUN --> PB{"ports?"} + PB -->|yes| PBG["GetPortBindings()
Register routes"] + PB -->|no| SAVE + PBG --> SAVE + SAVE["SaveMetadata()"] + SAVE --> FW{"firewall?"} + FW -->|yes| FWS["SetupFirewall()"] + FW -->|no| WAIT + FWS --> WAIT + + WAIT["await attachDone"] + WAIT --> LOGS{"Apple +
interactive?"} + LOGS -->|yes| WLOGS["Write logBuffer
to logs.jsonl"] + LOGS -->|no| CAP + WLOGS --> CAP + + CAP["captureLogs()"] + CAP --> HOOKS["runProviderStoppedHooks()"] + HOOKS --> SMETA["SaveMetadata()"] + SMETA --> RET["return attachErr"] +``` + +## Cleanup: monitorContainerExit() + +Runs as a background goroutine for **non-interactive** runs (started by `manager.Start()`). For **interactive** runs, cleanup is done inline in `manager.StartAttached()`. + +```mermaid +flowchart TD + M["monitorContainerExit()"] + M --> WAIT["runtime.WaitContainer()
(blocks)"] + WAIT --> CTX{"context
canceled?"} + CTX -->|yes| BAIL["return (container still running,
next Manager instance picks it up)"] + CTX -->|no| LOGS["captureLogs()"] + + LOGS --> HOOKS["runProviderStoppedHooks()"] + HOOKS --> CLOSE["close(exitCh)"] + CLOSE --> STATE{"exit code?"} + STATE -->|0| STOPPED["state = Stopped"] + STATE -->|non-zero| FAILED["state = Failed"] + STOPPED --> SAVE + FAILED --> SAVE + + SAVE["SaveMetadata()"] + SAVE --> CLEANUP["cleanupResources()
(sync.Once guarded)"] +``` + +--- + +## Completed Simplifications + +The following simplifications were identified during analysis and have been implemented: + +### 1. Removed `TTY` field from ExecOptions ✅ +`ExecOptions.TTY` was always set identically to `Interactive`. Removed the field — TTY is now determined at runtime via `term.IsTerminal(os.Stdin)` in `manager.StartAttached()`. + +### 2. Removed `streamLogs` from non-interactive Start ✅ +`StreamLogs` was never `true` in production. Non-interactive runs tell users to use `moat logs -f`. Removed the `streamLogs` method and its `stdcopy` import. + +### 3. Extracted postStart helpers ✅ +Extracted `setLogContext()`, `setupPortBindings()`, and `setupFirewall()` from the duplicated code in `Start()` and `StartAttached()`. Reduced ~80 lines of duplication. + +### 4. Simplified interactive event loop ✅ +With only one escape action (`EscapeStop`), eliminated the `escapeCh` channel. Escape errors now flow through `attachDone` and are handled inline. The attach goroutine is a one-liner. + +### 5. Extracted idempotent `cleanupResources()` ✅ +Consolidated resource cleanup from `Stop()`, `Wait()`, `monitorContainerExit()`, and `Destroy()` into a single method guarded by `sync.Once`. Reduced ~300 lines of duplicated cleanup code. + +### 6. Extracted `ProviderRunner` helper ✅ +`moat claude`, `moat codex`, and `moat gemini` now use a shared `RunProvider()` helper. Each provider is ~30-50 lines of declarative config instead of ~170 lines of boilerplate. Provider-specific logic is isolated in `BuildCommand` and `ConfigureAgent` callbacks. + +### 7. TTY trace builder pattern — Skipped +The conditional wrapping is clear as-is. Not needed until more layers are added. + +The duplication is significant (~200 lines each with minor variations). + +**Opportunity:** Extract a `ProviderRunner` helper that takes a provider-specific config (dependencies, command builder, flag names) and handles the boilerplate. Each provider CLI becomes ~50 lines of config + flag registration. + +### 7. Non-interactive Start() still streams logs to stdout + +`manager.Start()` calls `go m.streamLogs()` which copies container logs to stdout. But `ExecuteRun()` returns immediately after Start() for non-interactive mode, and the manager is deferred-closed. The `streamLogs` goroutine's lifecycle is unclear — it may be killed when the manager closes. + +**Opportunity:** Since non-interactive runs print "use moat logs -f", consider removing the `streamLogs` call entirely. The user explicitly monitors via `moat logs`. This simplifies the non-interactive path and eliminates the goroutine lifecycle question. + +### Priority ranking + +| # | Opportunity | Impact | Effort | +|---|------------|--------|--------| +| 5 | Remove `TTY` field (always = Interactive) | Low risk, cleaner API | Small | +| 7 | Remove streamLogs from non-interactive Start | Simpler lifecycle | Small | +| 1 | Extract postStart() helper | Less duplication in manager | Medium | +| 3 | Simplify interactive event loop | Fewer channels/goroutines | Medium | +| 2 | Idempotent cleanupRun() | Correctness + less duplication | Medium | +| 6 | ProviderRunner helper | Major dedup across 3 files | Large | +| 4 | TTY trace builder pattern | Future-proofing only | Skip | From 6c315cb399943df33ac74cb19e46c23cab8a97a3 Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 18:30:46 -0800 Subject: [PATCH 15/28] fix(run): block non-interactive runs until exit, update interactive metadata Three fixes for the simplified execution model: 1. Non-interactive runs now block until the container exits instead of returning immediately. This ensures monitorContainerExit has time to capture logs and update state before manager.Close() cancels its context. Signal handling (SIGINT/SIGTERM) gracefully stops the run. 2. Interactive runs now update state to stopped/failed after the container exits normally. Previously state stayed "running" and stopped_at was never set. 3. Docker ContainerLogsAll now waits for the container to reach not-running state before reading logs. This prevents empty log capture for fast-exiting containers where Docker's log driver hasn't flushed yet. --- cmd/moat/cli/exec.go | 36 +++++++++++++++++++++++++++++++----- cmd/moat/cli/run.go | 3 +-- internal/container/docker.go | 12 ++++++++++++ internal/run/manager.go | 11 +++++++++++ 4 files changed, 55 insertions(+), 7 deletions(-) diff --git a/cmd/moat/cli/exec.go b/cmd/moat/cli/exec.go index 33949609..f2cbc98e 100644 --- a/cmd/moat/cli/exec.go +++ b/cmd/moat/cli/exec.go @@ -244,7 +244,9 @@ func ExecuteRun(ctx context.Context, opts intcli.ExecOptions) (*run.Run, error) return r, RunInteractiveAttached(ctx, manager, r, opts.Command, opts.Flags.TTYTrace) } - // Non-interactive: start in background + // Non-interactive: start and block until the container exits. + // We must wait so that monitorContainerExit has time to capture logs and + // update state before manager.Close() cancels its context. if err := manager.Start(ctx, r.ID, run.StartOptions{}); err != nil { log.Error("failed to start run", "id", r.ID, "error", err) return r, fmt.Errorf("starting run: %w", err) @@ -264,10 +266,34 @@ func ExecuteRun(ctx context.Context, opts intcli.ExecOptions) (*run.Run, error) } } - fmt.Printf("\nRun %s started in background\n", r.ID) - fmt.Printf("Use 'moat logs %s -f' to follow output\n", r.ID) - fmt.Printf("Use 'moat stop %s' to stop\n", r.ID) - return r, nil + fmt.Printf("Run %s started. Stop with Ctrl+C.\n", r.ID) + + // Wait for container exit or signal + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + defer signal.Stop(sigCh) + + waitDone := make(chan error, 1) + go func() { + waitDone <- manager.Wait(ctx, r.ID) + }() + + select { + case sig := <-sigCh: + log.Info("received signal, stopping run", "signal", sig, "id", r.ID) + fmt.Printf("\nStopping run %s...\n", r.ID) + if err := manager.Stop(ctx, r.ID); err != nil { + log.Error("failed to stop run", "id", r.ID, "error", err) + } + // Wait for monitorContainerExit to finish cleanup + <-waitDone + return r, nil + case err := <-waitDone: + if err != nil { + return r, fmt.Errorf("run failed: %w", err) + } + return r, nil + } } // RunInteractiveAttached runs in interactive mode using StartAttached to ensure diff --git a/cmd/moat/cli/run.go b/cmd/moat/cli/run.go index 0fed22de..13d49bc6 100644 --- a/cmd/moat/cli/run.go +++ b/cmd/moat/cli/run.go @@ -28,8 +28,7 @@ Arguments: [-- cmd] Optional command to run instead of agent's default Non-interactive mode (default): - Run starts in background. Monitor with 'moat logs' and 'moat trace'. - Stop with 'moat stop'. + Run blocks until the command completes. Stop with Ctrl+C. Interactive mode (-i): Ctrl-/ k Stop the run diff --git a/internal/container/docker.go b/internal/container/docker.go index 32ae3dcf..9a00d68c 100644 --- a/internal/container/docker.go +++ b/internal/container/docker.go @@ -343,6 +343,18 @@ func (r *DockerRuntime) ContainerLogs(ctx context.Context, containerID string) ( // ContainerLogsAll returns all logs from a container (does not follow). // The logs are demultiplexed from Docker's format (removes 8-byte headers). func (r *DockerRuntime) ContainerLogsAll(ctx context.Context, containerID string) ([]byte, error) { + // Wait for the container to stop so Docker's log driver has flushed. + // For already-stopped containers this returns immediately. For fast-exiting + // containers it prevents reading logs before they're written. + waitCtx, waitCancel := context.WithTimeout(ctx, 10*time.Second) + defer waitCancel() + statusCh, errCh := r.cli.ContainerWait(waitCtx, containerID, container.WaitConditionNotRunning) + select { + case <-errCh: + // Ignore errors — container may already be removed + case <-statusCh: + } + reader, err := r.cli.ContainerLogs(ctx, containerID, container.LogsOptions{ ShowStdout: true, ShowStderr: true, diff --git a/internal/run/manager.go b/internal/run/manager.go index 3564b248..b8564230 100644 --- a/internal/run/manager.go +++ b/internal/run/manager.go @@ -2366,6 +2366,17 @@ func (m *Manager) StartAttached(ctx context.Context, runID string, stdin io.Read // Wait for the attachment to complete (container exits or context canceled) attachErr := <-attachDone + // Update run state based on how the attachment ended. + // Skip if context was canceled (e.g. escape-stop or SIGTERM) — the caller's + // Stop() call handles that state transition. + if ctx.Err() == nil { + if attachErr != nil { + r.SetStateFailedAt(attachErr.Error(), time.Now()) + } else { + r.SetStateWithTime(StateStopped, time.Now()) + } + } + // For Apple containers in interactive mode, write captured output directly to logs.jsonl. // (Apple TTY output doesn't go through container runtime logs) // For Docker, captureLogs will handle it via ContainerLogsAll (works even in TTY mode). From e1011342facadfca1826d50f51849fdc8614cc74 Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 20:07:59 -0800 Subject: [PATCH 16/28] docs: update for blocking non-interactive runs, remove -d flag Non-interactive runs block until exit, not background. Remove references to the removed -d/--detach flag from worktree examples. --- docs/content/guides/12-worktrees.md | 11 ++++------- docs/content/reference/01-cli.md | 9 ++++----- docs/content/reference/02-agent-yaml.md | 2 +- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/docs/content/guides/12-worktrees.md b/docs/content/guides/12-worktrees.md index 5e9ca488..d79526e1 100644 --- a/docs/content/guides/12-worktrees.md +++ b/docs/content/guides/12-worktrees.md @@ -46,9 +46,6 @@ If the branch and worktree already exist, they are reused. # Start the command defined in agent.yaml on the dark-mode branch moat wt dark-mode -# Run in background -moat wt dark-mode -d - # Run a specific command instead of the agent.yaml default moat wt dark-mode -- make test ``` @@ -64,7 +61,7 @@ The `--worktree` flag works on `moat claude`, `moat codex`, and `moat gemini`: moat claude --worktree dark-mode # Start Codex on a feature branch with a prompt -moat codex --worktree feature/auth -p "implement OAuth login" -d +moat codex --worktree feature/auth -p "implement OAuth login" # Start Gemini on a refactor branch moat gemini --worktree cleanup @@ -90,9 +87,9 @@ moat wt dark-mode --name my-custom-name Start runs on multiple branches simultaneously: ```bash -moat wt feature/auth -d -moat wt feature/dark-mode -d -moat wt fix/login-bug -d +moat wt feature/auth & +moat wt feature/dark-mode & +moat wt fix/login-bug & ``` Each run gets its own worktree, its own container, and its own branch. Branch names with slashes (like `feature/auth`) are supported. diff --git a/docs/content/reference/01-cli.md b/docs/content/reference/01-cli.md index 93bc3896..044472e1 100644 --- a/docs/content/reference/01-cli.md +++ b/docs/content/reference/01-cli.md @@ -51,9 +51,9 @@ The agent commands (`moat claude`, `moat codex`, `moat gemini`) share the follow | `--no-sandbox` | Disable gVisor sandbox (Docker only) | | `--worktree BRANCH` | Run in a git worktree for this branch (alias: `--wt`) | -Agent commands run interactively by default, owning the terminal with stdin/stdout/stderr connected. Use `-p`/`--prompt` for non-interactive mode (runs in background, exits when done). Each agent command also accepts command-specific flags documented in their own sections. +Agent commands run interactively by default, owning the terminal with stdin/stdout/stderr connected. Use `-p`/`--prompt` for non-interactive mode (blocks until the agent finishes). Each agent command also accepts command-specific flags documented in their own sections. -All agent commands support passing an initial prompt after `--`. Unlike `-p`, which runs non-interactively in the background and exits when done, arguments after `--` start an interactive session with the prompt pre-filled: +All agent commands support passing an initial prompt after `--`. Unlike `-p`, which runs non-interactively and exits when done, arguments after `--` start an interactive session with the prompt pre-filled: ```bash moat claude -- "is this thing on?" @@ -94,11 +94,10 @@ moat run [flags] [path] [-- command] ### Execution modes -**Non-interactive (default):** The run starts in the background. Use `moat logs -f` to follow output and `moat stop ` to stop. +**Non-interactive (default):** The CLI blocks until the command completes. Press `Ctrl+C` to stop. ```bash moat run ./my-project -moat logs -f ``` **Interactive (`-i`):** The run owns the terminal with stdin/stdout/stderr connected and a TTY allocated. Press `Ctrl-/ k` to stop the run. `Ctrl+C` is forwarded to the container process. @@ -110,7 +109,7 @@ moat run -i -- bash ### Examples ```bash -# Run in current directory (non-interactive, background) +# Run in current directory moat run # Run in specific directory diff --git a/docs/content/reference/02-agent-yaml.md b/docs/content/reference/02-agent-yaml.md index 23deb3b7..549ee3e9 100644 --- a/docs/content/reference/02-agent-yaml.md +++ b/docs/content/reference/02-agent-yaml.md @@ -589,7 +589,7 @@ interactive: true When `true`, allocates a TTY and connects stdin. The session owns the terminal. Press `Ctrl-/ k` to stop the run; `Ctrl+C` is forwarded to the container process. -When `false` (default), the run starts in the background. Use `moat logs -f` to follow output and `moat stop ` to stop. +When `false` (default), the CLI blocks until the command completes. Press `Ctrl+C` to stop. Use `moat logs ` to review output after the run. Required for shells, REPLs, and interactive tools. From f01c79e05c409f4dc4a75f4d3bf6dd6709eb72ee Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 20:10:50 -0800 Subject: [PATCH 17/28] docs(worktrees): show parallel branches in separate terminals --- docs/content/guides/12-worktrees.md | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/docs/content/guides/12-worktrees.md b/docs/content/guides/12-worktrees.md index d79526e1..1c20bf26 100644 --- a/docs/content/guides/12-worktrees.md +++ b/docs/content/guides/12-worktrees.md @@ -84,12 +84,17 @@ moat wt dark-mode --name my-custom-name ## Parallel branches -Start runs on multiple branches simultaneously: +Start runs on multiple branches simultaneously, each in its own terminal: ```bash -moat wt feature/auth & -moat wt feature/dark-mode & -moat wt fix/login-bug & +# Terminal 1 +moat wt feature/auth + +# Terminal 2 +moat wt feature/dark-mode + +# Terminal 3 +moat wt fix/login-bug ``` Each run gets its own worktree, its own container, and its own branch. Branch names with slashes (like `feature/auth`) are supported. From 35e5d8e7fb944a2002151ffad170faf2d6c997a9 Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 20:32:17 -0800 Subject: [PATCH 18/28] fix(run): handle escape-stop state correctly, fix message ordering Address code review feedback: 1. Escape-stop (Ctrl-/ k) no longer sets state to "failed". The state update and log capture in StartAttached are skipped when the attach ended due to an escape or context cancellation, letting the caller's Stop() handle state and monitorContainerExit capture complete logs. 2. captureLogs no longer fires while the container is still running on escape-stop. Previously this caused a 10s stall (ContainerWait timeout) and captured incomplete logs. 3. Provider "Started..." message now prints via OnRunCreated callback before ExecuteRun blocks, not after the run completes. --- internal/cli/provider.go | 14 +++++--------- internal/run/manager.go | 28 ++++++++++++++++++---------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/internal/cli/provider.go b/internal/cli/provider.go index 1c3f9318..0fd3458c 100644 --- a/internal/cli/provider.go +++ b/internal/cli/provider.go @@ -215,15 +215,11 @@ func RunProvider(cmd *cobra.Command, args []string, rc ProviderRunConfig) error SetWorktreeFields(&opts, wtOut.Result) - result, err := ExecuteRun(ctx, opts) - if err != nil { - return err - } - - if result != nil { - fmt.Printf("Starting %s in %s\n", rc.Name, absPath) - fmt.Printf("Run: %s (%s)\n", result.Name, result.ID) + // Print run info after creation but before blocking on execution + opts.OnRunCreated = func(info RunInfo) { + fmt.Printf("Started %s in %s (run %s)\n", rc.Name, absPath, info.ID) } - return nil + _, err = ExecuteRun(ctx, opts) + return err } diff --git a/internal/run/manager.go b/internal/run/manager.go index b8564230..c8915153 100644 --- a/internal/run/manager.go +++ b/internal/run/manager.go @@ -2366,10 +2366,14 @@ func (m *Manager) StartAttached(ctx context.Context, runID string, stdin io.Read // Wait for the attachment to complete (container exits or context canceled) attachErr := <-attachDone - // Update run state based on how the attachment ended. - // Skip if context was canceled (e.g. escape-stop or SIGTERM) — the caller's - // Stop() call handles that state transition. - if ctx.Err() == nil { + // Determine whether the caller will stop the container (escape-stop or context + // cancellation). In those cases, skip state updates and log capture here — the + // caller's Stop() and monitorContainerExit handle them after the container + // actually exits. + callerWillStop := ctx.Err() != nil || term.IsEscapeError(attachErr) + + if !callerWillStop { + // Container exited on its own — update state now. if attachErr != nil { r.SetStateFailedAt(attachErr.Error(), time.Now()) } else { @@ -2381,7 +2385,7 @@ func (m *Manager) StartAttached(ctx context.Context, runID string, stdin io.Read // (Apple TTY output doesn't go through container runtime logs) // For Docker, captureLogs will handle it via ContainerLogsAll (works even in TTY mode). // Always create the file for audit completeness, even if empty. - if r.Interactive && r.Store != nil && m.runtime.Type() == container.RuntimeApple { + if !callerWillStop && r.Interactive && r.Store != nil && m.runtime.Type() == container.RuntimeApple { // Use CompareAndSwap to ensure single write if r.logsCaptured.CompareAndSwap(false, true) { if lw, err := r.Store.LogWriter(); err == nil { @@ -2396,14 +2400,18 @@ func (m *Manager) StartAttached(ctx context.Context, runID string, stdin io.Read } } - // Capture logs after container exits (critical for audit/observability) - // For non-interactive mode, this fetches from container runtime logs - // For interactive mode with tee, this is a no-op (logsCaptured flag is already set) - m.captureLogs(r) + // Capture logs after container exits (critical for audit/observability). + // Skip when caller will stop the container — it's still running and + // monitorContainerExit will capture complete logs after it actually exits. + if !callerWillStop { + m.captureLogs(r) + } // Run provider stopped hooks (e.g., Claude session ID extraction). // Must happen after the container has exited so session files are flushed. - runProviderStoppedHooks(r) + if !callerWillStop { + runProviderStoppedHooks(r) + } _ = r.SaveMetadata() return attachErr From b0e13fc65f4ae56df7b5081966f24a60652dd5fc Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 20:39:51 -0800 Subject: [PATCH 19/28] fix(run): preserve Apple TTY logs on escape-stop, fix daemonClient race Address second round of code review feedback: 1. Apple TTY log buffer is now written unconditionally in StartAttached, not gated on callerWillStop. The buffer holds all output up to the escape point, and captureLogs() returns early for Apple interactive runs so this is the only path that writes logs. 2. cleanupResources snapshots m.daemonClient under m.mu.RLock before the cleanupOnce closure, preventing a data race with Create() which writes daemonClient under m.mu.Lock. 3. Stale "detach" comment in Wait() updated. --- internal/run/manager.go | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/internal/run/manager.go b/internal/run/manager.go index c8915153..1afcae5e 100644 --- a/internal/run/manager.go +++ b/internal/run/manager.go @@ -2382,10 +2382,10 @@ func (m *Manager) StartAttached(ctx context.Context, runID string, stdin io.Read } // For Apple containers in interactive mode, write captured output directly to logs.jsonl. - // (Apple TTY output doesn't go through container runtime logs) - // For Docker, captureLogs will handle it via ContainerLogsAll (works even in TTY mode). - // Always create the file for audit completeness, even if empty. - if !callerWillStop && r.Interactive && r.Store != nil && m.runtime.Type() == container.RuntimeApple { + // (Apple TTY output doesn't go through container runtime logs — captureLogs() returns + // early for Apple interactive runs, so this is the only path that writes logs.) + // Runs unconditionally: even on escape-stop the buffer holds all output up to that point. + if r.Interactive && r.Store != nil && m.runtime.Type() == container.RuntimeApple { // Use CompareAndSwap to ensure single write if r.logsCaptured.CompareAndSwap(false, true) { if lw, err := r.Store.LogWriter(); err == nil { @@ -2486,7 +2486,7 @@ func (m *Manager) Wait(ctx context.Context, runID string) error { return err case <-ctx.Done(): - // Context canceled - caller chose to detach, don't stop the run + // Context canceled — caller is responsible for stopping the run return ctx.Err() } } @@ -2627,13 +2627,20 @@ func runProviderStoppedHooks(r *Run) { // - run provider stopped hooks (has its own idempotency guard) // - close audit store or remove storage (Destroy-only) func (m *Manager) cleanupResources(ctx context.Context, r *Run) { + // Snapshot daemonClient under lock to avoid racing with Create() which + // sets it under m.mu.Lock(). cleanupResources is called from + // monitorContainerExit goroutines that run concurrently with Create(). + m.mu.RLock() + dc := m.daemonClient + m.mu.RUnlock() + r.cleanupOnce.Do(func() { // Cancel token refresh and unregister run from proxy daemon if err := r.stopProxyServer(ctx); err != nil { log.Debug("cleanup: stopping proxy", "error", err) } - if r.ProxyAuthToken != "" && m.daemonClient != nil { - if err := m.daemonClient.UnregisterRun(ctx, r.ProxyAuthToken); err != nil { + if r.ProxyAuthToken != "" && dc != nil { + if err := dc.UnregisterRun(ctx, r.ProxyAuthToken); err != nil { log.Debug("cleanup: unregistering from proxy daemon", "error", err) } } From 050590fb050c9a774fa74453443dde8ae38bb671 Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 20:46:13 -0800 Subject: [PATCH 20/28] fix(run): use daemonClient snapshot for UnregisterRoutes, fix stale comment Complete the daemonClient race fix: the UnregisterRoutes call inside cleanupOnce.Do was still accessing m.daemonClient directly instead of the snapshot taken under m.mu.RLock. Also update monitorContainerExit comment to remove stale "detached mode" reference. --- internal/run/manager.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/internal/run/manager.go b/internal/run/manager.go index 1afcae5e..53788d19 100644 --- a/internal/run/manager.go +++ b/internal/run/manager.go @@ -2698,8 +2698,8 @@ func (m *Manager) cleanupResources(ctx context.Context, r *Run) { // Unregister routes if r.Name != "" { _ = m.routes.Remove(r.Name) - if m.daemonClient != nil { - if err := m.daemonClient.UnregisterRoutes(ctx, r.Name); err != nil { + if dc != nil { + if err := dc.UnregisterRoutes(ctx, r.Name); err != nil { log.Debug("cleanup: failed to unregister routes", "error", err) } } @@ -2724,8 +2724,9 @@ func (m *Manager) cleanupResources(ctx context.Context, r *Run) { } // monitorContainerExit watches for container exit and captures logs. -// This runs in the background for ALL runs to ensure logs are captured -// even in detached mode where Wait() is never called. +// This runs in the background for ALL runs to ensure logs are captured, +// exitCh is closed, and resources are cleaned up regardless of which path +// (interactive, non-interactive, Stop) caused the container to exit. // It's safe to call multiple times - captureLogs is idempotent. func (m *Manager) monitorContainerExit(ctx context.Context, r *Run) { // Wait for container to exit (no timeout - let it run as long as needed) From 654161461aa85c170c259620a8e25a01d57ac157 Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 22:38:15 -0800 Subject: [PATCH 21/28] fix(run): update state before closing exitCh, align setup ordering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Move state update (SetStateFailedAt/SetStateWithTime) before close(exitCh) in monitorContainerExit so Wait() reads the final state when it unblocks. Previously Wait() could race and return nil for a failed container. 2. Align StartAttached to use same firewall→portBindings ordering as Start, preventing future divergence. 3. Update stale "detached completion" comment in captureLogs. --- internal/run/manager.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/internal/run/manager.go b/internal/run/manager.go index 53788d19..4ca393e6 100644 --- a/internal/run/manager.go +++ b/internal/run/manager.go @@ -2355,14 +2355,14 @@ func (m *Manager) StartAttached(ctx context.Context, runID string, stdin io.Read r.SetStateWithTime(StateRunning, time.Now()) } - m.setupPortBindings(ctx, r) - - _ = r.SaveMetadata() - if err := m.setupFirewall(ctx, r); err != nil { return err } + m.setupPortBindings(ctx, r) + + _ = r.SaveMetadata() + // Wait for the attachment to complete (container exits or context canceled) attachErr := <-attachDone @@ -2512,7 +2512,7 @@ func (m *Manager) Get(runID string) (*Run, error) { // - Normal exit (Wait) // - Interactive exit (StartAttached) // - Explicit stop (Stop) -// - Detached completion (background monitor) +// - Background monitor (monitorContainerExit) func (m *Manager) captureLogs(r *Run) { if r.Store == nil { return @@ -2751,10 +2751,8 @@ func (m *Manager) monitorContainerExit(ctx context.Context, r *Run) { // Must happen after captureLogs and before SaveMetadata. runProviderStoppedHooks(r) - // Now signal that container has exited (and logs are captured) - close(r.exitCh) - - // Update run state (use state lock to prevent races with concurrent access) + // Update run state BEFORE signaling exitCh so that Wait() reads + // the final state (including r.Error) when it unblocks. currentState := r.GetState() if currentState == StateRunning || currentState == StateStarting { if err != nil || exitCode != 0 { @@ -2770,9 +2768,11 @@ func (m *Manager) monitorContainerExit(ctx context.Context, r *Run) { } } - // Save updated state _ = r.SaveMetadata() + // Signal that container has exited (logs captured, state updated) + close(r.exitCh) + // Clean up all resources (30-second timeout for cleanup operations) cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), 30*time.Second) defer cleanupCancel() From be1c50e9dc94a65781889bdf64cf34f2084c8ecb Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Fri, 27 Feb 2026 22:59:49 -0800 Subject: [PATCH 22/28] fix(run): protect daemonClient read in setupPortBindings Snapshot m.daemonClient under m.mu.RLock before calling RegisterRoutes, same pattern as cleanupResources. Prevents data race with Create() which writes daemonClient under lock. --- internal/run/manager.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/internal/run/manager.go b/internal/run/manager.go index 4ca393e6..949a6855 100644 --- a/internal/run/manager.go +++ b/internal/run/manager.go @@ -2207,8 +2207,12 @@ func (m *Manager) setupPortBindings(ctx context.Context, r *Run) { if err := m.routes.Add(r.Name, services); err != nil { ui.Warnf("Registering routes: %v", err) } - if m.daemonClient != nil { - if err := m.daemonClient.RegisterRoutes(ctx, r.Name, services); err != nil { + // Snapshot daemonClient under lock to avoid racing with Create() + m.mu.RLock() + dc := m.daemonClient + m.mu.RUnlock() + if dc != nil { + if err := dc.RegisterRoutes(ctx, r.Name, services); err != nil { log.Debug("failed to register routes via daemon", "error", err) } } From a599001fcb97ed166fc4c1433787b051498ca2f8 Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Mon, 2 Mar 2026 16:22:26 -0800 Subject: [PATCH 23/28] fix(cli): clean up non-interactive run output - Remove duplicate run ID: one "Started" line from OnRunCreated (provider path) or exec.go (moat run path), not both - Remove stale "Stop with Ctrl+C" hint - Add post-completion hint: 'moat logs ' to view output - Remove post-ExecuteRun print in run.go (ExecuteRun blocks now) --- cmd/moat/cli/exec.go | 6 +++++- cmd/moat/cli/run.go | 12 ++---------- internal/cli/provider.go | 2 +- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/cmd/moat/cli/exec.go b/cmd/moat/cli/exec.go index f2cbc98e..1569b4bd 100644 --- a/cmd/moat/cli/exec.go +++ b/cmd/moat/cli/exec.go @@ -266,7 +266,10 @@ func ExecuteRun(ctx context.Context, opts intcli.ExecOptions) (*run.Run, error) } } - fmt.Printf("Run %s started. Stop with Ctrl+C.\n", r.ID) + // If no callback printed a started message, print one here (moat run path) + if opts.OnRunCreated == nil { + fmt.Printf("Started %s (%s)\n", r.Name, r.ID) + } // Wait for container exit or signal sigCh := make(chan os.Signal, 1) @@ -292,6 +295,7 @@ func ExecuteRun(ctx context.Context, opts intcli.ExecOptions) (*run.Run, error) if err != nil { return r, fmt.Errorf("run failed: %w", err) } + fmt.Printf("Use 'moat logs %s' to view output\n", r.ID) return r, nil } } diff --git a/cmd/moat/cli/run.go b/cmd/moat/cli/run.go index 13d49bc6..88dbb764 100644 --- a/cmd/moat/cli/run.go +++ b/cmd/moat/cli/run.go @@ -175,14 +175,6 @@ func runAgent(cmd *cobra.Command, args []string) error { Interactive: interactive, } - r, err := ExecuteRun(ctx, opts) - if err != nil { - return err - } - - if r != nil { - fmt.Printf("Started agent %q (run %s)\n", r.Name, r.ID) - } - - return nil + _, err = ExecuteRun(ctx, opts) + return err } diff --git a/internal/cli/provider.go b/internal/cli/provider.go index 0fd3458c..85261049 100644 --- a/internal/cli/provider.go +++ b/internal/cli/provider.go @@ -217,7 +217,7 @@ func RunProvider(cmd *cobra.Command, args []string, rc ProviderRunConfig) error // Print run info after creation but before blocking on execution opts.OnRunCreated = func(info RunInfo) { - fmt.Printf("Started %s in %s (run %s)\n", rc.Name, absPath, info.ID) + fmt.Printf("Started agent %q (%s)\n", info.Name, info.ID) } _, err = ExecuteRun(ctx, opts) From 813dfb13df9c819c28184f57a3ab9b7a2451a7e6 Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Mon, 2 Mar 2026 17:32:02 -0800 Subject: [PATCH 24/28] fix(run): stream logs for non-interactive runs, fix monitor lifecycle Non-interactive runs now stream container output to the terminal in real-time instead of blocking silently. This gives docker-run-like UX where users see output as it happens. Key changes: - monitorContainerExit uses context.Background() so it survives Close() - Manager.Close() waits for tracked monitors via sync.WaitGroup before closing the runtime, ensuring logs are always captured - Only monitors started via Start() are tracked; inherited monitors from loadPersistedRuns are not, so Close() doesn't hang on old containers - Docker ContainerLogs demuxes multiplexed format for non-TTY containers - Non-interactive path streams logs via FollowLogs while blocking on Wait - Added Ctrl+C hint on start and moat logs hint on completion (dimmed) - Updated help text and docs to describe streaming behavior --- cmd/moat/cli/exec.go | 23 ++++++++-- cmd/moat/cli/run.go | 2 +- docs/content/reference/01-cli.md | 4 +- docs/content/reference/02-agent-yaml.md | 2 +- internal/container/docker.go | 23 +++++++++- internal/run/edge_cases_test.go | 8 ++-- internal/run/manager.go | 61 +++++++++++++------------ 7 files changed, 80 insertions(+), 43 deletions(-) diff --git a/cmd/moat/cli/exec.go b/cmd/moat/cli/exec.go index 1569b4bd..7f66d624 100644 --- a/cmd/moat/cli/exec.go +++ b/cmd/moat/cli/exec.go @@ -244,9 +244,7 @@ func ExecuteRun(ctx context.Context, opts intcli.ExecOptions) (*run.Run, error) return r, RunInteractiveAttached(ctx, manager, r, opts.Command, opts.Flags.TTYTrace) } - // Non-interactive: start and block until the container exits. - // We must wait so that monitorContainerExit has time to capture logs and - // update state before manager.Close() cancels its context. + // Non-interactive: start the container, stream its output, and wait for exit. if err := manager.Start(ctx, r.ID, run.StartOptions{}); err != nil { log.Error("failed to start run", "id", r.ID, "error", err) return r, fmt.Errorf("starting run: %w", err) @@ -270,6 +268,18 @@ func ExecuteRun(ctx context.Context, opts intcli.ExecOptions) (*run.Run, error) if opts.OnRunCreated == nil { fmt.Printf("Started %s (%s)\n", r.Name, r.ID) } + fmt.Println(ui.Dim("Press Ctrl+C to stop")) + fmt.Println() + + // Stream container logs to stdout in a goroutine. + // FollowLogs blocks until the container exits or context is canceled. + logCtx, logCancel := context.WithCancel(ctx) + defer logCancel() + go func() { + if err := manager.FollowLogs(logCtx, r.ID, os.Stdout); err != nil && logCtx.Err() == nil { + log.Debug("log streaming ended", "error", err) + } + }() // Wait for container exit or signal sigCh := make(chan os.Signal, 1) @@ -284,18 +294,23 @@ func ExecuteRun(ctx context.Context, opts intcli.ExecOptions) (*run.Run, error) select { case sig := <-sigCh: log.Info("received signal, stopping run", "signal", sig, "id", r.ID) + logCancel() fmt.Printf("\nStopping run %s...\n", r.ID) if err := manager.Stop(ctx, r.ID); err != nil { log.Error("failed to stop run", "id", r.ID, "error", err) } // Wait for monitorContainerExit to finish cleanup <-waitDone + fmt.Println() + fmt.Println(ui.Dim(fmt.Sprintf("View output: moat logs %s", r.ID))) return r, nil case err := <-waitDone: + logCancel() if err != nil { return r, fmt.Errorf("run failed: %w", err) } - fmt.Printf("Use 'moat logs %s' to view output\n", r.ID) + fmt.Println() + fmt.Println(ui.Dim(fmt.Sprintf("View output: moat logs %s", r.ID))) return r, nil } } diff --git a/cmd/moat/cli/run.go b/cmd/moat/cli/run.go index 88dbb764..5f367c45 100644 --- a/cmd/moat/cli/run.go +++ b/cmd/moat/cli/run.go @@ -28,7 +28,7 @@ Arguments: [-- cmd] Optional command to run instead of agent's default Non-interactive mode (default): - Run blocks until the command completes. Stop with Ctrl+C. + Output streams to the terminal. Press Ctrl+C to stop. Interactive mode (-i): Ctrl-/ k Stop the run diff --git a/docs/content/reference/01-cli.md b/docs/content/reference/01-cli.md index 044472e1..82204da9 100644 --- a/docs/content/reference/01-cli.md +++ b/docs/content/reference/01-cli.md @@ -51,7 +51,7 @@ The agent commands (`moat claude`, `moat codex`, `moat gemini`) share the follow | `--no-sandbox` | Disable gVisor sandbox (Docker only) | | `--worktree BRANCH` | Run in a git worktree for this branch (alias: `--wt`) | -Agent commands run interactively by default, owning the terminal with stdin/stdout/stderr connected. Use `-p`/`--prompt` for non-interactive mode (blocks until the agent finishes). Each agent command also accepts command-specific flags documented in their own sections. +Agent commands run interactively by default, owning the terminal with stdin/stdout/stderr connected. Use `-p`/`--prompt` for non-interactive mode (output streams to the terminal). Each agent command also accepts command-specific flags documented in their own sections. All agent commands support passing an initial prompt after `--`. Unlike `-p`, which runs non-interactively and exits when done, arguments after `--` start an interactive session with the prompt pre-filled: @@ -94,7 +94,7 @@ moat run [flags] [path] [-- command] ### Execution modes -**Non-interactive (default):** The CLI blocks until the command completes. Press `Ctrl+C` to stop. +**Non-interactive (default):** Output streams to the terminal. Press `Ctrl+C` to stop. ```bash moat run ./my-project diff --git a/docs/content/reference/02-agent-yaml.md b/docs/content/reference/02-agent-yaml.md index 549ee3e9..e7eb2247 100644 --- a/docs/content/reference/02-agent-yaml.md +++ b/docs/content/reference/02-agent-yaml.md @@ -589,7 +589,7 @@ interactive: true When `true`, allocates a TTY and connects stdin. The session owns the terminal. Press `Ctrl-/ k` to stop the run; `Ctrl+C` is forwarded to the container process. -When `false` (default), the CLI blocks until the command completes. Press `Ctrl+C` to stop. Use `moat logs ` to review output after the run. +When `false` (default), output streams to the terminal. Press `Ctrl+C` to stop. Use `moat logs ` to review output after the run. Required for shells, REPLs, and interactive tools. diff --git a/internal/container/docker.go b/internal/container/docker.go index 9a00d68c..604a2ab6 100644 --- a/internal/container/docker.go +++ b/internal/container/docker.go @@ -332,12 +332,33 @@ func (r *DockerRuntime) RemoveContainer(ctx context.Context, containerID string) } // ContainerLogs returns the logs from a container (follows output). +// For non-TTY containers, the Docker multiplexed format is demuxed +// so the returned reader produces clean text. func (r *DockerRuntime) ContainerLogs(ctx context.Context, containerID string) (io.ReadCloser, error) { - return r.cli.ContainerLogs(ctx, containerID, container.LogsOptions{ + raw, err := r.cli.ContainerLogs(ctx, containerID, container.LogsOptions{ ShowStdout: true, ShowStderr: true, Follow: true, }) + if err != nil { + return nil, err + } + + // Check if container was created with TTY + inspect, inspectErr := r.cli.ContainerInspect(ctx, containerID) + if inspectErr != nil || inspect.Config.Tty { + // TTY mode or inspect failed: logs are plain text + return raw, nil + } + + // Non-TTY: demux Docker's multiplexed stdout/stderr format + pr, pw := io.Pipe() + go func() { + _, copyErr := stdcopy.StdCopy(pw, pw, raw) + raw.Close() + pw.CloseWithError(copyErr) + }() + return pr, nil } // ContainerLogsAll returns all logs from a container (does not follow). diff --git a/internal/run/edge_cases_test.go b/internal/run/edge_cases_test.go index a7edd787..b7fc6745 100644 --- a/internal/run/edge_cases_test.go +++ b/internal/run/edge_cases_test.go @@ -917,7 +917,7 @@ func TestMonitorContainerExitSetsStateFailed(t *testing.T) { // Run monitor in goroutine and wait for it to complete done := make(chan struct{}) go func() { - m.monitorContainerExit(context.Background(), r) + m.monitorContainerExit(r) close(done) }() @@ -976,7 +976,7 @@ func TestMonitorContainerExitSetsStateStopped(t *testing.T) { done := make(chan struct{}) go func() { - m.monitorContainerExit(context.Background(), r) + m.monitorContainerExit(r) close(done) }() @@ -1056,7 +1056,7 @@ func TestMonitorContainerExitWaitError(t *testing.T) { done := make(chan struct{}) go func() { - m.monitorContainerExit(context.Background(), r) + m.monitorContainerExit(r) close(done) }() @@ -1223,7 +1223,7 @@ func TestMonitorContainerExitAlreadyStopped(t *testing.T) { done := make(chan struct{}) go func() { - m.monitorContainerExit(context.Background(), r) + m.monitorContainerExit(r) close(done) }() diff --git a/internal/run/manager.go b/internal/run/manager.go index 949a6855..56152135 100644 --- a/internal/run/manager.go +++ b/internal/run/manager.go @@ -4,7 +4,6 @@ import ( "bytes" "context" "crypto/sha256" - "errors" "fmt" "io" "net" @@ -82,10 +81,14 @@ type Manager struct { daemonClient *daemon.Client mu sync.RWMutex - // ctx/cancel control background goroutines (e.g. monitorContainerExit). - // Canceled in Close() so lingering WaitContainer calls don't leak. + // ctx/cancel for general manager lifecycle. ctx context.Context cancel context.CancelFunc + + // monitorWg tracks active monitorContainerExit goroutines. + // Close() waits on this before closing the runtime so that + // monitors can finish capturing logs and updating state. + monitorWg sync.WaitGroup } // ManagerOptions configures the run manager. @@ -330,13 +333,11 @@ func (m *Manager) registerPersistedRun(runState State, meta storage.Metadata, st m.mu.Unlock() // For running containers, start background monitor to capture logs when they exit. - // Use m.ctx so these goroutines are canceled when the Manager closes. + // These inherited monitors are NOT tracked by monitorWg — they may block + // indefinitely on long-running containers from previous CLI invocations. + // Only monitors started via Start() are tracked so Close() doesn't hang. if runState == StateRunning { - monitorCtx := m.ctx - if monitorCtx == nil { - monitorCtx = context.Background() - } - go m.monitorContainerExit(monitorCtx, r) + go m.monitorContainerExit(r) } log.Debug("loaded persisted run", "id", runID, "name", meta.Name, "state", runState) @@ -2273,12 +2274,12 @@ func (m *Manager) Start(ctx context.Context, runID string, opts StartOptions) er } // Start background monitor to capture logs when container exits. - // Use m.ctx so the goroutine is canceled when the Manager closes. - monitorCtx := m.ctx - if monitorCtx == nil { - monitorCtx = context.Background() - } - go m.monitorContainerExit(monitorCtx, r) + // Tracked by monitorWg so Close() waits for completion. + m.monitorWg.Add(1) + go func() { + defer m.monitorWg.Done() + m.monitorContainerExit(r) + }() return nil } @@ -2732,19 +2733,12 @@ func (m *Manager) cleanupResources(ctx context.Context, r *Run) { // exitCh is closed, and resources are cleaned up regardless of which path // (interactive, non-interactive, Stop) caused the container to exit. // It's safe to call multiple times - captureLogs is idempotent. -func (m *Manager) monitorContainerExit(ctx context.Context, r *Run) { - // Wait for container to exit (no timeout - let it run as long as needed) - // This is the ONLY place that calls WaitContainer to avoid race conditions - exitCode, err := m.runtime.WaitContainer(ctx, r.ContainerID) - - // If WaitContainer failed due to context cancellation (e.g. Manager.Close()), - // bail out without touching run state. The container is likely still running - // and a future Manager instance will pick it up via loadPersistedRuns. - // We check err (not ctx.Err()) so that a natural container exit that races - // with context cancellation still gets its logs captured and state updated. - if err != nil && (errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded)) { - return - } +func (m *Manager) monitorContainerExit(r *Run) { + // Wait for container to exit (no timeout - let it run as long as needed). + // This is the ONLY place that calls WaitContainer to avoid race conditions. + // Uses context.Background() so this goroutine is not canceled by Close(). + // Close() waits on monitorWg before closing the runtime. + exitCode, err := m.runtime.WaitContainer(context.Background(), r.ContainerID) // CRITICAL: Capture logs IMMEDIATELY after container exits, BEFORE signaling. // Docker may start removing/cleaning the container at any moment after exit. @@ -2925,13 +2919,20 @@ func (m *Manager) RuntimeType() string { } // Close releases manager resources. +// It waits for active monitorContainerExit goroutines to finish so that +// logs are captured and state is updated before the runtime is closed. func (m *Manager) Close() error { - // Cancel background goroutines (monitorContainerExit) so lingering - // WaitContainer calls don't keep connections to Docker open. + // Cancel the manager context. if m.cancel != nil { m.cancel() } + // Wait for all monitor goroutines to finish. This ensures log capture + // and state updates complete before we close the runtime connection. + // monitorContainerExit uses context.Background() so it is not + // affected by the cancel above. + m.monitorWg.Wait() + // Stop all proxy/SSH servers and unregister runs from daemon, // with a 10-second overall timeout. closeCtx, closeCancel := context.WithTimeout(context.Background(), 10*time.Second) From 838cc7d83186c5e15eb548a0e0a5653296efddfa Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Mon, 2 Mar 2026 18:01:46 -0800 Subject: [PATCH 25/28] fix(run): cleanup interactive resources, fix Docker log inspect error Address code review feedback: - cleanupResources now called on normal interactive exit (natural container exit path). Without this, Docker containers, networks, sidecars, and temp dirs leaked until moat destroy. - Docker ContainerLogs returns error on inspect failure instead of silently producing garbled multiplexed output. - Run ID printed before both interactive and non-interactive sessions so moat run -i shows the run ID before the session starts. --- cmd/moat/cli/exec.go | 9 ++++----- internal/container/docker.go | 8 ++++++-- internal/run/manager.go | 9 +++++++++ 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/cmd/moat/cli/exec.go b/cmd/moat/cli/exec.go index 7f66d624..40d886e6 100644 --- a/cmd/moat/cli/exec.go +++ b/cmd/moat/cli/exec.go @@ -229,12 +229,15 @@ func ExecuteRun(ctx context.Context, opts intcli.ExecOptions) (*run.Run, error) } } - // Call the OnRunCreated callback if provided + // Call the OnRunCreated callback if provided (provider commands set this). + // For moat run, print the run info here so it appears before the session starts. if opts.OnRunCreated != nil { opts.OnRunCreated(intcli.RunInfo{ ID: r.ID, Name: r.Name, }) + } else { + fmt.Printf("Started %s (%s)\n", r.Name, r.ID) } // Interactive mode: use StartAttached to ensure TTY is connected before process starts. @@ -264,10 +267,6 @@ func ExecuteRun(ctx context.Context, opts intcli.ExecOptions) (*run.Run, error) } } - // If no callback printed a started message, print one here (moat run path) - if opts.OnRunCreated == nil { - fmt.Printf("Started %s (%s)\n", r.Name, r.ID) - } fmt.Println(ui.Dim("Press Ctrl+C to stop")) fmt.Println() diff --git a/internal/container/docker.go b/internal/container/docker.go index 604a2ab6..155e1f18 100644 --- a/internal/container/docker.go +++ b/internal/container/docker.go @@ -346,8 +346,12 @@ func (r *DockerRuntime) ContainerLogs(ctx context.Context, containerID string) ( // Check if container was created with TTY inspect, inspectErr := r.cli.ContainerInspect(ctx, containerID) - if inspectErr != nil || inspect.Config.Tty { - // TTY mode or inspect failed: logs are plain text + if inspectErr != nil { + raw.Close() + return nil, fmt.Errorf("inspecting container for log format: %w", inspectErr) + } + if inspect.Config.Tty { + // TTY mode: logs are plain text return raw, nil } diff --git a/internal/run/manager.go b/internal/run/manager.go index 56152135..072f26f1 100644 --- a/internal/run/manager.go +++ b/internal/run/manager.go @@ -2419,6 +2419,15 @@ func (m *Manager) StartAttached(ctx context.Context, runID string, stdin io.Read } _ = r.SaveMetadata() + // Clean up resources (network, sidecars, temp dirs) on natural exit. + // monitorContainerExit is not running for interactive runs, so this is + // the only cleanup path. Idempotent via cleanupOnce. + if !callerWillStop { + cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cleanupCancel() + m.cleanupResources(cleanupCtx, r) + } + return attachErr } From 519eaf47ae3449e1625242d2551ea7bfb4c4db54 Mon Sep 17 00:00:00 2001 From: Daniel Pupius Date: Mon, 2 Mar 2026 18:19:36 -0800 Subject: [PATCH 26/28] fix(container): use polling for Apple container wait to avoid hang Apple's "container wait" CLI command hangs indefinitely when a container is stopped or removed externally (e.g., via "moat stop" from another process). This blocked monitorContainerExit forever, causing the original "moat run" process to hang. Switch to always using the polling-based wait, which checks container status via inspect every 500ms. If inspect fails (container removed), treat it as a clean exit instead of returning an error. --- internal/container/apple.go | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/internal/container/apple.go b/internal/container/apple.go index e4ab3c99..6fe1f977 100644 --- a/internal/container/apple.go +++ b/internal/container/apple.go @@ -353,26 +353,11 @@ func (r *AppleRuntime) StopContainer(ctx context.Context, containerID string) er // WaitContainer blocks until the container exits and returns the exit code. func (r *AppleRuntime) WaitContainer(ctx context.Context, containerID string) (int64, error) { - // Apple's container CLI may have a "wait" command, or we poll with inspect - // Try "container wait" first - cmd := exec.CommandContext(ctx, r.containerBin, "wait", containerID) - var stdout, stderr bytes.Buffer - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - if err := cmd.Run(); err != nil { - // If wait is not available, fall back to polling - return r.waitByPolling(ctx, containerID) - } - - // Parse exit code from output - exitStr := strings.TrimSpace(stdout.String()) - exitCode, err := strconv.ParseInt(exitStr, 10, 64) - if err != nil { - return -1, fmt.Errorf("parsing exit code %q: %w", exitStr, err) - } - - return exitCode, nil + // Always poll with inspect rather than using "container wait". + // Apple's "container wait" hangs indefinitely when a container is + // stopped or removed externally (e.g., via "moat stop" from another + // process), which blocks monitorContainerExit forever. + return r.waitByPolling(ctx, containerID) } // waitByPolling polls the container status until it exits. @@ -391,7 +376,8 @@ func (r *AppleRuntime) waitByPolling(ctx context.Context, containerID string) (i cmd.Stdout = &stdout if err := cmd.Run(); err != nil { - return -1, fmt.Errorf("inspecting container: %w", err) + // Container no longer exists (removed externally) — treat as exited + return 0, nil } // Apple's inspect returns an array of container info objects From bd0a9cc78292a1a772b1587c8f65d121c7da3a09 Mon Sep 17 00:00:00 2001 From: Dan Pupius Date: Mon, 2 Mar 2026 18:19:20 -0800 Subject: [PATCH 27/28] docs: add interactive-only simplification plan Implementation plan for the fix/interactive-only branch. Updated to reflect the final design: non-interactive runs block with log streaming instead of fire-and-forget, plus additional fixes for monitor lifecycle, Docker log demuxing, and interactive resource cleanup. --- ...6-02-27-interactive-only-simplification.md | 451 ++++++++++++++++++ 1 file changed, 451 insertions(+) create mode 100644 docs/plans/2026-02-27-interactive-only-simplification.md diff --git a/docs/plans/2026-02-27-interactive-only-simplification.md b/docs/plans/2026-02-27-interactive-only-simplification.md new file mode 100644 index 00000000..17cb43ab --- /dev/null +++ b/docs/plans/2026-02-27-interactive-only-simplification.md @@ -0,0 +1,451 @@ +# Interactive-Only Simplification + +> **Status:** Implemented. Merged via PR #196 on the `fix/interactive-only` branch. + +**Goal:** Simplify moat's execution model to two clean modes: interactive (always attached, no detach) and non-interactive (blocking with log streaming to the terminal). + +**Architecture:** Remove the `moat attach` command, the `--detach` flag, escape proxy detach sequences, and all "output-only attached" code paths. Interactive runs own the terminal for their lifetime. Non-interactive runs block while streaming container output, with Ctrl+C for graceful shutdown. + +**Tech Stack:** Go, Cobra CLI, existing container runtime interface + +--- + +## Context + +The current model has accumulated complexity around attach/detach: + +- 4 execution functions: `ExecuteRun`, `RunAttached`, `RunInteractive`, `RunInteractiveAttached` +- Separate `moat attach` command with its own interactive/non-interactive modes +- Escape proxy with Ctrl-/ d (detach) and Ctrl-/ k (stop) +- Double Ctrl+C detection for "detach vs stop" in non-interactive attached mode +- `--detach` flag that makes non-interactive runs print advice about `moat attach` + +The new model: + +| Mode | Behavior | Terminal | Exit | +|------|----------|----------|------| +| **Interactive** (`-i`) | Stdin/stdout/stderr connected, TTY allocated | Owns terminal | Ctrl-/ k to stop. Ctrl+C goes to container. Process exit ends session. | +| **Non-interactive** (default) | Output streamed to terminal. Blocks until exit. | Stdout only | Ctrl+C to stop. Process exit ends session. | + +## What Gets Removed + +- `cmd/moat/cli/attach.go` — entire file deleted +- `RunAttached()` in `exec.go` — non-interactive attached mode +- `RunInteractive()` in `exec.go` — old interactive path (superseded by `RunInteractiveAttached`) +- `--detach` / `-d` flag from `ExecFlags` +- `EscapeDetach` action from escape proxy (keep `EscapeStop`) +- Double Ctrl+C detection logic +- All "Use 'moat attach ...'" messages +- `attachOutputMode()` and `attachInteractiveMode()` from attach.go + +## What Changes + +- `ExecuteRun()` simplified: interactive → `RunInteractiveAttached()`, non-interactive → start + stream logs + wait for exit +- Escape proxy simplified: only Ctrl-/ k (stop) remains. Ctrl-/ d removed. +- `EscapeHelpText()` updated to only show stop sequence +- `moat wt` defaults to non-interactive (remove the `interactive := !wtFlags.Detach` hack) +- Provider CLIs (claude, codex, gemini): remove references to `moat attach` in messages +- Help text for `moat run` updated to remove detach/attach documentation + +## What Stays + +- `RunInteractiveAttached()` — this is the only attached execution path +- `StartAttached()` in container runtime — still needed for interactive +- `Attach()` in container runtime — can be removed from the interface (only used by attach command) +- `FollowLogs()` / `RecentLogs()` in manager — still used by `moat logs` +- Status bar / TUI — still used for interactive sessions +- TTY tracing — still used for interactive sessions +- `EscapeStop` action — Ctrl-/ k still stops the run + +--- + +### Task 1: Remove `moat attach` command + +**Files:** +- Delete: `cmd/moat/cli/attach.go` + +**Step 1: Delete the attach command file** + +Delete `cmd/moat/cli/attach.go` entirely. This removes: +- `attachCmd` cobra command and its `init()` registration +- `attachToRun()`, `attachOutputMode()`, `attachInteractiveMode()` +- `attachInteractive` and `attachTTYTrace` variables +- Timing constants (`doublePressWindow`, `containerExitCheckDelay`, `ttyStartupDelay`) + +**Step 2: Verify it compiles** + +Run: `go build ./...` +Expected: BUILD SUCCESS (no other files import from attach.go — the constants and functions are file-local) + +Note: `ttyStartupDelay` is also used in `exec.go`. If the build fails because of this, move that constant to `exec.go` before deleting. + +**Step 3: Commit** + +```bash +git add -A +git commit -m "refactor(cli): remove moat attach command + +Simplifying to two modes: interactive (attached) and non-interactive +(monitor via moat logs). The attach command allowed reconnecting to +running containers, which added complexity without sufficient value." +``` + +--- + +### Task 2: Remove `--detach` flag and simplify `ExecFlags` + +**Files:** +- Modify: `internal/cli/types.go` +- Modify: `cmd/moat/cli/run.go` +- Modify: `cmd/moat/cli/exec.go` +- Modify: `cmd/moat/cli/wt.go` +- Modify: `internal/providers/claude/cli.go` +- Modify: `internal/providers/codex/cli.go` +- Modify: `internal/providers/gemini/cli.go` + +**Step 1: Remove `Detach` from `ExecFlags`** + +In `internal/cli/types.go`, remove the `Detach bool` field from `ExecFlags` struct and the corresponding `cmd.Flags().BoolVarP(&flags.Detach, "detach", "d", ...)` line from `AddExecFlags`. + +**Step 2: Remove `TTYTrace` from `ExecFlags`** + +The `TTYTrace` field on `ExecFlags` was only used by the attach command's `--tty-trace` flag. Remove it. (Note: `RunInteractiveAttached` takes `tracePath` as a parameter, and the `moat run` command doesn't expose this flag — it was attach-only.) + +Actually, check if `moat run` passes `opts.Flags.TTYTrace` anywhere. If it does, keep the field. If not, remove it. + +**Step 3: Update `ExecuteRun` in `exec.go`** + +Remove the `opts.Flags.Detach` check and the detached-mode message block. The function should now: + +1. If interactive: call `RunInteractiveAttached()` (same as now) +2. If non-interactive: `manager.Start()` → stream logs via `FollowLogs` → block on `manager.Wait()` with signal handling + +Replace the current detach block and `RunAttached` call with a blocking loop that streams container output to stdout while waiting for exit or Ctrl+C. See `cmd/moat/cli/exec.go` for the final implementation. + +**Step 4: Remove `RunAttached` and `RunInteractive` from `exec.go`** + +Delete the `RunAttached()` function entirely (non-interactive attached mode). +Delete the `RunInteractive()` function entirely (old interactive path, superseded by `RunInteractiveAttached`). + +**Step 5: Update `moat run` help text** + +In `cmd/moat/cli/run.go`, update the `Long` description to remove references to detach, attach. The new help should describe: + +``` +Non-interactive mode (default): + Output streams to the terminal. Press Ctrl+C to stop. + +Interactive mode (-i): + Ctrl-/ k Stop the run + Ctrl+C Sent to container process +``` + +Remove `-d` from examples. + +**Step 6: Fix `moat wt` interactive logic** + +In `cmd/moat/cli/wt.go`, the current code does `interactive := !wtFlags.Detach` which made worktree runs interactive by default (unless detached). With the removal of detach, change this to respect the config: + +```go +// Determine interactive mode: CLI flags > config > default (non-interactive) +interactive := false +if cfg != nil && cfg.Interactive { + interactive = true +} +``` + +Note: `moat wt` doesn't have its own `-i` flag. If the agent.yaml says `interactive: true`, it'll be interactive. Otherwise non-interactive. This is the right default for worktree runs. + +**Step 7: Update provider CLIs** + +In `internal/providers/claude/cli.go`, `internal/providers/codex/cli.go`, and `internal/providers/gemini/cli.go`: +- Remove references to `Detach` flag (e.g., `!claudeFlags.Detach` checks) +- Remove any "Use moat attach" messages +- The providers already determine interactivity correctly (prompt flag = non-interactive, no prompt = interactive) + +**Step 8: Verify build** + +Run: `go build ./...` + +**Step 9: Run tests** + +Run: `make test-unit` + +**Step 10: Commit** + +```bash +git add -A +git commit -m "refactor(cli): remove --detach flag and simplify execution modes + +Non-interactive runs now always start in background. Interactive runs +always own the terminal. Removes RunAttached and RunInteractive in +favor of the single RunInteractiveAttached path." +``` + +--- + +### Task 3: Simplify escape proxy (remove detach action) + +**Files:** +- Modify: `internal/term/escape.go` +- Modify: `internal/term/escape_test.go` + +**Step 1: Remove `EscapeDetach` from escape proxy** + +In `internal/term/escape.go`: + +1. Remove `EscapeDetach` from the `EscapeAction` constants (keep `EscapeNone` and `EscapeStop`) +2. Remove the `case EscapeDetach:` from `EscapeError.Error()` +3. Remove `escapeKeyDetach` constant (`byte = 'd'`) +4. In `EscapeProxy.Read()`, remove the `case escapeKeyDetach:` handling — when 'd' follows Ctrl-/, treat it as an unrecognized key (pass both bytes through) + +**Step 2: Update `EscapeHelpText()`** + +Change to: `"Escape: Ctrl-/ k (stop)"` + +**Step 3: Update tests** + +In `internal/term/escape_test.go`, remove or update tests for the detach escape sequence. Tests for: +- Ctrl-/ d should now pass both bytes through (not trigger escape) +- Ctrl-/ k should still trigger `EscapeStop` +- Ctrl-/ Ctrl-/ should still pass single Ctrl-/ through + +**Step 4: Run tests** + +Run: `go test ./internal/term/ -v` + +**Step 5: Commit** + +```bash +git add -A +git commit -m "refactor(term): remove detach escape sequence + +Only Ctrl-/ k (stop) remains. Ctrl-/ d is no longer an escape +sequence and passes through to the container." +``` + +--- + +### Task 4: Clean up `RunInteractiveAttached` (remove detach handling) + +**Files:** +- Modify: `cmd/moat/cli/exec.go` + +**Step 1: Remove detach handling from `RunInteractiveAttached`** + +In `RunInteractiveAttached()`: + +1. Remove the `case term.EscapeDetach:` from the escape action switch. Only `EscapeStop` remains. +2. In the `case err := <-attachDone:` handler, remove the "detached" message path. If attachment ends unexpectedly (container still running), treat it as an error or a clean exit, not a detach. + +The simplified `attachDone` handler: + +```go +case err := <-attachDone: + if err != nil && ctx.Err() == nil && !term.IsEscapeError(err) { + log.Error("run failed", "id", r.ID, "error", err) + return fmt.Errorf("run failed: %w", err) + } + fmt.Printf("Run %s completed\n", r.ID) + return nil +``` + +**Step 2: Update escape help text in the function** + +The `fmt.Printf("%s\n\n", term.EscapeHelpText())` call will now print the updated text. + +**Step 3: Remove detach-related messages** + +Remove all `"Use 'moat attach %s' to reattach"` messages from exec.go. + +**Step 4: Verify build and run tests** + +Run: `go build ./... && make test-unit` + +**Step 5: Commit** + +```bash +git add -A +git commit -m "refactor(cli): remove detach handling from interactive mode + +Interactive sessions now run until the process exits or user sends +Ctrl-/ k to stop. No detach path exists." +``` + +--- + +### Task 5: Remove `Attach()` from container runtime interface + +**Files:** +- Modify: `internal/container/runtime.go` +- Modify: `internal/container/docker.go` +- Modify: `internal/container/apple.go` +- Modify: `internal/run/manager.go` + +**Step 1: Check if `Attach()` is still used** + +Search for calls to `runtime.Attach(` and `manager.Attach(`. After removing `attach.go` and `RunInteractive()`, the only remaining call should be in `manager.Attach()`. If nothing calls `manager.Attach()` anymore, we can remove it from both the manager and the runtime interface. + +**Step 2: Remove `Attach` from runtime interface** + +In `internal/container/runtime.go`, remove: +```go +Attach(ctx context.Context, id string, opts AttachOptions) error +``` + +**Step 3: Remove `Attach` implementations** + +- In `internal/container/docker.go`, remove the `Attach()` method +- In `internal/container/apple.go`, remove the `Attach()` method + +**Step 4: Remove `Manager.Attach()` from run manager** + +In `internal/run/manager.go`, remove the `Attach()` method. + +**Step 5: Verify build** + +Run: `go build ./...` + +**Step 6: Run tests** + +Run: `make test-unit` + +**Step 7: Commit** + +```bash +git add -A +git commit -m "refactor(container): remove Attach from runtime interface + +Only StartAttached remains for interactive sessions. The separate +Attach method was only needed for the removed moat attach command." +``` + +--- + +### Task 6: Clean up references and messages + +**Files:** +- Modify: `internal/providers/claude/cli.go` +- Modify: `internal/providers/claude/cli_test.go` +- Modify: `cmd/moat/cli/wt.go` +- Modify: `internal/cli/worktree.go` + +**Step 1: Update error messages that reference `moat attach`** + +Search for all remaining "moat attach" references: + +1. `internal/providers/claude/cli.go:424` — Change from "Use 'moat attach %s' to reconnect" to "Use 'moat logs %s' to view output or 'moat stop %s' to stop" +2. `internal/providers/claude/cli_test.go:356-357` — Update test to check for new message +3. `cmd/moat/cli/wt.go:146` — Change from "Attach with 'moat attach'" to "View logs with 'moat logs %s' or stop with 'moat stop %s'" +4. `internal/cli/worktree.go:67` — Same pattern as above + +**Step 2: Update help text** + +Review `moat run --help` and `moat wt --help` to ensure no attach/detach references remain. + +**Step 3: Verify build and tests** + +Run: `go build ./... && make test-unit` + +**Step 4: Commit** + +```bash +git add -A +git commit -m "fix(cli): update messages to remove moat attach references + +Point users to moat logs and moat stop instead of the removed +moat attach command." +``` + +--- + +### Task 7: Update documentation + +**Files:** +- Modify: `docs/content/reference/01-cli.md` (if it exists) +- Modify: `docs/content/reference/02-agent-yaml.md` (if `interactive` field docs need updating) +- Modify: `docs/plans/2026-01-19-interactive-attach-model.md` (mark as superseded) + +**Step 1: Update CLI reference docs** + +Remove documentation for: +- `moat attach` command +- `--detach` / `-d` flag +- Ctrl-/ d escape sequence + +Update documentation for: +- `moat run` — describe the two modes (interactive and non-interactive) +- Escape sequences — only Ctrl-/ k + +**Step 2: Update agent.yaml reference** + +If the `interactive` field is documented, update to clarify its new meaning: +- `interactive: true` — allocates TTY, connects stdin, session owns terminal +- `interactive: false` (default) — runs in background, monitor via `moat logs` + +**Step 3: Mark old plan as superseded** + +Add a note at the top of `docs/plans/2026-01-19-interactive-attach-model.md`: + +```markdown +> **Status:** Superseded by [Interactive-Only Simplification](2026-02-27-interactive-only-simplification.md) +``` + +**Step 4: Commit** + +```bash +git add -A +git commit -m "docs: update for interactive-only execution model + +Remove documentation for moat attach command, --detach flag, and +Ctrl-/ d escape sequence. Document the simplified two-mode model." +``` + +--- + +### Task 8: Run full test suite and lint + +**Step 1: Run linter** + +Run: `make lint` + +Fix any issues. + +**Step 2: Run full unit tests** + +Run: `make test-unit` + +Fix any failures. + +**Step 3: Commit any fixes** + +```bash +git add -A +git commit -m "fix: address lint and test issues from simplification" +``` + +--- + +## Implementation Notes + +The following additional changes were made during implementation beyond the original plan: + +### Non-interactive mode: blocking with log streaming (not fire-and-forget) + +The original plan called for fire-and-forget non-interactive runs. During implementation, this was found to be problematic because `manager.Close()` cancels the monitor goroutine's context, killing log capture before it completes. The solution: non-interactive runs block while streaming container output to stdout (similar to `docker run`). This ensures the monitor goroutine finishes before the process exits. True fire-and-forget requires daemon-level monitoring (tracked in issue #197). + +### Monitor goroutine lifecycle fix + +`monitorContainerExit` was changed to use `context.Background()` instead of the manager's context, so `Close()` doesn't kill it. A `sync.WaitGroup` in `Close()` ensures the runtime stays alive while monitors finish. Only monitors started via `Start()` are tracked — inherited monitors from `loadPersistedRuns` are fire-and-forget since they belong to other CLI invocations. + +### Docker log demuxing + +`ContainerLogs` for non-TTY containers returns Docker's multiplexed stdout/stderr format. Added demuxing via `stdcopy.StdCopy` with `io.Pipe` to produce clean output. Also fixed inspect failure to return an error instead of silently producing garbled output. + +### Interactive resource cleanup + +`cleanupResources` was not called on natural interactive exit (only on escape-stop and signal paths). Added cleanup in the `StartAttached` non-error path to prevent resource leaks. + +### Interactive metadata updates + +After an interactive container exits normally, `state` and `stopped_at` are now updated via state transition in `StartAttached`. From e94f254751d06f6d6b4a37f238f8aafa4359f561 Mon Sep 17 00:00:00 2001 From: Daniel Pupius Date: Mon, 2 Mar 2026 19:39:09 -0800 Subject: [PATCH 28/28] fix(container): harden Apple container wait polling against transient errors Address code review feedback on the polling-based wait: - Distinguish "not found" errors (container removed) from transient XPC errors by checking stderr, matching RemoveContainer's pattern - Handle Apple's inspect returning an empty JSON array for removed containers (exits 0 with "[]" rather than erroring) - Log and retry on transient errors instead of treating all failures as container removal --- internal/container/apple.go | 41 +++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/internal/container/apple.go b/internal/container/apple.go index 6fe1f977..013b739e 100644 --- a/internal/container/apple.go +++ b/internal/container/apple.go @@ -372,26 +372,37 @@ func (r *AppleRuntime) waitByPolling(ctx context.Context, containerID string) (i // Check container status // Apple's container inspect outputs JSON directly (no --format flag) cmd := exec.CommandContext(ctx, r.containerBin, "inspect", containerID) - var stdout bytes.Buffer + var stdout, stderr bytes.Buffer cmd.Stdout = &stdout + cmd.Stderr = &stderr if err := cmd.Run(); err != nil { - // Container no longer exists (removed externally) — treat as exited - return 0, nil - } + stderrStr := stderr.String() + if strings.Contains(stderrStr, "notFound") || strings.Contains(stderrStr, "not found") { + // Container no longer exists (removed externally) — treat as exited + return 0, nil + } + // Transient error (XPC timeout, etc.) — log and keep polling + log.Debug("transient inspect error, retrying", "error", err, "stderr", stderrStr) + } else { + // Apple's inspect returns an array of container info objects + var info []struct { + Status string `json:"status"` + } + if err := json.Unmarshal(stdout.Bytes(), &info); err != nil { + return -1, fmt.Errorf("parsing container info: %w", err) + } - // Apple's inspect returns an array of container info objects - var info []struct { - Status string `json:"status"` - } - if err := json.Unmarshal(stdout.Bytes(), &info); err != nil { - return -1, fmt.Errorf("parsing container info: %w", err) - } + if len(info) == 0 { + // Empty result — container no longer exists (removed externally) + return 0, nil + } - if len(info) > 0 && (info[0].Status == "exited" || info[0].Status == "stopped") { - // Apple's container CLI doesn't provide exit code in inspect output - // Return 0 for stopped containers (best we can do) - return 0, nil + if info[0].Status == "exited" || info[0].Status == "stopped" { + // Apple's container CLI doesn't provide exit code in inspect output + // Return 0 for stopped containers (best we can do) + return 0, nil + } } // Sleep before next poll to avoid hammering the CLI