diff --git a/cmd/moat/cli/attach.go b/cmd/moat/cli/attach.go deleted file mode 100644 index e8a941b8..00000000 --- a/cmd/moat/cli/attach.go +++ /dev/null @@ -1,375 +0,0 @@ -package cli - -import ( - "context" - "fmt" - "io" - "os" - "os/signal" - "syscall" - "time" - - "github.com/majorcontext/moat/internal/log" - "github.com/majorcontext/moat/internal/run" - "github.com/majorcontext/moat/internal/term" - "github.com/majorcontext/moat/internal/trace" - "github.com/spf13/cobra" -) - -// Timing constants for attach behavior -const ( - // doublePressWindow is how quickly Ctrl+C must be pressed twice to stop a run - doublePressWindow = 500 * time.Millisecond - // containerExitCheckDelay is how long to wait for container exit detection - containerExitCheckDelay = 200 * time.Millisecond - // ttyStartupDelay is how long to wait before resizing TTY after container starts - // This allows the container process to initialize before we resize. - ttyStartupDelay = 200 * time.Millisecond -) - -var ( - attachInteractive bool - attachTTYTrace string -) - -var attachCmd = &cobra.Command{ - Use: "attach ", - Short: "Attach to a running agent", - Long: `Attach local stdin, stdout, and stderr to a running agent. - -Accepts a run ID or name. If a name matches multiple runs, you must -specify the run ID. - -By default, attaches in the same mode the run was started with: - - If the run was started with -i, attach will use interactive mode - - Otherwise, only stdout/stderr are connected - -Use -i to force interactive mode, or -i=false to force output-only mode. - -Non-interactive mode: - Ctrl+C Detach (run continues) - Ctrl+C Ctrl+C Stop the run (within 500ms) - -Interactive mode (-i): - Ctrl-/ d Detach (run continues) - Ctrl-/ k Stop the run - Ctrl+C Sent to container process - -Examples: - # Attach by name or ID - moat attach my-agent - moat attach run_a1b2c3d4e5f6 - - # Force interactive mode - moat attach -i run_a1b2c3d4e5f6 - - # Force output-only mode even if run was started interactively - moat attach -i=false run_a1b2c3d4e5f6`, - Args: cobra.ExactArgs(1), - RunE: attachToRun, -} - -func init() { - rootCmd.AddCommand(attachCmd) - attachCmd.Flags().BoolVarP(&attachInteractive, "interactive", "i", false, "interactive mode (use -i=false to force output-only)") - attachCmd.Flags().StringVar(&attachTTYTrace, "tty-trace", "", "capture terminal I/O to file for debugging (e.g., session.json)") -} - -func attachToRun(cmd *cobra.Command, args []string) error { - // Create manager - manager, err := run.NewManager() - if err != nil { - return fmt.Errorf("creating run manager: %w", err) - } - defer manager.Close() - - // Resolve argument to a single run - runID, err := resolveRunArgSingle(manager, args[0]) - if err != nil { - return err - } - - // Verify run exists and is running - r, err := manager.Get(runID) - if err != nil { - return fmt.Errorf("run not found: %w", err) - } - - if state := r.GetState(); state != run.StateRunning { - return fmt.Errorf("run %s is not running (state: %s)", runID, state) - } - - // Determine interactive mode: - // - If -i flag was explicitly set, use that value - // - Otherwise, use the run's original mode - interactive := r.Interactive // Default to run's mode - if cmd.Flags().Changed("interactive") { - interactive = attachInteractive - } - - fmt.Printf("Attaching to run %s (%s)...\n", r.ID, r.Name) - if interactive { - fmt.Printf("Escape: Ctrl-/ d (detach), Ctrl-/ k (stop). Ctrl+C goes to container.\n") - } else { - fmt.Println("Press Ctrl+C to detach (run continues), Ctrl+C twice to stop") - } - fmt.Println() - - ctx := context.Background() - - if interactive { - return attachInteractiveMode(ctx, manager, r, attachTTYTrace) - } - - return attachOutputMode(ctx, manager, r) -} - -// attachOutputMode attaches in output-only mode (no stdin). -// Uses container logs with follow mode for reliable output streaming. -func attachOutputMode(ctx context.Context, manager *run.Manager, r *run.Run) error { - // Set up signal handling - sigCh := make(chan os.Signal, 1) - signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) - defer signal.Stop(sigCh) - - var lastSigTime time.Time - - // Create cancellable context for logs - logsCtx, logsCancel := context.WithCancel(ctx) - defer logsCancel() - - // Use logs with follow mode for output-only attach - // This is more reliable than attach for containers already running - logsDone := make(chan error, 1) - go func() { - logsDone <- manager.FollowLogs(logsCtx, r.ID, os.Stdout) - }() - - // Also monitor if container exits - waitDone := make(chan error, 1) - go func() { - waitDone <- manager.Wait(ctx, r.ID) - }() - - for { - select { - case sig := <-sigCh: - now := time.Now() - if now.Sub(lastSigTime) < doublePressWindow { - // Double Ctrl+C - stop the run - fmt.Printf("\nStopping run %s...\n", r.ID) - logsCancel() - if err := manager.Stop(context.Background(), r.ID); err != nil { - log.Error("failed to stop run", "id", r.ID, "error", err) - } - fmt.Printf("Run %s stopped\n", r.ID) - return nil - } - - // First Ctrl+C - detach - log.Debug("received signal, detaching", "signal", sig) - fmt.Printf("\nDetaching from run %s (still running)\n", r.ID) - fmt.Printf("Press Ctrl+C again within 500ms to stop\n") - return nil - - case <-logsDone: - // Logs ended - wait a moment for container exit to be detected - select { - case err := <-waitDone: - if err != nil { - return err - } - fmt.Printf("Run %s completed\n", r.ID) - return nil - case <-time.After(containerExitCheckDelay): - // Container didn't exit quickly - check run state - currentRun, getErr := manager.Get(r.ID) - if getErr != nil || currentRun.State != run.StateRunning { - // Run ended or was cleaned up - fmt.Printf("Run %s completed\n", r.ID) - return nil - } - // Container still running, logs stream ended - fmt.Printf("\nDetached from run %s\n", r.ID) - return nil - } - - case err := <-waitDone: - logsCancel() - if err != nil { - return err - } - fmt.Printf("Run %s completed\n", r.ID) - return nil - } - } -} - -// attachInteractiveMode attaches with stdin connected. -func attachInteractiveMode(ctx context.Context, manager *run.Manager, r *run.Run, tracePath string) error { - // Set up TTY tracing if requested - // Note: We don't have the original command for attach, use a placeholder - command := []string{"(attach to " + r.Name + ")"} - tracer := setupTTYTracer(tracePath, r, command) - defer tracer.save() - - // Show recent logs before attaching so user has context - if logs, err := manager.RecentLogs(r.ID, 50); err == nil && len(logs) > 0 { - fmt.Print(logs) - // Add a newline if logs don't end with one - if len(logs) > 0 && logs[len(logs)-1] != '\n' { - fmt.Println() - } - } - - // Put terminal in raw mode to capture escape sequences without echo - if term.IsTerminal(os.Stdin) { - rawState, err := term.EnableRawMode(os.Stdin) - if err != nil { - log.Debug("failed to enable raw mode", "error", err) - // Continue without raw mode - escapes may echo - } else { - defer func() { - if err := term.RestoreTerminal(rawState); err != nil { - log.Debug("failed to restore terminal", "error", err) - } - }() - } - } - - // Set up status bar for interactive session - statusWriter, statusCleanup, stdout := setupStatusBar(manager, r) - defer statusCleanup() - - // Wrap stdout with tracer if tracing is enabled - if tracer != nil { - stdout = trace.NewRecordingWriter(stdout, tracer.recorder, trace.EventStdout) - } - - // Wrap stdin with escape proxy to detect detach/stop sequences - escapeProxy := term.NewEscapeProxy(os.Stdin) - - // Set up callback to update footer when escape sequence is in progress - if statusWriter != nil { - statusWriter.SetupEscapeHints(escapeProxy) - } - - // Wrap stdin with tracer if tracing is enabled - stdin := io.Reader(escapeProxy) - if tracer != nil { - stdin = trace.NewRecordingReader(escapeProxy, tracer.recorder, trace.EventStdin) - } - - // Set up signal handling - sigCh := make(chan os.Signal, 1) - signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM, syscall.SIGWINCH) - defer signal.Stop(sigCh) - - // Channel to receive escape actions from the attach goroutine - escapeCh := make(chan term.EscapeAction, 1) - - attachCtx, attachCancel := context.WithCancel(ctx) - defer attachCancel() - - attachDone := make(chan error, 1) - go func() { - err := manager.Attach(attachCtx, r.ID, stdin, stdout, os.Stderr) - // Check if the error is an escape sequence - if term.IsEscapeError(err) { - escapeCh <- term.GetEscapeAction(err) - attachDone <- nil - } else { - attachDone <- err - } - }() - - waitDone := make(chan error, 1) - go func() { - waitDone <- manager.Wait(ctx, r.ID) - }() - - for { - select { - case sig := <-sigCh: - if sig == syscall.SIGWINCH { - // Handle terminal resize - if statusWriter != nil && term.IsTerminal(os.Stdout) { - width, height := term.GetSize(os.Stdout) - if width > 0 && height > 0 { - // Record resize event for tracing - if tracer != nil { - tracer.recorder.AddResize(width, height) - } - _ = statusWriter.Resize(width, height) - // Also resize container TTY - // #nosec G115 -- width/height are validated positive above - _ = manager.ResizeTTY(ctx, r.ID, uint(height), uint(width)) - } - } - continue // Don't break out of loop - } - if sig == syscall.SIGTERM { - fmt.Printf("\nStopping run %s...\n", r.ID) - attachCancel() - if err := manager.Stop(context.Background(), r.ID); err != nil { - log.Error("failed to stop run", "id", r.ID, "error", err) - } - return nil - } - // SIGINT is forwarded to container via stdin/tty - - case action := <-escapeCh: - // Handle escape sequence - switch action { - case term.EscapeDetach: - attachCancel() - fmt.Printf("\r\nDetached from run %s (still running)\r\n", r.ID) - fmt.Printf("Use 'moat attach %s' to reattach\r\n", r.ID) - return nil - - case term.EscapeStop: - fmt.Printf("\r\nStopping run %s...\r\n", r.ID) - attachCancel() - if err := manager.Stop(context.Background(), r.ID); err != nil { - log.Error("failed to stop run", "id", r.ID, "error", err) - } - fmt.Printf("Run %s stopped\r\n", r.ID) - return nil - } - - case err := <-attachDone: - // Attach ended - wait a moment for container exit to be detected - if err != nil && ctx.Err() == nil && !term.IsEscapeError(err) { - log.Error("attach failed", "id", r.ID, "error", err) - } - // Give the wait goroutine time to detect container exit - select { - case waitErr := <-waitDone: - if waitErr != nil { - return waitErr - } - fmt.Printf("Run %s completed\n", r.ID) - return nil - case <-time.After(containerExitCheckDelay): - // Container didn't exit quickly - check run state - currentRun, getErr := manager.Get(r.ID) - if getErr != nil || currentRun.State != run.StateRunning { - // Run ended or was cleaned up - fmt.Printf("Run %s completed\n", r.ID) - return nil - } - // Container still running, we just got disconnected - fmt.Printf("\nDetached from run %s\n", r.ID) - return nil - } - - case err := <-waitDone: - attachCancel() - if err != nil { - return err - } - fmt.Printf("Run %s completed\n", r.ID) - return nil - } - } -} diff --git a/cmd/moat/cli/doctor_claude.go b/cmd/moat/cli/doctor_claude.go index ab607065..6b105ab6 100644 --- a/cmd/moat/cli/doctor_claude.go +++ b/cmd/moat/cli/doctor_claude.go @@ -945,7 +945,7 @@ func testContainerAuth(ctx context.Context, diag *claudeDiagnostic) error { diag.ContainerTest = result // Start container (don't stream logs to avoid cluttering output) - if startErr := mgr.Start(ctx, r.ID, run.StartOptions{StreamLogs: false}); startErr != nil { + if startErr := mgr.Start(ctx, r.ID, run.StartOptions{}); startErr != nil { return fmt.Errorf("starting container: %w", startErr) } diff --git a/cmd/moat/cli/exec.go b/cmd/moat/cli/exec.go index 22f09b04..40d886e6 100644 --- a/cmd/moat/cli/exec.go +++ b/cmd/moat/cli/exec.go @@ -20,6 +20,13 @@ import ( "github.com/spf13/cobra" ) +// Timing constants for interactive execution behavior +const ( + // ttyStartupDelay is how long to wait before resizing TTY after container starts. + // This allows the container process to initialize before we resize. + ttyStartupDelay = 200 * time.Millisecond +) + // Re-export types from internal/cli for backward compatibility // with code in cmd/moat/cli that uses these types. type ExecFlags = intcli.ExecFlags @@ -202,7 +209,6 @@ func ExecuteRun(ctx context.Context, opts intcli.ExecOptions) (*run.Run, error) Rebuild: opts.Flags.Rebuild, KeepContainer: opts.Flags.KeepContainer, Interactive: opts.Interactive, - TTY: opts.TTY, } // Create run @@ -223,24 +229,26 @@ func ExecuteRun(ctx context.Context, opts intcli.ExecOptions) (*run.Run, error) } } - // Call the OnRunCreated callback if provided + // Call the OnRunCreated callback if provided (provider commands set this). + // For moat run, print the run info here so it appears before the session starts. if opts.OnRunCreated != nil { opts.OnRunCreated(intcli.RunInfo{ ID: r.ID, Name: r.Name, }) + } else { + fmt.Printf("Started %s (%s)\n", r.Name, r.ID) } - // Interactive mode: use StartAttached to ensure TTY is connected before process starts + // Interactive mode: use StartAttached to ensure TTY is connected before process starts. // This is required for TUI applications like Codex CLI that need to detect terminal // capabilities immediately on startup. - if opts.Interactive && !opts.Flags.Detach { + if opts.Interactive { return r, RunInteractiveAttached(ctx, manager, r, opts.Command, opts.Flags.TTYTrace) } - // Start run (non-interactive or detached) - startOpts := run.StartOptions{StreamLogs: !opts.Interactive} - if err := manager.Start(ctx, r.ID, startOpts); err != nil { + // Non-interactive: start the container, stream its output, and wait for exit. + if err := manager.Start(ctx, r.ID, run.StartOptions{}); err != nil { log.Error("failed to start run", "id", r.ID, "error", err) return r, fmt.Errorf("starting run: %w", err) } @@ -259,217 +267,50 @@ func ExecuteRun(ctx context.Context, opts intcli.ExecOptions) (*run.Run, error) } } - // If detached, return immediately - if opts.Flags.Detach { - fmt.Printf("\nRun %s started in background\n", r.ID) - fmt.Printf("Use 'moat logs %s' to view output\n", r.ID) - fmt.Printf("Use 'moat attach %s' to attach\n", r.ID) - fmt.Printf("Use 'moat stop %s' to stop\n", r.ID) - return r, nil - } - - // Non-interactive attached mode: stream output, Ctrl+C detaches - return r, RunAttached(ctx, manager, r) -} + fmt.Println(ui.Dim("Press Ctrl+C to stop")) + fmt.Println() -// RunAttached runs in attached mode where output is streamed but stdin is not connected. -// Ctrl+C detaches (leaves run active), double Ctrl+C stops the run. -func RunAttached(ctx context.Context, manager *run.Manager, r *run.Run) error { - // Set up signal handling for detach behavior - sigCh := make(chan os.Signal, 1) - signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) - defer signal.Stop(sigCh) - - // Track Ctrl+C timing for double-press detection - lastSigTime := time.Time{} - - // Create a cancellable context for the wait - waitCtx, waitCancel := context.WithCancel(ctx) - defer waitCancel() - - // Channel to receive wait result - waitDone := make(chan error, 1) + // Stream container logs to stdout in a goroutine. + // FollowLogs blocks until the container exits or context is canceled. + logCtx, logCancel := context.WithCancel(ctx) + defer logCancel() go func() { - waitDone <- manager.Wait(waitCtx, r.ID) - }() - - for { - select { - case sig := <-sigCh: - now := time.Now() - if now.Sub(lastSigTime) < doublePressWindow { - // Double Ctrl+C - stop the run - fmt.Printf("\nStopping run %s...\n", r.ID) - waitCancel() - if err := manager.Stop(context.Background(), r.ID); err != nil { - log.Error("failed to stop run", "id", r.ID, "error", err) - } - fmt.Printf("Run %s stopped\n", r.ID) - return nil - } - // First Ctrl+C - detach (double-press detection happens before this point) - log.Debug("received signal, detaching", "signal", sig) - fmt.Printf("\nDetaching from run %s (still running)\n", r.ID) - fmt.Printf("Press Ctrl+C again within 500ms to stop, or use 'moat stop %s'\n", r.ID) - fmt.Printf("Use 'moat attach %s' to reattach\n", r.ID) - // Don't cancel - let the run continue - return nil - - case err := <-waitDone: - if err != nil { - log.Error("run failed", "id", r.ID, "error", err) - return err - } - log.Info("run completed", "id", r.ID) - fmt.Printf("Run %s completed\n", r.ID) - return nil - } - } -} - -// RunInteractive runs in interactive mode with stdin connected and TTY allocated. -func RunInteractive(ctx context.Context, manager *run.Manager, r *run.Run) error { - fmt.Printf("%s\n\n", term.EscapeHelpText()) - - // Resize container TTY to match terminal size - if term.IsTerminal(os.Stdout) { - width, height := term.GetSize(os.Stdout) - if width > 0 && height > 0 { - // #nosec G115 -- width/height are validated positive above - if err := manager.ResizeTTY(ctx, r.ID, uint(height), uint(width)); err != nil { - log.Debug("failed to resize TTY", "error", err) - } - } - } - - // Put terminal in raw mode to capture escape sequences without echo - if term.IsTerminal(os.Stdin) { - rawState, err := term.EnableRawMode(os.Stdin) - if err != nil { - log.Debug("failed to enable raw mode", "error", err) - // Continue without raw mode - escapes may echo - } else { - defer func() { - if err := term.RestoreTerminal(rawState); err != nil { - log.Debug("failed to restore terminal", "error", err) - } - }() + if err := manager.FollowLogs(logCtx, r.ID, os.Stdout); err != nil && logCtx.Err() == nil { + log.Debug("log streaming ended", "error", err) } - } + }() - // Set up signal handling + // Wait for container exit or signal sigCh := make(chan os.Signal, 1) signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) defer signal.Stop(sigCh) - // Wrap stdin with escape proxy to detect detach/stop sequences - escapeProxy := term.NewEscapeProxy(os.Stdin) - - // Channel to receive escape actions from the attach goroutine - escapeCh := make(chan term.EscapeAction, 1) - - // Attach to container with escape-proxied stdin - attachCtx, attachCancel := context.WithCancel(ctx) - defer attachCancel() - - attachDone := make(chan error, 1) - go func() { - err := manager.Attach(attachCtx, r.ID, escapeProxy, os.Stdout, os.Stderr) - // Check if the error is an escape sequence - if term.IsEscapeError(err) { - escapeCh <- term.GetEscapeAction(err) - attachDone <- nil - } else { - attachDone <- err - } - }() - - // Also wait for container to exit waitDone := make(chan error, 1) go func() { waitDone <- manager.Wait(ctx, r.ID) }() - for { - select { - case sig := <-sigCh: - // In interactive mode, forward SIGINT to container (it will handle it) - // Only SIGTERM causes us to stop - if sig == syscall.SIGTERM { - fmt.Printf("\nStopping run %s...\n", r.ID) - attachCancel() - if err := manager.Stop(context.Background(), r.ID); err != nil { - log.Error("failed to stop run", "id", r.ID, "error", err) - } - return nil - } - // SIGINT is forwarded to container via attached stdin/tty - - case action := <-escapeCh: - // Handle escape sequence - switch action { - case term.EscapeDetach: - attachCancel() - // Wait for attach goroutine to complete before checking state - select { - case <-attachDone: - case <-time.After(500 * time.Millisecond): - } - currentRun, _ := manager.Get(r.ID) - if currentRun != nil && currentRun.State == run.StateRunning { - fmt.Printf("\r\nDetached from run %s (still running)\r\n", r.ID) - fmt.Printf("Use 'moat attach %s' to reattach\r\n", r.ID) - } else { - fmt.Printf("\r\nRun %s stopped\r\n", r.ID) - } - return nil - - case term.EscapeStop: - fmt.Printf("\r\nStopping run %s...\r\n", r.ID) - attachCancel() - if err := manager.Stop(context.Background(), r.ID); err != nil { - log.Error("failed to stop run", "id", r.ID, "error", err) - } - fmt.Printf("Run %s stopped\r\n", r.ID) - return nil - } - - case err := <-attachDone: - // Attach ended - wait a moment for container exit to be detected - if err != nil && ctx.Err() == nil && !term.IsEscapeError(err) { - log.Error("attach failed", "id", r.ID, "error", err) - } - // Give the wait goroutine time to detect container exit - select { - case waitErr := <-waitDone: - if waitErr != nil { - return waitErr - } - fmt.Printf("Run %s completed\n", r.ID) - return nil - case <-time.After(containerExitCheckDelay): - // Container didn't exit quickly - check run state - currentRun, getErr := manager.Get(r.ID) - if getErr != nil || currentRun.State != run.StateRunning { - // Run ended or was cleaned up - fmt.Printf("Run %s completed\n", r.ID) - return nil - } - // Container still running, we just got disconnected - fmt.Printf("\nDetached from run %s\n", r.ID) - return nil - } - - case err := <-waitDone: - // Container exited - attachCancel() // Stop the attach goroutine - if err != nil { - log.Error("run failed", "id", r.ID, "error", err) - return err - } - fmt.Printf("Run %s completed\n", r.ID) - return nil + select { + case sig := <-sigCh: + log.Info("received signal, stopping run", "signal", sig, "id", r.ID) + logCancel() + fmt.Printf("\nStopping run %s...\n", r.ID) + if err := manager.Stop(ctx, r.ID); err != nil { + log.Error("failed to stop run", "id", r.ID, "error", err) + } + // Wait for monitorContainerExit to finish cleanup + <-waitDone + fmt.Println() + fmt.Println(ui.Dim(fmt.Sprintf("View output: moat logs %s", r.ID))) + return r, nil + case err := <-waitDone: + logCancel() + if err != nil { + return r, fmt.Errorf("run failed: %w", err) } + fmt.Println() + fmt.Println(ui.Dim(fmt.Sprintf("View output: moat logs %s", r.ID))) + return r, nil } } @@ -513,7 +354,7 @@ func RunInteractiveAttached(ctx context.Context, manager *run.Manager, r *run.Ru stdout = trace.NewRecordingWriter(stdout, tracer.recorder, trace.EventStdout) } - // Wrap stdin with escape proxy to detect detach/stop sequences + // Wrap stdin with escape proxy to detect stop sequences escapeProxy := term.NewEscapeProxy(os.Stdin) // Set up callback to update footer when escape sequence is in progress @@ -532,9 +373,6 @@ func RunInteractiveAttached(ctx context.Context, manager *run.Manager, r *run.Ru signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM, syscall.SIGWINCH) defer signal.Stop(sigCh) - // Channel to receive escape actions from the attach goroutine - escapeCh := make(chan term.EscapeAction, 1) - // Create cancellable context for the attach attachCtx, attachCancel := context.WithCancel(ctx) defer attachCancel() @@ -542,14 +380,7 @@ func RunInteractiveAttached(ctx context.Context, manager *run.Manager, r *run.Ru // Start with attachment - this ensures TTY is connected before process starts attachDone := make(chan error, 1) go func() { - err := manager.StartAttached(attachCtx, r.ID, stdin, stdout, os.Stderr) - // Check if the error is an escape sequence - if term.IsEscapeError(err) { - escapeCh <- term.GetEscapeAction(err) - attachDone <- nil - } else { - attachDone <- err - } + attachDone <- manager.StartAttached(attachCtx, r.ID, stdin, stdout, os.Stderr) }() // Give container a moment to start, then resize TTY to match terminal. @@ -602,38 +433,16 @@ func RunInteractiveAttached(ctx context.Context, manager *run.Manager, r *run.Ru } // SIGINT is forwarded to container via attached stdin/tty - case action := <-escapeCh: - // Handle escape sequence - switch action { - case term.EscapeDetach: - attachCancel() - // Wait for attach goroutine to complete before checking state - select { - case <-attachDone: - case <-time.After(500 * time.Millisecond): - } - currentRun, _ := manager.Get(r.ID) - if currentRun != nil && currentRun.State == run.StateRunning { - fmt.Printf("\r\nDetached from run %s (still running)\r\n", r.ID) - fmt.Printf("Use 'moat attach %s' to reattach\r\n", r.ID) - } else { - fmt.Printf("\r\nRun %s stopped\r\n", r.ID) - } - return nil - - case term.EscapeStop: + case err := <-attachDone: + if term.IsEscapeError(err) { fmt.Printf("\r\nStopping run %s...\r\n", r.ID) - attachCancel() - if err := manager.Stop(context.Background(), r.ID); err != nil { - log.Error("failed to stop run", "id", r.ID, "error", err) + if stopErr := manager.Stop(context.Background(), r.ID); stopErr != nil { + log.Error("failed to stop run", "id", r.ID, "error", stopErr) } fmt.Printf("Run %s stopped\r\n", r.ID) return nil } - - case err := <-attachDone: - // Attachment ended (container exited or error) - if err != nil && ctx.Err() == nil && !term.IsEscapeError(err) { + if err != nil && ctx.Err() == nil { log.Error("run failed", "id", r.ID, "error", err) return fmt.Errorf("run failed: %w", err) } diff --git a/cmd/moat/cli/list_clean_test.go b/cmd/moat/cli/list_clean_test.go index d74f17ad..cef767a2 100644 --- a/cmd/moat/cli/list_clean_test.go +++ b/cmd/moat/cli/list_clean_test.go @@ -81,9 +81,6 @@ func (s *listCleanStubRuntime) ContainerState(ctx context.Context, id string) (s func (s *listCleanStubRuntime) RemoveImage(ctx context.Context, id string) error { panic("unexpected call to RemoveImage") } -func (s *listCleanStubRuntime) Attach(ctx context.Context, id string, opts container.AttachOptions) error { - panic("unexpected call to Attach") -} func (s *listCleanStubRuntime) StartAttached(ctx context.Context, id string, opts container.AttachOptions) error { panic("unexpected call to StartAttached") } diff --git a/cmd/moat/cli/resolve.go b/cmd/moat/cli/resolve.go index 7fa483ea..28a34566 100644 --- a/cmd/moat/cli/resolve.go +++ b/cmd/moat/cli/resolve.go @@ -36,7 +36,7 @@ func resolveRunArg(manager *run.Manager, arg string, action string) ([]string, e // resolveRunArgSingle resolves a user-provided argument to exactly one run ID. // If multiple runs match, it prints them and returns an error telling the user // to specify a run ID. This is used by commands that only operate on a single -// run (e.g., logs, trace, audit, attach). +// run (e.g., logs, trace, audit). func resolveRunArgSingle(manager *run.Manager, arg string) (string, error) { matches, err := manager.Resolve(arg) if err != nil { diff --git a/cmd/moat/cli/run.go b/cmd/moat/cli/run.go index e35b52d1..5f367c45 100644 --- a/cmd/moat/cli/run.go +++ b/cmd/moat/cli/run.go @@ -20,7 +20,7 @@ var runCmd = &cobra.Command{ Long: `Run an agent in an isolated container with workspace mounting, credential injection, and full observability. -The agent runs in a Docker container with your workspace mounted at /workspace. +The agent runs in a container with your workspace mounted at /workspace. If an agent.yaml exists in the workspace, its settings are used as defaults. Arguments: @@ -28,11 +28,9 @@ Arguments: [-- cmd] Optional command to run instead of agent's default Non-interactive mode (default): - Ctrl+C Detach (run continues) - Ctrl+C Ctrl+C Stop the run (within 500ms) + Output streams to the terminal. Press Ctrl+C to stop. Interactive mode (-i): - Ctrl-/ d Detach (run continues) Ctrl-/ k Stop the run Ctrl+C Sent to container process @@ -58,9 +56,6 @@ Examples: # Run multiple commands moat run -- sh -c "npm install && npm test" - # Run detached (in background) - moat run -d ./my-project - # Run interactive shell moat run -i -- bash`, Args: cobra.ArbitraryArgs, @@ -159,7 +154,6 @@ func runAgent(cmd *cobra.Command, args []string) error { "workspace", absPath, "grants", runFlags.Grants, "cmd", containerCmd, - "detach", runFlags.Detach, "interactive", interactive, ) @@ -179,18 +173,8 @@ func runAgent(cmd *cobra.Command, args []string) error { Command: containerCmd, Config: cfg, Interactive: interactive, - TTY: interactive, - } - - r, err := ExecuteRun(ctx, opts) - if err != nil { - return err - } - - // Print started message if not already printed by Execute - if !runFlags.Detach && r != nil { - fmt.Printf("Started agent %q (run %s)\n", r.Name, r.ID) } - return nil + _, err = ExecuteRun(ctx, opts) + return err } diff --git a/cmd/moat/cli/wt.go b/cmd/moat/cli/wt.go index b5a9376e..8572a0c3 100644 --- a/cmd/moat/cli/wt.go +++ b/cmd/moat/cli/wt.go @@ -25,7 +25,7 @@ var wtCmd = &cobra.Command{ If the branch doesn't exist, it's created from HEAD. If the worktree doesn't exist, it's created. If a run is already active in the worktree, returns an -error with instructions to attach or stop it. +error with instructions to view logs or stop it. Configuration is read from agent.yaml in the repository root. @@ -36,9 +36,6 @@ Examples: # Start a run on a new feature branch moat wt dark-mode - # Run in background - moat wt dark-mode -d - # Run a specific command in the worktree moat wt dark-mode -- make test @@ -143,7 +140,7 @@ func runWorktree(cmd *cobra.Command, args []string) error { for _, r := range manager.List() { if r.WorktreePath == result.WorkspacePath && r.GetState() == run.StateRunning { - return fmt.Errorf("a run is already active in worktree for branch %q: %s (%s)\nAttach with 'moat attach %s' or stop with 'moat stop %s'", branch, r.Name, r.ID, r.ID, r.ID) + return fmt.Errorf("a run is already active in worktree for branch %q: %s (%s)\nView logs with 'moat logs %s' or stop with 'moat stop %s'", branch, r.Name, r.ID, r.ID, r.ID) } } @@ -165,11 +162,8 @@ func runWorktree(cmd *cobra.Command, args []string) error { } } - // Determine interactive mode: CLI flags > config > default - interactive := !wtFlags.Detach - if !interactive && cfg != nil && cfg.Interactive { - interactive = true - } + // Determine interactive mode from config + interactive := cfg != nil && cfg.Interactive log.Debug("starting worktree run", "branch", branch, @@ -195,7 +189,6 @@ func runWorktree(cmd *cobra.Command, args []string) error { Command: containerCmd, Config: cfg, Interactive: interactive, - TTY: interactive, } intcli.SetWorktreeFields(&opts, result) diff --git a/docs/content/guides/01-claude-code.md b/docs/content/guides/01-claude-code.md index 9592bf2b..53d02284 100644 --- a/docs/content/guides/01-claude-code.md +++ b/docs/content/guides/01-claude-code.md @@ -171,22 +171,22 @@ moat claude --name feature-auth ./my-project The name appears in `moat list` and makes it easier to manage multiple runs. -### Background runs +### Non-interactive runs -Run Claude Code in the background: +Run Claude Code non-interactively with a prompt: ```bash -moat claude -d ./my-project +moat claude -p "fix the failing tests" ./my-project ``` -Reattach later: +Monitor progress: ```bash $ moat list NAME RUN ID STATE AGE feature-auth run_a1b2c3d4e5f6 running 5m -$ moat attach run_a1b2c3d4e5f6 +$ moat logs -f run_a1b2c3d4e5f6 ``` ## Adding GitHub access diff --git a/docs/content/guides/02-codex.md b/docs/content/guides/02-codex.md index 6d72420f..4e5e1292 100644 --- a/docs/content/guides/02-codex.md +++ b/docs/content/guides/02-codex.md @@ -102,22 +102,22 @@ moat codex --name feature-auth ./my-project The name appears in `moat list` and makes it easier to manage multiple runs. -### Background runs +### Non-interactive runs -Run Codex in the background: +Run Codex non-interactively with a prompt: ```bash -moat codex -d ./my-project +moat codex -p "fix the failing tests" ./my-project ``` -Reattach later: +Monitor progress: ```bash $ moat list NAME RUN ID STATE AGE feature-auth run_a1b2c3d4e5f6 running 5m -$ moat attach run_a1b2c3d4e5f6 +$ moat logs -f run_a1b2c3d4e5f6 ``` ## Adding GitHub access diff --git a/docs/content/guides/03-gemini.md b/docs/content/guides/03-gemini.md index f19e46be..d2d5dcf1 100644 --- a/docs/content/guides/03-gemini.md +++ b/docs/content/guides/03-gemini.md @@ -138,22 +138,22 @@ moat gemini --name feature-auth ./my-project The name appears in `moat list` and makes it easier to manage multiple runs. -### Background runs +### Non-interactive runs -Run Gemini in the background: +Run Gemini non-interactively with a prompt: ```bash -moat gemini -d ./my-project +moat gemini -p "fix the failing tests" ./my-project ``` -Reattach later: +Monitor progress: ```bash $ moat list NAME RUN ID STATE AGE feature-auth run_a1b2c3d4e5f6 running 5m -$ moat attach run_a1b2c3d4e5f6 +$ moat logs -f run_a1b2c3d4e5f6 ``` ## Adding GitHub access diff --git a/docs/content/guides/11-observability.md b/docs/content/guides/11-observability.md index affcc610..a89bac93 100644 --- a/docs/content/guides/11-observability.md +++ b/docs/content/guides/11-observability.md @@ -42,7 +42,11 @@ Show the last N lines: $ moat logs -n 50 ``` -> **Note:** The `--follow` flag for `moat logs` is not yet implemented. Use `moat attach` to see live output from a running container. +Follow logs in real time with `--follow`: + +```bash +$ moat logs -f my-agent +``` See [CLI reference](../reference/01-cli.md) for the complete list of `moat logs` flags. @@ -265,10 +269,10 @@ $ jq 'select(.status == 401)' ~/.moat/runs/run_*/network.jsonl ### No output from `moat logs` -The run may not have produced any output. Verify the run exists with `moat list`. If the run is still active, use `moat attach` to see live output: +The run may not have produced any output. Verify the run exists with `moat list`. If the run is still active, use `moat logs -f` to follow live output: ```bash -$ moat attach run_a1b2c3d4e5f6 +$ moat logs -f run_a1b2c3d4e5f6 ``` ### No network traces diff --git a/docs/content/guides/12-worktrees.md b/docs/content/guides/12-worktrees.md index 69fa141a..1c20bf26 100644 --- a/docs/content/guides/12-worktrees.md +++ b/docs/content/guides/12-worktrees.md @@ -46,9 +46,6 @@ If the branch and worktree already exist, they are reused. # Start the command defined in agent.yaml on the dark-mode branch moat wt dark-mode -# Run in background -moat wt dark-mode -d - # Run a specific command instead of the agent.yaml default moat wt dark-mode -- make test ``` @@ -64,7 +61,7 @@ The `--worktree` flag works on `moat claude`, `moat codex`, and `moat gemini`: moat claude --worktree dark-mode # Start Codex on a feature branch with a prompt -moat codex --worktree feature/auth -p "implement OAuth login" -d +moat codex --worktree feature/auth -p "implement OAuth login" # Start Gemini on a refactor branch moat gemini --worktree cleanup @@ -87,12 +84,17 @@ moat wt dark-mode --name my-custom-name ## Parallel branches -Start runs on multiple branches simultaneously: +Start runs on multiple branches simultaneously, each in its own terminal: ```bash -moat wt feature/auth -d -moat wt feature/dark-mode -d -moat wt fix/login-bug -d +# Terminal 1 +moat wt feature/auth + +# Terminal 2 +moat wt feature/dark-mode + +# Terminal 3 +moat wt fix/login-bug ``` Each run gets its own worktree, its own container, and its own branch. Branch names with slashes (like `feature/auth`) are supported. @@ -114,7 +116,7 @@ If a run is already active in a worktree, `moat wt` returns an error: ```text Error: a run is already active in worktree for branch "dark-mode": my-agent-dark-mode (run_a1b2c3d4e5f6) -Attach with 'moat attach run_a1b2c3d4e5f6' or stop with 'moat stop run_a1b2c3d4e5f6' +Follow with 'moat logs -f run_a1b2c3d4e5f6' or stop with 'moat stop run_a1b2c3d4e5f6' ``` The `--worktree` flag on agent commands behaves the same way. @@ -182,15 +184,15 @@ Branch-specific configuration works as follows: 2. Start runs on multiple branches: ```bash - moat wt feature/auth -d - moat wt feature/dark-mode -d + moat wt feature/auth + moat wt feature/dark-mode ``` 3. Monitor progress: ```bash moat wt list moat logs run_a1b2c3d4e5f6 - moat attach run_a1b2c3d4e5f6 + moat logs -f run_a1b2c3d4e5f6 ``` 4. When finished, clean up worktree directories: @@ -216,10 +218,10 @@ Run `moat wt` from within a git repository. Worktrees are a git feature and requ ### "a run is already active in worktree" -Another run is already active on that branch. Either attach to it or stop it first: +Another run is already active on that branch. Follow its logs or stop it first: ```bash -moat attach +moat logs -f moat stop ``` diff --git a/docs/content/reference/01-cli.md b/docs/content/reference/01-cli.md index 6de964ff..82204da9 100644 --- a/docs/content/reference/01-cli.md +++ b/docs/content/reference/01-cli.md @@ -23,7 +23,7 @@ These flags apply to all commands: ## Run identification -Commands that operate on a run (`stop`, `destroy`, `attach`, `logs`, `trace`, `audit`, `snapshot`) accept a run ID or a run name: +Commands that operate on a run (`stop`, `destroy`, `logs`, `trace`, `audit`, `snapshot`) accept a run ID or a run name: ```bash moat stop run_a1b2c3d4e5f6 # by full ID @@ -33,7 +33,7 @@ moat stop my-agent # by name Resolution priority: exact ID > ID prefix > exact name. -If a name matches multiple runs, batch commands (`stop`, `destroy`) prompt for confirmation while single-target commands (`logs`, `attach`) list the matches and ask you to specify a run ID. +If a name matches multiple runs, batch commands (`stop`, `destroy`) prompt for confirmation while single-target commands (`logs`) list the matches and ask you to specify a run ID. ## Common agent flags @@ -44,7 +44,6 @@ The agent commands (`moat claude`, `moat codex`, `moat gemini`) share the follow | `-g`, `--grant PROVIDER` | Inject credential (repeatable). See [Grants reference](./04-grants.md) for available providers. | | `-e KEY=VALUE` | Set environment variable (repeatable) | | `-n`, `--name NAME` | Run name (default: from `agent.yaml` or random) | -| `-d`, `--detach` | Run in background | | `--rebuild` | Force rebuild of container image | | `--allow-host HOST` | Additional hosts to allow network access to (repeatable) | | `--runtime RUNTIME` | Container runtime to use (`apple`, `docker`) | @@ -52,7 +51,7 @@ The agent commands (`moat claude`, `moat codex`, `moat gemini`) share the follow | `--no-sandbox` | Disable gVisor sandbox (Docker only) | | `--worktree BRANCH` | Run in a git worktree for this branch (alias: `--wt`) | -Each agent command also accepts `-p`/`--prompt` for non-interactive mode, plus command-specific flags documented in their own sections. +Agent commands run interactively by default, owning the terminal with stdin/stdout/stderr connected. Use `-p`/`--prompt` for non-interactive mode (output streams to the terminal). Each agent command also accepts command-specific flags documented in their own sections. All agent commands support passing an initial prompt after `--`. Unlike `-p`, which runs non-interactively and exits when done, arguments after `--` start an interactive session with the prompt pre-filled: @@ -85,7 +84,6 @@ moat run [flags] [path] [-- command] | `--name NAME` | Set run name (used for hostname routing) | | `--grant PROVIDER` | Inject credential (repeatable) | | `-e KEY=VALUE` | Set environment variable (repeatable) | -| `-d`, `--detach` | Run in background | | `-i`, `--interactive` | Enable interactive mode (stdin + TTY) | | `--rebuild` | Force rebuild of container image | | `--runtime RUNTIME` | Container runtime to use (apple, docker) | @@ -94,6 +92,20 @@ moat run [flags] [path] [-- command] | `--no-snapshots` | Disable snapshots for this run | | `--no-sandbox` | Disable gVisor sandboxing (Docker only) | +### Execution modes + +**Non-interactive (default):** Output streams to the terminal. Press `Ctrl+C` to stop. + +```bash +moat run ./my-project +``` + +**Interactive (`-i`):** The run owns the terminal with stdin/stdout/stderr connected and a TTY allocated. Press `Ctrl-/ k` to stop the run. `Ctrl+C` is forwarded to the container process. + +```bash +moat run -i -- bash +``` + ### Examples ```bash @@ -115,9 +127,6 @@ moat run -- sh -c "npm install && npm test" # Interactive shell moat run -i -- bash -# Background run -moat run -d ./my-project - # Multiple credentials moat run --grant github --grant anthropic ./my-project @@ -206,11 +215,8 @@ moat claude --grant github # Named run moat claude --name feature-auth ./my-project -# Run in a git worktree -moat claude --worktree=dark-mode --prompt "implement dark mode" --detach - -# Background run -moat claude -d ./my-project +# Run in a git worktree (non-interactive with prompt) +moat claude --worktree=dark-mode --prompt "implement dark mode" # Require manual approval for each tool use moat claude --noyolo @@ -273,11 +279,8 @@ moat codex --grant github # Named run moat codex --name my-feature -# Run in a git worktree -moat codex --worktree=dark-mode --prompt "implement dark mode" --detach - -# Run in background -moat codex -d +# Run in a git worktree (non-interactive with prompt) +moat codex --worktree=dark-mode --prompt "implement dark mode" # Force rebuild moat codex --rebuild @@ -331,11 +334,8 @@ moat gemini --grant github # Named run moat gemini --name my-feature -# Run in a git worktree -moat gemini --worktree=dark-mode --prompt "implement dark mode" --detach - -# Run in background -moat gemini -d +# Run in a git worktree (non-interactive with prompt) +moat gemini --worktree=dark-mode --prompt "implement dark mode" # Force rebuild moat gemini --rebuild @@ -353,7 +353,7 @@ moat wt [-- command] The branch is created from HEAD if it doesn't exist. The worktree is created at `~/.moat/worktrees//`. -Configuration is read from `agent.yaml` in the repository root. If a run is already active in the worktree, returns an error with instructions to attach or stop it. +Configuration is read from `agent.yaml` in the repository root. If a run is already active in the worktree, returns an error with instructions to stop it. ### Arguments @@ -366,7 +366,6 @@ Configuration is read from `agent.yaml` in the repository root. If a run is alre | Flag | Description | |------|-------------| -| `-d`, `--detach` | Run in background | | `-n`, `--name NAME` | Override auto-generated run name | | `-g`, `--grant PROVIDER` | Inject credential (repeatable) | | `-e KEY=VALUE` | Set environment variable (repeatable) | @@ -390,9 +389,6 @@ Override the default worktree base path (`~/.moat/worktrees/`) with the `MOAT_WO # Start a run in a worktree for the dark-mode branch moat wt dark-mode -# Run in background -moat wt dark-mode -d - # Run a specific command in the worktree moat wt dark-mode -- make test @@ -438,59 +434,6 @@ moat wt clean dark-mode --- -## moat attach - -Attach to a running container. - -``` -moat attach [flags] -``` - -### Arguments - -| Argument | Description | -|----------|-------------| -| `run` | Run ID or name | - -### Flags - -| Flag | Description | -|------|-------------| -| `-i`, `--interactive` | Force interactive mode | -| `-i=false` | Force output-only mode | - -By default, uses the run's original mode (interactive or not). - -On attach, Moat sends a TTY resize signal to the container to force a screen repaint. This ensures the terminal displays the current state immediately, rather than waiting for new output. - -### Examples - -```bash -# Attach by name -moat attach my-agent - -# Attach by ID -moat attach run_a1b2c3d4e5f6 - -# Force interactive -moat attach -i run_a1b2c3d4e5f6 - -# Force output-only -moat attach -i=false run_a1b2c3d4e5f6 -``` - -### Detach sequences - -**Non-interactive mode:** -- `Ctrl+C` -- Detach (run continues) -- `Ctrl+C Ctrl+C` (within 500ms) -- Stop the run - -**Interactive mode:** -- `Ctrl-/ d` -- Detach (run continues) -- `Ctrl-/ k` -- Stop the run - ---- - ## moat grant Store credentials for injection into runs. See [Grants reference](./04-grants.md) for details on each provider, host matching rules, and credential sources. diff --git a/docs/content/reference/02-agent-yaml.md b/docs/content/reference/02-agent-yaml.md index 217d6c97..e7eb2247 100644 --- a/docs/content/reference/02-agent-yaml.md +++ b/docs/content/reference/02-agent-yaml.md @@ -577,7 +577,7 @@ command: ["sh", "-c", "npm install && npm start"] ### interactive -Enable interactive mode (stdin + TTY). +Enable interactive mode. ```yaml interactive: true @@ -587,6 +587,10 @@ interactive: true - Default: `false` - CLI override: `-i` +When `true`, allocates a TTY and connects stdin. The session owns the terminal. Press `Ctrl-/ k` to stop the run; `Ctrl+C` is forwarded to the container process. + +When `false` (default), output streams to the terminal. Press `Ctrl+C` to stop. Use `moat logs ` to review output after the run. + Required for shells, REPLs, and interactive tools. --- diff --git a/docs/plans/2026-01-19-interactive-attach-model.md b/docs/plans/2026-01-19-interactive-attach-model.md index 9d2c73f0..ba3fcd76 100644 --- a/docs/plans/2026-01-19-interactive-attach-model.md +++ b/docs/plans/2026-01-19-interactive-attach-model.md @@ -1,7 +1,7 @@ # Interactive and Attach Model for Moat **Date:** 2026-01-19 -**Status:** Implemented +**Status:** Superseded by [Interactive-Only Simplification](2026-02-27-interactive-only-simplification.md) ## Problem Statement diff --git a/docs/plans/2026-02-27-interactive-only-simplification.md b/docs/plans/2026-02-27-interactive-only-simplification.md new file mode 100644 index 00000000..17cb43ab --- /dev/null +++ b/docs/plans/2026-02-27-interactive-only-simplification.md @@ -0,0 +1,451 @@ +# Interactive-Only Simplification + +> **Status:** Implemented. Merged via PR #196 on the `fix/interactive-only` branch. + +**Goal:** Simplify moat's execution model to two clean modes: interactive (always attached, no detach) and non-interactive (blocking with log streaming to the terminal). + +**Architecture:** Remove the `moat attach` command, the `--detach` flag, escape proxy detach sequences, and all "output-only attached" code paths. Interactive runs own the terminal for their lifetime. Non-interactive runs block while streaming container output, with Ctrl+C for graceful shutdown. + +**Tech Stack:** Go, Cobra CLI, existing container runtime interface + +--- + +## Context + +The current model has accumulated complexity around attach/detach: + +- 4 execution functions: `ExecuteRun`, `RunAttached`, `RunInteractive`, `RunInteractiveAttached` +- Separate `moat attach` command with its own interactive/non-interactive modes +- Escape proxy with Ctrl-/ d (detach) and Ctrl-/ k (stop) +- Double Ctrl+C detection for "detach vs stop" in non-interactive attached mode +- `--detach` flag that makes non-interactive runs print advice about `moat attach` + +The new model: + +| Mode | Behavior | Terminal | Exit | +|------|----------|----------|------| +| **Interactive** (`-i`) | Stdin/stdout/stderr connected, TTY allocated | Owns terminal | Ctrl-/ k to stop. Ctrl+C goes to container. Process exit ends session. | +| **Non-interactive** (default) | Output streamed to terminal. Blocks until exit. | Stdout only | Ctrl+C to stop. Process exit ends session. | + +## What Gets Removed + +- `cmd/moat/cli/attach.go` — entire file deleted +- `RunAttached()` in `exec.go` — non-interactive attached mode +- `RunInteractive()` in `exec.go` — old interactive path (superseded by `RunInteractiveAttached`) +- `--detach` / `-d` flag from `ExecFlags` +- `EscapeDetach` action from escape proxy (keep `EscapeStop`) +- Double Ctrl+C detection logic +- All "Use 'moat attach ...'" messages +- `attachOutputMode()` and `attachInteractiveMode()` from attach.go + +## What Changes + +- `ExecuteRun()` simplified: interactive → `RunInteractiveAttached()`, non-interactive → start + stream logs + wait for exit +- Escape proxy simplified: only Ctrl-/ k (stop) remains. Ctrl-/ d removed. +- `EscapeHelpText()` updated to only show stop sequence +- `moat wt` defaults to non-interactive (remove the `interactive := !wtFlags.Detach` hack) +- Provider CLIs (claude, codex, gemini): remove references to `moat attach` in messages +- Help text for `moat run` updated to remove detach/attach documentation + +## What Stays + +- `RunInteractiveAttached()` — this is the only attached execution path +- `StartAttached()` in container runtime — still needed for interactive +- `Attach()` in container runtime — can be removed from the interface (only used by attach command) +- `FollowLogs()` / `RecentLogs()` in manager — still used by `moat logs` +- Status bar / TUI — still used for interactive sessions +- TTY tracing — still used for interactive sessions +- `EscapeStop` action — Ctrl-/ k still stops the run + +--- + +### Task 1: Remove `moat attach` command + +**Files:** +- Delete: `cmd/moat/cli/attach.go` + +**Step 1: Delete the attach command file** + +Delete `cmd/moat/cli/attach.go` entirely. This removes: +- `attachCmd` cobra command and its `init()` registration +- `attachToRun()`, `attachOutputMode()`, `attachInteractiveMode()` +- `attachInteractive` and `attachTTYTrace` variables +- Timing constants (`doublePressWindow`, `containerExitCheckDelay`, `ttyStartupDelay`) + +**Step 2: Verify it compiles** + +Run: `go build ./...` +Expected: BUILD SUCCESS (no other files import from attach.go — the constants and functions are file-local) + +Note: `ttyStartupDelay` is also used in `exec.go`. If the build fails because of this, move that constant to `exec.go` before deleting. + +**Step 3: Commit** + +```bash +git add -A +git commit -m "refactor(cli): remove moat attach command + +Simplifying to two modes: interactive (attached) and non-interactive +(monitor via moat logs). The attach command allowed reconnecting to +running containers, which added complexity without sufficient value." +``` + +--- + +### Task 2: Remove `--detach` flag and simplify `ExecFlags` + +**Files:** +- Modify: `internal/cli/types.go` +- Modify: `cmd/moat/cli/run.go` +- Modify: `cmd/moat/cli/exec.go` +- Modify: `cmd/moat/cli/wt.go` +- Modify: `internal/providers/claude/cli.go` +- Modify: `internal/providers/codex/cli.go` +- Modify: `internal/providers/gemini/cli.go` + +**Step 1: Remove `Detach` from `ExecFlags`** + +In `internal/cli/types.go`, remove the `Detach bool` field from `ExecFlags` struct and the corresponding `cmd.Flags().BoolVarP(&flags.Detach, "detach", "d", ...)` line from `AddExecFlags`. + +**Step 2: Remove `TTYTrace` from `ExecFlags`** + +The `TTYTrace` field on `ExecFlags` was only used by the attach command's `--tty-trace` flag. Remove it. (Note: `RunInteractiveAttached` takes `tracePath` as a parameter, and the `moat run` command doesn't expose this flag — it was attach-only.) + +Actually, check if `moat run` passes `opts.Flags.TTYTrace` anywhere. If it does, keep the field. If not, remove it. + +**Step 3: Update `ExecuteRun` in `exec.go`** + +Remove the `opts.Flags.Detach` check and the detached-mode message block. The function should now: + +1. If interactive: call `RunInteractiveAttached()` (same as now) +2. If non-interactive: `manager.Start()` → stream logs via `FollowLogs` → block on `manager.Wait()` with signal handling + +Replace the current detach block and `RunAttached` call with a blocking loop that streams container output to stdout while waiting for exit or Ctrl+C. See `cmd/moat/cli/exec.go` for the final implementation. + +**Step 4: Remove `RunAttached` and `RunInteractive` from `exec.go`** + +Delete the `RunAttached()` function entirely (non-interactive attached mode). +Delete the `RunInteractive()` function entirely (old interactive path, superseded by `RunInteractiveAttached`). + +**Step 5: Update `moat run` help text** + +In `cmd/moat/cli/run.go`, update the `Long` description to remove references to detach, attach. The new help should describe: + +``` +Non-interactive mode (default): + Output streams to the terminal. Press Ctrl+C to stop. + +Interactive mode (-i): + Ctrl-/ k Stop the run + Ctrl+C Sent to container process +``` + +Remove `-d` from examples. + +**Step 6: Fix `moat wt` interactive logic** + +In `cmd/moat/cli/wt.go`, the current code does `interactive := !wtFlags.Detach` which made worktree runs interactive by default (unless detached). With the removal of detach, change this to respect the config: + +```go +// Determine interactive mode: CLI flags > config > default (non-interactive) +interactive := false +if cfg != nil && cfg.Interactive { + interactive = true +} +``` + +Note: `moat wt` doesn't have its own `-i` flag. If the agent.yaml says `interactive: true`, it'll be interactive. Otherwise non-interactive. This is the right default for worktree runs. + +**Step 7: Update provider CLIs** + +In `internal/providers/claude/cli.go`, `internal/providers/codex/cli.go`, and `internal/providers/gemini/cli.go`: +- Remove references to `Detach` flag (e.g., `!claudeFlags.Detach` checks) +- Remove any "Use moat attach" messages +- The providers already determine interactivity correctly (prompt flag = non-interactive, no prompt = interactive) + +**Step 8: Verify build** + +Run: `go build ./...` + +**Step 9: Run tests** + +Run: `make test-unit` + +**Step 10: Commit** + +```bash +git add -A +git commit -m "refactor(cli): remove --detach flag and simplify execution modes + +Non-interactive runs now always start in background. Interactive runs +always own the terminal. Removes RunAttached and RunInteractive in +favor of the single RunInteractiveAttached path." +``` + +--- + +### Task 3: Simplify escape proxy (remove detach action) + +**Files:** +- Modify: `internal/term/escape.go` +- Modify: `internal/term/escape_test.go` + +**Step 1: Remove `EscapeDetach` from escape proxy** + +In `internal/term/escape.go`: + +1. Remove `EscapeDetach` from the `EscapeAction` constants (keep `EscapeNone` and `EscapeStop`) +2. Remove the `case EscapeDetach:` from `EscapeError.Error()` +3. Remove `escapeKeyDetach` constant (`byte = 'd'`) +4. In `EscapeProxy.Read()`, remove the `case escapeKeyDetach:` handling — when 'd' follows Ctrl-/, treat it as an unrecognized key (pass both bytes through) + +**Step 2: Update `EscapeHelpText()`** + +Change to: `"Escape: Ctrl-/ k (stop)"` + +**Step 3: Update tests** + +In `internal/term/escape_test.go`, remove or update tests for the detach escape sequence. Tests for: +- Ctrl-/ d should now pass both bytes through (not trigger escape) +- Ctrl-/ k should still trigger `EscapeStop` +- Ctrl-/ Ctrl-/ should still pass single Ctrl-/ through + +**Step 4: Run tests** + +Run: `go test ./internal/term/ -v` + +**Step 5: Commit** + +```bash +git add -A +git commit -m "refactor(term): remove detach escape sequence + +Only Ctrl-/ k (stop) remains. Ctrl-/ d is no longer an escape +sequence and passes through to the container." +``` + +--- + +### Task 4: Clean up `RunInteractiveAttached` (remove detach handling) + +**Files:** +- Modify: `cmd/moat/cli/exec.go` + +**Step 1: Remove detach handling from `RunInteractiveAttached`** + +In `RunInteractiveAttached()`: + +1. Remove the `case term.EscapeDetach:` from the escape action switch. Only `EscapeStop` remains. +2. In the `case err := <-attachDone:` handler, remove the "detached" message path. If attachment ends unexpectedly (container still running), treat it as an error or a clean exit, not a detach. + +The simplified `attachDone` handler: + +```go +case err := <-attachDone: + if err != nil && ctx.Err() == nil && !term.IsEscapeError(err) { + log.Error("run failed", "id", r.ID, "error", err) + return fmt.Errorf("run failed: %w", err) + } + fmt.Printf("Run %s completed\n", r.ID) + return nil +``` + +**Step 2: Update escape help text in the function** + +The `fmt.Printf("%s\n\n", term.EscapeHelpText())` call will now print the updated text. + +**Step 3: Remove detach-related messages** + +Remove all `"Use 'moat attach %s' to reattach"` messages from exec.go. + +**Step 4: Verify build and run tests** + +Run: `go build ./... && make test-unit` + +**Step 5: Commit** + +```bash +git add -A +git commit -m "refactor(cli): remove detach handling from interactive mode + +Interactive sessions now run until the process exits or user sends +Ctrl-/ k to stop. No detach path exists." +``` + +--- + +### Task 5: Remove `Attach()` from container runtime interface + +**Files:** +- Modify: `internal/container/runtime.go` +- Modify: `internal/container/docker.go` +- Modify: `internal/container/apple.go` +- Modify: `internal/run/manager.go` + +**Step 1: Check if `Attach()` is still used** + +Search for calls to `runtime.Attach(` and `manager.Attach(`. After removing `attach.go` and `RunInteractive()`, the only remaining call should be in `manager.Attach()`. If nothing calls `manager.Attach()` anymore, we can remove it from both the manager and the runtime interface. + +**Step 2: Remove `Attach` from runtime interface** + +In `internal/container/runtime.go`, remove: +```go +Attach(ctx context.Context, id string, opts AttachOptions) error +``` + +**Step 3: Remove `Attach` implementations** + +- In `internal/container/docker.go`, remove the `Attach()` method +- In `internal/container/apple.go`, remove the `Attach()` method + +**Step 4: Remove `Manager.Attach()` from run manager** + +In `internal/run/manager.go`, remove the `Attach()` method. + +**Step 5: Verify build** + +Run: `go build ./...` + +**Step 6: Run tests** + +Run: `make test-unit` + +**Step 7: Commit** + +```bash +git add -A +git commit -m "refactor(container): remove Attach from runtime interface + +Only StartAttached remains for interactive sessions. The separate +Attach method was only needed for the removed moat attach command." +``` + +--- + +### Task 6: Clean up references and messages + +**Files:** +- Modify: `internal/providers/claude/cli.go` +- Modify: `internal/providers/claude/cli_test.go` +- Modify: `cmd/moat/cli/wt.go` +- Modify: `internal/cli/worktree.go` + +**Step 1: Update error messages that reference `moat attach`** + +Search for all remaining "moat attach" references: + +1. `internal/providers/claude/cli.go:424` — Change from "Use 'moat attach %s' to reconnect" to "Use 'moat logs %s' to view output or 'moat stop %s' to stop" +2. `internal/providers/claude/cli_test.go:356-357` — Update test to check for new message +3. `cmd/moat/cli/wt.go:146` — Change from "Attach with 'moat attach'" to "View logs with 'moat logs %s' or stop with 'moat stop %s'" +4. `internal/cli/worktree.go:67` — Same pattern as above + +**Step 2: Update help text** + +Review `moat run --help` and `moat wt --help` to ensure no attach/detach references remain. + +**Step 3: Verify build and tests** + +Run: `go build ./... && make test-unit` + +**Step 4: Commit** + +```bash +git add -A +git commit -m "fix(cli): update messages to remove moat attach references + +Point users to moat logs and moat stop instead of the removed +moat attach command." +``` + +--- + +### Task 7: Update documentation + +**Files:** +- Modify: `docs/content/reference/01-cli.md` (if it exists) +- Modify: `docs/content/reference/02-agent-yaml.md` (if `interactive` field docs need updating) +- Modify: `docs/plans/2026-01-19-interactive-attach-model.md` (mark as superseded) + +**Step 1: Update CLI reference docs** + +Remove documentation for: +- `moat attach` command +- `--detach` / `-d` flag +- Ctrl-/ d escape sequence + +Update documentation for: +- `moat run` — describe the two modes (interactive and non-interactive) +- Escape sequences — only Ctrl-/ k + +**Step 2: Update agent.yaml reference** + +If the `interactive` field is documented, update to clarify its new meaning: +- `interactive: true` — allocates TTY, connects stdin, session owns terminal +- `interactive: false` (default) — runs in background, monitor via `moat logs` + +**Step 3: Mark old plan as superseded** + +Add a note at the top of `docs/plans/2026-01-19-interactive-attach-model.md`: + +```markdown +> **Status:** Superseded by [Interactive-Only Simplification](2026-02-27-interactive-only-simplification.md) +``` + +**Step 4: Commit** + +```bash +git add -A +git commit -m "docs: update for interactive-only execution model + +Remove documentation for moat attach command, --detach flag, and +Ctrl-/ d escape sequence. Document the simplified two-mode model." +``` + +--- + +### Task 8: Run full test suite and lint + +**Step 1: Run linter** + +Run: `make lint` + +Fix any issues. + +**Step 2: Run full unit tests** + +Run: `make test-unit` + +Fix any failures. + +**Step 3: Commit any fixes** + +```bash +git add -A +git commit -m "fix: address lint and test issues from simplification" +``` + +--- + +## Implementation Notes + +The following additional changes were made during implementation beyond the original plan: + +### Non-interactive mode: blocking with log streaming (not fire-and-forget) + +The original plan called for fire-and-forget non-interactive runs. During implementation, this was found to be problematic because `manager.Close()` cancels the monitor goroutine's context, killing log capture before it completes. The solution: non-interactive runs block while streaming container output to stdout (similar to `docker run`). This ensures the monitor goroutine finishes before the process exits. True fire-and-forget requires daemon-level monitoring (tracked in issue #197). + +### Monitor goroutine lifecycle fix + +`monitorContainerExit` was changed to use `context.Background()` instead of the manager's context, so `Close()` doesn't kill it. A `sync.WaitGroup` in `Close()` ensures the runtime stays alive while monitors finish. Only monitors started via `Start()` are tracked — inherited monitors from `loadPersistedRuns` are fire-and-forget since they belong to other CLI invocations. + +### Docker log demuxing + +`ContainerLogs` for non-TTY containers returns Docker's multiplexed stdout/stderr format. Added demuxing via `stdcopy.StdCopy` with `io.Pipe` to produce clean output. Also fixed inspect failure to return an error instead of silently producing garbled output. + +### Interactive resource cleanup + +`cleanupResources` was not called on natural interactive exit (only on escape-stop and signal paths). Added cleanup in the `StartAttached` non-error path to prevent resource leaks. + +### Interactive metadata updates + +After an interactive container exits normally, `state` and `stopped_at` are now updated via state transition in `StartAttached`. diff --git a/docs/runtime-flow.md b/docs/runtime-flow.md new file mode 100644 index 00000000..dfa176ac --- /dev/null +++ b/docs/runtime-flow.md @@ -0,0 +1,366 @@ +# Runtime Flow: Interactive vs Non-Interactive + +Here's the summary: + + Created docs/runtime-flow.md with 7 mermaid diagrams tracing both paths from CLI entry through container lifecycle: + + 1. Entry points → ExecuteRun() + 2. The interactive/non-interactive fork + 3. manager.Create() (shared heavyweight setup) + 4. Non-interactive: manager.Start() → background + 5. Interactive: RunInteractiveAttached() → event loop + 6. manager.StartAttached() internals + 7. monitorContainerExit() cleanup + + 7 simplification opportunities identified, ranked by impact/effort: + ┌──────────┬──────────────────────────────────────────────┬────────────────────────────────────────────────────────────────────────────┐ + │ Priority │ What │ Why │ + ├──────────┼──────────────────────────────────────────────┼────────────────────────────────────────────────────────────────────────────┤ + │ 1 │ Remove TTY field from ExecOptions │ Always equals Interactive — dead weight │ + ├──────────┼──────────────────────────────────────────────┼────────────────────────────────────────────────────────────────────────────┤ + │ 2 │ Remove streamLogs from non-interactive Start │ Users use moat logs -f now; goroutine lifecycle is unclear │ + ├──────────┼──────────────────────────────────────────────┼────────────────────────────────────────────────────────────────────────────┤ + │ 3 │ Extract postStart() helper │ Start() and StartAttached() share ~60% of code │ + ├──────────┼──────────────────────────────────────────────┼────────────────────────────────────────────────────────────────────────────┤ + │ 4 │ Simplify interactive event loop │ Only one escape action now; the channel dance is overkill │ + ├──────────┼──────────────────────────────────────────────┼────────────────────────────────────────────────────────────────────────────┤ + │ 5 │ Extract idempotent cleanupRun() │ Cleanup is duplicated across 3 paths with inconsistent guards │ + ├──────────┼──────────────────────────────────────────────┼────────────────────────────────────────────────────────────────────────────┤ + │ 6 │ ProviderRunner helper │ claude/codex/gemini CLIs are ~200 lines each of near-identical boilerplate │ + ├──────────┼──────────────────────────────────────────────┼────────────────────────────────────────────────────────────────────────────┤ + │ 7 │ Skip: TTY trace builder │ Future-proofing, not needed now │ + └──────────┴──────────────────────────────────────────────┴────────────────────────────────────────────────────────────────────────────┘ + Want me to proceed with any of these? + +Current state after the attach/detach simplification. + +## Entry Points + +All commands converge on `ExecuteRun()`: + +```mermaid +flowchart TD + A["moat run"] + B["moat claude"] + C["moat codex"] + D["moat gemini"] + E["moat wt"] + + A -->|"parse args, load config,
resolve interactive mode"| F["ExecuteRun()"] + B -->|"interactive = (no -p flag)"| F + C -->|"interactive = (no -p flag)"| F + D -->|"interactive = (no -p flag)"| F + E -->|"interactive = config.Interactive"| F +``` + +Each entry point builds an `ExecOptions` struct with `Interactive` and `TTY` set identically (both true or both false), then calls `ExecuteRun()`. + +## ExecuteRun: The Fork + +```mermaid +flowchart TD + F["ExecuteRun()"] + F --> G["Set runtime env"] + G --> H["NewManagerWithOptions()"] + H --> I["manager.Create(ctx, opts)"] + + I --> WT{"worktree
metadata?"} + WT -->|yes| WTS["Save worktree metadata"] + WT -->|no| CB + WTS --> CB + + CB{"OnRunCreated
callback?"} + CB -->|yes| CBS["Call callback"] + CB -->|no| MODE + CBS --> MODE + + MODE{"opts.Interactive?"} + MODE -->|yes| INT["RunInteractiveAttached()"] + MODE -->|no| NONINT["manager.Start()"] + + NONINT --> PORTS{"ports?"} + PORTS -->|yes| PRINTP["Print endpoints"] + PORTS -->|no| BG + PRINTP --> BG["Print background instructions
(moat logs, moat stop)"] + BG --> RET["return"] + + INT --> DONE["Blocks until process exits
or Ctrl-/ k"] +``` + +## manager.Create(): Shared Setup (Both Modes) + +This is the heavyweight function (~500 lines). Both modes go through the same path. + +```mermaid +flowchart TD + C["manager.Create()"] + C --> NAME["Resolve name
(flag > config > random)"] + NAME --> VGRANT["Validate grants
(credential store check)"] + VGRANT --> RUN["Create Run struct
(ID, state=Created)"] + + RUN --> MOUNTS["Build mounts
(workspace + config + volumes + worktree .git)"] + + MOUNTS --> PROXY{"needs proxy?
(grants or strict network)"} + PROXY -->|yes| DAEMON["EnsureRunning(daemon)"] + DAEMON --> CREDS["Load credentials per grant"] + CREDS --> RUNCTX["Build RunContext
(headers, network rules, MCP)"] + RUNCTX --> REGISTER["daemon.RegisterRun()"] + REGISTER --> PROXYENV["Set proxy env vars
(HTTP_PROXY, CA cert mount)"] + PROXY -->|no| NOPROXY["No proxy setup"] + + PROXYENV --> SSH + NOPROXY --> SSH + + SSH{"ssh grant?"} + SSH -->|yes| SSHA["Start SSH agent server
(mount socket)"] + SSH -->|no| AGENTCFG + + SSHA --> AGENTCFG + + AGENTCFG{"agent provider?
(claude/codex/gemini)"} + AGENTCFG -->|yes| PCFG["Provider.ContainerConfig()
(config files, env, mounts)"] + AGENTCFG -->|no| IMG + + PCFG --> IMG["image.Resolve()
(deps → base image)"] + IMG --> BUILD{"image exists?"} + BUILD -->|no| BUILDIMG["Build image
(Dockerfile generation + build)"] + BUILD -->|yes| SNAP + BUILDIMG --> SNAP + + SNAP{"snapshots
configured?"} + SNAP -->|yes| SNAPENG["Create snapshot engine"] + SNAP -->|no| CONT + + SNAPENG --> CONT + CONT --> LANGSERV{"language
servers?"} + LANGSERV -->|yes| LS["Configure language server
mounts and env"] + LANGSERV -->|no| SVC + LS --> SVC + + SVC{"services?
(postgres, redis, etc)"} + SVC -->|yes| SVCSTART["Create network
Start service containers
Wait for readiness"] + SVC -->|no| DEPS + SVCSTART --> DEPS + + DEPS{"build deps?
(BuildKit)"} + DEPS -->|yes| BK["Start BuildKit sidecar"] + DEPS -->|no| AUDIT + BK --> AUDIT + + AUDIT["Create audit store"] + AUDIT --> CCFG["Build container.Config"] + CCFG --> CREATE["runtime.CreateContainer()"] + CREATE --> FW{"strict
network?"} + FW -->|yes| FWSET["Record firewall settings"] + FW -->|no| SAVE + FWSET --> SAVE + SAVE["SaveMetadata()"] + SAVE --> RRET["return Run"] +``` + +## Non-Interactive Path: manager.Start() + +```mermaid +flowchart TD + S["manager.Start()"] + S --> SS["state = Starting"] + SS --> LOG["Set log context"] + LOG --> SC["runtime.StartContainer()"] + SC --> FW{"firewall
enabled?"} + FW -->|yes| FWS["runtime.SetupFirewall()"] + FW -->|no| PB + FWS --> PB + + PB{"ports?"} + PB -->|yes| PBG["GetPortBindings()
Register routes"] + PB -->|no| RUN + PBG --> RUN + + RUN["state = Running"] + RUN --> SAVE["SaveMetadata()"] + SAVE --> SNAP{"snapshot
engine?"} + SNAP -->|yes| SNPC["Create pre-run snapshot"] + SNAP -->|no| STREAM + SNPC --> STREAM + + STREAM["go streamLogs()"] + STREAM --> MON["go monitorContainerExit()"] + MON --> RET["return"] +``` + +After `Start()` returns, `ExecuteRun()` prints the background instructions and the CLI exits. The container runs independently. `monitorContainerExit()` runs in a background goroutine that captures logs, runs provider hooks, and cleans up when the container eventually exits. + +## Interactive Path: RunInteractiveAttached() + +```mermaid +flowchart TD + R["RunInteractiveAttached()"] + R --> HELP["Print escape help
(Ctrl-/ k)"] + HELP --> TRACE{"tty-trace
path?"} + TRACE -->|yes| TT["Setup TTY tracer"] + TRACE -->|no| RAW + TT --> RAW + + RAW{"stdin is
terminal?"} + RAW -->|yes| RAWM["EnableRawMode()"] + RAW -->|no| SB + RAWM --> SB + + SB{"stdout is
terminal?"} + SB -->|yes| SBS["Setup status bar
(scroll region, grants)"] + SB -->|no| WRAP + SBS --> WRAP + + WRAP["Wrap stdin: EscapeProxy"] + WRAP --> EHINTS{"status
writer?"} + EHINTS -->|yes| EH["SetupEscapeHints()"] + EHINTS -->|no| TWRAP + EH --> TWRAP + + TWRAP{"tracer?"} + TWRAP -->|yes| TW["Wrap stdin/stdout
with RecordingReader/Writer"] + TWRAP -->|no| SIG + TW --> SIG + + SIG["signal.Notify
(SIGINT, SIGTERM, SIGWINCH)"] + + SIG --> SA["go manager.StartAttached()
(blocks until container exits)"] + SA --> RESIZE["go sleep(200ms) then
ResizeTTY()"] + + RESIZE --> LOOP["Event loop"] + + LOOP --> |SIGWINCH| WINCH["Resize status bar + TTY"] + WINCH --> LOOP + + LOOP --> |SIGTERM| TERM["Stop run"] + TERM --> EXIT["return"] + + LOOP --> |SIGINT| FWDI["Forwarded to container
via TTY"] + FWDI --> LOOP + + LOOP --> |EscapeStop| STOP["Cancel attach
manager.Stop()"] + STOP --> EXIT + + LOOP --> |attachDone err| ERR{"error?"} + ERR -->|yes| ERRET["return error"] + ERR -->|no| COMP["Print completed"] + COMP --> EXIT +``` + +## manager.StartAttached() + +```mermaid +flowchart TD + SA["manager.StartAttached()"] + SA --> SS["state = Starting"] + SS --> LOG["Set log context"] + LOG --> TTY{"stdin is terminal?"} + TTY -->|yes| USTY["useTTY = true"] + TTY -->|no| USTN["useTTY = false"] + + USTY --> TEE + USTN --> TEE + + TEE{"interactive +
has store?"} + TEE -->|yes| TEEW["Tee stdout/stderr
to logBuffer"] + TEE -->|no| AOPTS + TEEW --> AOPTS + + AOPTS["Build AttachOptions
(stdin, stdout, stderr, TTY,
initial terminal size)"] + AOPTS --> GORTN["go runtime.StartAttached()"] + GORTN --> DELAY["sleep(100ms)"] + DELAY --> RUN["state = Running"] + RUN --> PB{"ports?"} + PB -->|yes| PBG["GetPortBindings()
Register routes"] + PB -->|no| SAVE + PBG --> SAVE + SAVE["SaveMetadata()"] + SAVE --> FW{"firewall?"} + FW -->|yes| FWS["SetupFirewall()"] + FW -->|no| WAIT + FWS --> WAIT + + WAIT["await attachDone"] + WAIT --> LOGS{"Apple +
interactive?"} + LOGS -->|yes| WLOGS["Write logBuffer
to logs.jsonl"] + LOGS -->|no| CAP + WLOGS --> CAP + + CAP["captureLogs()"] + CAP --> HOOKS["runProviderStoppedHooks()"] + HOOKS --> SMETA["SaveMetadata()"] + SMETA --> RET["return attachErr"] +``` + +## Cleanup: monitorContainerExit() + +Runs as a background goroutine for **non-interactive** runs (started by `manager.Start()`). For **interactive** runs, cleanup is done inline in `manager.StartAttached()`. + +```mermaid +flowchart TD + M["monitorContainerExit()"] + M --> WAIT["runtime.WaitContainer()
(blocks)"] + WAIT --> CTX{"context
canceled?"} + CTX -->|yes| BAIL["return (container still running,
next Manager instance picks it up)"] + CTX -->|no| LOGS["captureLogs()"] + + LOGS --> HOOKS["runProviderStoppedHooks()"] + HOOKS --> CLOSE["close(exitCh)"] + CLOSE --> STATE{"exit code?"} + STATE -->|0| STOPPED["state = Stopped"] + STATE -->|non-zero| FAILED["state = Failed"] + STOPPED --> SAVE + FAILED --> SAVE + + SAVE["SaveMetadata()"] + SAVE --> CLEANUP["cleanupResources()
(sync.Once guarded)"] +``` + +--- + +## Completed Simplifications + +The following simplifications were identified during analysis and have been implemented: + +### 1. Removed `TTY` field from ExecOptions ✅ +`ExecOptions.TTY` was always set identically to `Interactive`. Removed the field — TTY is now determined at runtime via `term.IsTerminal(os.Stdin)` in `manager.StartAttached()`. + +### 2. Removed `streamLogs` from non-interactive Start ✅ +`StreamLogs` was never `true` in production. Non-interactive runs tell users to use `moat logs -f`. Removed the `streamLogs` method and its `stdcopy` import. + +### 3. Extracted postStart helpers ✅ +Extracted `setLogContext()`, `setupPortBindings()`, and `setupFirewall()` from the duplicated code in `Start()` and `StartAttached()`. Reduced ~80 lines of duplication. + +### 4. Simplified interactive event loop ✅ +With only one escape action (`EscapeStop`), eliminated the `escapeCh` channel. Escape errors now flow through `attachDone` and are handled inline. The attach goroutine is a one-liner. + +### 5. Extracted idempotent `cleanupResources()` ✅ +Consolidated resource cleanup from `Stop()`, `Wait()`, `monitorContainerExit()`, and `Destroy()` into a single method guarded by `sync.Once`. Reduced ~300 lines of duplicated cleanup code. + +### 6. Extracted `ProviderRunner` helper ✅ +`moat claude`, `moat codex`, and `moat gemini` now use a shared `RunProvider()` helper. Each provider is ~30-50 lines of declarative config instead of ~170 lines of boilerplate. Provider-specific logic is isolated in `BuildCommand` and `ConfigureAgent` callbacks. + +### 7. TTY trace builder pattern — Skipped +The conditional wrapping is clear as-is. Not needed until more layers are added. + +The duplication is significant (~200 lines each with minor variations). + +**Opportunity:** Extract a `ProviderRunner` helper that takes a provider-specific config (dependencies, command builder, flag names) and handles the boilerplate. Each provider CLI becomes ~50 lines of config + flag registration. + +### 7. Non-interactive Start() still streams logs to stdout + +`manager.Start()` calls `go m.streamLogs()` which copies container logs to stdout. But `ExecuteRun()` returns immediately after Start() for non-interactive mode, and the manager is deferred-closed. The `streamLogs` goroutine's lifecycle is unclear — it may be killed when the manager closes. + +**Opportunity:** Since non-interactive runs print "use moat logs -f", consider removing the `streamLogs` call entirely. The user explicitly monitors via `moat logs`. This simplifies the non-interactive path and eliminates the goroutine lifecycle question. + +### Priority ranking + +| # | Opportunity | Impact | Effort | +|---|------------|--------|--------| +| 5 | Remove `TTY` field (always = Interactive) | Low risk, cleaner API | Small | +| 7 | Remove streamLogs from non-interactive Start | Simpler lifecycle | Small | +| 1 | Extract postStart() helper | Less duplication in manager | Medium | +| 3 | Simplify interactive event loop | Fewer channels/goroutines | Medium | +| 2 | Idempotent cleanupRun() | Correctness + less duplication | Medium | +| 6 | ProviderRunner helper | Major dedup across 3 files | Large | +| 4 | TTY trace builder pattern | Future-proofing only | Skip | diff --git a/internal/cli/provider.go b/internal/cli/provider.go new file mode 100644 index 00000000..85261049 --- /dev/null +++ b/internal/cli/provider.go @@ -0,0 +1,225 @@ +package cli + +import ( + "context" + "fmt" + "strings" + + "github.com/majorcontext/moat/internal/config" + "github.com/majorcontext/moat/internal/log" + "github.com/spf13/cobra" +) + +// ProviderRunConfig describes a provider's run configuration. +// Each provider supplies a ProviderRunConfig to RunProvider +// to eliminate repeated boilerplate for workspace resolution, +// grant dedup, config setup, dry run, and execution. +type ProviderRunConfig struct { + // Name is the provider name (e.g., "claude", "codex", "gemini"). + // Used in log messages and dry run output. + Name string + + // Flags is a pointer to the provider's ExecFlags. + Flags *ExecFlags + + // PromptFlag is the value of the -p/--prompt flag. + PromptFlag string + + // AllowedHosts are additional hosts from the --allow-host flag. + AllowedHosts []string + + // WtFlag is the value of the --worktree flag. + WtFlag string + + // GetCredentialGrant returns the grant name for the provider's credential. + // Returns empty string if no credential exists. + GetCredentialGrant func() string + + // Dependencies are the required dependencies (e.g., ["node@20", "git", "claude-code"]). + Dependencies []string + + // NetworkHosts are hosts the provider needs network access to. + NetworkHosts []string + + // BuildCommand builds the container command from the prompt flag value and + // initial prompt (from -- args). Called after grants and interactive mode + // are resolved. + BuildCommand func(promptFlag, initialPrompt string) ([]string, error) + + // ConfigureAgent applies provider-specific config tweaks (e.g., syncLogs). + // Called after dependencies and network hosts are added. + // cfg is guaranteed non-nil. + ConfigureAgent func(cfg *config.Config) + + // SupportsInitialPrompt indicates whether this provider supports the + // -- "prompt" syntax for passing an initial prompt via cobra's ArgsLenAtDash. + // If false, args are treated as a simple workspace path. + SupportsInitialPrompt bool + + // DryRunNote is an optional extra line to print during dry run + // (e.g., "Note: No API key configured. Claude will prompt for login."). + // Only printed when grants is empty. + DryRunNote string +} + +// RunProvider executes the shared boilerplate for provider CLI commands. +// It handles workspace resolution, config loading, worktree support, +// grant dedup, dependency injection, network hosts, dry run, and execution. +func RunProvider(cmd *cobra.Command, args []string, rc ProviderRunConfig) error { + // Guard: if a subcommand was invoked, skip the parent run function + if cmd.CalledAs() != rc.Name { + return nil + } + + // Parse workspace and optional initial prompt from args + workspace := "." + var initialPrompt string + + if rc.SupportsInitialPrompt { + dashIdx := cmd.ArgsLenAtDash() + if dashIdx >= 0 { + if dashIdx > 0 { + workspace = args[0] + } + passthroughArgs := args[dashIdx:] + if len(passthroughArgs) > 0 { + initialPrompt = strings.Join(passthroughArgs, " ") + } + } else if len(args) > 0 { + workspace = args[0] + } + } else { + if len(args) > 0 { + workspace = args[0] + } + } + + absPath, err := ResolveWorkspacePath(workspace) + if err != nil { + return err + } + + cfg, err := config.Load(absPath) + if err != nil { + return fmt.Errorf("loading config: %w", err) + } + + // Handle --wt/--worktree flag + wtOut, err := ResolveWorktreeWorkspace(rc.WtFlag, absPath, rc.Flags, cfg) + if err != nil { + return err + } + absPath = wtOut.Workspace + cfg = wtOut.Config + + // Build grants list with deduplication: credential grant first, + // then config grants, then flag grants. + grantSet := make(map[string]bool) + var grants []string + addGrant := func(g string) { + if !grantSet[g] { + grantSet[g] = true + grants = append(grants, g) + } + } + + if rc.GetCredentialGrant != nil { + if credName := rc.GetCredentialGrant(); credName != "" { + addGrant(credName) + } + } + if cfg != nil { + for _, g := range cfg.Grants { + addGrant(g) + } + } + for _, g := range rc.Flags.Grants { + addGrant(g) + } + rc.Flags.Grants = grants + + interactive := rc.PromptFlag == "" + + // Build container command (provider-specific logic) + containerCmd, err := rc.BuildCommand(rc.PromptFlag, initialPrompt) + if err != nil { + return err + } + + // Name from flag, or config, or let manager generate one + if rc.Flags.Name == "" && cfg != nil && cfg.Name != "" { + rc.Flags.Name = cfg.Name + } + + // Ensure config is non-nil before modifying dependencies/network + if cfg == nil { + cfg = &config.Config{} + } + + // Add required dependencies, skipping any already present + for _, dep := range rc.Dependencies { + prefix := dep + for i := range dep { + if dep[i] == '@' { + prefix = dep[:i] + break + } + } + if !HasDependency(cfg.Dependencies, prefix) { + cfg.Dependencies = append(cfg.Dependencies, dep) + } + } + + // Network: provider hosts first, then user-specified allowed hosts + cfg.Network.Allow = append(cfg.Network.Allow, rc.NetworkHosts...) + cfg.Network.Allow = append(cfg.Network.Allow, rc.AllowedHosts...) + + // Provider-specific config tweaks (e.g., enabling log sync) + if rc.ConfigureAgent != nil { + rc.ConfigureAgent(cfg) + } + + if envErr := ParseEnvFlags(rc.Flags.Env, cfg); envErr != nil { + return envErr + } + + log.Debug(fmt.Sprintf("starting %s", rc.Name), + "workspace", absPath, + "grants", grants, + "interactive", interactive, + "prompt", rc.PromptFlag, + "rebuild", rc.Flags.Rebuild, + ) + + if DryRun { + fmt.Printf("Dry run - would start %s\n", rc.Name) + fmt.Printf("Workspace: %s\n", absPath) + fmt.Printf("Grants: %v\n", grants) + fmt.Printf("Interactive: %v\n", interactive) + fmt.Printf("Rebuild: %v\n", rc.Flags.Rebuild) + if len(grants) == 0 && rc.DryRunNote != "" { + fmt.Println(rc.DryRunNote) + } + return nil + } + + ctx := context.Background() + + opts := ExecOptions{ + Flags: *rc.Flags, + Workspace: absPath, + Command: containerCmd, + Config: cfg, + Interactive: interactive, + } + + SetWorktreeFields(&opts, wtOut.Result) + + // Print run info after creation but before blocking on execution + opts.OnRunCreated = func(info RunInfo) { + fmt.Printf("Started agent %q (%s)\n", info.Name, info.ID) + } + + _, err = ExecuteRun(ctx, opts) + return err +} diff --git a/internal/cli/types.go b/internal/cli/types.go index 02e095a7..81a33aae 100644 --- a/internal/cli/types.go +++ b/internal/cli/types.go @@ -14,7 +14,6 @@ type ExecFlags struct { Runtime string Rebuild bool KeepContainer bool - Detach bool Interactive bool NoSandbox bool TTYTrace string // Path to save terminal I/O trace for debugging @@ -27,7 +26,6 @@ func AddExecFlags(cmd *cobra.Command, flags *ExecFlags) { cmd.Flags().StringVarP(&flags.Name, "name", "n", "", "name for this run (default: from agent.yaml or random)") cmd.Flags().BoolVar(&flags.Rebuild, "rebuild", false, "force rebuild of container image") cmd.Flags().BoolVar(&flags.KeepContainer, "keep", false, "keep container after run completes (for debugging)") - cmd.Flags().BoolVarP(&flags.Detach, "detach", "d", false, "run in background and return immediately") cmd.Flags().StringVar(&flags.Runtime, "runtime", "", "container runtime to use (apple, docker)") cmd.Flags().BoolVar(&flags.NoSandbox, "no-sandbox", false, "disable gVisor sandbox (reduced isolation, Docker only)") cmd.Flags().StringVar(&flags.TTYTrace, "tty-trace", "", "capture terminal I/O to file for debugging (e.g., session.json)") @@ -50,7 +48,6 @@ type ExecOptions struct { Command []string Config *config.Config Interactive bool // Can be set by flags or command logic - TTY bool // Worktree tracking (set by moat wt or --wt flag) WorktreeBranch string diff --git a/internal/cli/worktree.go b/internal/cli/worktree.go index 79da7ddf..51ce2a2c 100644 --- a/internal/cli/worktree.go +++ b/internal/cli/worktree.go @@ -64,7 +64,7 @@ func ResolveWorktreeWorkspace(wtBranch, workspace string, flags *ExecFlags, cfg // Check for active run in this worktree (only possible if reused) if result.Reused && CheckWorktreeActive != nil { if name, id := CheckWorktreeActive(result.WorkspacePath); name != "" { - return nil, fmt.Errorf("a run is already active in worktree for branch %q: %s (%s)\nStop it first with 'moat stop %s' or attach with 'moat attach %s'", wtBranch, name, id, id, id) + return nil, fmt.Errorf("a run is already active in worktree for branch %q: %s (%s)\nStop it first with 'moat stop %s' or view logs with 'moat logs %s'", wtBranch, name, id, id, id) } } diff --git a/internal/container/apple.go b/internal/container/apple.go index bcb0c439..013b739e 100644 --- a/internal/container/apple.go +++ b/internal/container/apple.go @@ -353,26 +353,11 @@ func (r *AppleRuntime) StopContainer(ctx context.Context, containerID string) er // WaitContainer blocks until the container exits and returns the exit code. func (r *AppleRuntime) WaitContainer(ctx context.Context, containerID string) (int64, error) { - // Apple's container CLI may have a "wait" command, or we poll with inspect - // Try "container wait" first - cmd := exec.CommandContext(ctx, r.containerBin, "wait", containerID) - var stdout, stderr bytes.Buffer - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - if err := cmd.Run(); err != nil { - // If wait is not available, fall back to polling - return r.waitByPolling(ctx, containerID) - } - - // Parse exit code from output - exitStr := strings.TrimSpace(stdout.String()) - exitCode, err := strconv.ParseInt(exitStr, 10, 64) - if err != nil { - return -1, fmt.Errorf("parsing exit code %q: %w", exitStr, err) - } - - return exitCode, nil + // Always poll with inspect rather than using "container wait". + // Apple's "container wait" hangs indefinitely when a container is + // stopped or removed externally (e.g., via "moat stop" from another + // process), which blocks monitorContainerExit forever. + return r.waitByPolling(ctx, containerID) } // waitByPolling polls the container status until it exits. @@ -387,25 +372,37 @@ func (r *AppleRuntime) waitByPolling(ctx context.Context, containerID string) (i // Check container status // Apple's container inspect outputs JSON directly (no --format flag) cmd := exec.CommandContext(ctx, r.containerBin, "inspect", containerID) - var stdout bytes.Buffer + var stdout, stderr bytes.Buffer cmd.Stdout = &stdout + cmd.Stderr = &stderr if err := cmd.Run(); err != nil { - return -1, fmt.Errorf("inspecting container: %w", err) - } + stderrStr := stderr.String() + if strings.Contains(stderrStr, "notFound") || strings.Contains(stderrStr, "not found") { + // Container no longer exists (removed externally) — treat as exited + return 0, nil + } + // Transient error (XPC timeout, etc.) — log and keep polling + log.Debug("transient inspect error, retrying", "error", err, "stderr", stderrStr) + } else { + // Apple's inspect returns an array of container info objects + var info []struct { + Status string `json:"status"` + } + if err := json.Unmarshal(stdout.Bytes(), &info); err != nil { + return -1, fmt.Errorf("parsing container info: %w", err) + } - // Apple's inspect returns an array of container info objects - var info []struct { - Status string `json:"status"` - } - if err := json.Unmarshal(stdout.Bytes(), &info); err != nil { - return -1, fmt.Errorf("parsing container info: %w", err) - } + if len(info) == 0 { + // Empty result — container no longer exists (removed externally) + return 0, nil + } - if len(info) > 0 && (info[0].Status == "exited" || info[0].Status == "stopped") { - // Apple's container CLI doesn't provide exit code in inspect output - // Return 0 for stopped containers (best we can do) - return 0, nil + if info[0].Status == "exited" || info[0].Status == "stopped" { + // Apple's container CLI doesn't provide exit code in inspect output + // Return 0 for stopped containers (best we can do) + return 0, nil + } } // Sleep before next poll to avoid hammering the CLI @@ -1137,38 +1134,6 @@ func (r *AppleRuntime) ContainerState(ctx context.Context, containerID string) ( return info[0].Status, nil } -// Attach connects stdin/stdout/stderr to a running container. -func (r *AppleRuntime) Attach(ctx context.Context, containerID string, opts AttachOptions) error { - // Build attach command arguments - args := []string{"attach"} - if opts.Stdin != nil { - args = append(args, "--stdin") - } - args = append(args, containerID) - - cmd := exec.CommandContext(ctx, r.containerBin, args...) - - // Connect stdin/stdout/stderr - if opts.Stdin != nil { - cmd.Stdin = opts.Stdin - } - if opts.Stdout != nil { - cmd.Stdout = opts.Stdout - } - if opts.Stderr != nil { - cmd.Stderr = opts.Stderr - } - - // Run the attach command - if err := cmd.Run(); err != nil { - if ctx.Err() != nil { - return ctx.Err() - } - return fmt.Errorf("attaching to container: %w", err) - } - return nil -} - // ResizeTTY resizes the container's TTY to the given dimensions. // For Apple containers, this resizes the PTY master created during StartAttached. func (r *AppleRuntime) ResizeTTY(ctx context.Context, containerID string, height, width uint) error { diff --git a/internal/container/docker.go b/internal/container/docker.go index 8080742f..155e1f18 100644 --- a/internal/container/docker.go +++ b/internal/container/docker.go @@ -332,17 +332,54 @@ func (r *DockerRuntime) RemoveContainer(ctx context.Context, containerID string) } // ContainerLogs returns the logs from a container (follows output). +// For non-TTY containers, the Docker multiplexed format is demuxed +// so the returned reader produces clean text. func (r *DockerRuntime) ContainerLogs(ctx context.Context, containerID string) (io.ReadCloser, error) { - return r.cli.ContainerLogs(ctx, containerID, container.LogsOptions{ + raw, err := r.cli.ContainerLogs(ctx, containerID, container.LogsOptions{ ShowStdout: true, ShowStderr: true, Follow: true, }) + if err != nil { + return nil, err + } + + // Check if container was created with TTY + inspect, inspectErr := r.cli.ContainerInspect(ctx, containerID) + if inspectErr != nil { + raw.Close() + return nil, fmt.Errorf("inspecting container for log format: %w", inspectErr) + } + if inspect.Config.Tty { + // TTY mode: logs are plain text + return raw, nil + } + + // Non-TTY: demux Docker's multiplexed stdout/stderr format + pr, pw := io.Pipe() + go func() { + _, copyErr := stdcopy.StdCopy(pw, pw, raw) + raw.Close() + pw.CloseWithError(copyErr) + }() + return pr, nil } // ContainerLogsAll returns all logs from a container (does not follow). // The logs are demultiplexed from Docker's format (removes 8-byte headers). func (r *DockerRuntime) ContainerLogsAll(ctx context.Context, containerID string) ([]byte, error) { + // Wait for the container to stop so Docker's log driver has flushed. + // For already-stopped containers this returns immediately. For fast-exiting + // containers it prevents reading logs before they're written. + waitCtx, waitCancel := context.WithTimeout(ctx, 10*time.Second) + defer waitCancel() + statusCh, errCh := r.cli.ContainerWait(waitCtx, containerID, container.WaitConditionNotRunning) + select { + case <-errCh: + // Ignore errors — container may already be removed + case <-statusCh: + } + reader, err := r.cli.ContainerLogs(ctx, containerID, container.LogsOptions{ ShowStdout: true, ShowStderr: true, @@ -618,65 +655,6 @@ func (r *DockerRuntime) ContainerState(ctx context.Context, containerID string) return inspect.State.Status, nil } -// Attach connects stdin/stdout/stderr to a running container. -func (r *DockerRuntime) Attach(ctx context.Context, containerID string, opts AttachOptions) error { - resp, err := r.cli.ContainerAttach(ctx, containerID, container.AttachOptions{ - Stream: true, - Stdin: opts.Stdin != nil, - Stdout: opts.Stdout != nil, - Stderr: opts.Stderr != nil, - }) - if err != nil { - return fmt.Errorf("attaching to container: %w", err) - } - defer resp.Close() - - // Set up bidirectional copy - outputDone := make(chan error, 1) - stdinDone := make(chan error, 1) - - // Copy container output to stdout/stderr. - // Since containers are always created with Tty: true (see CreateContainer), - // output is always raw (not multiplexed). Use io.Copy unconditionally. - go func() { - _, err := io.Copy(opts.Stdout, resp.Reader) - outputDone <- err - }() - - // Copy stdin to container (if provided) - if opts.Stdin != nil { - go func() { - _, err := io.Copy(resp.Conn, opts.Stdin) - // Close write side when stdin ends - if closeWriter, ok := resp.Conn.(interface{ CloseWrite() error }); ok { - if closeErr := closeWriter.CloseWrite(); closeErr != nil && err == nil { - err = closeErr - } - } - stdinDone <- err - }() - } - - // Wait for context cancellation, stdin error (e.g., escape sequence), or output completion - for { - select { - case <-ctx.Done(): - return ctx.Err() - case err := <-stdinDone: - // Stdin error - could be escape sequence or EOF - if err != nil && err != io.EOF { - return err - } - // Normal stdin EOF - continue waiting for output - case err := <-outputDone: - if err != nil && err != io.EOF { - return err - } - return nil - } - } -} - // ResizeTTY resizes the container's TTY to the given dimensions. func (r *DockerRuntime) ResizeTTY(ctx context.Context, containerID string, height, width uint) error { return r.cli.ContainerResize(ctx, containerID, container.ResizeOptions{ @@ -690,7 +668,7 @@ func (r *DockerRuntime) ResizeTTY(ctx context.Context, containerID string, heigh // before the process starts. The attach happens first, then start, ensuring // the I/O streams are ready when the container's process begins. func (r *DockerRuntime) StartAttached(ctx context.Context, containerID string, opts AttachOptions) error { - // Attach first (before starting) - this is the key difference from Attach() + // Attach before starting so I/O streams are ready when the process begins resp, err := r.cli.ContainerAttach(ctx, containerID, container.AttachOptions{ Stream: true, Stdin: opts.Stdin != nil, diff --git a/internal/container/runtime.go b/internal/container/runtime.go index 7566e157..b09c0f8c 100644 --- a/internal/container/runtime.go +++ b/internal/container/runtime.go @@ -109,10 +109,6 @@ type Runtime interface { // RemoveImage removes an image by ID or tag. RemoveImage(ctx context.Context, id string) error - // Attach connects stdin/stdout/stderr to a running container. - // Returns when the attachment ends (container exits or context canceled). - Attach(ctx context.Context, id string, opts AttachOptions) error - // StartAttached starts a container with stdin/stdout/stderr already attached. // This is required for TUI applications that need the terminal connected // before the process starts (e.g., to read cursor position). diff --git a/internal/e2e/logs_capture_test.go b/internal/e2e/logs_capture_test.go index 6cf4d73a..16729852 100644 --- a/internal/e2e/logs_capture_test.go +++ b/internal/e2e/logs_capture_test.go @@ -16,7 +16,7 @@ import ( // TestLogsCapturedInAttachedMode verifies that logs are captured to logs.jsonl // when a run completes in attached (non-interactive) mode. -// This is the standard flow: moat run (without -d) +// This is the standard flow: moat run func TestLogsCapturedInAttachedMode(t *testing.T) { testOnAllRuntimes(t, func(t *testing.T, rt container.Runtime) { ctx, cancel := context.WithTimeout(context.Background(), testTimeout) @@ -42,7 +42,7 @@ func TestLogsCapturedInAttachedMode(t *testing.T) { defer mgr.Destroy(context.Background(), r.ID) // Start and wait for completion (simulating attached mode) - if err := mgr.Start(ctx, r.ID, run.StartOptions{StreamLogs: true}); err != nil { + if err := mgr.Start(ctx, r.ID, run.StartOptions{}); err != nil { t.Fatalf("Start: %v", err) } @@ -89,7 +89,7 @@ func TestLogsCapturedInAttachedMode(t *testing.T) { // TestLogsCapturedInDetachedMode verifies that logs are captured to logs.jsonl // when a run is started in detached mode and later completes. -// This is the critical bug: moat run -d doesn't capture logs. +// This verifies logs are captured when a container runs without attached I/O. func TestLogsCapturedInDetachedMode(t *testing.T) { testOnAllRuntimes(t, func(t *testing.T, rt container.Runtime) { ctx, cancel := context.WithTimeout(context.Background(), testTimeout) @@ -115,7 +115,7 @@ func TestLogsCapturedInDetachedMode(t *testing.T) { defer mgr.Destroy(context.Background(), r.ID) // Start without waiting (detached mode) - if err := mgr.Start(ctx, r.ID, run.StartOptions{StreamLogs: false}); err != nil { + if err := mgr.Start(ctx, r.ID, run.StartOptions{}); err != nil { t.Fatalf("Start: %v", err) } @@ -252,7 +252,7 @@ func TestLogsCapturedAfterStop(t *testing.T) { defer mgr.Destroy(context.Background(), r.ID) // Start the run - if err := mgr.Start(ctx, r.ID, run.StartOptions{StreamLogs: false}); err != nil { + if err := mgr.Start(ctx, r.ID, run.StartOptions{}); err != nil { t.Fatalf("Start: %v", err) } @@ -327,7 +327,7 @@ func TestLogsAlwaysExistForAudit(t *testing.T) { } defer mgr.Destroy(context.Background(), r.ID) - if err := mgr.Start(ctx, r.ID, run.StartOptions{StreamLogs: false}); err != nil { + if err := mgr.Start(ctx, r.ID, run.StartOptions{}); err != nil { t.Fatalf("Start: %v", err) } diff --git a/internal/providers/claude/cli.go b/internal/providers/claude/cli.go index 26b1684d..8afd1b73 100644 --- a/internal/providers/claude/cli.go +++ b/internal/providers/claude/cli.go @@ -1,7 +1,6 @@ package claude import ( - "context" "fmt" "strings" @@ -61,9 +60,6 @@ Examples: # Name the session for easy reference moat claude --name my-feature - # Run in background - moat claude -d - # Force rebuild of container image moat claude --rebuild @@ -99,188 +95,52 @@ Use 'moat list' to see running and recent runs.`, } func runClaudeCode(cmd *cobra.Command, args []string) error { - // If subcommand is being run, don't execute this - if cmd.CalledAs() != "claude" { - return nil - } - - // Separate workspace arg from initial prompt args. - // Everything after "--" is passed as an initial prompt to Claude. - // moat claude -- "is this thing on?" → workspace=".", prompt="is this thing on?" - // moat claude ./project -- "explain this" → workspace="./project", prompt="explain this" - // moat claude ./project → workspace="./project", no prompt - var initialPrompt string - workspace := "." - dashIdx := cmd.ArgsLenAtDash() - if dashIdx >= 0 { - // Args before "--" are moat args (workspace), after are passthrough - if dashIdx > 0 { - workspace = args[0] - } - passthroughArgs := args[dashIdx:] - if len(passthroughArgs) > 0 { - initialPrompt = strings.Join(passthroughArgs, " ") - } - } else if len(args) > 0 { - workspace = args[0] - } - - absPath, err := cli.ResolveWorkspacePath(workspace) - if err != nil { - return err - } - - // Load agent.yaml if present, otherwise use defaults - cfg, err := config.Load(absPath) - if err != nil { - return fmt.Errorf("loading config: %w", err) - } - - // Handle --wt flag - wtOut, err := cli.ResolveWorktreeWorkspace(claudeWtFlag, absPath, &claudeFlags, cfg) - if err != nil { - return err - } - absPath = wtOut.Workspace - cfg = wtOut.Config - - // Build grants list using a set for deduplication - grantSet := make(map[string]bool) - var grants []string - addGrant := func(g string) { - if !grantSet[g] { - grantSet[g] = true - grants = append(grants, g) - } - } - - if credName := getClaudeCredentialName(); credName != "" { - addGrant(credName) // Use the actual name the credential is stored under - } - if cfg != nil { - for _, g := range cfg.Grants { - addGrant(g) - } - } - for _, g := range claudeFlags.Grants { - addGrant(g) - } - claudeFlags.Grants = grants - - // Determine interactive mode - interactive := claudePromptFlag == "" - - // Validate mutually exclusive flags + // Validate mutually exclusive flags before delegating to shared runner if claudeContinue && claudeResume != "" { return fmt.Errorf("--continue and --resume are mutually exclusive") } - // Build container command - containerCmd := []string{"claude"} - - // By default, skip permission prompts since Moat provides isolation. - if !claudeNoYolo { - containerCmd = append(containerCmd, "--dangerously-skip-permissions") - } - - if claudeContinue { - containerCmd = append(containerCmd, "--continue") - } - - if claudeResume != "" { - sessionID, resolveErr := resolveResumeSession(claudeResume) - if resolveErr != nil { - return resolveErr - } - containerCmd = append(containerCmd, "--resume", sessionID) - } - - if claudePromptFlag != "" { - containerCmd = append(containerCmd, "-p", claudePromptFlag) - } - - if initialPrompt != "" { - containerCmd = append(containerCmd, initialPrompt) - } - - // Use name from flag, or config, or let manager generate one - if claudeFlags.Name == "" && cfg != nil && cfg.Name != "" { - claudeFlags.Name = cfg.Name - } - - // Ensure dependencies for Claude Code - if cfg == nil { - cfg = &config.Config{} - } - if !cli.HasDependency(cfg.Dependencies, "node") { - cfg.Dependencies = append(cfg.Dependencies, "node@20") - } - if !cli.HasDependency(cfg.Dependencies, "git") { - cfg.Dependencies = append(cfg.Dependencies, "git") - } - if !cli.HasDependency(cfg.Dependencies, "claude-code") { - cfg.Dependencies = append(cfg.Dependencies, "claude-code") - } - - // Always sync Claude logs - syncLogs := true - cfg.Claude.SyncLogs = &syncLogs - - // Allow network access to claude.ai for OAuth login - cfg.Network.Allow = append(cfg.Network.Allow, "claude.ai", "*.claude.ai") - - // Add allowed hosts if specified - cfg.Network.Allow = append(cfg.Network.Allow, claudeAllowedHosts...) - - // Add environment variables from flags - if envErr := cli.ParseEnvFlags(claudeFlags.Env, cfg); envErr != nil { - return envErr - } - - log.Debug("starting claude code", - "workspace", absPath, - "grants", grants, - "interactive", interactive, - "prompt", claudePromptFlag, - "rebuild", claudeFlags.Rebuild, - ) - - if cli.DryRun { - fmt.Println("Dry run - would start Claude Code") - fmt.Printf("Workspace: %s\n", absPath) - fmt.Printf("Grants: %v\n", grants) - fmt.Printf("Interactive: %v\n", interactive) - fmt.Printf("Rebuild: %v\n", claudeFlags.Rebuild) - if len(grants) == 0 { - fmt.Println("Note: No API key configured. Claude will prompt for login.") - } - return nil - } - - ctx := context.Background() - - opts := cli.ExecOptions{ - Flags: claudeFlags, - Workspace: absPath, - Command: containerCmd, - Config: cfg, - Interactive: interactive, - TTY: interactive, - } - - cli.SetWorktreeFields(&opts, wtOut.Result) - - result, err := cli.ExecuteRun(ctx, opts) - if err != nil { - return err - } - - if result != nil && !claudeFlags.Detach { - fmt.Printf("Starting Claude Code in %s\n", absPath) - fmt.Printf("Run: %s (%s)\n", result.Name, result.ID) - } - - return nil + return cli.RunProvider(cmd, args, cli.ProviderRunConfig{ + Name: "claude", + Flags: &claudeFlags, + PromptFlag: claudePromptFlag, + AllowedHosts: claudeAllowedHosts, + WtFlag: claudeWtFlag, + GetCredentialGrant: getClaudeCredentialName, + Dependencies: []string{"node@20", "git", "claude-code"}, + NetworkHosts: []string{"claude.ai", "*.claude.ai"}, + SupportsInitialPrompt: true, + DryRunNote: "Note: No API key configured. Claude will prompt for login.", + BuildCommand: func(promptFlag, initialPrompt string) ([]string, error) { + containerCmd := []string{"claude"} + + // By default, skip permission prompts since Moat provides isolation. + if !claudeNoYolo { + containerCmd = append(containerCmd, "--dangerously-skip-permissions") + } + if claudeContinue { + containerCmd = append(containerCmd, "--continue") + } + if claudeResume != "" { + sessionID, err := resolveResumeSession(claudeResume) + if err != nil { + return nil, err + } + containerCmd = append(containerCmd, "--resume", sessionID) + } + if promptFlag != "" { + containerCmd = append(containerCmd, "-p", promptFlag) + } + if initialPrompt != "" { + containerCmd = append(containerCmd, initialPrompt) + } + return containerCmd, nil + }, + ConfigureAgent: func(cfg *config.Config) { + syncLogs := true + cfg.Claude.SyncLogs = &syncLogs + }, + }) } // getClaudeCredentialName returns the grant name to use for moat claude. @@ -419,9 +279,9 @@ func resolveResumeSessionInDir(arg, baseDir string) (string, error) { } // If the run is still active, the session ID hasn't been captured yet. - // Tell the user to attach instead of starting a new container. + // Tell the user to view logs or stop instead of starting a new container. if match.State == "running" || match.State == "starting" { - return "", fmt.Errorf("run %s (%s) is still running\n\nUse 'moat attach %s' to reconnect.", matchID, match.Name, arg) + return "", fmt.Errorf("run %s (%s) is still running\n\nUse 'moat logs %s' to view output or 'moat stop %s' to stop.", matchID, match.Name, matchID, matchID) } sessionID := match.ProviderMeta["claude_session_id"] diff --git a/internal/providers/claude/cli_test.go b/internal/providers/claude/cli_test.go index bcf25a1f..2070a7fe 100644 --- a/internal/providers/claude/cli_test.go +++ b/internal/providers/claude/cli_test.go @@ -353,8 +353,8 @@ func TestResolveResumeSession_StillRunning(t *testing.T) { if !strings.Contains(err.Error(), "still running") { t.Errorf("error = %q, want mention of 'still running'", err) } - if !strings.Contains(err.Error(), "moat attach") { - t.Errorf("error = %q, want mention of 'moat attach'", err) + if !strings.Contains(err.Error(), "moat logs") { + t.Errorf("error = %q, want mention of 'moat logs'", err) } } diff --git a/internal/providers/codex/cli.go b/internal/providers/codex/cli.go index 2e8c60d1..4f511e6d 100644 --- a/internal/providers/codex/cli.go +++ b/internal/providers/codex/cli.go @@ -1,16 +1,11 @@ package codex import ( - "context" - "fmt" - "strings" - "github.com/spf13/cobra" "github.com/majorcontext/moat/internal/cli" "github.com/majorcontext/moat/internal/config" "github.com/majorcontext/moat/internal/credential" - "github.com/majorcontext/moat/internal/log" ) var ( @@ -80,9 +75,6 @@ Examples: # Name the session for easy reference moat codex --name my-feature - # Run in background - moat codex -d - # Force rebuild of container image moat codex --rebuild @@ -109,175 +101,40 @@ Use 'moat list' to see running and recent runs.`, } func runCodex(cmd *cobra.Command, args []string) error { - // If subcommand is being run, don't execute this - if cmd.CalledAs() != "codex" { - return nil - } - - // Separate workspace arg from initial prompt args. - // Everything after "--" is passed as an initial prompt to Codex. - // moat codex -- "testing" → workspace=".", prompt="testing" - // moat codex ./project -- "explain this" → workspace="./project", prompt="explain this" - // moat codex ./project → workspace="./project", no prompt - var initialPrompt string - workspace := "." - dashIdx := cmd.ArgsLenAtDash() - if dashIdx >= 0 { - // Args before "--" are moat args (workspace), after are passthrough - if dashIdx > 0 { - workspace = args[0] - } - passthroughArgs := args[dashIdx:] - if len(passthroughArgs) > 0 { - initialPrompt = strings.Join(passthroughArgs, " ") - } - } else if len(args) > 0 { - workspace = args[0] - } - - absPath, err := cli.ResolveWorkspacePath(workspace) - if err != nil { - return err - } - - // Load agent.yaml if present, otherwise use defaults - cfg, err := config.Load(absPath) - if err != nil { - return fmt.Errorf("loading config: %w", err) - } - - // Handle --wt flag - wtOut, err := cli.ResolveWorktreeWorkspace(codexWtFlag, absPath, &codexFlags, cfg) - if err != nil { - return err - } - absPath = wtOut.Workspace - cfg = wtOut.Config - - // Build grants list using a set for deduplication - grantSet := make(map[string]bool) - var grants []string - addGrant := func(g string) { - if !grantSet[g] { - grantSet[g] = true - grants = append(grants, g) - } - } - - if credName := getCodexCredentialName(); credName != "" { - addGrant(credName) // Use the actual name the credential is stored under - } - if cfg != nil { - for _, g := range cfg.Grants { - addGrant(g) - } - } - for _, g := range codexFlags.Grants { - addGrant(g) - } - codexFlags.Grants = grants - - // Determine interactive mode - interactive := codexPromptFlag == "" - - // Build container command - // codex is installed globally via the dependency system - var containerCmd []string - - if codexPromptFlag != "" { - // Non-interactive mode: use `codex exec` with the prompt - // --full-auto allows edits during execution (safe since we're in a container) - containerCmd = []string{"codex", "exec"} - if codexFullAuto { - containerCmd = append(containerCmd, "--full-auto") - } - containerCmd = append(containerCmd, codexPromptFlag) - } else { - // Interactive mode: just run `codex` for the TUI - containerCmd = []string{"codex"} - if initialPrompt != "" { - containerCmd = append(containerCmd, initialPrompt) - } - } - - // Use name from flag, or config, or let manager generate one - if codexFlags.Name == "" && cfg != nil && cfg.Name != "" { - codexFlags.Name = cfg.Name - } - - // Ensure dependencies for Codex CLI - if cfg == nil { - cfg = &config.Config{} - } - if !cli.HasDependency(cfg.Dependencies, "node") { - cfg.Dependencies = append(cfg.Dependencies, "node@20") - } - if !cli.HasDependency(cfg.Dependencies, "git") { - cfg.Dependencies = append(cfg.Dependencies, "git") - } - if !cli.HasDependency(cfg.Dependencies, "codex-cli") { - cfg.Dependencies = append(cfg.Dependencies, "codex-cli") - } - - // Allow network access to OpenAI - cfg.Network.Allow = append(cfg.Network.Allow, NetworkHosts()...) - - // Add allowed hosts if specified - cfg.Network.Allow = append(cfg.Network.Allow, codexAllowedHosts...) - - // Always sync Codex logs - syncLogs := true - cfg.Codex.SyncLogs = &syncLogs - - // Add environment variables from flags - if envErr := cli.ParseEnvFlags(codexFlags.Env, cfg); envErr != nil { - return envErr - } - - log.Debug("starting codex cli", - "workspace", absPath, - "grants", grants, - "interactive", interactive, - "prompt", codexPromptFlag, - "rebuild", codexFlags.Rebuild, - ) - - if cli.DryRun { - fmt.Println("Dry run - would start Codex CLI") - fmt.Printf("Workspace: %s\n", absPath) - fmt.Printf("Grants: %v\n", grants) - fmt.Printf("Interactive: %v\n", interactive) - fmt.Printf("Rebuild: %v\n", codexFlags.Rebuild) - if len(grants) == 0 { - fmt.Println("Note: No API key configured. Codex will prompt for login.") - } - return nil - } - - ctx := context.Background() - - opts := cli.ExecOptions{ - Flags: codexFlags, - Workspace: absPath, - Command: containerCmd, - Config: cfg, - Interactive: interactive, - TTY: interactive, - } - - cli.SetWorktreeFields(&opts, wtOut.Result) - - result, err := cli.ExecuteRun(ctx, opts) - if err != nil { - return err - } - - if result != nil && !codexFlags.Detach { - fmt.Printf("Starting Codex CLI in %s\n", absPath) - fmt.Printf("Run: %s (%s)\n", result.Name, result.ID) - } - - return nil + return cli.RunProvider(cmd, args, cli.ProviderRunConfig{ + Name: "codex", + Flags: &codexFlags, + PromptFlag: codexPromptFlag, + AllowedHosts: codexAllowedHosts, + WtFlag: codexWtFlag, + GetCredentialGrant: getCodexCredentialName, + Dependencies: DefaultDependencies(), + NetworkHosts: NetworkHosts(), + SupportsInitialPrompt: true, + DryRunNote: "Note: No API key configured. Codex will prompt for login.", + BuildCommand: func(promptFlag, initialPrompt string) ([]string, error) { + if promptFlag != "" { + // Non-interactive: use `codex exec` with the prompt. + // --full-auto allows edits during execution (safe in a container). + containerCmd := []string{"codex", "exec"} + if codexFullAuto { + containerCmd = append(containerCmd, "--full-auto") + } + containerCmd = append(containerCmd, promptFlag) + return containerCmd, nil + } + // Interactive: run `codex` for the TUI + containerCmd := []string{"codex"} + if initialPrompt != "" { + containerCmd = append(containerCmd, initialPrompt) + } + return containerCmd, nil + }, + ConfigureAgent: func(cfg *config.Config) { + syncLogs := true + cfg.Codex.SyncLogs = &syncLogs + }, + }) } // getCodexCredentialName returns the name under which the Codex credential is stored. diff --git a/internal/providers/gemini/cli.go b/internal/providers/gemini/cli.go index 2f1e4f96..ab7ad404 100644 --- a/internal/providers/gemini/cli.go +++ b/internal/providers/gemini/cli.go @@ -1,14 +1,10 @@ package gemini import ( - "context" - "fmt" - "github.com/spf13/cobra" "github.com/majorcontext/moat/internal/cli" "github.com/majorcontext/moat/internal/config" - "github.com/majorcontext/moat/internal/log" ) var ( @@ -67,9 +63,6 @@ Examples: # Name the session for easy reference moat gemini --name my-feature - # Run in background - moat gemini -d - # Force rebuild of container image moat gemini --rebuild @@ -92,153 +85,31 @@ Use 'moat list' to see running and recent runs.`, } func runGemini(cmd *cobra.Command, args []string) error { - // If subcommand is being run, don't execute this - if cmd.CalledAs() != "gemini" { - return nil - } - - // Determine workspace - workspace := "." - if len(args) > 0 { - workspace = args[0] - } - - absPath, err := cli.ResolveWorkspacePath(workspace) - if err != nil { - return err - } - - // Load agent.yaml if present, otherwise use defaults - cfg, err := config.Load(absPath) - if err != nil { - return fmt.Errorf("loading config: %w", err) - } - - // Handle --wt flag - wtOut, err := cli.ResolveWorktreeWorkspace(geminiWtFlag, absPath, &geminiFlags, cfg) - if err != nil { - return err - } - absPath = wtOut.Workspace - cfg = wtOut.Config - - // Build grants list using a set for deduplication - grantSet := make(map[string]bool) - var grants []string - addGrant := func(g string) { - if !grantSet[g] { - grantSet[g] = true - grants = append(grants, g) - } - } - - // If user has an API key stored via `moat grant gemini`, use proxy injection - if HasCredential() { - addGrant("gemini") - } - if cfg != nil { - for _, g := range cfg.Grants { - addGrant(g) - } - } - for _, g := range geminiFlags.Grants { - addGrant(g) - } - geminiFlags.Grants = grants - - // Determine interactive mode - interactive := geminiPromptFlag == "" - - // Build container command - var containerCmd []string - if geminiPromptFlag != "" { - containerCmd = []string{"gemini", "-p", geminiPromptFlag} - } else { - containerCmd = []string{"gemini"} - } - - // Use name from flag, or config, or let manager generate one - if geminiFlags.Name == "" && cfg != nil && cfg.Name != "" { - geminiFlags.Name = cfg.Name - } - - // Ensure dependencies for Gemini CLI - if cfg == nil { - cfg = &config.Config{} - } - for _, dep := range DefaultDependencies() { - prefix := dep - if idx := len(dep) - 1; idx >= 0 { - // Extract prefix before @version - for i := range dep { - if dep[i] == '@' { - prefix = dep[:i] - break - } + return cli.RunProvider(cmd, args, cli.ProviderRunConfig{ + Name: "gemini", + Flags: &geminiFlags, + PromptFlag: geminiPromptFlag, + AllowedHosts: geminiAllowedHosts, + WtFlag: geminiWtFlag, + GetCredentialGrant: func() string { + if HasCredential() { + return "gemini" } - } - if !cli.HasDependency(cfg.Dependencies, prefix) { - cfg.Dependencies = append(cfg.Dependencies, dep) - } - } - - // Allow network access to Google APIs - cfg.Network.Allow = append(cfg.Network.Allow, NetworkHosts()...) - - // Add allowed hosts if specified - cfg.Network.Allow = append(cfg.Network.Allow, geminiAllowedHosts...) - - // Always sync Gemini logs for moat gemini command - syncLogs := true - cfg.Gemini.SyncLogs = &syncLogs - - // Add environment variables from flags - if envErr := cli.ParseEnvFlags(geminiFlags.Env, cfg); envErr != nil { - return envErr - } - - log.Debug("starting gemini", - "workspace", absPath, - "grants", grants, - "interactive", interactive, - "prompt", geminiPromptFlag, - "rebuild", geminiFlags.Rebuild, - ) - - if cli.DryRun { - fmt.Println("Dry run - would start Gemini") - fmt.Printf("Workspace: %s\n", absPath) - fmt.Printf("Grants: %v\n", grants) - fmt.Printf("Interactive: %v\n", interactive) - fmt.Printf("Rebuild: %v\n", geminiFlags.Rebuild) - if len(grants) == 0 { - fmt.Println("Note: No API key configured. Gemini will prompt for login.") - } - return nil - } - - ctx := context.Background() - - opts := cli.ExecOptions{ - Flags: geminiFlags, - Workspace: absPath, - Command: containerCmd, - Config: cfg, - Interactive: interactive, - TTY: interactive, - } - - cli.SetWorktreeFields(&opts, wtOut.Result) - - result, err := cli.ExecuteRun(ctx, opts) - if err != nil { - return err - } - - if result != nil && !geminiFlags.Detach { - fmt.Printf("Starting Gemini in %s\n", absPath) - fmt.Printf("Run: %s (%s)\n", result.Name, result.ID) - } - - return nil + return "" + }, + Dependencies: DefaultDependencies(), + NetworkHosts: NetworkHosts(), + SupportsInitialPrompt: false, + DryRunNote: "Note: No API key configured. Gemini will prompt for login.", + BuildCommand: func(promptFlag, _ string) ([]string, error) { + if promptFlag != "" { + return []string{"gemini", "-p", promptFlag}, nil + } + return []string{"gemini"}, nil + }, + ConfigureAgent: func(cfg *config.Config) { + syncLogs := true + cfg.Gemini.SyncLogs = &syncLogs + }, + }) } diff --git a/internal/run/edge_cases_test.go b/internal/run/edge_cases_test.go index a7edd787..b7fc6745 100644 --- a/internal/run/edge_cases_test.go +++ b/internal/run/edge_cases_test.go @@ -917,7 +917,7 @@ func TestMonitorContainerExitSetsStateFailed(t *testing.T) { // Run monitor in goroutine and wait for it to complete done := make(chan struct{}) go func() { - m.monitorContainerExit(context.Background(), r) + m.monitorContainerExit(r) close(done) }() @@ -976,7 +976,7 @@ func TestMonitorContainerExitSetsStateStopped(t *testing.T) { done := make(chan struct{}) go func() { - m.monitorContainerExit(context.Background(), r) + m.monitorContainerExit(r) close(done) }() @@ -1056,7 +1056,7 @@ func TestMonitorContainerExitWaitError(t *testing.T) { done := make(chan struct{}) go func() { - m.monitorContainerExit(context.Background(), r) + m.monitorContainerExit(r) close(done) }() @@ -1223,7 +1223,7 @@ func TestMonitorContainerExitAlreadyStopped(t *testing.T) { done := make(chan struct{}) go func() { - m.monitorContainerExit(context.Background(), r) + m.monitorContainerExit(r) close(done) }() diff --git a/internal/run/manager.go b/internal/run/manager.go index b1f761d5..072f26f1 100644 --- a/internal/run/manager.go +++ b/internal/run/manager.go @@ -4,7 +4,6 @@ import ( "bytes" "context" "crypto/sha256" - "errors" "fmt" "io" "net" @@ -20,7 +19,6 @@ import ( "syscall" "time" - "github.com/docker/docker/pkg/stdcopy" "github.com/majorcontext/moat/internal/audit" "github.com/majorcontext/moat/internal/config" "github.com/majorcontext/moat/internal/container" @@ -83,10 +81,14 @@ type Manager struct { daemonClient *daemon.Client mu sync.RWMutex - // ctx/cancel control background goroutines (e.g. monitorContainerExit). - // Canceled in Close() so lingering WaitContainer calls don't leak. + // ctx/cancel for general manager lifecycle. ctx context.Context cancel context.CancelFunc + + // monitorWg tracks active monitorContainerExit goroutines. + // Close() waits on this before closing the runtime so that + // monitors can finish capturing logs and updating state. + monitorWg sync.WaitGroup } // ManagerOptions configures the run manager. @@ -331,13 +333,11 @@ func (m *Manager) registerPersistedRun(runState State, meta storage.Metadata, st m.mu.Unlock() // For running containers, start background monitor to capture logs when they exit. - // Use m.ctx so these goroutines are canceled when the Manager closes. + // These inherited monitors are NOT tracked by monitorWg — they may block + // indefinitely on long-running containers from previous CLI invocations. + // Only monitors started via Start() are tracked so Close() doesn't hang. if runState == StateRunning { - monitorCtx := m.ctx - if monitorCtx == nil { - monitorCtx = context.Background() - } - go m.monitorContainerExit(monitorCtx, r) + go m.monitorContainerExit(r) } log.Debug("loaded persisted run", "id", runID, "name", meta.Name, "state", runState) @@ -2158,10 +2158,84 @@ region = %s } // StartOptions configures how a run is started. -type StartOptions struct { - // StreamLogs controls whether container logs are streamed to stdout. - // Set to false for interactive mode where attach handles I/O. - StreamLogs bool +type StartOptions struct{} + +// setLogContext configures the structured logger with run-specific fields +// so all subsequent log entries in this goroutine are correlated to the run. +func setLogContext(r *Run) { + log.SetRunContext(log.RunContext{ + RunID: r.ID, + RunName: r.Name, + Agent: r.Agent, + Workspace: filepath.Base(r.Workspace), + Image: r.Image, + Grants: r.Grants, + }) +} + +// setupPortBindings retrieves the host-side port mappings for a container's +// exposed ports and registers them as routes with both the local route table +// and the proxy daemon. Port binding lookup is retried because the container +// runtime may not have mappings ready immediately after start. +func (m *Manager) setupPortBindings(ctx context.Context, r *Run) { + if len(r.Ports) == 0 { + return + } + + var bindings map[int]int + var err error + for i := 0; i < 5; i++ { + bindings, err = m.runtime.GetPortBindings(ctx, r.ContainerID) + if err != nil || len(bindings) >= len(r.Ports) { + break + } + time.Sleep(50 * time.Millisecond) + } + if err != nil { + ui.Warnf("Getting port bindings: %v", err) + return + } + + r.HostPorts = make(map[string]int) + services := make(map[string]string) + for serviceName, containerPort := range r.Ports { + if hostPort, ok := bindings[containerPort]; ok { + r.HostPorts[serviceName] = hostPort + services[serviceName] = fmt.Sprintf("127.0.0.1:%d", hostPort) + } + } + if len(services) > 0 { + if err := m.routes.Add(r.Name, services); err != nil { + ui.Warnf("Registering routes: %v", err) + } + // Snapshot daemonClient under lock to avoid racing with Create() + m.mu.RLock() + dc := m.daemonClient + m.mu.RUnlock() + if dc != nil { + if err := dc.RegisterRoutes(ctx, r.Name, services); err != nil { + log.Debug("failed to register routes via daemon", "error", err) + } + } + } +} + +// setupFirewall configures iptables-based network isolation inside the +// container so that only traffic through the credential-injecting proxy is +// allowed. Returns an error if firewall setup fails, since a strict network +// policy without a working firewall would leave the container unprotected. +func (m *Manager) setupFirewall(ctx context.Context, r *Run) error { + if !r.FirewallEnabled || r.ProxyPort <= 0 { + return nil + } + if err := m.runtime.SetupFirewall(ctx, r.ContainerID, r.ProxyHost, r.ProxyPort); err != nil { + r.SetStateFailedAt(fmt.Sprintf("firewall setup failed: %v", err), time.Now()) + if stopErr := m.runtime.StopContainer(ctx, r.ContainerID); stopErr != nil { + ui.Warnf("Failed to stop container after firewall error: %v", stopErr) + } + return fmt.Errorf("firewall setup failed (required for strict network policy): %w", err) + } + return nil } // Start begins execution of a run. @@ -2174,74 +2248,18 @@ func (m *Manager) Start(ctx context.Context, runID string, opts StartOptions) er } m.mu.Unlock() r.SetState(StateStarting) - - // Set run context in logger for correlation - log.SetRunContext(log.RunContext{ - RunID: runID, - RunName: r.Name, - Agent: r.Agent, - Workspace: filepath.Base(r.Workspace), - Image: r.Image, - Grants: r.Grants, - }) + setLogContext(r) if err := m.runtime.StartContainer(ctx, r.ContainerID); err != nil { r.SetStateFailedAt(err.Error(), time.Now()) return err } - // Set up firewall if enabled (strict network policy) - // This blocks all outbound traffic except to the proxy - if r.FirewallEnabled && r.ProxyPort > 0 { - if err := m.runtime.SetupFirewall(ctx, r.ContainerID, r.ProxyHost, r.ProxyPort); err != nil { - // Firewall setup failed - this is fatal for strict policy since the user - // explicitly requested network isolation. Without iptables, only proxy-level - // filtering applies, which can be bypassed by tools that ignore HTTP_PROXY. - r.SetStateFailedAt(fmt.Sprintf("firewall setup failed: %v", err), time.Now()) - if stopErr := m.runtime.StopContainer(ctx, r.ContainerID); stopErr != nil { - ui.Warnf("Failed to stop container after firewall error: %v", stopErr) - } - return fmt.Errorf("firewall setup failed (required for strict network policy): %w", err) - } + if err := m.setupFirewall(ctx, r); err != nil { + return err } - // Get actual port bindings after container starts - if len(r.Ports) > 0 { - // Retry a few times - Docker may need a moment to set up port bindings - var bindings map[int]int - var err error - for i := 0; i < 5; i++ { - bindings, err = m.runtime.GetPortBindings(ctx, r.ContainerID) - if err != nil || len(bindings) >= len(r.Ports) { - break - } - time.Sleep(50 * time.Millisecond) - } - if err != nil { - // Log but don't fail - container is running - ui.Warnf("Getting port bindings: %v", err) - } else { - r.HostPorts = make(map[string]int) - services := make(map[string]string) - for serviceName, containerPort := range r.Ports { - if hostPort, ok := bindings[containerPort]; ok { - r.HostPorts[serviceName] = hostPort - services[serviceName] = fmt.Sprintf("127.0.0.1:%d", hostPort) - } - } - // Register routes - if len(services) > 0 { - if err := m.routes.Add(r.Name, services); err != nil { - ui.Warnf("Registering routes: %v", err) - } - if m.daemonClient != nil { - if err := m.daemonClient.RegisterRoutes(ctx, r.Name, services); err != nil { - log.Debug("failed to register routes via daemon", "error", err) - } - } - } - } - } + m.setupPortBindings(ctx, r) r.SetStateWithTime(StateRunning, time.Now()) @@ -2255,19 +2273,13 @@ func (m *Manager) Start(ctx context.Context, runID string, opts StartOptions) er } } - // Stream logs to stdout (unless disabled for interactive mode) - if opts.StreamLogs { - go m.streamLogs(context.Background(), r) - } - // Start background monitor to capture logs when container exits. - // This is critical for detached runs where Wait() is never called. - // Use m.ctx so the goroutine is canceled when the Manager closes. - monitorCtx := m.ctx - if monitorCtx == nil { - monitorCtx = context.Background() - } - go m.monitorContainerExit(monitorCtx, r) + // Tracked by monitorWg so Close() waits for completion. + m.monitorWg.Add(1) + go func() { + defer m.monitorWg.Done() + m.monitorContainerExit(r) + }() return nil } @@ -2286,16 +2298,7 @@ func (m *Manager) StartAttached(ctx context.Context, runID string, stdin io.Read containerID := r.ContainerID m.mu.Unlock() r.SetState(StateStarting) - - // Set run context in logger for correlation - log.SetRunContext(log.RunContext{ - RunID: runID, - RunName: r.Name, - Agent: r.Agent, - Workspace: filepath.Base(r.Workspace), - Image: r.Image, - Grants: r.Grants, - }) + setLogContext(r) // Start with attachment - this ensures TTY is connected before process starts. // TTY mode must match how the container was created (see CreateContainer in @@ -2357,63 +2360,36 @@ func (m *Manager) StartAttached(ctx context.Context, runID string, stdin io.Read r.SetStateWithTime(StateRunning, time.Now()) } - // Get actual port bindings after container starts - if len(r.Ports) > 0 { - var bindings map[int]int - var bindErr error - for i := 0; i < 5; i++ { - bindings, bindErr = m.runtime.GetPortBindings(ctx, r.ContainerID) - if bindErr != nil || len(bindings) >= len(r.Ports) { - break - } - time.Sleep(50 * time.Millisecond) - } - if bindErr != nil { - ui.Warnf("Getting port bindings: %v", bindErr) - } else { - r.HostPorts = make(map[string]int) - services := make(map[string]string) - for serviceName, containerPort := range r.Ports { - if hostPort, ok := bindings[containerPort]; ok { - r.HostPorts[serviceName] = hostPort - services[serviceName] = fmt.Sprintf("127.0.0.1:%d", hostPort) - } - } - if len(services) > 0 { - if routeErr := m.routes.Add(r.Name, services); routeErr != nil { - ui.Warnf("Registering routes: %v", routeErr) - } - if m.daemonClient != nil { - if routeErr := m.daemonClient.RegisterRoutes(ctx, r.Name, services); routeErr != nil { - log.Debug("failed to register routes via daemon", "error", routeErr) - } - } - } - } + if err := m.setupFirewall(ctx, r); err != nil { + return err } - // Save state to disk - _ = r.SaveMetadata() + m.setupPortBindings(ctx, r) - // Set up firewall if enabled (do this after container starts) - if r.FirewallEnabled && r.ProxyPort > 0 { - if err := m.runtime.SetupFirewall(ctx, r.ContainerID, r.ProxyHost, r.ProxyPort); err != nil { - // Firewall setup failed - this is fatal for strict policy - r.SetStateFailedAt(fmt.Sprintf("firewall setup failed: %v", err), time.Now()) - if stopErr := m.runtime.StopContainer(ctx, r.ContainerID); stopErr != nil { - ui.Warnf("Failed to stop container after firewall error: %v", stopErr) - } - return fmt.Errorf("firewall setup failed (required for strict network policy): %w", err) - } - } + _ = r.SaveMetadata() // Wait for the attachment to complete (container exits or context canceled) attachErr := <-attachDone + // Determine whether the caller will stop the container (escape-stop or context + // cancellation). In those cases, skip state updates and log capture here — the + // caller's Stop() and monitorContainerExit handle them after the container + // actually exits. + callerWillStop := ctx.Err() != nil || term.IsEscapeError(attachErr) + + if !callerWillStop { + // Container exited on its own — update state now. + if attachErr != nil { + r.SetStateFailedAt(attachErr.Error(), time.Now()) + } else { + r.SetStateWithTime(StateStopped, time.Now()) + } + } + // For Apple containers in interactive mode, write captured output directly to logs.jsonl. - // (Apple TTY output doesn't go through container runtime logs) - // For Docker, captureLogs will handle it via ContainerLogsAll (works even in TTY mode). - // Always create the file for audit completeness, even if empty. + // (Apple TTY output doesn't go through container runtime logs — captureLogs() returns + // early for Apple interactive runs, so this is the only path that writes logs.) + // Runs unconditionally: even on escape-stop the buffer holds all output up to that point. if r.Interactive && r.Store != nil && m.runtime.Type() == container.RuntimeApple { // Use CompareAndSwap to ensure single write if r.logsCaptured.CompareAndSwap(false, true) { @@ -2429,45 +2405,30 @@ func (m *Manager) StartAttached(ctx context.Context, runID string, stdin io.Read } } - // Capture logs after container exits (critical for audit/observability) - // For non-interactive mode, this fetches from container runtime logs - // For interactive mode with tee, this is a no-op (logsCaptured flag is already set) - m.captureLogs(r) + // Capture logs after container exits (critical for audit/observability). + // Skip when caller will stop the container — it's still running and + // monitorContainerExit will capture complete logs after it actually exits. + if !callerWillStop { + m.captureLogs(r) + } // Run provider stopped hooks (e.g., Claude session ID extraction). // Must happen after the container has exited so session files are flushed. - runProviderStoppedHooks(r) + if !callerWillStop { + runProviderStoppedHooks(r) + } _ = r.SaveMetadata() - return attachErr -} - -// streamLogs streams container logs to stdout for real-time feedback. -// Note: Final log capture to storage is handled by captureLogs() which is called -// from all container exit paths (Wait, StartAttached, Stop) to ensure complete -// logs are captured even for fast-exiting containers. -func (m *Manager) streamLogs(ctx context.Context, r *Run) { - logs, err := m.runtime.ContainerLogs(ctx, r.ContainerID) - if err != nil { - ui.Errorf("Getting logs: %v", err) - return + // Clean up resources (network, sidecars, temp dirs) on natural exit. + // monitorContainerExit is not running for interactive runs, so this is + // the only cleanup path. Idempotent via cleanupOnce. + if !callerWillStop { + cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cleanupCancel() + m.cleanupResources(cleanupCtx, r) } - defer logs.Close() - // Stream to stdout only for real-time feedback - // Storage is handled by Wait() after container exits - // - // Note: streamLogs is only called for non-interactive runs (see exec.go). - // Interactive runs use StartAttached which handles I/O directly. - // Non-interactive Docker containers use multiplexed streams (no TTY), - // so we must demultiplex to avoid 8-byte headers leaking into output. - if m.runtime.Type() == container.RuntimeDocker { - // Docker non-interactive container: demultiplex the stream - _, _ = stdcopy.StdCopy(os.Stdout, os.Stderr, logs) - } else { - // Apple container: output is already raw - _, _ = io.Copy(os.Stdout, logs) - } + return attachErr } // Stop terminates a running run. @@ -2487,134 +2448,23 @@ func (m *Manager) Stop(ctx context.Context, runID string) error { } r.SetState(StateStopping) - buildkitContainerID := r.BuildkitContainerID - networkID := r.NetworkID - serviceContainers := r.ServiceContainers m.mu.Unlock() - // Stop service containers - if len(serviceContainers) > 0 { - svcMgr := m.runtime.ServiceManager() - if svcMgr != nil { - for svcName, containerID := range serviceContainers { - log.Debug("stopping service", "service", svcName, "container_id", containerID) - if err := svcMgr.StopService(ctx, container.ServiceInfo{ID: containerID}); err != nil { - log.Debug("failed to stop service", "service", svcName, "error", err) - } - } - } - } - - // Stop and remove BuildKit sidecar if present (before main container). - // Must remove before network cleanup — Docker refuses to remove networks - // with connected containers (#131). - if buildkitContainerID != "" { - log.Debug("stopping buildkit sidecar", "container_id", buildkitContainerID) - if err := m.runtime.StopContainer(ctx, buildkitContainerID); err != nil { - log.Debug("failed to stop buildkit sidecar", "error", err) - } - if err := m.runtime.RemoveContainer(ctx, buildkitContainerID); err != nil { - log.Debug("failed to remove buildkit sidecar", "error", err) - } - } - + // Stop the main container if err := m.runtime.StopContainer(ctx, r.ContainerID); err != nil { - // Log but don't fail - container might already be stopped ui.Warnf("%v", err) log.Debug("failed to stop container", "container_id", r.ContainerID, "error", err) } - // Capture logs after container stops (defense-in-depth). - // monitorContainerExit should also capture these when exitCh is signaled, - // but capturing here provides a safety net if moat crashes before that. - // captureLogs is idempotent so multiple calls are safe. + // Capture logs and run provider hooks (both idempotent) m.captureLogs(r) - - // Run provider stopped hooks before saving metadata. runProviderStoppedHooks(r) - // Cancel token refresh and unregister run from proxy daemon - if err := r.stopProxyServer(ctx); err != nil { - ui.Warnf("Stopping proxy: %v", err) - } - if r.ProxyAuthToken != "" && m.daemonClient != nil { - if err := m.daemonClient.UnregisterRun(ctx, r.ProxyAuthToken); err != nil { - ui.Warnf("Unregistering from proxy daemon: %v", err) - } - } - - // Stop the SSH agent server if one was created - if err := r.stopSSHAgentServer(); err != nil { - ui.Warnf("Stopping SSH agent proxy: %v", err) - } - - // Unregister routes for this agent - if r.Name != "" { - _ = m.routes.Remove(r.Name) - if m.daemonClient != nil { - if err := m.daemonClient.UnregisterRoutes(ctx, r.Name); err != nil { - log.Debug("failed to unregister routes via daemon", "error", err) - } - } - } - r.SetStateWithTime(StateStopped, time.Now()) - m.mu.Lock() - keepContainer := r.KeepContainer - containerID := r.ContainerID - providerCleanupPaths := r.ProviderCleanupPaths - m.mu.Unlock() - - // Save state to disk _ = r.SaveMetadata() - // Auto-remove container unless --keep was specified - if !keepContainer { - if rmErr := m.runtime.RemoveContainer(ctx, containerID); rmErr != nil { - ui.Warnf("Removing container: %v", rmErr) - } - } - - // Clean up provider resources - for providerName, cleanupPath := range providerCleanupPaths { - if prov := provider.Get(providerName); prov != nil { - prov.Cleanup(cleanupPath) - } - } - - // Clean up Claude config temp directory - m.mu.RLock() - claudeConfigTempDir := r.ClaudeConfigTempDir - m.mu.RUnlock() - if claudeConfigTempDir != "" { - if err := os.RemoveAll(claudeConfigTempDir); err != nil { - log.Debug("failed to remove Claude config temp dir", "path", claudeConfigTempDir, "error", err) - } - } - - // Clean up Gemini config temp directory - m.mu.RLock() - geminiConfigTempDir := r.GeminiConfigTempDir - m.mu.RUnlock() - if geminiConfigTempDir != "" { - if err := os.RemoveAll(geminiConfigTempDir); err != nil { - log.Debug("failed to remove Gemini config temp dir", "path", geminiConfigTempDir, "error", err) - } - } - - // Remove network if present (with force-disconnect fallback) - if networkID != "" { - log.Debug("removing docker network", "network_id", networkID) - netMgr := m.runtime.NetworkManager() - if netMgr != nil { - if err := netMgr.RemoveNetwork(ctx, networkID); err != nil { - log.Debug("network removal failed, trying force removal", "network_id", networkID, "error", err) - if forceErr := netMgr.ForceRemoveNetwork(ctx, networkID); forceErr != nil { - log.Debug("force network removal also failed", "network_id", networkID, "error", forceErr) - } - } - } - } + // Clean up all resources + m.cleanupResources(ctx, r) return nil } @@ -2627,16 +2477,14 @@ func (m *Manager) Wait(ctx context.Context, runID string) error { m.mu.RUnlock() return fmt.Errorf("run %s not found", runID) } - containerID := r.ContainerID m.mu.RUnlock() - // Wait for container to exit (signaled by monitorContainerExit) or context cancellation - // NOTE: We don't call WaitContainer here to avoid race condition - monitorContainerExit + // Wait for container to exit (signaled by monitorContainerExit) or context cancellation. + // We don't call WaitContainer here to avoid race conditions — monitorContainerExit // is the only goroutine that waits on the container and will close exitCh when done. select { case <-r.exitCh: // Container has exited (monitorContainerExit already captured logs and updated state) - // Capture logs again here (idempotent) for defense-in-depth m.captureLogs(r) // Get final error (thread-safe read) @@ -2647,78 +2495,12 @@ func (m *Manager) Wait(ctx context.Context, runID string) error { } r.stateMu.Unlock() - // NOTE: stopProxyServer, UnregisterRun, and stopSSHAgentServer are - // handled by monitorContainerExit (which runs for ALL runs, including - // detached). We don't repeat them here to avoid duplicate 404 warnings - // and races when monitorContainerExit's cleanup has already completed. - - // Unregister routes for this agent - if r.Name != "" { - _ = m.routes.Remove(r.Name) - if m.daemonClient != nil { - if stopErr := m.daemonClient.UnregisterRoutes(context.Background(), r.Name); stopErr != nil { - log.Debug("failed to unregister routes via daemon", "error", stopErr) - } - } - } - - // NOTE: We don't update r.State, r.StoppedAt, or r.Error here because - // monitorContainerExit already set them when the container exited. - // Overwriting would lose StateFailed state and error details. - - // Get values needed for cleanup - m.mu.RLock() - keepContainer := r.KeepContainer - providerCleanupPaths := r.ProviderCleanupPaths - m.mu.RUnlock() - - // Auto-remove container unless --keep was specified. - // Use log.Debug for the error — Destroy() may also try to remove - // the same container, and Docker can report "removal already in progress". - if !keepContainer { - if rmErr := m.runtime.RemoveContainer(context.Background(), containerID); rmErr != nil { - log.Debug("removing container after wait", "container_id", containerID, "error", rmErr) - } - } - - // Clean up provider resources - for providerName, cleanupPath := range providerCleanupPaths { - if prov := provider.Get(providerName); prov != nil { - prov.Cleanup(cleanupPath) - } - } - - // Clean up AWS temp directory - if r.awsTempDir != "" { - if rmErr := os.RemoveAll(r.awsTempDir); rmErr != nil { - ui.Warnf("Removing AWS temp dir: %v", rmErr) - } - } - - // Clean up Claude config temp directory - if r.ClaudeConfigTempDir != "" { - if rmErr := os.RemoveAll(r.ClaudeConfigTempDir); rmErr != nil { - log.Debug("failed to remove Claude config temp dir", "path", r.ClaudeConfigTempDir, "error", rmErr) - } - } - - // Clean up Codex config temp directory - if r.CodexConfigTempDir != "" { - if rmErr := os.RemoveAll(r.CodexConfigTempDir); rmErr != nil { - log.Debug("failed to remove Codex config temp dir", "path", r.CodexConfigTempDir, "error", rmErr) - } - } - - // Clean up Gemini config temp directory - if r.GeminiConfigTempDir != "" { - if rmErr := os.RemoveAll(r.GeminiConfigTempDir); rmErr != nil { - log.Debug("failed to remove Gemini config temp dir", "path", r.GeminiConfigTempDir, "error", rmErr) - } - } + // Clean up resources (usually no-op because monitorContainerExit already did it) + m.cleanupResources(context.Background(), r) return err case <-ctx.Done(): - // Context canceled - caller chose to detach, don't stop the run + // Context canceled — caller is responsible for stopping the run return ctx.Err() } } @@ -2744,7 +2526,7 @@ func (m *Manager) Get(runID string) (*Run, error) { // - Normal exit (Wait) // - Interactive exit (StartAttached) // - Explicit stop (Stop) -// - Detached completion (background monitor) +// - Background monitor (monitorContainerExit) func (m *Manager) captureLogs(r *Run) { if r.Store == nil { return @@ -2848,23 +2630,124 @@ func runProviderStoppedHooks(r *Run) { } } +// cleanupResources tears down all resources associated with a run. It is +// idempotent — only the first call does work, subsequent calls are no-ops. +// This is safe to call from Stop, Wait, monitorContainerExit, or Destroy. +// +// It does NOT: +// - stop the main container (caller handles that) +// - update run state (path-specific) +// - capture logs (has its own idempotency guard) +// - run provider stopped hooks (has its own idempotency guard) +// - close audit store or remove storage (Destroy-only) +func (m *Manager) cleanupResources(ctx context.Context, r *Run) { + // Snapshot daemonClient under lock to avoid racing with Create() which + // sets it under m.mu.Lock(). cleanupResources is called from + // monitorContainerExit goroutines that run concurrently with Create(). + m.mu.RLock() + dc := m.daemonClient + m.mu.RUnlock() + + r.cleanupOnce.Do(func() { + // Cancel token refresh and unregister run from proxy daemon + if err := r.stopProxyServer(ctx); err != nil { + log.Debug("cleanup: stopping proxy", "error", err) + } + if r.ProxyAuthToken != "" && dc != nil { + if err := dc.UnregisterRun(ctx, r.ProxyAuthToken); err != nil { + log.Debug("cleanup: unregistering from proxy daemon", "error", err) + } + } + + // Stop the SSH agent server + if err := r.stopSSHAgentServer(); err != nil { + log.Debug("cleanup: stopping SSH agent", "error", err) + } + + // Stop service containers + if len(r.ServiceContainers) > 0 { + svcMgr := m.runtime.ServiceManager() + if svcMgr != nil { + for svcName, svcContainerID := range r.ServiceContainers { + log.Debug("cleanup: stopping service", "service", svcName, "container_id", svcContainerID) + if err := svcMgr.StopService(ctx, container.ServiceInfo{ID: svcContainerID}); err != nil { + log.Debug("cleanup: failed to stop service", "service", svcName, "error", err) + } + } + } + } + + // Remove BuildKit sidecar before network (Docker requires containers + // disconnected before network removal, see #131). + if r.BuildkitContainerID != "" { + log.Debug("cleanup: removing buildkit sidecar", "container_id", r.BuildkitContainerID) + if err := m.runtime.StopContainer(ctx, r.BuildkitContainerID); err != nil { + log.Debug("cleanup: failed to stop buildkit sidecar", "error", err) + } + if err := m.runtime.RemoveContainer(ctx, r.BuildkitContainerID); err != nil { + log.Debug("cleanup: failed to remove buildkit sidecar", "error", err) + } + } + + // Remove main container unless --keep was specified + if !r.KeepContainer { + if err := m.runtime.RemoveContainer(ctx, r.ContainerID); err != nil { + log.Debug("cleanup: failed to remove container", "error", err) + } + } + + // Remove network (with force-disconnect fallback) + if r.NetworkID != "" { + netMgr := m.runtime.NetworkManager() + if netMgr != nil { + if err := netMgr.RemoveNetwork(ctx, r.NetworkID); err != nil { + log.Debug("cleanup: network removal failed, trying force", "network", r.NetworkID, "error", err) + if forceErr := netMgr.ForceRemoveNetwork(ctx, r.NetworkID); forceErr != nil { + log.Debug("cleanup: force network removal failed", "network", r.NetworkID, "error", forceErr) + } + } + } + } + + // Unregister routes + if r.Name != "" { + _ = m.routes.Remove(r.Name) + if dc != nil { + if err := dc.UnregisterRoutes(ctx, r.Name); err != nil { + log.Debug("cleanup: failed to unregister routes", "error", err) + } + } + } + + // Clean up provider resources + for providerName, cleanupPath := range r.ProviderCleanupPaths { + if prov := provider.Get(providerName); prov != nil { + prov.Cleanup(cleanupPath) + } + } + + // Clean up temp directories + for _, dir := range []string{r.awsTempDir, r.ClaudeConfigTempDir, r.CodexConfigTempDir, r.GeminiConfigTempDir} { + if dir != "" { + if err := os.RemoveAll(dir); err != nil { + log.Debug("cleanup: failed to remove temp dir", "path", dir, "error", err) + } + } + } + }) +} + // monitorContainerExit watches for container exit and captures logs. -// This runs in the background for ALL runs to ensure logs are captured -// even in detached mode where Wait() is never called. +// This runs in the background for ALL runs to ensure logs are captured, +// exitCh is closed, and resources are cleaned up regardless of which path +// (interactive, non-interactive, Stop) caused the container to exit. // It's safe to call multiple times - captureLogs is idempotent. -func (m *Manager) monitorContainerExit(ctx context.Context, r *Run) { - // Wait for container to exit (no timeout - let it run as long as needed) - // This is the ONLY place that calls WaitContainer to avoid race conditions - exitCode, err := m.runtime.WaitContainer(ctx, r.ContainerID) - - // If WaitContainer failed due to context cancellation (e.g. Manager.Close()), - // bail out without touching run state. The container is likely still running - // and a future Manager instance will pick it up via loadPersistedRuns. - // We check err (not ctx.Err()) so that a natural container exit that races - // with context cancellation still gets its logs captured and state updated. - if err != nil && (errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded)) { - return - } +func (m *Manager) monitorContainerExit(r *Run) { + // Wait for container to exit (no timeout - let it run as long as needed). + // This is the ONLY place that calls WaitContainer to avoid race conditions. + // Uses context.Background() so this goroutine is not canceled by Close(). + // Close() waits on monitorWg before closing the runtime. + exitCode, err := m.runtime.WaitContainer(context.Background(), r.ContainerID) // CRITICAL: Capture logs IMMEDIATELY after container exits, BEFORE signaling. // Docker may start removing/cleaning the container at any moment after exit. @@ -2875,10 +2758,8 @@ func (m *Manager) monitorContainerExit(ctx context.Context, r *Run) { // Must happen after captureLogs and before SaveMetadata. runProviderStoppedHooks(r) - // Now signal that container has exited (and logs are captured) - close(r.exitCh) - - // Update run state (use state lock to prevent races with concurrent access) + // Update run state BEFORE signaling exitCh so that Wait() reads + // the final state (including r.Error) when it unblocks. currentState := r.GetState() if currentState == StateRunning || currentState == StateStarting { if err != nil || exitCode != 0 { @@ -2894,81 +2775,15 @@ func (m *Manager) monitorContainerExit(ctx context.Context, r *Run) { } } - // Save updated state _ = r.SaveMetadata() - // Use a single 30-second timeout for all cleanup operations so a hung - // container operation can't block this goroutine indefinitely. + // Signal that container has exited (logs captured, state updated) + close(r.exitCh) + + // Clean up all resources (30-second timeout for cleanup operations) cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), 30*time.Second) defer cleanupCancel() - - // Cancel token refresh and unregister run from proxy daemon - if stopErr := r.stopProxyServer(cleanupCtx); stopErr != nil { - log.Debug("stopping proxy after container exit", "error", stopErr) - } - if r.ProxyAuthToken != "" && m.daemonClient != nil { - if stopErr := m.daemonClient.UnregisterRun(cleanupCtx, r.ProxyAuthToken); stopErr != nil { - log.Debug("unregistering from proxy daemon after container exit", "error", stopErr) - } - } - - // Stop the SSH agent server if one was created - if stopErr := r.stopSSHAgentServer(); stopErr != nil { - log.Debug("stopping SSH agent proxy after container exit", "error", stopErr) - } - - // Stop service containers - if len(r.ServiceContainers) > 0 { - svcMgr := m.runtime.ServiceManager() - if svcMgr != nil { - for svcName, svcContainerID := range r.ServiceContainers { - log.Debug("stopping service after container exit", "service", svcName, "container_id", svcContainerID) - if stopErr := svcMgr.StopService(cleanupCtx, container.ServiceInfo{ID: svcContainerID}); stopErr != nil { - log.Debug("failed to stop service", "service", svcName, "error", stopErr) - } - } - } - } - - // Remove containers before network — Docker refuses to remove networks - // with connected containers, causing network leaks (#131). - if r.BuildkitContainerID != "" { - log.Debug("removing buildkit sidecar after container exit", "container_id", r.BuildkitContainerID) - if stopErr := m.runtime.StopContainer(cleanupCtx, r.BuildkitContainerID); stopErr != nil { - log.Debug("failed to stop buildkit sidecar", "error", stopErr) - } - if rmErr := m.runtime.RemoveContainer(cleanupCtx, r.BuildkitContainerID); rmErr != nil { - log.Debug("failed to remove buildkit sidecar", "error", rmErr) - } - } - if !r.KeepContainer { - if rmErr := m.runtime.RemoveContainer(cleanupCtx, r.ContainerID); rmErr != nil { - log.Debug("failed to remove main container after exit", "error", rmErr) - } - } - - // Remove network (with force-disconnect fallback) - if r.NetworkID != "" { - netMgr := m.runtime.NetworkManager() - if netMgr != nil { - if removeErr := netMgr.RemoveNetwork(cleanupCtx, r.NetworkID); removeErr != nil { - log.Debug("network removal failed, trying force removal", "network", r.NetworkID, "error", removeErr) - if forceErr := netMgr.ForceRemoveNetwork(cleanupCtx, r.NetworkID); forceErr != nil { - log.Debug("force network removal also failed", "network", r.NetworkID, "error", forceErr) - } - } - } - } - - // Unregister routes - if r.Name != "" { - _ = m.routes.Remove(r.Name) - if m.daemonClient != nil { - if err := m.daemonClient.UnregisterRoutes(cleanupCtx, r.Name); err != nil { - log.Debug("failed to unregister routes via daemon", "error", err) - } - } - } + m.cleanupResources(cleanupCtx, r) } // List returns all runs. @@ -2991,73 +2806,14 @@ func (m *Manager) Destroy(ctx context.Context, runID string) error { m.mu.Unlock() return fmt.Errorf("run %s not found", runID) } - m.mu.Unlock() if r.GetState() == StateRunning { return fmt.Errorf("cannot destroy running run %s; stop it first", runID) } - // Remove container (may already be removed by Wait or monitorContainerExit) - if err := m.runtime.RemoveContainer(ctx, r.ContainerID); err != nil { - log.Debug("removing container in destroy", "container_id", r.ContainerID, "error", err) - } - - // Remove service containers - for svcName, svcContainerID := range r.ServiceContainers { - if err := m.runtime.RemoveContainer(ctx, svcContainerID); err != nil { - log.Debug("removing service container in destroy", "service", svcName, "error", err) - } - } - - // Remove BuildKit sidecar container if present - if r.BuildkitContainerID != "" { - if err := m.runtime.RemoveContainer(ctx, r.BuildkitContainerID); err != nil { - log.Debug("removing buildkit sidecar in destroy", "error", err) - } - } - - // Remove network if present (with force-disconnect fallback) - if r.NetworkID != "" { - netMgr := m.runtime.NetworkManager() - if netMgr != nil { - if err := netMgr.RemoveNetwork(ctx, r.NetworkID); err != nil { - log.Debug("network removal failed, trying force removal", "network", r.NetworkID, "error", err) - if forceErr := netMgr.ForceRemoveNetwork(ctx, r.NetworkID); forceErr != nil { - log.Debug("force network removal also failed", "network", r.NetworkID, "error", forceErr) - } - } - } - } - - // Cancel token refresh and unregister run from proxy daemon. - // These may already have been handled by monitorContainerExit — that's fine, - // Destroy is a best-effort cleanup for anything that was missed. - if err := r.stopProxyServer(ctx); err != nil { - log.Debug("stopping proxy in destroy", "error", err) - } - if r.ProxyAuthToken != "" && m.daemonClient != nil { - if err := m.daemonClient.UnregisterRun(ctx, r.ProxyAuthToken); err != nil { - log.Debug("unregistering from daemon in destroy", "error", err) - } - } - - // Stop the SSH agent server if one was created and still running - if err := r.stopSSHAgentServer(); err != nil { - log.Debug("stopping SSH agent in destroy", "error", err) - } - - // Unregister routes for this agent - if r.Name != "" { - if err := m.routes.Remove(r.Name); err != nil { - ui.Warnf("Removing routes: %v", err) - } - if m.daemonClient != nil { - if err := m.daemonClient.UnregisterRoutes(ctx, r.Name); err != nil { - log.Debug("failed to unregister routes via daemon", "error", err) - } - } - } + // Clean up all run resources (idempotent - may already be done by Stop/monitorContainerExit) + m.cleanupResources(ctx, r) // Check if we should stop the routing proxy (no more agents with ports) if m.proxyLifecycle.ShouldStop() { @@ -3073,41 +2829,6 @@ func (m *Manager) Destroy(ctx context.Context, runID string) error { } } - // Clean up provider resources - for providerName, cleanupPath := range r.ProviderCleanupPaths { - if prov := provider.Get(providerName); prov != nil { - prov.Cleanup(cleanupPath) - } - } - - // Clean up AWS temp directory - if r.awsTempDir != "" { - if err := os.RemoveAll(r.awsTempDir); err != nil { - ui.Warnf("Removing AWS temp dir: %v", err) - } - } - - // Clean up Claude config temp directory - if r.ClaudeConfigTempDir != "" { - if err := os.RemoveAll(r.ClaudeConfigTempDir); err != nil { - log.Debug("failed to remove Claude config temp dir", "path", r.ClaudeConfigTempDir, "error", err) - } - } - - // Clean up Codex config temp directory - if r.CodexConfigTempDir != "" { - if err := os.RemoveAll(r.CodexConfigTempDir); err != nil { - log.Debug("failed to remove Codex config temp dir", "path", r.CodexConfigTempDir, "error", err) - } - } - - // Clean up Gemini config temp directory - if r.GeminiConfigTempDir != "" { - if err := os.RemoveAll(r.GeminiConfigTempDir); err != nil { - log.Debug("failed to remove Gemini config temp dir", "path", r.GeminiConfigTempDir, "error", err) - } - } - // Remove run storage directory (logs, traces, metadata) if r.Store != nil { if err := r.Store.Remove(); err != nil { @@ -3122,25 +2843,6 @@ func (m *Manager) Destroy(ctx context.Context, runID string) error { return nil } -// Attach connects stdin/stdout/stderr to a running container. -func (m *Manager) Attach(ctx context.Context, runID string, stdin io.Reader, stdout, stderr io.Writer) error { - m.mu.RLock() - r, ok := m.runs[runID] - if !ok { - m.mu.RUnlock() - return fmt.Errorf("run %s not found", runID) - } - containerID := r.ContainerID - m.mu.RUnlock() - - return m.runtime.Attach(ctx, containerID, container.AttachOptions{ - Stdin: stdin, - Stdout: stdout, - Stderr: stderr, - TTY: true, // Default to TTY mode for now - }) -} - // ResizeTTY resizes the container's TTY to the given dimensions. func (m *Manager) ResizeTTY(ctx context.Context, runID string, height, width uint) error { m.mu.RLock() @@ -3178,7 +2880,7 @@ func (m *Manager) FollowLogs(ctx context.Context, runID string, w io.Writer) err } // RecentLogs returns the last n lines of container logs. -// Used to show context when re-attaching to a running container. +// Used to show recent output context for a running container. func (m *Manager) RecentLogs(runID string, lines int) (string, error) { m.mu.RLock() r, ok := m.runs[runID] @@ -3226,13 +2928,20 @@ func (m *Manager) RuntimeType() string { } // Close releases manager resources. +// It waits for active monitorContainerExit goroutines to finish so that +// logs are captured and state is updated before the runtime is closed. func (m *Manager) Close() error { - // Cancel background goroutines (monitorContainerExit) so lingering - // WaitContainer calls don't keep connections to Docker open. + // Cancel the manager context. if m.cancel != nil { m.cancel() } + // Wait for all monitor goroutines to finish. This ensures log capture + // and state updates complete before we close the runtime connection. + // monitorContainerExit uses context.Background() so it is not + // affected by the cancel above. + m.monitorWg.Wait() + // Stop all proxy/SSH servers and unregister runs from daemon, // with a 10-second overall timeout. closeCtx, closeCancel := context.WithTimeout(context.Background(), 10*time.Second) diff --git a/internal/run/run.go b/internal/run/run.go index 51e6f0a4..ad7e307f 100644 --- a/internal/run/run.go +++ b/internal/run/run.go @@ -64,6 +64,7 @@ type Run struct { // Shutdown coordination to prevent race conditions sshAgentStopOnce sync.Once // Ensures SSHAgentServer.Stop() called only once + cleanupOnce sync.Once // Ensures resource cleanup runs only once // State protection - guards State, Error, StartedAt, StoppedAt fields // Use this lock when reading or modifying these fields to prevent races @@ -123,7 +124,6 @@ type Options struct { Rebuild bool // Force rebuild of container image (ignores cache) KeepContainer bool // If true, don't auto-remove container after run Interactive bool // Keep stdin open for interactive input - TTY bool // Allocate a pseudo-TTY } // generateID creates a unique run identifier. diff --git a/internal/term/escape.go b/internal/term/escape.go index 20fb9630..47ede5d5 100644 --- a/internal/term/escape.go +++ b/internal/term/escape.go @@ -12,8 +12,6 @@ type EscapeAction int const ( // EscapeNone means no escape action was triggered. EscapeNone EscapeAction = iota - // EscapeDetach means the user wants to detach from the session. - EscapeDetach // EscapeStop means the user wants to stop the run. EscapeStop ) @@ -25,8 +23,6 @@ type EscapeError struct { func (e EscapeError) Error() string { switch e.Action { - case EscapeDetach: - return "escape: detach" case EscapeStop: return "escape: stop" default: @@ -54,14 +50,12 @@ const ( EscapePrefix byte = 0x1f // Command keys (after the prefix) - escapeKeyDetach byte = 'd' - escapeKeyStop byte = 'k' + escapeKeyStop byte = 'k' ) // EscapeProxy wraps a reader and watches for escape sequences. // // Escape sequences are: Ctrl-/ followed by: -// - d: detach from session (run continues) // - k: stop the run // // When an escape sequence is detected, Read returns an EscapeError. @@ -87,7 +81,7 @@ func NewEscapeProxy(r io.Reader) *EscapeProxy { } // OnPrefixChange sets a callback that fires when the escape prefix state changes. -// The callback receives true when Ctrl-/ is pressed (waiting for d/k), and false +// The callback receives true when Ctrl-/ is pressed (waiting for command key), and false // when the sequence completes or is canceled. This can be used to update UI state, // such as showing escape key hints in a status bar. // @@ -151,18 +145,6 @@ func (e *EscapeProxy) Read(p []byte) (int, error) { // Check for escape commands switch b { - case escapeKeyDetach: - // Buffer any remaining bytes for next Read - if i+1 < n { - e.buf = append(e.buf, buf[i+1:n]...) - } - // If we have output to return first, defer the escape - if len(out) > 0 { - pendingEscape = &EscapeError{Action: EscapeDetach} - break - } - return 0, EscapeError{Action: EscapeDetach} - case escapeKeyStop: if i+1 < n { e.buf = append(e.buf, buf[i+1:n]...) @@ -237,8 +219,6 @@ func (e *EscapeProxy) Read(p []byte) (int, error) { b := oneByte[0] e.setPrefixState(false) switch b { - case escapeKeyDetach: - return 0, EscapeError{Action: EscapeDetach} case escapeKeyStop: return 0, EscapeError{Action: EscapeStop} case EscapePrefix: @@ -279,5 +259,5 @@ func (e *EscapeProxy) Read(p []byte) (int, error) { // EscapeHelpText returns help text explaining the escape sequences. func EscapeHelpText() string { - return "Escape sequences: Ctrl-/ d (detach), Ctrl-/ k (stop)" + return "Escape: Ctrl-/ k (stop)" } diff --git a/internal/term/escape_test.go b/internal/term/escape_test.go index 18ac4592..13c7eef0 100644 --- a/internal/term/escape_test.go +++ b/internal/term/escape_test.go @@ -20,31 +20,19 @@ func TestEscapeProxy_PassThrough(t *testing.T) { } } -func TestEscapeProxy_Detach(t *testing.T) { - // Ctrl-/ d should trigger detach +func TestEscapeProxy_DPassesThrough(t *testing.T) { + // Ctrl-/ d is not an escape sequence; both bytes should pass through input := []byte{EscapePrefix, 'd', 'x', 'y', 'z'} r := NewEscapeProxy(bytes.NewReader(input)) - buf := make([]byte, 10) - n, err := r.Read(buf) - - if !IsEscapeError(err) { - t.Fatalf("expected EscapeError, got: %v", err) - } - if GetEscapeAction(err) != EscapeDetach { - t.Errorf("expected EscapeDetach, got: %v", GetEscapeAction(err)) - } - if n != 0 { - t.Errorf("expected 0 bytes, got %d", n) - } - - // Remaining bytes should be available - n, err = r.Read(buf) + out, err := io.ReadAll(r) if err != nil { - t.Fatalf("unexpected error reading remaining: %v", err) + t.Fatalf("unexpected error: %v", err) } - if string(buf[:n]) != "xyz" { - t.Errorf("remaining bytes: got %q, want %q", buf[:n], "xyz") + + expected := []byte{EscapePrefix, 'd', 'x', 'y', 'z'} + if !bytes.Equal(out, expected) { + t.Errorf("got %v, want %v", out, expected) } } @@ -97,27 +85,19 @@ func TestEscapeProxy_UnrecognizedEscape(t *testing.T) { } func TestEscapeProxy_MixedContent(t *testing.T) { - // Normal content with escape in the middle + // Normal content with unrecognized escape in the middle - both bytes pass through input := []byte{'a', 'b', EscapePrefix, 'd', 'c'} r := NewEscapeProxy(bytes.NewReader(input)) - buf := make([]byte, 10) - n, err := r.Read(buf) - - // First read should return "ab" then the escape - // Behavior depends on read size - may get "ab" then escape on next read - // or escape immediately - - if IsEscapeError(err) { - // Escape detected - "ab" should be buffered or returned - if n > 0 { - if string(buf[:n]) != "ab" { - t.Errorf("before escape: got %q, want %q", buf[:n], "ab") - } - } - } else if err != nil { + out, err := io.ReadAll(r) + if err != nil { t.Fatalf("unexpected error: %v", err) } + + expected := []byte{'a', 'b', EscapePrefix, 'd', 'c'} + if !bytes.Equal(out, expected) { + t.Errorf("got %v, want %v", out, expected) + } } func TestEscapeProxy_EscapeAtEnd(t *testing.T) { @@ -138,8 +118,8 @@ func TestEscapeProxy_EscapeAtEnd(t *testing.T) { } func TestEscapeProxy_SmallReads(t *testing.T) { - // Read one byte at a time - input := []byte{'a', EscapePrefix, 'd', 'b'} + // Read one byte at a time with a stop escape + input := []byte{'a', EscapePrefix, 'k', 'b'} r := NewEscapeProxy(bytes.NewReader(input)) buf := make([]byte, 1) @@ -158,8 +138,8 @@ func TestEscapeProxy_SmallReads(t *testing.T) { if !IsEscapeError(err) { t.Fatalf("read 2: expected EscapeError, got: %v", err) } - if GetEscapeAction(err) != EscapeDetach { - t.Errorf("read 2: expected EscapeDetach, got: %v", GetEscapeAction(err)) + if GetEscapeAction(err) != EscapeStop { + t.Errorf("read 2: expected EscapeStop, got: %v", GetEscapeAction(err)) } // Read 'b' @@ -177,7 +157,6 @@ func TestEscapeError_Error(t *testing.T) { action EscapeAction want string }{ - {EscapeDetach, "escape: detach"}, {EscapeStop, "escape: stop"}, {EscapeNone, "escape: unknown"}, } @@ -205,7 +184,7 @@ func TestEscapeProxy_OnPrefixChange(t *testing.T) { wantFinalState bool }{ { - name: "prefix detected then completed with detach", + name: "prefix detected then canceled with unrecognized d", input: []byte{EscapePrefix, 'd'}, wantCallbacks: []bool{true, false}, wantFinalState: false,