diff --git a/src/Netclaw.Actors.Tests/Tools/ShellToolTests.cs b/src/Netclaw.Actors.Tests/Tools/ShellToolTests.cs index 64c07e0e8..7119e5f7f 100644 --- a/src/Netclaw.Actors.Tests/Tools/ShellToolTests.cs +++ b/src/Netclaw.Actors.Tests/Tools/ShellToolTests.cs @@ -73,17 +73,20 @@ public async Task Requested_timeout_overrides_default_timeout() } [Fact] - public async Task Caller_cancellation_returns_gracefully() + public async Task Caller_cancellation_kills_child_process_tree_and_returns_gracefully() { // Reproduces the session-pipeline path: ShellTool's own timeout is long, // and cancellation instead arrives via the *outer* ct (the pipeline's // per-tool deadline). ShellTool must catch that, kill the process, and // return a message rather than letting the cancellation escape as an - // exception. If the kill failed, draining the pipes would hang and this - // test would never complete — so a passing run also proves the process - // was terminated. + // exception. On Unix the command also spawns a background child that + // inherits stdout/stderr; if the tree kill regresses, that child keeps + // the pipe write-ends open and the test never completes. var tool = new ShellTool(new ToolConfig { ShellTimeoutSeconds = 100 }); - var args = ToolInput.Create("Command", "sleep 120"); + var command = OperatingSystem.IsWindows() + ? "ping 127.0.0.1 -n 120 > nul" + : "sleep 120 & wait"; + var args = ToolInput.Create("Command", command); var context = new ToolExecutionContext("test/thread", Path.GetTempPath()) { Audience = TrustAudience.Personal, diff --git a/src/Netclaw.Actors/Tools/ShellTool.cs b/src/Netclaw.Actors/Tools/ShellTool.cs index a99eea1d9..ed7b5f0cd 100644 --- a/src/Netclaw.Actors/Tools/ShellTool.cs +++ b/src/Netclaw.Actors/Tools/ShellTool.cs @@ -134,18 +134,36 @@ protected override async Task ExecuteAsync(Params args, ToolExecutionCon } catch (OperationCanceledException) { + var killClosedPipes = true; + try { process.Kill(entireProcessTree: true); } - catch (Exception ex) when (ex is InvalidOperationException or Win32Exception) + catch (InvalidOperationException ex) { - // Already exited (TOCTOU race), or the OS refused the kill. + // Already exited between cancellation detection and kill. Debug.WriteLine($"shell_execute: process kill skipped — {ex.Message}"); } + catch (Win32Exception ex) + { + // If the OS refuses the kill, the child may keep stdout/stderr + // open forever. Close our read ends so cancellation still + // returns promptly instead of hanging in ReadToEndAsync. + killClosedPipes = false; + Debug.WriteLine($"shell_execute: process kill skipped — {ex.Message}"); + process.StandardOutput.Dispose(); + process.StandardError.Dispose(); + } - // The kill closes the pipes, so the reads now drain to EOF. - await Task.WhenAll(stdoutTask, stderrTask); + try + { + await Task.WhenAll(stdoutTask, stderrTask); + } + catch (Exception ex) when (!killClosedPipes && ex is IOException or ObjectDisposedException) + { + Debug.WriteLine($"shell_execute: pipe drain aborted — {ex.Message}"); + } return timeoutCts.IsCancellationRequested ? $"Error: Command timed out after {effectiveTimeoutSeconds} seconds."