mostlydev · mostlydev · Apr 28, 2026 · Apr 28, 2026
diff --git a/cllama b/cllama
diff --git a/cmd/claw-mcp-stdio/main.go b/cmd/claw-mcp-stdio/main.go
@@ -31,6 +31,7 @@ const (
 	defaultRestartBackoffMS = 1000
 	defaultRestartMaxMS     = 15000
 	defaultMaxBodyBytes     = 1 << 20
+	lateResponseTTL         = 5 * time.Minute
 )
 
 type config struct {
@@ -59,6 +60,11 @@ type pendingCall struct {
 	ch         chan []byte
 }
 
+type lateResponse struct {
+	reason string
+	at     time.Time
+}
+
 type initCache struct {
 	response []byte
 }
@@ -74,6 +80,7 @@ type stdioBridge struct {
 	available   bool
 	generation  int64
 	pending     map[string]pendingCall
+	late        map[string]lateResponse
 	sessions    map[string]int64
 	initialized bool
 	init        *initCache
@@ -248,6 +255,7 @@ func newStdioBridge(ctx context.Context, cfg config) *stdioBridge {
 		ctx:      childCtx,
 		cancel:   cancel,
 		pending:  make(map[string]pendingCall),
+		late:     make(map[string]lateResponse),
 		sessions: make(map[string]int64),
 	}
 }
@@ -331,6 +339,7 @@ func (b *stdioBridge) markAvailable(stdin io.WriteCloser) int64 {
 	b.stdin = stdin
 	b.available = true
 	b.pending = make(map[string]pendingCall)
+	b.late = make(map[string]lateResponse)
 	b.sessions = make(map[string]int64)
 	b.initialized = false
 	b.init = nil
@@ -344,6 +353,7 @@ func (b *stdioBridge) markUnavailable(message string) {
 		pending.ch <- rpcErrorResponse(pending.originalID, -32000, message)
 	}
 	b.pending = make(map[string]pendingCall)
+	b.late = make(map[string]lateResponse)
 	b.sessions = make(map[string]int64)
 	b.initialized = false
 	b.init = nil
@@ -374,8 +384,16 @@ func (b *stdioBridge) readStdout(generation int64, r io.Reader) {
 		if ok {
 			delete(b.pending, key)
 		}
+		late, wasLate := b.late[key]
+		if wasLate {
+			delete(b.late, key)
+		}
 		b.mu.Unlock()
 		if !ok {
+			if wasLate {
+				fmt.Fprintf(os.Stderr, "claw-mcp-stdio received late response for canceled id %s (%s)\n", key, late.reason)
+				continue
+			}
 			fmt.Fprintf(os.Stderr, "claw-mcp-stdio ignored response for unknown id %s\n", key)
 			continue
 		}
@@ -397,6 +415,7 @@ func (b *stdioBridge) failGeneration(generation int64, message string) {
 		pending.ch <- rpcErrorResponse(pending.originalID, -32000, message)
 	}
 	b.pending = make(map[string]pendingCall)
+	b.late = make(map[string]lateResponse)
 	b.available = false
 	b.stdin = nil
 	b.sessions = make(map[string]int64)
@@ -576,10 +595,10 @@ func (b *stdioBridge) forwardRequest(ctx context.Context, body []byte, originalI
 	case resp := <-ch:
 		return resp, nil
 	case <-ctx.Done():
-		b.removePending(key)
+		b.removePending(key, "request context canceled")
 		return nil, ctx.Err()
 	case <-timer.C:
-		b.removePending(key)
+		b.removePending(key, "request timeout")
 		return nil, fmt.Errorf("stdio child request timed out after %s", timeout)
 	}
 }
@@ -614,15 +633,35 @@ func (b *stdioBridge) writeToChild(body []byte, pending *pendingCall) (string, e
 	return key, nil
 }
 
-func (b *stdioBridge) removePending(key string) {
+func (b *stdioBridge) removePending(key string, reason string) {
 	if key == "" {
 		return
 	}
 	b.mu.Lock()
-	delete(b.pending, key)
+	if _, ok := b.pending[key]; ok {
+		delete(b.pending, key)
+		b.rememberLateResponseLocked(key, reason)
+	}
 	b.mu.Unlock()
 }
 
+func (b *stdioBridge) rememberLateResponseLocked(key string, reason string) {
+	if b.late == nil {
+		b.late = make(map[string]lateResponse)
+	}
+	now := time.Now()
+	for existing, late := range b.late {
+		if now.Sub(late.at) > lateResponseTTL {
+			delete(b.late, existing)
+		}
+	}
+	reason = strings.TrimSpace(reason)
+	if reason == "" {
+		reason = "request canceled"
+	}
+	b.late[key] = lateResponse{reason: reason, at: now}
+}
+
 func rewriteRequestID(body []byte, id json.RawMessage) ([]byte, error) {
 	var obj map[string]json.RawMessage
 	if err := json.Unmarshal(body, &obj); err != nil {

diff --git a/cmd/claw-mcp-stdio/main_test.go b/cmd/claw-mcp-stdio/main_test.go
@@ -65,6 +65,29 @@ func TestBridgeRoutesConcurrentDuplicateClientIDs(t *testing.T) {
 	}
 }
 
+func TestBridgeLogsCanceledLateResponseSeparately(t *testing.T) {
+	bridge := newStdioBridge(testContext(t), config{})
+	bridge.mu.Lock()
+	bridge.generation = 1
+	bridge.pending["7"] = pendingCall{
+		originalID: json.RawMessage(`7`),
+		ch:         make(chan []byte, 1),
+	}
+	bridge.mu.Unlock()
+
+	bridge.removePending("7", "request context canceled")
+	logged := captureStderr(t, func() {
+		bridge.readStdout(2, strings.NewReader(`{"jsonrpc":"2.0","id":7,"result":{"ok":true}}`+"\n"))
+	})
+
+	if !strings.Contains(logged, "late response for canceled id 7") || !strings.Contains(logged, "request context canceled") {
+		t.Fatalf("expected canceled late-response log, got %q", logged)
+	}
+	if strings.Contains(logged, "unknown id") {
+		t.Fatalf("late canceled response should not be logged as unknown: %q", logged)
+	}
+}
+
 func TestBridgeRejectsUnknownSession(t *testing.T) {
 	server, cleanup := newTestWrapper(t, "")
 	defer cleanup()
@@ -233,6 +256,27 @@ func jsonPath(data []byte, path string) any {
 	return current
 }
 
+func captureStderr(t *testing.T, fn func()) string {
+	t.Helper()
+	old := os.Stderr
+	r, w, err := os.Pipe()
+	if err != nil {
+		t.Fatal(err)
+	}
+	os.Stderr = w
+	defer func() {
+		os.Stderr = old
+	}()
+	fn()
+	_ = w.Close()
+	raw, err := io.ReadAll(r)
+	if err != nil {
+		t.Fatal(err)
+	}
+	_ = r.Close()
+	return string(raw)
+}
+
 func TestHelperProcess(t *testing.T) {
 	if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" {
 		return

diff --git a/cmd/claw/skill_data/SKILL.md b/cmd/claw/skill_data/SKILL.md
@@ -309,6 +309,7 @@ When a service subscribes to tools via `x-claw.tools`, cllama performs bounded t
 
 - Tools are injected into the LLM request as available tool definitions
 - When the model calls a tool, cllama executes against the providing service. The execution path depends on the descriptor: HTTP-native services use the per-tool `http` metadata; MCP sidecars (descriptor declares a top-level `mcp` block, v0.11.0+) are reached via the Streamable HTTP `tools/call` endpoint with cached `initialize` sessions.
+- Repeated identical managed tool calls in one mediated turn are not re-executed; cllama returns a structured `duplicate_tool_call` result and records duplicate metadata in `tool_trace`
 - Tool results are fed back to the model for up to 8 rounds (configurable)
 - `tool_trace` entries appear in session history for auditability
 - Works with both OpenAI-compatible and Anthropic-format requests

diff --git a/internal/audit/event.go b/internal/audit/event.go
@@ -26,6 +26,9 @@ type Event struct {
 	ToolName           string    `json:"tool_name,omitempty"`
 	ToolService        string    `json:"tool_service,omitempty"`
 	ToolRound          *int      `json:"tool_round,omitempty"`
+	ToolDuplicate      bool      `json:"tool_duplicate,omitempty"`
+	ToolDuplicateRound *int      `json:"tool_duplicate_of_round,omitempty"`
+	ToolDuplicateCount *int      `json:"tool_duplicate_count,omitempty"`
 	// provider_pool event fields
 	Provider      string `json:"provider,omitempty"`
 	KeyID         string `json:"key_id,omitempty"`

diff --git a/internal/audit/normalize.go b/internal/audit/normalize.go
@@ -140,6 +140,17 @@ func NormalizeSessionHistoryLine(line []byte) ([]Event, error) {
 				value := roundNumber
 				event.ToolRound = &value
 			}
+			if duplicate, _ := boolField(callMap, "duplicate"); duplicate {
+				event.ToolDuplicate = true
+			}
+			if duplicateRound, ok := intField(callMap, "duplicate_of_round"); ok {
+				value := duplicateRound
+				event.ToolDuplicateRound = &value
+			}
+			if duplicateCount, ok := intField(callMap, "duplicate_count"); ok {
+				value := duplicateCount
+				event.ToolDuplicateCount = &value
+			}
 			if hasTotalRounds {
 				value := totalRounds
 				event.TotalRounds = &value

diff --git a/internal/audit/normalize_test.go b/internal/audit/normalize_test.go
@@ -149,6 +149,30 @@ func TestNormalizeSessionHistoryLineOpenAIManagedTool(t *testing.T) {
 	}
 }
 
+func TestNormalizeSessionHistoryLineManagedToolDuplicateMetadata(t *testing.T) {
+	line := `{"version":1,"id":"hist1_dup","status":"ok","ts":"2026-04-03T12:00:00Z","claw_id":"agent","path":"/v1/chat/completions","requested_model":"xai/grok","effective_provider":"xai","effective_model":"grok","status_code":200,"usage":{"total_rounds":3},"tool_trace":[{"round":2,"tool_calls":[{"name":"search.query","service":"search","status_code":409,"duplicate":true,"duplicate_of_round":1,"duplicate_count":2,"result":{"ok":false,"error":{"code":"duplicate_tool_call","message":"already ran"}}}]}]}`
+	events, err := NormalizeSessionHistoryLine([]byte(line))
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(events) != 1 {
+		t.Fatalf("expected 1 tool event, got %+v", events)
+	}
+	event := events[0]
+	if !event.ToolDuplicate {
+		t.Fatalf("expected duplicate flag, got %+v", event)
+	}
+	if event.ToolDuplicateRound == nil || *event.ToolDuplicateRound != 1 {
+		t.Fatalf("expected duplicate source round 1, got %+v", event)
+	}
+	if event.ToolDuplicateCount == nil || *event.ToolDuplicateCount != 2 {
+		t.Fatalf("expected duplicate count 2, got %+v", event)
+	}
+	if event.Error != "already ran" {
+		t.Fatalf("expected extracted duplicate error message, got %+v", event)
+	}
+}
+
 func TestNormalizeSessionHistoryLineAnthropicManagedToolFailure(t *testing.T) {
 	line := `{"version":1,"id":"hist1_def","status":"error","ts":"2026-04-03T12:05:00Z","claw_id":"nano-bot","path":"/v1/messages","requested_model":"anthropic/claude-sonnet-4","effective_provider":"anthropic","effective_model":"anthropic/claude-sonnet-4","status_code":502,"response":{"format":"json","json":{"error":{"message":"tool result rejected"}}},"usage":{"prompt_tokens":140,"completion_tokens":30,"total_rounds":2},"tool_trace":[{"round":1,"tool_calls":[{"name":"trading-api.get_market_context","service":"trading-api","latency_ms":145,"status_code":504,"result":{"ok":false,"error":"backend timeout"}}]}]}`
 	events, err := NormalizeSessionHistoryLine([]byte(line))

diff --git a/site/changelog.md b/site/changelog.md
@@ -29,6 +29,8 @@ outline: deep
 
 ## Unreleased
 
+- Fix: managed tool mediation now skips repeated identical managed tool calls within a single turn and feeds the model a structured `duplicate_tool_call` result instead of re-executing the sidecar ([#191](https://github.com/mostlydev/clawdapus/issues/191)). Session history records duplicate metadata in `tool_trace`, cllama logs a duplicate intervention with the tool name, and the stdio MCP wrapper now distinguishes late responses after cancellation or timeout from truly unknown JSON-RPC IDs.
+
 ## v0.13.3 <Badge type="tip" text="Latest" /> {#v0-13-3}
 
 *2026-04-28*

diff --git a/site/guide/tools.md b/site/guide/tools.md
@@ -240,6 +240,8 @@ When the LLM returns a response containing tool calls, cllama dispatches based o
 
 Unknown managed tool names are rejected at validation time.
 
+Within a single mediated turn, cllama also detects repeated managed tool calls with the same canonical tool name and arguments. The first call executes normally; later duplicates are skipped and returned to the model as a structured `duplicate_tool_call` tool result, with `tool_trace` metadata showing the original round and duplicate count. This keeps retry loops from repeatedly burning MCP sidecar time for the same query.
+
 **Budget limits** (configurable in pod YAML, compiled into `tools.json`):
 
 | Limit | Default |
@@ -257,7 +259,7 @@ Hidden tool rounds are preserved across turns. When the runner sends its next re
 
 ### Error Handling
 
-Tool execution errors are fed back to the LLM as structured results inside the mediated loop — the LLM decides how to communicate the failure to the runner. If cllama itself encounters a fatal error (budget exhaustion, internal failure), it returns `502` to the runner.
+Tool execution errors are fed back to the LLM as structured results inside the mediated loop — the LLM decides how to communicate the failure to the runner. Duplicate managed tool calls are treated as structured tool errors rather than sidecar executions. If cllama itself encounters a fatal error (budget exhaustion, internal failure), it returns `502` to the runner.
 
 ## Telemetry and Audit
 

diff --git a/skills/clawdapus/SKILL.md b/skills/clawdapus/SKILL.md
@@ -309,6 +309,7 @@ When a service subscribes to tools via `x-claw.tools`, cllama performs bounded t
 
 - Tools are injected into the LLM request as available tool definitions
 - When the model calls a tool, cllama executes against the providing service. The execution path depends on the descriptor: HTTP-native services use the per-tool `http` metadata; MCP sidecars (descriptor declares a top-level `mcp` block, v0.11.0+) are reached via the Streamable HTTP `tools/call` endpoint with cached `initialize` sessions.
+- Repeated identical managed tool calls in one mediated turn are not re-executed; cllama returns a structured `duplicate_tool_call` result and records duplicate metadata in `tool_trace`
 - Tool results are fed back to the model for up to 8 rounds (configurable)
 - `tool_trace` entries appear in session history for auditability
 - Works with both OpenAI-compatible and Anthropic-format requests
+126 −0		internal/proxy/handler_test.go
+132 −0		internal/proxy/toolmediation.go
+40 −0		internal/proxy/toolmediation_test.go
+9 −6		internal/sessionhistory/recorder.go