From 79aee58945f5c6680fed81eddb5b1da8d78672b4 Mon Sep 17 00:00:00 2001
From: Layne Penney <dev@layne.pro>
Date: Thu, 15 Jan 2026 06:40:49 -0600
Subject: [PATCH 01/17] feat: add semantic tool fallback and print_tree tool
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements semantic fallback system for tool calls:
- Auto-corrects high-similarity tool name typos (≥0.85 threshold)
- Suggests similar tools for medium-similarity matches (≥0.6)
- Maps common parameter aliases (query→pattern, max→head_limit, etc.)
- Configurable via .codi.json toolFallback settings

New tools and features:
- print_tree: Display directory structure as a tree
- grep: Now accepts query/max_results aliases for pattern/head_limit

Files:
- src/tools/tool-fallback.ts: Core fallback logic with Levenshtein matching
- src/tools/print-tree.ts: Tree visualization tool
- tests/tool-fallback.test.ts: 45 unit tests
- tests/print-tree.test.ts: 15 unit tests
- ROADMAP.md: Future enhancement notes

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 ROADMAP.md                  |  30 ++
 src/config.ts               |  14 +
 src/tools/grep.ts           |  14 +-
 src/tools/index.ts          |  16 +
 src/tools/print-tree.ts     | 198 +++++++++++++
 src/tools/registry.ts       |  71 ++++-
 src/tools/tool-fallback.ts  | 291 +++++++++++++++++++
 tests/print-tree.test.ts    | 205 +++++++++++++
 tests/tool-fallback.test.ts | 563 ++++++++++++++++++++++++++++++++++++
 9 files changed, 1396 insertions(+), 6 deletions(-)
 create mode 100644 ROADMAP.md
 create mode 100644 src/tools/print-tree.ts
 create mode 100644 src/tools/tool-fallback.ts
 create mode 100644 tests/print-tree.test.ts
 create mode 100644 tests/tool-fallback.test.ts

diff --git a/ROADMAP.md b/ROADMAP.md
new file mode 100644
index 0000000..df7732d
--- /dev/null
+++ b/ROADMAP.md
@@ -0,0 +1,30 @@
+# Codi Roadmap
+
+This document tracks planned features and improvements for Codi.
+
+## Planned Features
+
+### Semantic Fallback for Tool Calls
+
+When a model attempts to call a tool that doesn't exist or uses incorrect parameter names, implement a semantic fallback system that:
+
+1. **Tool Name Matching**: If a requested tool doesn't exist, find the closest matching tool by name similarity (e.g., `print_tree` -> `list_directory`, `search` -> `grep`)
+
+2. **Parameter Mapping**: When a tool is called with unrecognized parameters, attempt to map them to the correct parameter names based on:
+   - Common aliases (e.g., `query` -> `pattern`, `max_results` -> `head_limit`)
+   - Semantic similarity (e.g., `search_term` -> `pattern`)
+   - Parameter descriptions
+
+3. **Graceful Degradation**: Instead of failing on invalid tool calls, provide helpful feedback to the model about what tools/parameters are available
+
+This would help bridge the gap between different model training data and Codi's actual tool definitions, improving compatibility with various LLMs.
+
+**Current Mitigations**:
+- Added parameter aliases to `grep` tool (`query` -> `pattern`, `max_results`/`max`/`limit` -> `head_limit`)
+- Added `print_tree` tool (commonly expected by models)
+
+---
+
+## Completed Features
+
+See [CLAUDE.md](./CLAUDE.md) for documentation on implemented features.
diff --git a/src/config.ts b/src/config.ts
index 4de04e3..66a2ad9 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -155,6 +155,20 @@ export interface WorkspaceConfig {
       [toolName: string]: Record<string, unknown>;
     };
   };
+
+  /** Tool fallback settings for handling unknown tools and parameter aliases */
+  toolFallback?: {
+    /** Enable semantic tool fallback (default: true) */
+    enabled?: boolean;
+    /** Threshold for auto-correcting tool names (0-1, default: 0.85) */
+    autoCorrectThreshold?: number;
+    /** Threshold for suggesting similar tools (0-1, default: 0.6) */
+    suggestionThreshold?: number;
+    /** Auto-execute corrected tools without confirmation (default: false) */
+    autoExecute?: boolean;
+    /** Enable parameter aliasing (default: true) */
+    parameterAliasing?: boolean;
+  };
 }
 
 /**
diff --git a/src/tools/grep.ts b/src/tools/grep.ts
index 4dce6e1..2b0dfa2 100644
--- a/src/tools/grep.ts
+++ b/src/tools/grep.ts
@@ -44,19 +44,27 @@ export class GrepTool extends BaseTool {
   }
 
   async execute(input: Record<string, unknown>): Promise<string> {
-    const pattern = input.pattern as string;
+    // Support parameter aliases for model compatibility:
+    // - query -> pattern (common model assumption)
+    // - max_results, max, limit -> head_limit (various naming conventions)
+    const pattern = (input.pattern as string) || (input.query as string);
     const path = (input.path as string) || '.';
     const filePattern = (input.file_pattern as string) || '**/*';
     const ignoreCase = (input.ignore_case as boolean) || false;
+    const headLimit = (input.head_limit as number) ||
+                      (input.max_results as number) ||
+                      (input.max as number) ||
+                      (input.limit as number) ||
+                      100;
 
     if (!pattern) {
-      throw new Error('Pattern is required');
+      throw new Error('Pattern is required (or use "query" alias)');
     }
 
     const resolvedPath = resolve(process.cwd(), path);
     const regex = new RegExp(pattern, ignoreCase ? 'gi' : 'g');
     const matches: Match[] = [];
-    const MAX_MATCHES = 100;
+    const MAX_MATCHES = headLimit;
 
     // Get list of files to search
     const files: string[] = [];
diff --git a/src/tools/index.ts b/src/tools/index.ts
index 97b4cd9..fd871ba 100644
--- a/src/tools/index.ts
+++ b/src/tools/index.ts
@@ -20,6 +20,20 @@ export { RefactorTool } from './refactor.js';
 export { ShellInfoTool } from './shell-info.js';
 export { PipelineTool } from './pipeline.js';
 export { GenerateDocsTool } from './generate-docs.js';
+export { PrintTreeTool } from './print-tree.js';
+
+// Tool fallback utilities
+export {
+  findBestToolMatch,
+  mapParameters,
+  formatFallbackError,
+  formatMappingInfo,
+  GLOBAL_PARAMETER_ALIASES,
+  DEFAULT_FALLBACK_CONFIG,
+  type ToolFallbackConfig,
+  type ToolMatchResult,
+  type ParameterMapResult,
+} from './tool-fallback.js';
 
 // Symbol index tools
 export {
@@ -52,6 +66,7 @@ import { RefactorTool } from './refactor.js';
 import { ShellInfoTool } from './shell-info.js';
 import { PipelineTool } from './pipeline.js';
 import { GenerateDocsTool } from './generate-docs.js';
+import { PrintTreeTool } from './print-tree.js';
 import type { Retriever } from '../rag/retriever.js';
 import type { SymbolIndexService } from '../symbol-index/service.js';
 import {
@@ -81,6 +96,7 @@ export function registerDefaultTools(): void {
   globalRegistry.register(new GlobTool());
   globalRegistry.register(new GrepTool());
   globalRegistry.register(new ListDirectoryTool());
+  globalRegistry.register(new PrintTreeTool());
 
   // Shell
   globalRegistry.register(new BashTool());
diff --git a/src/tools/print-tree.ts b/src/tools/print-tree.ts
new file mode 100644
index 0000000..c264501
--- /dev/null
+++ b/src/tools/print-tree.ts
@@ -0,0 +1,198 @@
+// Copyright 2026 Layne Penney
+// SPDX-License-Identifier: Apache-2.0
+
+import { readdir, stat } from 'fs/promises';
+import { resolve, join, basename } from 'path';
+import { BaseTool } from './base.js';
+import type { ToolDefinition } from '../types.js';
+
+interface TreeNode {
+  name: string;
+  type: 'file' | 'directory';
+  children?: TreeNode[];
+}
+
+export class PrintTreeTool extends BaseTool {
+  getDefinition(): ToolDefinition {
+    return {
+      name: 'print_tree',
+      description: 'Print a tree-like directory structure. Useful for understanding project layout. Respects .gitignore patterns and skips common non-essential directories.',
+      input_schema: {
+        type: 'object',
+        properties: {
+          path: {
+            type: 'string',
+            description: 'Root directory path (optional, defaults to current directory)',
+          },
+          depth: {
+            type: 'number',
+            description: 'Maximum depth to traverse (optional, defaults to 3)',
+          },
+          show_hidden: {
+            type: 'boolean',
+            description: 'Include hidden files and directories (default: false)',
+          },
+          show_files: {
+            type: 'boolean',
+            description: 'Include files in output, not just directories (default: true)',
+          },
+        },
+        required: [],
+      },
+    };
+  }
+
+  // Directories to always skip (common non-essential directories)
+  private readonly SKIP_DIRS = new Set([
+    'node_modules',
+    '.git',
+    '.svn',
+    '.hg',
+    '__pycache__',
+    '.pytest_cache',
+    '.mypy_cache',
+    '.tox',
+    '.nox',
+    '.eggs',
+    '*.egg-info',
+    'dist',
+    'build',
+    '.next',
+    '.nuxt',
+    '.output',
+    '.cache',
+    '.parcel-cache',
+    '.turbo',
+    'coverage',
+    '.nyc_output',
+    'vendor',
+    'target', // Rust/Java
+    '.gradle',
+    '.idea',
+    '.vscode',
+  ]);
+
+  async execute(input: Record<string, unknown>): Promise<string> {
+    const path = (input.path as string) || '.';
+    const maxDepth = (input.depth as number) || 3;
+    const showHidden = (input.show_hidden as boolean) || false;
+    const showFiles = input.show_files !== false; // Default to true
+
+    const resolvedPath = resolve(process.cwd(), path);
+    const rootName = basename(resolvedPath) || resolvedPath;
+
+    try {
+      const tree = await this.buildTree(resolvedPath, 0, maxDepth, showHidden, showFiles);
+      if (!tree) {
+        return `Directory not found or empty: ${path}`;
+      }
+
+      const lines: string[] = [rootName + '/'];
+      this.renderTree(tree.children || [], '', lines, showFiles);
+
+      return lines.join('\n');
+    } catch (error) {
+      const msg = error instanceof Error ? error.message : String(error);
+      return `Error reading directory: ${msg}`;
+    }
+  }
+
+  private async buildTree(
+    dirPath: string,
+    currentDepth: number,
+    maxDepth: number,
+    showHidden: boolean,
+    showFiles: boolean
+  ): Promise<TreeNode | null> {
+    try {
+      const stats = await stat(dirPath);
+      if (!stats.isDirectory()) {
+        return null;
+      }
+    } catch {
+      return null;
+    }
+
+    const name = basename(dirPath) || dirPath;
+    const node: TreeNode = { name, type: 'directory', children: [] };
+
+    if (currentDepth >= maxDepth) {
+      return node;
+    }
+
+    try {
+      const entries = await readdir(dirPath);
+      const children: TreeNode[] = [];
+
+      for (const entry of entries) {
+        // Skip hidden files unless requested
+        if (!showHidden && entry.startsWith('.')) {
+          continue;
+        }
+
+        // Skip non-essential directories
+        if (this.SKIP_DIRS.has(entry)) {
+          continue;
+        }
+
+        const fullPath = join(dirPath, entry);
+
+        try {
+          const entryStats = await stat(fullPath);
+
+          if (entryStats.isDirectory()) {
+            const childTree = await this.buildTree(
+              fullPath,
+              currentDepth + 1,
+              maxDepth,
+              showHidden,
+              showFiles
+            );
+            if (childTree) {
+              children.push(childTree);
+            }
+          } else if (showFiles) {
+            children.push({ name: entry, type: 'file' });
+          }
+        } catch {
+          // Skip entries we can't stat
+          continue;
+        }
+      }
+
+      // Sort: directories first, then files, alphabetically
+      children.sort((a, b) => {
+        if (a.type !== b.type) {
+          return a.type === 'directory' ? -1 : 1;
+        }
+        return a.name.localeCompare(b.name);
+      });
+
+      node.children = children;
+    } catch {
+      // Can't read directory
+    }
+
+    return node;
+  }
+
+  private renderTree(nodes: TreeNode[], prefix: string, lines: string[], showFiles: boolean): void {
+    const filteredNodes = showFiles ? nodes : nodes.filter(n => n.type === 'directory');
+
+    for (let i = 0; i < filteredNodes.length; i++) {
+      const node = filteredNodes[i];
+      const isLast = i === filteredNodes.length - 1;
+      const connector = isLast ? '└── ' : '├── ';
+      const childPrefix = isLast ? '    ' : '│   ';
+
+      if (node.type === 'directory') {
+        lines.push(prefix + connector + node.name + '/');
+        if (node.children && node.children.length > 0) {
+          this.renderTree(node.children, prefix + childPrefix, lines, showFiles);
+        }
+      } else {
+        lines.push(prefix + connector + node.name);
+      }
+    }
+  }
+}
diff --git a/src/tools/registry.ts b/src/tools/registry.ts
index 67f9283..f12a1e7 100644
--- a/src/tools/registry.ts
+++ b/src/tools/registry.ts
@@ -3,6 +3,14 @@
 
 import type { ToolDefinition, ToolCall, ToolResult } from '../types.js';
 import { BaseTool } from './base.js';
+import {
+  findBestToolMatch,
+  mapParameters,
+  formatFallbackError,
+  formatMappingInfo,
+  type ToolFallbackConfig,
+  DEFAULT_FALLBACK_CONFIG,
+} from './tool-fallback.js';
 
 /**
  * Registry for managing available tools.
@@ -10,6 +18,21 @@ import { BaseTool } from './base.js';
  */
 export class ToolRegistry {
   private tools: Map<string, BaseTool> = new Map();
+  private fallbackConfig: ToolFallbackConfig = DEFAULT_FALLBACK_CONFIG;
+
+  /**
+   * Set fallback configuration.
+   */
+  setFallbackConfig(config: Partial<ToolFallbackConfig>): void {
+    this.fallbackConfig = { ...DEFAULT_FALLBACK_CONFIG, ...config };
+  }
+
+  /**
+   * Get current fallback configuration.
+   */
+  getFallbackConfig(): ToolFallbackConfig {
+    return { ...this.fallbackConfig };
+  }
 
   /**
    * Register a tool with the registry.
@@ -53,10 +76,32 @@ export class ToolRegistry {
   }
 
   /**
-   * Execute a single tool call.
+   * Execute a single tool call with semantic fallback support.
    */
   async execute(toolCall: ToolCall): Promise<ToolResult> {
-    const tool = this.tools.get(toolCall.name);
+    let tool = this.tools.get(toolCall.name);
+    let mappedInput = toolCall.input;
+    let toolCorrection: { from: string; to: string } | null = null;
+    let paramMappings: Array<{ from: string; to: string }> = [];
+
+    // If tool not found, try fallback matching
+    if (!tool && this.fallbackConfig.enabled) {
+      const definitions = this.getDefinitions();
+      const matchResult = findBestToolMatch(toolCall.name, definitions, this.fallbackConfig);
+
+      if (matchResult.shouldAutoCorrect && matchResult.matchedName) {
+        // Auto-correct to matched tool
+        tool = this.tools.get(matchResult.matchedName);
+        toolCorrection = { from: toolCall.name, to: matchResult.matchedName };
+      } else if (!matchResult.exactMatch) {
+        // Return error with suggestions
+        return {
+          tool_use_id: toolCall.id,
+          content: formatFallbackError(toolCall.name, matchResult),
+          is_error: true,
+        };
+      }
+    }
 
     if (!tool) {
       return {
@@ -66,7 +111,27 @@ export class ToolRegistry {
       };
     }
 
-    return tool.run(toolCall.id, toolCall.input);
+    // Apply parameter mapping
+    if (this.fallbackConfig.parameterAliasing) {
+      const mapResult = mapParameters(
+        toolCall.input,
+        tool.getDefinition().input_schema,
+        this.fallbackConfig
+      );
+      mappedInput = mapResult.mappedInput;
+      paramMappings = mapResult.mappings;
+    }
+
+    // Execute the tool
+    const result = await tool.run(toolCall.id, mappedInput);
+
+    // Prepend mapping info to result if any corrections were made
+    const mappingInfo = formatMappingInfo(toolCorrection, paramMappings);
+    if (mappingInfo && !result.is_error) {
+      result.content = `${mappingInfo}\n\n${result.content}`;
+    }
+
+    return result;
   }
 
   /**
diff --git a/src/tools/tool-fallback.ts b/src/tools/tool-fallback.ts
new file mode 100644
index 0000000..e3b3ef6
--- /dev/null
+++ b/src/tools/tool-fallback.ts
@@ -0,0 +1,291 @@
+// Copyright 2026 Layne Penney
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Semantic Tool Fallback System
+ *
+ * Handles tool name matching and parameter mapping when exact matches fail.
+ * - Suggests similar tools for typos/misnamed tools
+ * - Auto-corrects high-similarity matches (configurable)
+ * - Maps common parameter aliases to canonical forms
+ */
+
+import { stringSimilarity } from '../entity-normalization.js';
+import type { ToolDefinition } from '../types.js';
+
+/**
+ * Configuration for tool fallback behavior.
+ */
+export interface ToolFallbackConfig {
+  /** Enable/disable fallback system */
+  enabled: boolean;
+  /** Threshold above which tool is auto-corrected (0-1) */
+  autoCorrectThreshold: number;
+  /** Threshold above which tool is suggested (0-1) */
+  suggestionThreshold: number;
+  /** Auto-execute corrected tools without confirmation */
+  autoExecute: boolean;
+  /** Enable parameter aliasing */
+  parameterAliasing: boolean;
+}
+
+/**
+ * Default fallback configuration.
+ */
+export const DEFAULT_FALLBACK_CONFIG: ToolFallbackConfig = {
+  enabled: true,
+  autoCorrectThreshold: 0.85,
+  suggestionThreshold: 0.6,
+  autoExecute: false,
+  parameterAliasing: true,
+};
+
+/**
+ * Result of a tool name match attempt.
+ */
+export interface ToolMatchResult {
+  /** Whether an exact match was found */
+  exactMatch: boolean;
+  /** The matched tool name (may differ from requested) */
+  matchedName: string | null;
+  /** Similarity score (1.0 for exact match) */
+  score: number;
+  /** All candidates above suggestion threshold */
+  suggestions: Array<{ name: string; score: number; description: string }>;
+  /** Whether auto-correction should be applied */
+  shouldAutoCorrect: boolean;
+}
+
+/**
+ * Find the best matching tool for a given name.
+ */
+export function findBestToolMatch(
+  requestedName: string,
+  availableTools: ToolDefinition[],
+  config: ToolFallbackConfig = DEFAULT_FALLBACK_CONFIG
+): ToolMatchResult {
+  // Check for exact match first
+  const exactMatch = availableTools.find((t) => t.name === requestedName);
+  if (exactMatch) {
+    return {
+      exactMatch: true,
+      matchedName: requestedName,
+      score: 1.0,
+      suggestions: [],
+      shouldAutoCorrect: false,
+    };
+  }
+
+  if (!config.enabled) {
+    return {
+      exactMatch: false,
+      matchedName: null,
+      score: 0,
+      suggestions: [],
+      shouldAutoCorrect: false,
+    };
+  }
+
+  // Calculate similarity scores for all tools
+  const scores = availableTools.map((tool) => ({
+    name: tool.name,
+    score: stringSimilarity(requestedName.toLowerCase(), tool.name.toLowerCase()),
+    description: tool.description.slice(0, 80) + (tool.description.length > 80 ? '...' : ''),
+  }));
+
+  // Sort by score descending
+  scores.sort((a, b) => b.score - a.score);
+
+  // Filter to suggestions above threshold
+  const suggestions = scores.filter((s) => s.score >= config.suggestionThreshold);
+  const bestMatch = scores[0];
+
+  // Only auto-correct if:
+  // 1. Best match is above auto-correct threshold
+  // 2. There's a clear winner (no other match within 0.05 of the best)
+  let shouldAutoCorrect = false;
+  if (bestMatch && bestMatch.score >= config.autoCorrectThreshold) {
+    const closeMatches = scores.filter((s) => s.score >= bestMatch.score - 0.05);
+    // Only auto-correct if there's exactly one clear winner
+    shouldAutoCorrect = closeMatches.length === 1;
+  }
+
+  return {
+    exactMatch: false,
+    matchedName: shouldAutoCorrect ? bestMatch.name : null,
+    score: bestMatch?.score ?? 0,
+    suggestions,
+    shouldAutoCorrect,
+  };
+}
+
+/**
+ * Global parameter aliases.
+ * Maps canonical parameter names to their common aliases.
+ */
+export const GLOBAL_PARAMETER_ALIASES: Map<string, string[]> = new Map([
+  // Search/query related
+  ['pattern', ['query', 'search', 'search_term', 'search_query', 'regex', 'expression', 'search_pattern']],
+  ['path', ['file', 'file_path', 'filepath', 'directory', 'dir', 'folder', 'location']],
+
+  // Result limiting
+  ['head_limit', ['max_results', 'max', 'limit', 'count', 'num_results', 'top_k', 'k', 'n']],
+  ['depth', ['max_depth', 'level', 'levels']],
+
+  // Flags
+  ['ignore_case', ['case_insensitive', 'i', 'insensitive', 'no_case']],
+  ['recursive', ['recurse', 'r']],
+  ['show_hidden', ['hidden', 'all', 'include_hidden', 'show_all']],
+  ['show_files', ['include_files', 'files']],
+
+  // Content
+  ['content', ['text', 'body', 'data', 'value']],
+  ['new_content', ['replacement', 'replace_with', 'new_text', 'new_value']],
+  ['old_content', ['original', 'old_text', 'find', 'search']],
+
+  // File operations
+  ['file_pattern', ['glob', 'include', 'glob_pattern', 'filter']],
+
+  // Bash specific
+  ['command', ['cmd', 'script', 'shell_command', 'exec']],
+]);
+
+/**
+ * Result of parameter mapping.
+ */
+export interface ParameterMapResult {
+  /** The mapped parameters */
+  mappedInput: Record<string, unknown>;
+  /** Any parameters that couldn't be mapped */
+  unmappedParams: string[];
+  /** Mapping details for logging */
+  mappings: Array<{ from: string; to: string }>;
+}
+
+/**
+ * Map parameters using global aliases and tool-specific schema.
+ */
+export function mapParameters(
+  input: Record<string, unknown>,
+  toolSchema: ToolDefinition['input_schema'],
+  config: ToolFallbackConfig = DEFAULT_FALLBACK_CONFIG
+): ParameterMapResult {
+  if (!config.parameterAliasing) {
+    return {
+      mappedInput: input,
+      unmappedParams: [],
+      mappings: [],
+    };
+  }
+
+  const mappedInput: Record<string, unknown> = {};
+  const unmappedParams: string[] = [];
+  const mappings: Array<{ from: string; to: string }> = [];
+  const schemaProps = Object.keys(toolSchema.properties || {});
+
+  for (const [key, value] of Object.entries(input)) {
+    // If key exists in schema, use it directly
+    if (schemaProps.includes(key)) {
+      mappedInput[key] = value;
+      continue;
+    }
+
+    // Try to find a mapping from global aliases
+    let mapped = false;
+    for (const [canonical, aliases] of GLOBAL_PARAMETER_ALIASES) {
+      if (schemaProps.includes(canonical) && aliases.includes(key.toLowerCase())) {
+        // Only map if we haven't already set this canonical parameter
+        if (!(canonical in mappedInput)) {
+          mappedInput[canonical] = value;
+          mappings.push({ from: key, to: canonical });
+          mapped = true;
+        }
+        break;
+      }
+    }
+
+    // Try semantic similarity as fallback
+    if (!mapped) {
+      const bestMatch = findBestParameterMatch(key, schemaProps);
+      if (bestMatch && bestMatch.score >= 0.7) {
+        // Only map if we haven't already set this parameter
+        if (!(bestMatch.name in mappedInput)) {
+          mappedInput[bestMatch.name] = value;
+          mappings.push({ from: key, to: bestMatch.name });
+          mapped = true;
+        }
+      }
+    }
+
+    if (!mapped) {
+      unmappedParams.push(key);
+      // Still include unmapped params - the tool might handle them
+      mappedInput[key] = value;
+    }
+  }
+
+  return { mappedInput, unmappedParams, mappings };
+}
+
+/**
+ * Find best matching parameter name using similarity.
+ */
+function findBestParameterMatch(
+  paramName: string,
+  schemaProps: string[]
+): { name: string; score: number } | null {
+  let bestMatch: { name: string; score: number } | null = null;
+
+  for (const prop of schemaProps) {
+    const score = stringSimilarity(paramName.toLowerCase(), prop.toLowerCase());
+    if (!bestMatch || score > bestMatch.score) {
+      bestMatch = { name: prop, score };
+    }
+  }
+
+  return bestMatch;
+}
+
+/**
+ * Format an error message with suggestions.
+ */
+export function formatFallbackError(requestedTool: string, matchResult: ToolMatchResult): string {
+  const lines: string[] = [`Error: Unknown tool "${requestedTool}"`];
+
+  if (matchResult.suggestions.length > 0) {
+    lines.push('');
+    lines.push('Did you mean:');
+    // Show up to 3 suggestions
+    for (const suggestion of matchResult.suggestions.slice(0, 3)) {
+      const percent = Math.round(suggestion.score * 100);
+      lines.push(`  - ${suggestion.name} (${percent}% match): ${suggestion.description}`);
+    }
+  }
+
+  return lines.join('\n');
+}
+
+/**
+ * Format parameter mapping info for prepending to tool result.
+ */
+export function formatMappingInfo(
+  toolCorrection: { from: string; to: string } | null,
+  paramMappings: Array<{ from: string; to: string }>
+): string | null {
+  const parts: string[] = [];
+
+  if (toolCorrection) {
+    parts.push(`Tool: "${toolCorrection.from}" → "${toolCorrection.to}"`);
+  }
+
+  if (paramMappings.length > 0) {
+    const mappingStr = paramMappings.map((m) => `${m.from}→${m.to}`).join(', ');
+    parts.push(`Params: ${mappingStr}`);
+  }
+
+  if (parts.length === 0) {
+    return null;
+  }
+
+  return `(Mapped: ${parts.join('; ')})`;
+}
diff --git a/tests/print-tree.test.ts b/tests/print-tree.test.ts
new file mode 100644
index 0000000..07f1529
--- /dev/null
+++ b/tests/print-tree.test.ts
@@ -0,0 +1,205 @@
+// Copyright 2026 Layne Penney
+// SPDX-License-Identifier: Apache-2.0
+
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { PrintTreeTool } from '../src/tools/print-tree.js';
+import * as fs from 'fs/promises';
+import * as path from 'path';
+import * as os from 'os';
+
+describe('PrintTreeTool', () => {
+  let tool: PrintTreeTool;
+  let tempDir: string;
+
+  beforeEach(async () => {
+    tool = new PrintTreeTool();
+    // Create a temporary directory structure for testing
+    tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'print-tree-test-'));
+
+    // Create test directory structure:
+    // tempDir/
+    //   src/
+    //     index.ts
+    //     utils/
+    //       helpers.ts
+    //   tests/
+    //     test.ts
+    //   package.json
+    //   .hidden
+    //   node_modules/  (should be skipped)
+    //     somelib/
+    //       index.js
+
+    await fs.mkdir(path.join(tempDir, 'src'));
+    await fs.mkdir(path.join(tempDir, 'src', 'utils'));
+    await fs.mkdir(path.join(tempDir, 'tests'));
+    await fs.mkdir(path.join(tempDir, 'node_modules'));
+    await fs.mkdir(path.join(tempDir, 'node_modules', 'somelib'));
+
+    await fs.writeFile(path.join(tempDir, 'src', 'index.ts'), 'export {};');
+    await fs.writeFile(path.join(tempDir, 'src', 'utils', 'helpers.ts'), 'export {};');
+    await fs.writeFile(path.join(tempDir, 'tests', 'test.ts'), 'test();');
+    await fs.writeFile(path.join(tempDir, 'package.json'), '{}');
+    await fs.writeFile(path.join(tempDir, '.hidden'), 'hidden file');
+    await fs.writeFile(path.join(tempDir, 'node_modules', 'somelib', 'index.js'), '');
+  });
+
+  afterEach(async () => {
+    // Clean up temp directory
+    await fs.rm(tempDir, { recursive: true, force: true });
+  });
+
+  describe('getDefinition', () => {
+    it('returns correct tool definition', () => {
+      const def = tool.getDefinition();
+      expect(def.name).toBe('print_tree');
+      expect(def.description).toContain('tree');
+      expect(def.input_schema.properties).toHaveProperty('path');
+      expect(def.input_schema.properties).toHaveProperty('depth');
+      expect(def.input_schema.properties).toHaveProperty('show_hidden');
+      expect(def.input_schema.properties).toHaveProperty('show_files');
+    });
+  });
+
+  describe('execute', () => {
+    it('prints directory tree with default options', async () => {
+      const result = await tool.execute({ path: tempDir });
+
+      // Should show directories
+      expect(result).toContain('src/');
+      expect(result).toContain('tests/');
+
+      // Should show files
+      expect(result).toContain('package.json');
+
+      // Should NOT show hidden files by default
+      expect(result).not.toContain('.hidden');
+
+      // Should NOT show node_modules (skipped directory)
+      expect(result).not.toContain('node_modules');
+    });
+
+    it('shows tree connectors', async () => {
+      const result = await tool.execute({ path: tempDir });
+
+      // Should have tree connectors
+      expect(result).toMatch(/[├└]──/);
+    });
+
+    it('respects depth parameter', async () => {
+      const result = await tool.execute({ path: tempDir, depth: 1 });
+
+      // Should show top-level directories
+      expect(result).toContain('src/');
+      expect(result).toContain('tests/');
+
+      // Should NOT show nested files/dirs at depth 1
+      expect(result).not.toContain('utils/');
+      expect(result).not.toContain('helpers.ts');
+    });
+
+    it('shows hidden files when show_hidden is true', async () => {
+      const result = await tool.execute({ path: tempDir, show_hidden: true });
+
+      expect(result).toContain('.hidden');
+    });
+
+    it('hides files when show_files is false', async () => {
+      const result = await tool.execute({ path: tempDir, show_files: false });
+
+      // Should show directories
+      expect(result).toContain('src/');
+      expect(result).toContain('tests/');
+
+      // Should NOT show files
+      expect(result).not.toContain('package.json');
+      expect(result).not.toContain('index.ts');
+    });
+
+    it('skips common non-essential directories', async () => {
+      const result = await tool.execute({ path: tempDir });
+
+      // node_modules should be skipped
+      expect(result).not.toContain('node_modules');
+      expect(result).not.toContain('somelib');
+    });
+
+    it('sorts directories before files', async () => {
+      const result = await tool.execute({ path: tempDir });
+
+      // Get the position of directories and files
+      const srcPos = result.indexOf('src/');
+      const testsPos = result.indexOf('tests/');
+      const packagePos = result.indexOf('package.json');
+
+      // Directories should come before files
+      expect(srcPos).toBeLessThan(packagePos);
+      expect(testsPos).toBeLessThan(packagePos);
+    });
+
+    it('handles empty directory', async () => {
+      const emptyDir = path.join(tempDir, 'empty');
+      await fs.mkdir(emptyDir);
+
+      const result = await tool.execute({ path: emptyDir });
+
+      // Should show the directory name
+      expect(result).toContain('empty/');
+    });
+
+    it('handles non-existent directory', async () => {
+      const result = await tool.execute({ path: '/nonexistent/path/xyz' });
+
+      expect(result).toContain('not found');
+    });
+
+    it('uses current directory when path not specified', async () => {
+      // Save current dir
+      const originalCwd = process.cwd();
+
+      try {
+        process.chdir(tempDir);
+        const result = await tool.execute({});
+
+        // Should show contents of tempDir
+        expect(result).toContain('src/');
+        expect(result).toContain('tests/');
+      } finally {
+        // Restore original cwd
+        process.chdir(originalCwd);
+      }
+    });
+
+    it('shows nested structure correctly', async () => {
+      const result = await tool.execute({ path: tempDir, depth: 3 });
+
+      // Should show nested utils directory
+      expect(result).toContain('utils/');
+      expect(result).toContain('helpers.ts');
+    });
+  });
+
+  describe('tree formatting', () => {
+    it('uses correct tree characters for last items', async () => {
+      const result = await tool.execute({ path: tempDir });
+
+      // Should have └── for last items in a directory
+      expect(result).toContain('└──');
+    });
+
+    it('uses correct tree characters for non-last items', async () => {
+      const result = await tool.execute({ path: tempDir });
+
+      // Should have ├── for non-last items
+      expect(result).toContain('├──');
+    });
+
+    it('uses vertical lines for nested items', async () => {
+      const result = await tool.execute({ path: tempDir, depth: 3 });
+
+      // Nested items should have │ for indentation
+      // This appears when there are siblings after the parent
+      expect(result).toMatch(/│\s+[├└]──/);
+    });
+  });
+});
diff --git a/tests/tool-fallback.test.ts b/tests/tool-fallback.test.ts
new file mode 100644
index 0000000..7e2ced9
--- /dev/null
+++ b/tests/tool-fallback.test.ts
@@ -0,0 +1,563 @@
+// Copyright 2026 Layne Penney
+// SPDX-License-Identifier: Apache-2.0
+
+import { describe, it, expect, beforeEach } from 'vitest';
+import {
+  findBestToolMatch,
+  mapParameters,
+  formatFallbackError,
+  formatMappingInfo,
+  GLOBAL_PARAMETER_ALIASES,
+  DEFAULT_FALLBACK_CONFIG,
+  type ToolFallbackConfig,
+} from '../src/tools/tool-fallback.js';
+import { ToolRegistry } from '../src/tools/registry.js';
+import type { ToolDefinition } from '../src/types.js';
+
+// Mock tool definitions for testing
+const mockTools: ToolDefinition[] = [
+  {
+    name: 'grep',
+    description: 'Search for patterns in file contents. Returns matching lines with file paths.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        pattern: { type: 'string', description: 'Search pattern' },
+        path: { type: 'string', description: 'Path to search in' },
+        head_limit: { type: 'number', description: 'Max results' },
+        ignore_case: { type: 'boolean', description: 'Case insensitive' },
+      },
+      required: ['pattern'],
+    },
+  },
+  {
+    name: 'glob',
+    description: 'Find files matching a glob pattern. Returns file paths.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        pattern: { type: 'string', description: 'Glob pattern' },
+        path: { type: 'string', description: 'Base path' },
+      },
+      required: ['pattern'],
+    },
+  },
+  {
+    name: 'read_file',
+    description: 'Read the contents of a file.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        path: { type: 'string', description: 'File path' },
+      },
+      required: ['path'],
+    },
+  },
+  {
+    name: 'write_file',
+    description: 'Write content to a file.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        path: { type: 'string', description: 'File path' },
+        content: { type: 'string', description: 'Content to write' },
+      },
+      required: ['path', 'content'],
+    },
+  },
+  {
+    name: 'list_directory',
+    description: 'List files and directories in a given path.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        path: { type: 'string', description: 'Directory path' },
+        show_hidden: { type: 'boolean', description: 'Show hidden files' },
+      },
+      required: [],
+    },
+  },
+  {
+    name: 'print_tree',
+    description: 'Print a tree-like directory structure.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        path: { type: 'string', description: 'Root path' },
+        depth: { type: 'number', description: 'Max depth' },
+      },
+      required: [],
+    },
+  },
+  {
+    name: 'bash',
+    description: 'Execute a bash command.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        command: { type: 'string', description: 'Command to execute' },
+      },
+      required: ['command'],
+    },
+  },
+];
+
+describe('findBestToolMatch', () => {
+  it('returns exact match when tool exists', () => {
+    const result = findBestToolMatch('grep', mockTools);
+    expect(result.exactMatch).toBe(true);
+    expect(result.matchedName).toBe('grep');
+    expect(result.score).toBe(1.0);
+    expect(result.suggestions).toHaveLength(0);
+    expect(result.shouldAutoCorrect).toBe(false);
+  });
+
+  it('is case-sensitive for exact matches', () => {
+    const result = findBestToolMatch('Grep', mockTools);
+    expect(result.exactMatch).toBe(false);
+    // But should suggest grep with high similarity
+    expect(result.suggestions.length).toBeGreaterThan(0);
+    expect(result.suggestions[0].name).toBe('grep');
+  });
+
+  it('suggests similar tools for typos', () => {
+    const result = findBestToolMatch('gre', mockTools);
+    expect(result.exactMatch).toBe(false);
+    expect(result.suggestions.length).toBeGreaterThan(0);
+    expect(result.suggestions[0].name).toBe('grep');
+  });
+
+  it('auto-corrects high-similarity case typos', () => {
+    const result = findBestToolMatch('GREP', mockTools);
+    // GREP vs grep should have high similarity due to case-insensitive comparison
+    expect(result.shouldAutoCorrect).toBe(true);
+    expect(result.matchedName).toBe('grep');
+  });
+
+  it('suggests print_tree for print_tre typo', () => {
+    // "print_tre" vs "print_tree" has high similarity
+    const result = findBestToolMatch('print_tre', mockTools);
+    expect(result.exactMatch).toBe(false);
+    expect(result.suggestions.some((s) => s.name === 'print_tree')).toBe(true);
+  });
+
+  it('suggests list_directory for list_directo typo', () => {
+    // "list_directo" vs "list_directory" has high similarity (only missing 'ry')
+    const result = findBestToolMatch('list_directo', mockTools);
+    expect(result.exactMatch).toBe(false);
+    expect(result.suggestions.length).toBeGreaterThan(0);
+    expect(result.suggestions[0].name).toBe('list_directory');
+  });
+
+  it('returns no suggestions when disabled', () => {
+    const config: ToolFallbackConfig = { ...DEFAULT_FALLBACK_CONFIG, enabled: false };
+    const result = findBestToolMatch('unknown', mockTools, config);
+    expect(result.suggestions).toHaveLength(0);
+    expect(result.matchedName).toBeNull();
+  });
+
+  it('does not auto-correct when multiple close matches exist', () => {
+    // Create tools with close names to test ambiguity
+    // Both "test_a" and "test_b" are equally similar to "test_x"
+    const ambiguousTools: ToolDefinition[] = [
+      { name: 'test_a', description: 'Test A', input_schema: { type: 'object', properties: {} } },
+      { name: 'test_b', description: 'Test B', input_schema: { type: 'object', properties: {} } },
+    ];
+    // "test_x" has equal similarity to both test_a and test_b (both differ by 1 char)
+    const result = findBestToolMatch('test_x', ambiguousTools);
+    expect(result.suggestions.length).toBeGreaterThan(0);
+    // Should not auto-correct because both are equally close matches
+    expect(result.shouldAutoCorrect).toBe(false);
+  });
+
+  it('respects custom thresholds', () => {
+    const config: ToolFallbackConfig = {
+      ...DEFAULT_FALLBACK_CONFIG,
+      suggestionThreshold: 0.9, // Very high threshold
+    };
+    const result = findBestToolMatch('gre', mockTools, config);
+    // 'gre' vs 'grep' is about 0.75 similarity, below 0.9 threshold
+    expect(result.suggestions).toHaveLength(0);
+  });
+
+  it('includes truncated descriptions in suggestions', () => {
+    const result = findBestToolMatch('search', mockTools);
+    for (const suggestion of result.suggestions) {
+      expect(suggestion.description).toBeDefined();
+      expect(suggestion.description.length).toBeLessThanOrEqual(83); // 80 + '...'
+    }
+  });
+});
+
+describe('mapParameters', () => {
+  const grepSchema = mockTools.find((t) => t.name === 'grep')!.input_schema;
+  const writeFileSchema = mockTools.find((t) => t.name === 'write_file')!.input_schema;
+  const bashSchema = mockTools.find((t) => t.name === 'bash')!.input_schema;
+
+  it('passes through valid parameters unchanged', () => {
+    const result = mapParameters({ pattern: 'test', path: '.' }, grepSchema);
+    expect(result.mappedInput.pattern).toBe('test');
+    expect(result.mappedInput.path).toBe('.');
+    expect(result.mappings).toHaveLength(0);
+    expect(result.unmappedParams).toHaveLength(0);
+  });
+
+  it('maps query to pattern', () => {
+    const result = mapParameters({ query: 'test' }, grepSchema);
+    expect(result.mappedInput.pattern).toBe('test');
+    expect(result.mappings).toContainEqual({ from: 'query', to: 'pattern' });
+  });
+
+  it('maps search to pattern', () => {
+    const result = mapParameters({ search: 'test' }, grepSchema);
+    expect(result.mappedInput.pattern).toBe('test');
+    expect(result.mappings).toContainEqual({ from: 'search', to: 'pattern' });
+  });
+
+  it('maps max_results to head_limit', () => {
+    const result = mapParameters({ max_results: 10 }, grepSchema);
+    expect(result.mappedInput.head_limit).toBe(10);
+    expect(result.mappings).toContainEqual({ from: 'max_results', to: 'head_limit' });
+  });
+
+  it('maps max to head_limit', () => {
+    const result = mapParameters({ max: 5 }, grepSchema);
+    expect(result.mappedInput.head_limit).toBe(5);
+  });
+
+  it('maps limit to head_limit', () => {
+    const result = mapParameters({ limit: 20 }, grepSchema);
+    expect(result.mappedInput.head_limit).toBe(20);
+  });
+
+  it('maps file_path to path', () => {
+    const result = mapParameters({ file_path: '/test.ts' }, grepSchema);
+    expect(result.mappedInput.path).toBe('/test.ts');
+  });
+
+  it('maps case_insensitive to ignore_case', () => {
+    const result = mapParameters({ case_insensitive: true }, grepSchema);
+    expect(result.mappedInput.ignore_case).toBe(true);
+  });
+
+  it('maps text to content for write_file', () => {
+    const result = mapParameters({ path: '/test.txt', text: 'hello' }, writeFileSchema);
+    expect(result.mappedInput.content).toBe('hello');
+    expect(result.mappings).toContainEqual({ from: 'text', to: 'content' });
+  });
+
+  it('maps cmd to command for bash', () => {
+    const result = mapParameters({ cmd: 'ls -la' }, bashSchema);
+    expect(result.mappedInput.command).toBe('ls -la');
+  });
+
+  it('preserves unmapped parameters', () => {
+    const result = mapParameters({ unknown_param: 'value', pattern: 'test' }, grepSchema);
+    expect(result.mappedInput.unknown_param).toBe('value');
+    expect(result.unmappedParams).toContain('unknown_param');
+  });
+
+  it('explicit parameters take precedence over aliases', () => {
+    const result = mapParameters({ pattern: 'explicit', query: 'alias' }, grepSchema);
+    expect(result.mappedInput.pattern).toBe('explicit');
+    // query should be ignored since pattern is already set
+    expect(result.mappings).toHaveLength(0);
+  });
+
+  it('does not map when aliasing is disabled', () => {
+    const config: ToolFallbackConfig = { ...DEFAULT_FALLBACK_CONFIG, parameterAliasing: false };
+    const result = mapParameters({ query: 'test' }, grepSchema, config);
+    expect(result.mappedInput.query).toBe('test');
+    expect(result.mappedInput.pattern).toBeUndefined();
+    expect(result.mappings).toHaveLength(0);
+  });
+
+  it('handles multiple alias mappings', () => {
+    const result = mapParameters(
+      { query: 'test', max: 10, file_path: '/src' },
+      grepSchema
+    );
+    expect(result.mappedInput.pattern).toBe('test');
+    expect(result.mappedInput.head_limit).toBe(10);
+    expect(result.mappedInput.path).toBe('/src');
+    expect(result.mappings).toHaveLength(3);
+  });
+});
+
+describe('formatFallbackError', () => {
+  it('includes tool name in error', () => {
+    const matchResult = {
+      exactMatch: false,
+      matchedName: null,
+      score: 0.5,
+      suggestions: [],
+      shouldAutoCorrect: false,
+    };
+    const error = formatFallbackError('unknown_tool', matchResult);
+    expect(error).toContain('unknown_tool');
+    expect(error).toContain('Error');
+  });
+
+  it('includes suggestions when available', () => {
+    const matchResult = {
+      exactMatch: false,
+      matchedName: null,
+      score: 0.75,
+      suggestions: [
+        { name: 'grep', score: 0.75, description: 'Search for patterns' },
+        { name: 'glob', score: 0.6, description: 'Find files' },
+      ],
+      shouldAutoCorrect: false,
+    };
+    const error = formatFallbackError('gre', matchResult);
+    expect(error).toContain('Did you mean');
+    expect(error).toContain('grep');
+    expect(error).toContain('75%');
+    expect(error).toContain('glob');
+    expect(error).toContain('60%');
+  });
+
+  it('limits suggestions to 3', () => {
+    const matchResult = {
+      exactMatch: false,
+      matchedName: null,
+      score: 0.5,
+      suggestions: [
+        { name: 'tool1', score: 0.8, description: 'Desc 1' },
+        { name: 'tool2', score: 0.7, description: 'Desc 2' },
+        { name: 'tool3', score: 0.65, description: 'Desc 3' },
+        { name: 'tool4', score: 0.6, description: 'Desc 4' },
+        { name: 'tool5', score: 0.55, description: 'Desc 5' },
+      ],
+      shouldAutoCorrect: false,
+    };
+    const error = formatFallbackError('unknown', matchResult);
+    expect(error).toContain('tool1');
+    expect(error).toContain('tool2');
+    expect(error).toContain('tool3');
+    expect(error).not.toContain('tool4');
+    expect(error).not.toContain('tool5');
+  });
+});
+
+describe('formatMappingInfo', () => {
+  it('returns null when no mappings', () => {
+    const result = formatMappingInfo(null, []);
+    expect(result).toBeNull();
+  });
+
+  it('formats tool correction', () => {
+    const result = formatMappingInfo({ from: 'GREP', to: 'grep' }, []);
+    expect(result).toContain('Tool');
+    expect(result).toContain('GREP');
+    expect(result).toContain('grep');
+  });
+
+  it('formats parameter mappings', () => {
+    const result = formatMappingInfo(null, [
+      { from: 'query', to: 'pattern' },
+      { from: 'max', to: 'head_limit' },
+    ]);
+    expect(result).toContain('Params');
+    expect(result).toContain('query→pattern');
+    expect(result).toContain('max→head_limit');
+  });
+
+  it('formats both tool and parameter mappings', () => {
+    const result = formatMappingInfo(
+      { from: 'GREP', to: 'grep' },
+      [{ from: 'query', to: 'pattern' }]
+    );
+    expect(result).toContain('Tool');
+    expect(result).toContain('Params');
+  });
+});
+
+describe('GLOBAL_PARAMETER_ALIASES', () => {
+  it('has common query aliases for pattern', () => {
+    const aliases = GLOBAL_PARAMETER_ALIASES.get('pattern');
+    expect(aliases).toContain('query');
+    expect(aliases).toContain('search');
+    expect(aliases).toContain('search_term');
+  });
+
+  it('has common path aliases', () => {
+    const aliases = GLOBAL_PARAMETER_ALIASES.get('path');
+    expect(aliases).toContain('file');
+    expect(aliases).toContain('file_path');
+    expect(aliases).toContain('directory');
+  });
+
+  it('has limit aliases for head_limit', () => {
+    const aliases = GLOBAL_PARAMETER_ALIASES.get('head_limit');
+    expect(aliases).toContain('max_results');
+    expect(aliases).toContain('max');
+    expect(aliases).toContain('limit');
+  });
+
+  it('has command aliases for bash', () => {
+    const aliases = GLOBAL_PARAMETER_ALIASES.get('command');
+    expect(aliases).toContain('cmd');
+    expect(aliases).toContain('script');
+  });
+});
+
+describe('ToolRegistry integration', () => {
+  let registry: ToolRegistry;
+
+  // Simple mock tool for testing
+  class MockTool {
+    private name: string;
+    private definition: ToolDefinition;
+    private response: string;
+
+    constructor(name: string, definition: ToolDefinition, response: string = 'Success') {
+      this.name = name;
+      this.definition = definition;
+      this.response = response;
+    }
+
+    getName(): string {
+      return this.name;
+    }
+
+    getDefinition(): ToolDefinition {
+      return this.definition;
+    }
+
+    async run(toolUseId: string, input: Record<string, unknown>) {
+      // Validate required params
+      const required = this.definition.input_schema.required || [];
+      for (const param of required) {
+        if (!(param in input)) {
+          return {
+            tool_use_id: toolUseId,
+            content: `Error: Missing required parameter: ${param}`,
+            is_error: true,
+          };
+        }
+      }
+      return {
+        tool_use_id: toolUseId,
+        content: `${this.response}: ${JSON.stringify(input)}`,
+        is_error: false,
+      };
+    }
+  }
+
+  beforeEach(() => {
+    registry = new ToolRegistry();
+    // Register mock tools
+    for (const def of mockTools) {
+      registry.register(new MockTool(def.name, def) as any);
+    }
+  });
+
+  it('executes exact tool match', async () => {
+    const result = await registry.execute({
+      id: 'test-1',
+      name: 'grep',
+      input: { pattern: 'test' },
+    });
+    expect(result.is_error).toBe(false);
+    expect(result.content).toContain('Success');
+  });
+
+  it('provides suggestions for unknown tool with similar name', async () => {
+    const result = await registry.execute({
+      id: 'test-2',
+      name: 'greb', // Typo of 'grep' - should get suggestions
+      input: { pattern: 'test' },
+    });
+    expect(result.is_error).toBe(true);
+    expect(result.content).toContain('Unknown tool');
+    expect(result.content).toContain('Did you mean');
+    expect(result.content).toContain('grep');
+  });
+
+  it('returns error without suggestions for completely unknown tool', async () => {
+    const result = await registry.execute({
+      id: 'test-2b',
+      name: 'xyzabc123', // Completely unknown - no similar tools
+      input: { pattern: 'test' },
+    });
+    expect(result.is_error).toBe(true);
+    expect(result.content).toContain('Unknown tool');
+    // No suggestions because nothing is similar enough
+  });
+
+  it('auto-corrects high-similarity tool name', async () => {
+    const result = await registry.execute({
+      id: 'test-3',
+      name: 'GREP', // Case typo
+      input: { pattern: 'test' },
+    });
+    // Should auto-correct and succeed
+    expect(result.is_error).toBe(false);
+    expect(result.content).toContain('Mapped');
+    expect(result.content).toContain('GREP');
+    expect(result.content).toContain('grep');
+  });
+
+  it('maps query parameter to pattern', async () => {
+    const result = await registry.execute({
+      id: 'test-4',
+      name: 'grep',
+      input: { query: 'test' },
+    });
+    expect(result.is_error).toBe(false);
+    expect(result.content).toContain('Mapped');
+    expect(result.content).toContain('query→pattern');
+  });
+
+  it('maps multiple parameters', async () => {
+    const result = await registry.execute({
+      id: 'test-5',
+      name: 'grep',
+      input: { query: 'test', max_results: 10, file_path: '/src' },
+    });
+    expect(result.is_error).toBe(false);
+    expect(result.content).toContain('query→pattern');
+    expect(result.content).toContain('max_results→head_limit');
+    expect(result.content).toContain('file_path→path');
+  });
+
+  it('respects disabled fallback', async () => {
+    registry.setFallbackConfig({ enabled: false });
+    const result = await registry.execute({
+      id: 'test-6',
+      name: 'search',
+      input: { pattern: 'test' },
+    });
+    expect(result.is_error).toBe(true);
+    expect(result.content).toBe('Error: Unknown tool "search"');
+    expect(result.content).not.toContain('Did you mean');
+  });
+
+  it('respects disabled parameter aliasing', async () => {
+    registry.setFallbackConfig({ parameterAliasing: false });
+    const result = await registry.execute({
+      id: 'test-7',
+      name: 'grep',
+      input: { query: 'test' }, // query won't be mapped to pattern
+    });
+    // Should fail because 'pattern' is required but 'query' wasn't mapped
+    expect(result.is_error).toBe(true);
+    expect(result.content).toContain('Missing required parameter');
+  });
+
+  it('getFallbackConfig returns current config', () => {
+    const config = registry.getFallbackConfig();
+    expect(config.enabled).toBe(true);
+    expect(config.autoCorrectThreshold).toBe(0.85);
+    expect(config.suggestionThreshold).toBe(0.6);
+  });
+
+  it('setFallbackConfig updates config', () => {
+    registry.setFallbackConfig({ suggestionThreshold: 0.8 });
+    const config = registry.getFallbackConfig();
+    expect(config.suggestionThreshold).toBe(0.8);
+    // Other values should still have defaults
+    expect(config.enabled).toBe(true);
+  });
+});

From bf78b6a70f40d8ae228ee5b9660e686cb7da31f7 Mon Sep 17 00:00:00 2001
From: Layne Penney <dev@layne.pro>
Date: Thu, 15 Jan 2026 07:09:40 -0600
Subject: [PATCH 02/17] clean content?

---
 src/providers/ollama-cloud.ts | 62 ++++++++++++++++++++++++++++++++---
 1 file changed, 58 insertions(+), 4 deletions(-)

diff --git a/src/providers/ollama-cloud.ts b/src/providers/ollama-cloud.ts
index edd0f80..2bc2541 100644
--- a/src/providers/ollama-cloud.ts
+++ b/src/providers/ollama-cloud.ts
@@ -217,15 +217,20 @@ export class OllamaCloudProvider extends BaseProvider {
           }
 
           // Extract thinking content from <think> tags
-          const { content: cleanedContent, thinking } = this.extractThinkingContent(
+          const { content: thinkingCleanedContent, thinking } = this.extractThinkingContent(
             responseData.message.content
           );
 
           // Fall back to extracting tool calls from text if no native calls
           if (toolCalls.length === 0 && tools && tools.length > 0) {
-            toolCalls = this.extractToolCalls(cleanedContent, tools);
+            toolCalls = this.extractToolCalls(thinkingCleanedContent, tools);
           }
 
+          // Clean hallucinated traces from content (after tool extraction)
+          const cleanedContent = toolCalls.length > 0
+            ? this.cleanHallucinatedTraces(thinkingCleanedContent)
+            : thinkingCleanedContent;
+
           return createProviderResponse({
             content: cleanedContent,
             toolCalls,
@@ -332,14 +337,19 @@ export class OllamaCloudProvider extends BaseProvider {
           }
 
           // Extract thinking content from <think> tags (used by qwen3:thinking and similar models)
-          const { content: cleanedContent, thinking } = this.extractThinkingContent(fullText);
+          const { content: thinkingCleanedContent, thinking } = this.extractThinkingContent(fullText);
 
           // Use native tool calls if available, otherwise extract from text
           let toolCalls: ToolCall[] = nativeToolCalls;
           if (toolCalls.length === 0 && tools && tools.length > 0) {
-            toolCalls = this.extractToolCalls(cleanedContent, tools);
+            toolCalls = this.extractToolCalls(thinkingCleanedContent, tools);
           }
 
+          // Clean hallucinated traces from content (after tool extraction)
+          const cleanedContent = toolCalls.length > 0
+            ? this.cleanHallucinatedTraces(thinkingCleanedContent)
+            : thinkingCleanedContent;
+
           return createProviderResponse({
             content: cleanedContent,
             toolCalls,
@@ -489,6 +499,34 @@ export class OllamaCloudProvider extends BaseProvider {
       return toolCalls;
     }
 
+    // Pattern 3: [Calling tool_name]: {json} format
+    // Used by some models that simulate agent traces. We extract the call but ignore
+    // any "[Result from ...]" which are hallucinated results.
+    const callingPattern = /\[Calling\s+([a-z_][a-z0-9_]*)\]\s*:\s*(\{[^}]*\})/gi;
+
+    while ((match = callingPattern.exec(content)) !== null) {
+      const rawToolName = match[1];
+      const normalizedName = this.normalizeToolName(rawToolName);
+      const jsonArgs = match[2];
+
+      if (toolNames.has(normalizedName)) {
+        try {
+          const args = JSON.parse(jsonArgs);
+          toolCalls.push({
+            id: `extracted_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`,
+            name: normalizedName,
+            input: args,
+          });
+        } catch {
+          // Invalid JSON, skip
+        }
+      }
+    }
+
+    if (toolCalls.length > 0) {
+      return toolCalls;
+    }
+
     // Pattern 3: Look for JSON objects with "name" field
     // This pattern handles nested braces properly
     const jsonPattern = /\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\}/g;
@@ -583,6 +621,22 @@ export class OllamaCloudProvider extends BaseProvider {
     return { content: cleanedContent, thinking };
   }
 
+  /**
+   * Clean hallucinated agent trace patterns from content.
+   * Some models output fake "[Calling tool]: {json}[Result from tool]: result" traces.
+   * This should be called AFTER extractToolCalls to clean up the display content.
+   */
+  private cleanHallucinatedTraces(content: string): string {
+    // Pattern: [Calling tool_name]: {json}[Result from tool_name]: any text until next [ or end
+    const hallucinatedTracePattern = /\[Calling\s+[a-z_][a-z0-9_]*\]\s*:\s*\{[^}]*\}\s*(?:\[Result from\s+[a-z_][a-z0-9_]*\]\s*:\s*[^\[]*)?/gi;
+    let cleanedContent = content.replace(hallucinatedTracePattern, '').trim();
+
+    // Clean up multiple newlines
+    cleanedContent = cleanedContent.replace(/\n{3,}/g, '\n\n').trim();
+
+    return cleanedContent;
+  }
+
   /**
    * Pull a model if it's not already available.
    */

From 2677bedb64535aa6ae40dc2d7685e68c662d7805 Mon Sep 17 00:00:00 2001
From: Layne Penney <dev@layne.pro>
Date: Thu, 15 Jan 2026 09:09:23 -0600
Subject: [PATCH 03/17] fix: surface thinking output and audit raw responses

---
 src/agent.ts                       | 29 ++++++++++++++--
 src/providers/anthropic.ts         |  4 ++-
 src/providers/message-converter.ts | 10 +++---
 src/providers/ollama-cloud.ts      | 55 +++++++++++++++++++++++-------
 src/providers/openai-compatible.ts |  5 +++
 src/providers/response-parser.ts   |  3 ++
 src/types.ts                       |  2 ++
 7 files changed, 87 insertions(+), 21 deletions(-)

diff --git a/src/agent.ts b/src/agent.ts
index d4c0125..8062820 100644
--- a/src/agent.ts
+++ b/src/agent.ts
@@ -512,10 +512,17 @@ Always use tools to interact with the filesystem rather than asking the user to
 
       // Call the model with streaming (using native system prompt support)
       const apiStartTime = Date.now();
+      let streamedChars = 0;
+      const onChunk = (chunk: string): void => {
+        if (chunk) {
+          streamedChars += chunk.length;
+        }
+        this.callbacks.onText?.(chunk);
+      };
       const response = await chatProvider.streamChat(
         messagesToSend,
         tools,
-        this.callbacks.onText,
+        onChunk,
         systemContext
       );
       const apiDuration = (Date.now() - apiStartTime) / 1000;
@@ -541,7 +548,8 @@ Always use tools to interact with the filesystem rather than asking the user to
         response.content,
         response.toolCalls,
         response.usage,
-        Date.now() - apiStartTime
+        Date.now() - apiStartTime,
+        response.rawResponse
       );
 
       // Record usage for cost tracking
@@ -574,7 +582,22 @@ Always use tools to interact with the filesystem rather than asking the user to
         finalResponse = response.content;
       }
 
-      if (isExtractedToolCall) {
+      const shouldEmitFallback = !response.content &&
+        response.toolCalls.length === 0 &&
+        streamedChars === 0;
+
+      if (shouldEmitFallback) {
+        const fallbackMessage = response.reasoningContent
+          ? 'Model returned reasoning without a final answer. Try again or check --audit for the raw response.'
+          : 'Model returned an empty response. Try again or check --audit for the raw response.';
+
+        finalResponse = fallbackMessage;
+        this.messages.push({
+          role: 'assistant',
+          content: fallbackMessage,
+        });
+        this.callbacks.onText?.(fallbackMessage);
+      } else if (isExtractedToolCall) {
         // For extracted tool calls, store as plain text (model doesn't understand tool_use blocks)
         this.messages.push({
           role: 'assistant',
diff --git a/src/providers/anthropic.ts b/src/providers/anthropic.ts
index b5f8d1f..667ef0e 100644
--- a/src/providers/anthropic.ts
+++ b/src/providers/anthropic.ts
@@ -126,6 +126,7 @@ export class AnthropicProvider extends BaseProvider {
         cacheCreationInputTokens: usage.cache_creation_input_tokens,
         cacheReadInputTokens: usage.cache_read_input_tokens,
       },
+      rawResponse: finalMessage,
     };
   }
 
@@ -209,7 +210,7 @@ export class AnthropicProvider extends BaseProvider {
             text: b.text || '',
           }),
           // Unknown block types become empty text blocks (logged by mapContentBlocks)
-          unknown: () => ({ type: 'text' as const, text: '' }),
+          unknown: (b) => ({ type: 'text' as const, text: b.text || b.content || '' }),
         });
 
         return { role, content };
@@ -258,6 +259,7 @@ export class AnthropicProvider extends BaseProvider {
       outputTokens: usage.output_tokens,
       cacheCreationInputTokens: usage.cache_creation_input_tokens,
       cacheReadInputTokens: usage.cache_read_input_tokens,
+      rawResponse: response,
     });
   }
 }
diff --git a/src/providers/message-converter.ts b/src/providers/message-converter.ts
index 4be7413..aa5b984 100644
--- a/src/providers/message-converter.ts
+++ b/src/providers/message-converter.ts
@@ -14,8 +14,8 @@
  * - Prevents silent bugs like tool_result blocks being dropped
  */
 
-import type { Message, ContentBlock } from '../types.js';
-import { logger } from '../logger.js';
+import type {ContentBlock, Message} from '../types.js';
+import {logger} from '../logger.js';
 
 /**
  * Typed block interfaces for type-safe extraction.
@@ -229,7 +229,7 @@ export interface BlockConverters<T> {
   image: (block: ContentBlock) => T;
   thinking: (block: ContentBlock) => T;
   /** Called for unknown block types - can return null to skip */
-  unknown?: (block: ContentBlock) => T | null;
+  unknown: (block: ContentBlock) => T;
 }
 
 /**
@@ -244,7 +244,7 @@ export interface BlockConverters<T> {
 export function mapContentBlock<T>(
   block: ContentBlock,
   converters: BlockConverters<T>
-): T | null {
+): T {
   switch (block.type) {
     case 'text':
       return converters.text(block);
@@ -258,7 +258,7 @@ export function mapContentBlock<T>(
       return converters.thinking(block);
     default:
       logger.warn(`Unknown content block type: ${(block as ContentBlock).type}`);
-      return converters.unknown ? converters.unknown(block) : null;
+      return converters.unknown(block);
   }
 }
 
diff --git a/src/providers/ollama-cloud.ts b/src/providers/ollama-cloud.ts
index 2bc2541..978a134 100644
--- a/src/providers/ollama-cloud.ts
+++ b/src/providers/ollama-cloud.ts
@@ -56,6 +56,7 @@ interface OllamaChatResponse {
   message: {
     role: string;
     content: string;
+    thinking?: string;
     tool_calls?: OllamaToolCall[];
   };
   done: boolean;
@@ -216,28 +217,37 @@ export class OllamaCloudProvider extends BaseProvider {
             }));
           }
 
+          const rawContent = responseData.message.content || '';
+          const thinkingField = responseData.message.thinking || '';
+
           // Extract thinking content from <think> tags
-          const { content: thinkingCleanedContent, thinking } = this.extractThinkingContent(
-            responseData.message.content
+          const { content: thinkingCleanedContent, thinking: tagThinking } = this.extractThinkingContent(
+            rawContent
           );
+          const combinedThinking = [thinkingField, tagThinking].filter(Boolean).join('\n');
+          const hasContent = thinkingCleanedContent.trim().length > 0;
+          const useFallbackContent = !hasContent && combinedThinking.length > 0;
+          const finalContent = useFallbackContent ? combinedThinking : thinkingCleanedContent;
+          const reasoningContent = useFallbackContent ? undefined : (combinedThinking || undefined);
 
           // Fall back to extracting tool calls from text if no native calls
           if (toolCalls.length === 0 && tools && tools.length > 0) {
-            toolCalls = this.extractToolCalls(thinkingCleanedContent, tools);
+            toolCalls = this.extractToolCalls(finalContent, tools);
           }
 
           // Clean hallucinated traces from content (after tool extraction)
           const cleanedContent = toolCalls.length > 0
-            ? this.cleanHallucinatedTraces(thinkingCleanedContent)
-            : thinkingCleanedContent;
+            ? this.cleanHallucinatedTraces(finalContent)
+            : finalContent;
 
           return createProviderResponse({
             content: cleanedContent,
             toolCalls,
             stopReason: responseData.done_reason,
-            reasoningContent: thinking || undefined,
+            reasoningContent,
             inputTokens: responseData.prompt_eval_count,
             outputTokens: responseData.eval_count,
+            rawResponse: responseData,
           });
         },
         {
@@ -289,10 +299,13 @@ export class OllamaCloudProvider extends BaseProvider {
           const reader = response.body.getReader();
           const decoder = new TextDecoder();
           let fullText = '';
+          let thinkingText = '';
+          let streamedContentChars = 0;
           let inputTokens: number | undefined;
           let outputTokens: number | undefined;
           let stopReason: string | undefined;
           const nativeToolCalls: ToolCall[] = [];
+          const rawChunks: OllamaChatResponse[] = [];
 
           // Process streamed chunks
           while (true) {
@@ -305,11 +318,19 @@ export class OllamaCloudProvider extends BaseProvider {
             for (const line of lines) {
               try {
                 const data: OllamaChatResponse = JSON.parse(line);
+                rawChunks.push(data);
 
                 if (data.message?.content) {
                   const content = data.message.content;
                   fullText += content;
-                  if (onChunk) onChunk(content);
+                  if (content) {
+                    streamedContentChars += content.length;
+                    if (onChunk) onChunk(content);
+                  }
+                }
+
+                if (data.message?.thinking) {
+                  thinkingText += data.message.thinking;
                 }
 
                 // Capture native tool calls from Ollama API
@@ -337,26 +358,36 @@ export class OllamaCloudProvider extends BaseProvider {
           }
 
           // Extract thinking content from <think> tags (used by qwen3:thinking and similar models)
-          const { content: thinkingCleanedContent, thinking } = this.extractThinkingContent(fullText);
+          const { content: thinkingCleanedContent, thinking: tagThinking } = this.extractThinkingContent(fullText);
+          const combinedThinking = [thinkingText, tagThinking].filter(Boolean).join('\n');
+          const hasContent = thinkingCleanedContent.trim().length > 0;
+          const useFallbackContent = !hasContent && combinedThinking.length > 0;
+          const finalContent = useFallbackContent ? combinedThinking : thinkingCleanedContent;
+          const reasoningContent = useFallbackContent ? undefined : (combinedThinking || undefined);
+
+          if (streamedContentChars === 0 && finalContent && onChunk) {
+            onChunk(finalContent);
+          }
 
           // Use native tool calls if available, otherwise extract from text
           let toolCalls: ToolCall[] = nativeToolCalls;
           if (toolCalls.length === 0 && tools && tools.length > 0) {
-            toolCalls = this.extractToolCalls(thinkingCleanedContent, tools);
+            toolCalls = this.extractToolCalls(finalContent, tools);
           }
 
           // Clean hallucinated traces from content (after tool extraction)
           const cleanedContent = toolCalls.length > 0
-            ? this.cleanHallucinatedTraces(thinkingCleanedContent)
-            : thinkingCleanedContent;
+            ? this.cleanHallucinatedTraces(finalContent)
+            : finalContent;
 
           return createProviderResponse({
             content: cleanedContent,
             toolCalls,
             stopReason: stopReason || 'stop',
-            reasoningContent: thinking || undefined,
+            reasoningContent,
             inputTokens,
             outputTokens,
+            rawResponse: { stream: true, chunks: rawChunks },
           });
         },
         {
diff --git a/src/providers/openai-compatible.ts b/src/providers/openai-compatible.ts
index e45d924..f952e6f 100644
--- a/src/providers/openai-compatible.ts
+++ b/src/providers/openai-compatible.ts
@@ -118,8 +118,10 @@ export class OpenAICompatibleProvider extends BaseProvider {
     let reasoningContent = '';
     const toolCallAccumulator = new StreamingToolCallAccumulator();
     let streamUsage: { prompt_tokens: number; completion_tokens: number; cached_tokens?: number } | null = null;
+    const rawChunks: OpenAI.ChatCompletionChunk[] = [];
 
     for await (const chunk of stream) {
+      rawChunks.push(chunk);
       const delta = chunk.choices[0]?.delta;
 
       // Handle reasoning content from reasoning models (e.g., DeepSeek-R1)
@@ -168,6 +170,7 @@ export class OpenAICompatibleProvider extends BaseProvider {
       inputTokens,
       outputTokens,
       cachedInputTokens: streamUsage?.cached_tokens,
+      rawResponse: { stream: true, chunks: rawChunks },
     });
   }
 
@@ -300,6 +303,7 @@ export class OpenAICompatibleProvider extends BaseProvider {
         } : null,
         // Thinking blocks are converted to text for OpenAI (it doesn't have native thinking input)
         thinking: (b) => ({ kind: 'text', text: b.text || '' }),
+        unknown: (b) => ({ kind: 'text', text: b.text || b.content || '' }),
       };
 
       // Process blocks and collect by type
@@ -479,6 +483,7 @@ export class OpenAICompatibleProvider extends BaseProvider {
       inputTokens,
       outputTokens,
       cachedInputTokens: cachedTokens,
+      rawResponse: response,
     });
   }
 }
diff --git a/src/providers/response-parser.ts b/src/providers/response-parser.ts
index d900c18..7151178 100644
--- a/src/providers/response-parser.ts
+++ b/src/providers/response-parser.ts
@@ -43,6 +43,7 @@ export function createProviderResponse(params: {
   inputTokens?: number;
   outputTokens?: number;
   reasoningContent?: string;
+  rawResponse?: unknown;
   // Cache metrics
   cacheCreationInputTokens?: number;
   cacheReadInputTokens?: number;
@@ -55,6 +56,7 @@ export function createProviderResponse(params: {
     inputTokens,
     outputTokens,
     reasoningContent,
+    rawResponse,
     cacheCreationInputTokens,
     cacheReadInputTokens,
     cachedInputTokens,
@@ -65,6 +67,7 @@ export function createProviderResponse(params: {
     toolCalls,
     stopReason: mapStopReason(stopReason, toolCalls.length > 0),
     ...(reasoningContent && { reasoningContent }),
+    ...(rawResponse !== undefined && { rawResponse }),
     ...(inputTokens !== undefined && outputTokens !== undefined && {
       usage: {
         inputTokens,
diff --git a/src/types.ts b/src/types.ts
index 018a5ed..97a389c 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -150,6 +150,7 @@ export interface TokenUsage {
  * @property {{'end_turn' | 'tool_use' | 'max_tokens'}} stopReason - Reason for stopping the response generation.
  * @property {string} [reasoningContent] - Optional reasoning/thinking content from reasoning models.
  * @property {TokenUsage} [usage] - Token usage information if available.
+ * @property {unknown} [rawResponse] - Raw provider response payload for audit/debugging.
  */
 export interface ProviderResponse {
   content: string;
@@ -157,6 +158,7 @@ export interface ProviderResponse {
   stopReason: 'end_turn' | 'tool_use' | 'max_tokens';
   reasoningContent?: string;
   usage?: TokenUsage;
+  rawResponse?: unknown;
 }
 
 // Provider configuration

From 438fbd9301736500fee44bbc7aafd40a65f4bc75 Mon Sep 17 00:00:00 2001
From: Layne Penney <dev@layne.pro>
Date: Thu, 15 Jan 2026 09:12:59 -0600
Subject: [PATCH 04/17] docs: add roadmap item for test sandbox compatibility

---
 ROADMAP.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/ROADMAP.md b/ROADMAP.md
index df7732d..eee379f 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -4,6 +4,13 @@ This document tracks planned features and improvements for Codi.
 
 ## Planned Features
 
+### Test Sandbox Compatibility
+
+Update tests that write to `~/.codi` or bind to `127.0.0.1` so they use local temporary
+directories and ephemeral ports by default, avoiding sandbox permission errors.
+
+---
+
 ### Semantic Fallback for Tool Calls
 
 When a model attempts to call a tool that doesn't exist or uses incorrect parameter names, implement a semantic fallback system that:

From a7005531eb200efbcdfa8a48dd800c606ee2e2a0 Mon Sep 17 00:00:00 2001
From: Layne Penney <dev@layne.pro>
Date: Thu, 15 Jan 2026 09:35:04 -0600
Subject: [PATCH 05/17] feat: stream reasoning and include it in messages

---
 src/agent.ts                       | 28 +++++++++++++++++++++++++---
 src/index.ts                       | 17 +++++++++++++++++
 src/providers/anthropic.ts         |  3 ++-
 src/providers/base.ts              |  3 ++-
 src/providers/mock.ts              |  3 ++-
 src/providers/ollama-cloud.ts      | 24 ++++++++++++++++++------
 src/providers/openai-compatible.ts |  4 +++-
 tests/providers.test.ts            |  8 +++++++-
 8 files changed, 76 insertions(+), 14 deletions(-)

diff --git a/src/agent.ts b/src/agent.ts
index 8062820..884ef08 100644
--- a/src/agent.ts
+++ b/src/agent.ts
@@ -96,6 +96,7 @@ export interface AgentOptions {
   auditLogger?: AuditLogger | null; // Optional audit logger for session debugging
   onText?: (text: string) => void;
   onReasoning?: (reasoning: string) => void; // Called with reasoning trace from reasoning models
+  onReasoningChunk?: (chunk: string) => void; // Streaming reasoning output
   onToolCall?: (name: string, input: Record<string, unknown>) => void;
   onToolResult?: (name: string, result: string, isError: boolean) => void;
   onConfirm?: (confirmation: ToolConfirmation) => Promise<ConfirmationResult>; // Confirm destructive tools
@@ -130,6 +131,7 @@ export class Agent {
   private callbacks: {
     onText?: (text: string) => void;
     onReasoning?: (reasoning: string) => void;
+    onReasoningChunk?: (chunk: string) => void;
     onToolCall?: (name: string, input: Record<string, unknown>) => void;
     onToolResult?: (name: string, result: string, isError: boolean) => void;
     onConfirm?: (confirmation: ToolConfirmation) => Promise<ConfirmationResult>;
@@ -168,6 +170,7 @@ export class Agent {
     this.callbacks = {
       onText: options.onText,
       onReasoning: options.onReasoning,
+      onReasoningChunk: options.onReasoningChunk,
       onToolCall: options.onToolCall,
       onToolResult: options.onToolResult,
       onConfirm: options.onConfirm,
@@ -519,11 +522,19 @@ Always use tools to interact with the filesystem rather than asking the user to
         }
         this.callbacks.onText?.(chunk);
       };
+      let streamedReasoningChars = 0;
+      const onReasoningChunk = (chunk: string): void => {
+        if (chunk) {
+          streamedReasoningChars += chunk.length;
+        }
+        this.callbacks.onReasoningChunk?.(chunk);
+      };
       const response = await chatProvider.streamChat(
         messagesToSend,
         tools,
         onChunk,
-        systemContext
+        systemContext,
+        onReasoningChunk
       );
       const apiDuration = (Date.now() - apiStartTime) / 1000;
 
@@ -558,7 +569,7 @@ Always use tools to interact with the filesystem rather than asking the user to
       }
 
       // Call reasoning callback if reasoning content is present (e.g., from DeepSeek-R1)
-      if (response.reasoningContent && this.callbacks.onReasoning) {
+      if (response.reasoningContent && this.callbacks.onReasoning && streamedReasoningChars === 0) {
         this.callbacks.onReasoning(response.reasoningContent);
       }
 
@@ -582,6 +593,10 @@ Always use tools to interact with the filesystem rather than asking the user to
         finalResponse = response.content;
       }
 
+      const thinkingText = response.reasoningContent?.trim();
+      const shouldAddThinkingBlock = !!thinkingText &&
+        (!response.content || response.content.trim() !== thinkingText);
+
       const shouldEmitFallback = !response.content &&
         response.toolCalls.length === 0 &&
         streamedChars === 0;
@@ -599,14 +614,21 @@ Always use tools to interact with the filesystem rather than asking the user to
         this.callbacks.onText?.(fallbackMessage);
       } else if (isExtractedToolCall) {
         // For extracted tool calls, store as plain text (model doesn't understand tool_use blocks)
+        const combinedContent = thinkingText
+          ? `${response.content || ''}${response.content ? '\n\n' : ''}[Thinking]:\n${thinkingText}`
+          : (response.content || '');
         this.messages.push({
           role: 'assistant',
-          content: response.content || '',
+          content: combinedContent,
         });
       } else if (response.content || response.toolCalls.length > 0) {
         // For native tool calls, use content blocks
         const contentBlocks: ContentBlock[] = [];
 
+        if (shouldAddThinkingBlock && thinkingText) {
+          contentBlocks.push({ type: 'thinking', text: thinkingText });
+        }
+
         if (response.content) {
           contentBlocks.push({ type: 'text', text: response.content });
         }
diff --git a/src/index.ts b/src/index.ts
index 82d7acb..220a912 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -2442,6 +2442,7 @@ async function main() {
 
   // Track if we've received streaming output (to manage spinner)
   let isStreaming = false;
+  let isReasoningStreaming = false;
 
   // Track tool start times for duration logging
   const toolStartTimes = new Map<string, number>();
@@ -2478,6 +2479,14 @@ async function main() {
       console.log(chalk.dim(reasoning));
       console.log(chalk.dim.italic('---\n'));
     },
+    onReasoningChunk: (chunk) => {
+      if (!isReasoningStreaming) {
+        isReasoningStreaming = true;
+        spinner.stop();
+        console.log(chalk.dim.italic('\n💭 Thinking...'));
+      }
+      process.stdout.write(chalk.dim(chunk));
+    },
     onToolCall: (name, input) => {
       // Stop any spinner and record start time
       spinner.stop();
@@ -3315,6 +3324,10 @@ async function main() {
               spinner.thinking();
               const startTime = Date.now();
               await agent.chat(result, { taskType: command.taskType });
+              if (isReasoningStreaming) {
+                console.log(chalk.dim.italic('\n---\n'));
+                isReasoningStreaming = false;
+              }
               const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
               console.log(chalk.dim(`\n(${elapsed}s)`));
             }
@@ -3340,6 +3353,10 @@ async function main() {
     try {
       const startTime = Date.now();
       await agent.chat(trimmed);
+      if (isReasoningStreaming) {
+        console.log(chalk.dim.italic('\n---\n'));
+        isReasoningStreaming = false;
+      }
       const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
       console.log(chalk.dim(`\n(${elapsed}s)`));
     } catch (error) {
diff --git a/src/providers/anthropic.ts b/src/providers/anthropic.ts
index 667ef0e..0bc2efb 100644
--- a/src/providers/anthropic.ts
+++ b/src/providers/anthropic.ts
@@ -74,7 +74,8 @@ export class AnthropicProvider extends BaseProvider {
     messages: Message[],
     tools?: ToolDefinition[],
     onChunk?: (chunk: string) => void,
-    systemPrompt?: string
+    systemPrompt?: string,
+    _onReasoningChunk?: (chunk: string) => void
   ): Promise<ProviderResponse> {
     const stream = this.client.messages.stream({
       model: this.model,
diff --git a/src/providers/base.ts b/src/providers/base.ts
index 6e95a31..9371ca4 100644
--- a/src/providers/base.ts
+++ b/src/providers/base.ts
@@ -65,7 +65,8 @@ export abstract class BaseProvider {
     messages: Message[],
     tools?: ToolDefinition[],
     onChunk?: (chunk: string) => void,
-    systemPrompt?: string
+    systemPrompt?: string,
+    onReasoningChunk?: (chunk: string) => void
   ): Promise<ProviderResponse>;
 
   /**
diff --git a/src/providers/mock.ts b/src/providers/mock.ts
index 06da603..e8099dc 100644
--- a/src/providers/mock.ts
+++ b/src/providers/mock.ts
@@ -289,7 +289,8 @@ export class MockProvider extends BaseProvider {
     messages: Message[],
     tools?: ToolDefinition[],
     onChunk?: (chunk: string) => void,
-    systemPrompt?: string
+    systemPrompt?: string,
+    _onReasoningChunk?: (chunk: string) => void
   ): Promise<ProviderResponse> {
     this.recordCall('streamChat', messages, tools, systemPrompt);
 
diff --git a/src/providers/ollama-cloud.ts b/src/providers/ollama-cloud.ts
index 978a134..b9e99cc 100644
--- a/src/providers/ollama-cloud.ts
+++ b/src/providers/ollama-cloud.ts
@@ -228,11 +228,14 @@ export class OllamaCloudProvider extends BaseProvider {
           const hasContent = thinkingCleanedContent.trim().length > 0;
           const useFallbackContent = !hasContent && combinedThinking.length > 0;
           const finalContent = useFallbackContent ? combinedThinking : thinkingCleanedContent;
-          const reasoningContent = useFallbackContent ? undefined : (combinedThinking || undefined);
+          const reasoningContent = combinedThinking || undefined;
+          const toolExtractionText = combinedThinking && !finalContent.includes(combinedThinking)
+            ? `${finalContent}\n${combinedThinking}`
+            : finalContent;
 
           // Fall back to extracting tool calls from text if no native calls
           if (toolCalls.length === 0 && tools && tools.length > 0) {
-            toolCalls = this.extractToolCalls(finalContent, tools);
+            toolCalls = this.extractToolCalls(toolExtractionText, tools);
           }
 
           // Clean hallucinated traces from content (after tool extraction)
@@ -262,7 +265,8 @@ export class OllamaCloudProvider extends BaseProvider {
     messages: Message[],
     tools?: ToolDefinition[],
     onChunk?: (chunk: string) => void,
-    systemPrompt?: string
+    systemPrompt?: string,
+    onReasoningChunk?: (chunk: string) => void
   ): Promise<ProviderResponse> {
     const ollamaMessages = this.convertMessages(messages, systemPrompt);
 
@@ -301,6 +305,7 @@ export class OllamaCloudProvider extends BaseProvider {
           let fullText = '';
           let thinkingText = '';
           let streamedContentChars = 0;
+          let streamedThinkingChars = 0;
           let inputTokens: number | undefined;
           let outputTokens: number | undefined;
           let stopReason: string | undefined;
@@ -331,6 +336,10 @@ export class OllamaCloudProvider extends BaseProvider {
 
                 if (data.message?.thinking) {
                   thinkingText += data.message.thinking;
+                  if (onReasoningChunk) {
+                    streamedThinkingChars += data.message.thinking.length;
+                    onReasoningChunk(data.message.thinking);
+                  }
                 }
 
                 // Capture native tool calls from Ollama API
@@ -363,16 +372,19 @@ export class OllamaCloudProvider extends BaseProvider {
           const hasContent = thinkingCleanedContent.trim().length > 0;
           const useFallbackContent = !hasContent && combinedThinking.length > 0;
           const finalContent = useFallbackContent ? combinedThinking : thinkingCleanedContent;
-          const reasoningContent = useFallbackContent ? undefined : (combinedThinking || undefined);
+          const reasoningContent = combinedThinking || undefined;
+          const toolExtractionText = combinedThinking && !finalContent.includes(combinedThinking)
+            ? `${finalContent}\n${combinedThinking}`
+            : finalContent;
 
-          if (streamedContentChars === 0 && finalContent && onChunk) {
+          if (streamedContentChars === 0 && finalContent && onChunk && streamedThinkingChars === 0) {
             onChunk(finalContent);
           }
 
           // Use native tool calls if available, otherwise extract from text
           let toolCalls: ToolCall[] = nativeToolCalls;
           if (toolCalls.length === 0 && tools && tools.length > 0) {
-            toolCalls = this.extractToolCalls(finalContent, tools);
+            toolCalls = this.extractToolCalls(toolExtractionText, tools);
           }
 
           // Clean hallucinated traces from content (after tool extraction)
diff --git a/src/providers/openai-compatible.ts b/src/providers/openai-compatible.ts
index f952e6f..9994df3 100644
--- a/src/providers/openai-compatible.ts
+++ b/src/providers/openai-compatible.ts
@@ -95,7 +95,8 @@ export class OpenAICompatibleProvider extends BaseProvider {
     messages: Message[],
     tools?: ToolDefinition[],
     onChunk?: (chunk: string) => void,
-    systemPrompt?: string
+    systemPrompt?: string,
+    onReasoningChunk?: (chunk: string) => void
   ): Promise<ProviderResponse> {
     const convertedMessages = this.convertMessages(messages);
     const messagesWithSystem: OpenAI.ChatCompletionMessageParam[] = systemPrompt
@@ -128,6 +129,7 @@ export class OpenAICompatibleProvider extends BaseProvider {
       const reasoningDelta = (delta as any)?.reasoning_content;
       if (reasoningDelta) {
         reasoningContent += reasoningDelta;
+        onReasoningChunk?.(reasoningDelta);
       }
 
       if (delta?.content) {
diff --git a/tests/providers.test.ts b/tests/providers.test.ts
index b18c26e..f94c67e 100644
--- a/tests/providers.test.ts
+++ b/tests/providers.test.ts
@@ -39,7 +39,13 @@ describe('BaseProvider', () => {
     async chat() {
       return { content: '', toolCalls: [], stopReason: 'end_turn' as const };
     }
-    async streamChat() {
+    async streamChat(
+      _messages: Message[] = [],
+      _tools?: unknown,
+      _onChunk?: (chunk: string) => void,
+      _systemPrompt?: string,
+      _onReasoningChunk?: (chunk: string) => void
+    ) {
       return { content: '', toolCalls: [], stopReason: 'end_turn' as const };
     }
     supportsToolUse() { return true; }

From 7eab210e2b075b1933c7fe9839ff629dc5a7eaf4 Mon Sep 17 00:00:00 2001
From: Layne Penney <dev@layne.pro>
Date: Thu, 15 Jan 2026 09:43:28 -0600
Subject: [PATCH 06/17] fix: parse tool traces from text output

---
 src/agent.ts              | 13 +++++---
 src/utils/json-parser.ts  | 70 ++++++++++++++++++++++++++++++++++++++-
 tests/json-parser.test.ts | 25 +++++++++++++-
 3 files changed, 101 insertions(+), 7 deletions(-)

diff --git a/src/agent.ts b/src/agent.ts
index 884ef08..e99808b 100644
--- a/src/agent.ts
+++ b/src/agent.ts
@@ -575,12 +575,15 @@ Always use tools to interact with the filesystem rather than asking the user to
 
       // If no tool calls were detected via API but tools are enabled,
       // try to extract tool calls from the text (for models that output JSON as text)
-      if (response.toolCalls.length === 0 && this.useTools && this.extractToolsFromText && response.content) {
+      if (response.toolCalls.length === 0 && this.useTools && this.extractToolsFromText) {
         const availableTools = this.toolRegistry.listTools();
-        const extractedCalls = extractToolCallsFromText(response.content, availableTools);
-        if (extractedCalls.length > 0) {
-          response.toolCalls = extractedCalls;
-          response.stopReason = 'tool_use';
+        const extractionText = [response.content, response.reasoningContent].filter(Boolean).join('\n');
+        if (extractionText) {
+          const extractedCalls = extractToolCallsFromText(extractionText, availableTools);
+          if (extractedCalls.length > 0) {
+            response.toolCalls = extractedCalls;
+            response.stopReason = 'tool_use';
+          }
         }
       }
 
diff --git a/src/utils/json-parser.ts b/src/utils/json-parser.ts
index 8082ba2..d5f3086 100644
--- a/src/utils/json-parser.ts
+++ b/src/utils/json-parser.ts
@@ -38,6 +38,50 @@ export function tryParseJson(jsonStr: string): unknown | null {
   }
 }
 
+function extractJsonObjectFromIndex(
+  text: string,
+  startIndex: number
+): { json: string; endIndex: number } | null {
+  const start = text.indexOf('{', startIndex);
+  if (start === -1) return null;
+
+  let depth = 0;
+  let inString = false;
+  let isEscaped = false;
+
+  for (let i = start; i < text.length; i++) {
+    const char = text[i];
+
+    if (isEscaped) {
+      isEscaped = false;
+      continue;
+    }
+
+    if (char === '\\') {
+      isEscaped = true;
+      continue;
+    }
+
+    if (char === '"') {
+      inString = !inString;
+      continue;
+    }
+
+    if (inString) continue;
+
+    if (char === '{') {
+      depth += 1;
+    } else if (char === '}') {
+      depth -= 1;
+      if (depth === 0) {
+        return { json: text.slice(start, i + 1), endIndex: i + 1 };
+      }
+    }
+  }
+
+  return null;
+}
+
 /**
  * Try to extract tool calls from text when models output JSON instead of using
  * proper function calling (common with Ollama models).
@@ -63,7 +107,31 @@ export function extractToolCallsFromText(text: string, availableTools: string[])
     }
   }
 
-  // Pattern 2: Look for JSON in code blocks (objects or arrays)
+  // Pattern 2: [Calling tool_name]: {json} format
+  if (toolCalls.length === 0) {
+    const callingPattern = /\[Calling\s+([a-z_][a-z0-9_]*)\]\s*:\s*/gi;
+
+    while ((match = callingPattern.exec(text)) !== null) {
+      const toolName = match[1];
+      if (!availableTools.includes(toolName)) continue;
+
+      const extracted = extractJsonObjectFromIndex(text, match.index + match[0].length);
+      if (!extracted) continue;
+
+      const args = tryParseJson(extracted.json);
+      if (args && typeof args === 'object') {
+        toolCalls.push({
+          id: `extracted_${Date.now()}_${toolCalls.length}`,
+          name: toolName,
+          input: args as Record<string, unknown>,
+        });
+      }
+
+      callingPattern.lastIndex = extracted.endIndex;
+    }
+  }
+
+  // Pattern 3: Look for JSON in code blocks (objects or arrays)
   if (toolCalls.length === 0) {
     const codeBlockPattern = /```(?:json)?\s*([\s\S]*?)\s*```/g;
     while ((match = codeBlockPattern.exec(text)) !== null) {
diff --git a/tests/json-parser.test.ts b/tests/json-parser.test.ts
index 3dd47ab..66f9371 100644
--- a/tests/json-parser.test.ts
+++ b/tests/json-parser.test.ts
@@ -151,7 +151,30 @@ describe('json-parser', () => {
       });
     });
 
-    describe('pattern 2: JSON in code blocks', () => {
+    describe('pattern 2: [Calling tool_name]: {json} traces', () => {
+      it('extracts tool calls from calling trace format', () => {
+        const text = '[Calling write_file]: {"path": "notes.txt", "content": "hello"}';
+        const calls = extractToolCallsFromText(text, availableTools);
+
+        expect(calls).toHaveLength(1);
+        expect(calls[0].name).toBe('write_file');
+        expect(calls[0].input).toEqual({ path: 'notes.txt', content: 'hello' });
+      });
+
+      it('extracts multiple calling trace tool calls', () => {
+        const text = `
+[Calling read_file]: {"path": "a.txt"}
+[Calling read_file]: {"path": "b.txt"}
+        `;
+        const calls = extractToolCallsFromText(text, availableTools);
+
+        expect(calls).toHaveLength(2);
+        expect(calls[0].input).toEqual({ path: 'a.txt' });
+        expect(calls[1].input).toEqual({ path: 'b.txt' });
+      });
+    });
+
+    describe('pattern 3: JSON in code blocks', () => {
       it('extracts from json code block', () => {
         const text = `
 Here's the tool call:

From b6e38a5e6b8c289b339e0e80f5f2dd61abbdcc56 Mon Sep 17 00:00:00 2001
From: Layne Penney <dev@layne.pro>
Date: Thu, 15 Jan 2026 09:58:41 -0600
Subject: [PATCH 07/17] feat: apply tool fallback during text extraction

---
 src/agent.ts              |  5 ++--
 src/utils/json-parser.ts  | 43 ++++++++++++++++++++++++----------
 tests/json-parser.test.ts | 49 +++++++++++++++++++++++++--------------
 3 files changed, 65 insertions(+), 32 deletions(-)

diff --git a/src/agent.ts b/src/agent.ts
index e99808b..2e5bcb0 100644
--- a/src/agent.ts
+++ b/src/agent.ts
@@ -576,10 +576,11 @@ Always use tools to interact with the filesystem rather than asking the user to
       // If no tool calls were detected via API but tools are enabled,
       // try to extract tool calls from the text (for models that output JSON as text)
       if (response.toolCalls.length === 0 && this.useTools && this.extractToolsFromText) {
-        const availableTools = this.toolRegistry.listTools();
+        const toolDefinitions = this.toolRegistry.getDefinitions();
+        const fallbackConfig = this.toolRegistry.getFallbackConfig();
         const extractionText = [response.content, response.reasoningContent].filter(Boolean).join('\n');
         if (extractionText) {
-          const extractedCalls = extractToolCallsFromText(extractionText, availableTools);
+          const extractedCalls = extractToolCallsFromText(extractionText, toolDefinitions, fallbackConfig);
           if (extractedCalls.length > 0) {
             response.toolCalls = extractedCalls;
             response.stopReason = 'tool_use';
diff --git a/src/utils/json-parser.ts b/src/utils/json-parser.ts
index d5f3086..399534f 100644
--- a/src/utils/json-parser.ts
+++ b/src/utils/json-parser.ts
@@ -6,7 +6,12 @@
  * Extracted from agent.ts for reusability.
  */
 
-import type { ToolCall } from '../types.js';
+import type { ToolCall, ToolDefinition } from '../types.js';
+import {
+  DEFAULT_FALLBACK_CONFIG,
+  findBestToolMatch,
+  type ToolFallbackConfig,
+} from '../tools/tool-fallback.js';
 
 /**
  * Attempt to fix common JSON issues from LLM output:
@@ -86,21 +91,31 @@ function extractJsonObjectFromIndex(
  * Try to extract tool calls from text when models output JSON instead of using
  * proper function calling (common with Ollama models).
  */
-export function extractToolCallsFromText(text: string, availableTools: string[]): ToolCall[] {
+export function extractToolCallsFromText(
+  text: string,
+  toolDefinitions: ToolDefinition[],
+  fallbackConfig: ToolFallbackConfig = DEFAULT_FALLBACK_CONFIG
+): ToolCall[] {
   const toolCalls: ToolCall[] = [];
+  const resolveToolName = (requestedName: string): string | null => {
+    const match = findBestToolMatch(requestedName, toolDefinitions, fallbackConfig);
+    if (match.exactMatch) return requestedName;
+    if (match.shouldAutoCorrect && match.matchedName) return match.matchedName;
+    return null;
+  };
 
   // Pattern 1: {"name": "tool_name", "arguments": {...}} or {"name": "tool_name", "parameters": {...}}
   const jsonPattern = /\{[\s\S]*?"name"\s*:\s*"(\w+)"[\s\S]*?(?:"arguments"|"parameters"|"input")\s*:\s*(\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\})[\s\S]*?\}/g;
 
   let match;
   while ((match = jsonPattern.exec(text)) !== null) {
-    const toolName = match[1];
-    if (availableTools.includes(toolName)) {
+    const resolvedName = resolveToolName(match[1]);
+    if (resolvedName) {
       const args = tryParseJson(match[2]);
       if (args && typeof args === 'object') {
         toolCalls.push({
           id: `extracted_${Date.now()}_${toolCalls.length}`,
-          name: toolName,
+          name: resolvedName,
           input: args as Record<string, unknown>,
         });
       }
@@ -112,8 +127,8 @@ export function extractToolCallsFromText(text: string, availableTools: string[])
     const callingPattern = /\[Calling\s+([a-z_][a-z0-9_]*)\]\s*:\s*/gi;
 
     while ((match = callingPattern.exec(text)) !== null) {
-      const toolName = match[1];
-      if (!availableTools.includes(toolName)) continue;
+      const resolvedName = resolveToolName(match[1]);
+      if (!resolvedName) continue;
 
       const extracted = extractJsonObjectFromIndex(text, match.index + match[0].length);
       if (!extracted) continue;
@@ -122,7 +137,7 @@ export function extractToolCallsFromText(text: string, availableTools: string[])
       if (args && typeof args === 'object') {
         toolCalls.push({
           id: `extracted_${Date.now()}_${toolCalls.length}`,
-          name: toolName,
+          name: resolvedName,
           input: args as Record<string, unknown>,
         });
       }
@@ -144,10 +159,12 @@ export function extractToolCallsFromText(text: string, availableTools: string[])
       // Handle array of tool calls
       if (Array.isArray(parsed)) {
         for (const item of parsed) {
-          if (item?.name && availableTools.includes(item.name as string)) {
+          if (item?.name) {
+            const resolvedName = resolveToolName(item.name as string);
+            if (!resolvedName) continue;
             toolCalls.push({
               id: `extracted_${Date.now()}_${toolCalls.length}`,
-              name: item.name as string,
+              name: resolvedName,
               input: (item.arguments || item.parameters || item.input || {}) as Record<string, unknown>,
             });
           }
@@ -156,10 +173,12 @@ export function extractToolCallsFromText(text: string, availableTools: string[])
       // Handle single object
       else {
         const obj = parsed as Record<string, unknown>;
-        if (obj.name && availableTools.includes(obj.name as string)) {
+        if (obj.name) {
+          const resolvedName = resolveToolName(obj.name as string);
+          if (!resolvedName) continue;
           toolCalls.push({
             id: `extracted_${Date.now()}_${toolCalls.length}`,
-            name: obj.name as string,
+            name: resolvedName,
             input: (obj.arguments || obj.parameters || obj.input || {}) as Record<string, unknown>,
           });
         }
diff --git a/tests/json-parser.test.ts b/tests/json-parser.test.ts
index 66f9371..b7a4510 100644
--- a/tests/json-parser.test.ts
+++ b/tests/json-parser.test.ts
@@ -90,12 +90,17 @@ describe('json-parser', () => {
   });
 
   describe('extractToolCallsFromText', () => {
-    const availableTools = ['read_file', 'write_file', 'bash', 'glob'];
+    const toolNames = ['read_file', 'write_file', 'bash', 'glob'];
+    const toolDefinitions = toolNames.map((name) => ({
+      name,
+      description: `${name} tool`,
+      input_schema: { type: 'object', properties: {} },
+    }));
 
     describe('pattern 1: inline JSON with name and arguments', () => {
       it('extracts tool call with "arguments" key', () => {
         const text = 'I will read the file: {"name": "read_file", "arguments": {"path": "test.txt"}}';
-        const calls = extractToolCallsFromText(text, availableTools);
+        const calls = extractToolCallsFromText(text, toolDefinitions);
 
         expect(calls).toHaveLength(1);
         expect(calls[0].name).toBe('read_file');
@@ -104,7 +109,7 @@ describe('json-parser', () => {
 
       it('extracts tool call with "parameters" key', () => {
         const text = '{"name": "bash", "parameters": {"command": "ls -la"}}';
-        const calls = extractToolCallsFromText(text, availableTools);
+        const calls = extractToolCallsFromText(text, toolDefinitions);
 
         expect(calls).toHaveLength(1);
         expect(calls[0].name).toBe('bash');
@@ -113,7 +118,7 @@ describe('json-parser', () => {
 
       it('extracts tool call with "input" key', () => {
         const text = '{"name": "glob", "input": {"pattern": "*.ts"}}';
-        const calls = extractToolCallsFromText(text, availableTools);
+        const calls = extractToolCallsFromText(text, toolDefinitions);
 
         expect(calls).toHaveLength(1);
         expect(calls[0].name).toBe('glob');
@@ -125,7 +130,7 @@ describe('json-parser', () => {
           {"name": "read_file", "arguments": {"path": "a.txt"}}
           {"name": "read_file", "arguments": {"path": "b.txt"}}
         `;
-        const calls = extractToolCallsFromText(text, availableTools);
+        const calls = extractToolCallsFromText(text, toolDefinitions);
 
         expect(calls).toHaveLength(2);
         expect(calls[0].input).toEqual({ path: 'a.txt' });
@@ -134,7 +139,7 @@ describe('json-parser', () => {
 
       it('ignores unknown tools', () => {
         const text = '{"name": "unknown_tool", "arguments": {"foo": "bar"}}';
-        const calls = extractToolCallsFromText(text, availableTools);
+        const calls = extractToolCallsFromText(text, toolDefinitions);
 
         expect(calls).toHaveLength(0);
       });
@@ -144,7 +149,7 @@ describe('json-parser', () => {
           {"name": "bash", "arguments": {"command": "ls"}}
           {"name": "bash", "arguments": {"command": "pwd"}}
         `;
-        const calls = extractToolCallsFromText(text, availableTools);
+        const calls = extractToolCallsFromText(text, toolDefinitions);
 
         expect(calls[0].id).not.toBe(calls[1].id);
         expect(calls[0].id).toMatch(/^extracted_/);
@@ -154,7 +159,7 @@ describe('json-parser', () => {
     describe('pattern 2: [Calling tool_name]: {json} traces', () => {
       it('extracts tool calls from calling trace format', () => {
         const text = '[Calling write_file]: {"path": "notes.txt", "content": "hello"}';
-        const calls = extractToolCallsFromText(text, availableTools);
+        const calls = extractToolCallsFromText(text, toolDefinitions);
 
         expect(calls).toHaveLength(1);
         expect(calls[0].name).toBe('write_file');
@@ -166,7 +171,7 @@ describe('json-parser', () => {
 [Calling read_file]: {"path": "a.txt"}
 [Calling read_file]: {"path": "b.txt"}
         `;
-        const calls = extractToolCallsFromText(text, availableTools);
+        const calls = extractToolCallsFromText(text, toolDefinitions);
 
         expect(calls).toHaveLength(2);
         expect(calls[0].input).toEqual({ path: 'a.txt' });
@@ -182,7 +187,7 @@ Here's the tool call:
 {"name": "read_file", "arguments": {"path": "config.json"}}
 \`\`\`
         `;
-        const calls = extractToolCallsFromText(text, availableTools);
+        const calls = extractToolCallsFromText(text, toolDefinitions);
 
         expect(calls).toHaveLength(1);
         expect(calls[0].name).toBe('read_file');
@@ -194,7 +199,7 @@ Here's the tool call:
 {"name": "bash", "arguments": {"command": "echo hello"}}
 \`\`\`
         `;
-        const calls = extractToolCallsFromText(text, availableTools);
+        const calls = extractToolCallsFromText(text, toolDefinitions);
 
         expect(calls).toHaveLength(1);
         expect(calls[0].name).toBe('bash');
@@ -209,7 +214,7 @@ Here's the tool call:
 ]
 \`\`\`
         `;
-        const calls = extractToolCallsFromText(text, availableTools);
+        const calls = extractToolCallsFromText(text, toolDefinitions);
 
         expect(calls).toHaveLength(2);
         expect(calls[0].name).toBe('read_file');
@@ -222,7 +227,7 @@ Here's the tool call:
 just some text
 \`\`\`
         `;
-        const calls = extractToolCallsFromText(text, availableTools);
+        const calls = extractToolCallsFromText(text, toolDefinitions);
 
         expect(calls).toHaveLength(0);
       });
@@ -233,16 +238,24 @@ just some text
 {"foo": "bar"}
 \`\`\`
         `;
-        const calls = extractToolCallsFromText(text, availableTools);
+        const calls = extractToolCallsFromText(text, toolDefinitions);
 
         expect(calls).toHaveLength(0);
       });
     });
 
     describe('edge cases', () => {
+      it('auto-corrects close tool name matches', () => {
+        const text = '{"name": "readfile", "arguments": {"path": "test.txt"}}';
+        const calls = extractToolCallsFromText(text, toolDefinitions);
+
+        expect(calls).toHaveLength(1);
+        expect(calls[0].name).toBe('read_file');
+      });
+
       it('returns empty array for text without tool calls', () => {
         const text = 'Just a regular response without any tools';
-        const calls = extractToolCallsFromText(text, availableTools);
+        const calls = extractToolCallsFromText(text, toolDefinitions);
 
         expect(calls).toEqual([]);
       });
@@ -256,7 +269,7 @@ just some text
 
       it('handles nested objects in arguments', () => {
         const text = '{"name": "write_file", "arguments": {"path": "test.json", "content": "{\\"key\\": \\"value\\"}"}}';
-        const calls = extractToolCallsFromText(text, availableTools);
+        const calls = extractToolCallsFromText(text, toolDefinitions);
 
         expect(calls).toHaveLength(1);
         expect(calls[0].input).toHaveProperty('path');
@@ -264,7 +277,7 @@ just some text
 
       it('handles whitespace variations', () => {
         const text = '{  "name"  :  "bash"  ,  "arguments"  :  {  "command"  :  "ls"  }  }';
-        const calls = extractToolCallsFromText(text, availableTools);
+        const calls = extractToolCallsFromText(text, toolDefinitions);
 
         expect(calls).toHaveLength(1);
       });
@@ -272,7 +285,7 @@ just some text
       it('prefers inline pattern over code block pattern', () => {
         // When inline pattern matches, code block pattern should not run
         const text = '{"name": "bash", "arguments": {"command": "ls"}}';
-        const calls = extractToolCallsFromText(text, availableTools);
+        const calls = extractToolCallsFromText(text, toolDefinitions);
 
         expect(calls).toHaveLength(1);
       });

From df6f3babafaa6d858f712c930d61b2c14a43f0d6 Mon Sep 17 00:00:00 2001
From: Layne Penney <dev@layne.pro>
Date: Thu, 15 Jan 2026 10:14:03 -0600
Subject: [PATCH 08/17] test: isolate history storage in test runs

---
 src/history.ts        | 29 +++++++++++++++++++++--------
 tests/history.test.ts | 35 ++++++++++++++++++++++++-----------
 tests/index.test.ts   | 24 ++++++++++--------------
 3 files changed, 55 insertions(+), 33 deletions(-)

diff --git a/src/history.ts b/src/history.ts
index d662a45..80f7ae0 100644
--- a/src/history.ts
+++ b/src/history.ts
@@ -7,13 +7,26 @@
  */
 import * as fs from 'fs';
 import * as path from 'path';
-import { homedir } from 'os';
+import { homedir, tmpdir } from 'os';
 
 /** Maximum number of history entries to keep */
 const MAX_HISTORY_SIZE = 50;
 
-/** Directory where history is stored */
-const HISTORY_DIR = path.join(homedir(), '.codi', 'history');
+/** Directory where history is stored (allow test override). */
+const DEFAULT_HISTORY_DIR = path.join(homedir(), '.codi', 'history');
+const TEST_HISTORY_DIR = path.join(tmpdir(), `.codi-history-${process.pid}`);
+
+function resolveHistoryDir(): string {
+  if (process.env.CODI_HISTORY_DIR) {
+    return process.env.CODI_HISTORY_DIR;
+  }
+
+  if (process.env.VITEST || process.env.NODE_ENV === 'test') {
+    return TEST_HISTORY_DIR;
+  }
+
+  return DEFAULT_HISTORY_DIR;
+}
 
 /**
  * Types of file operations that can be undone.
@@ -54,21 +67,21 @@ interface HistoryIndex {
  * Get the path to the history index file.
  */
 function getIndexPath(): string {
-  return path.join(HISTORY_DIR, 'index.json');
+  return path.join(resolveHistoryDir(), 'index.json');
 }
 
 /**
  * Get the path to a backup file.
  */
 function getBackupPath(id: string): string {
-  return path.join(HISTORY_DIR, 'backups', `${id}.backup`);
+  return path.join(resolveHistoryDir(), 'backups', `${id}.backup`);
 }
 
 /**
  * Ensure the history directory exists.
  */
 function ensureHistoryDir(): void {
-  const backupsDir = path.join(HISTORY_DIR, 'backups');
+  const backupsDir = path.join(resolveHistoryDir(), 'backups');
   if (!fs.existsSync(backupsDir)) {
     fs.mkdirSync(backupsDir, { recursive: true });
   }
@@ -331,7 +344,7 @@ export function clearHistory(): number {
   const count = index.entries.length;
 
   // Delete all backup files
-  const backupsDir = path.join(HISTORY_DIR, 'backups');
+  const backupsDir = path.join(resolveHistoryDir(), 'backups');
   if (fs.existsSync(backupsDir)) {
     try {
       fs.rmSync(backupsDir, { recursive: true });
@@ -379,5 +392,5 @@ export function formatHistoryEntry(entry: HistoryEntry): string {
  * Get the history directory path.
  */
 export function getHistoryDir(): string {
-  return HISTORY_DIR;
+  return resolveHistoryDir();
 }
diff --git a/tests/history.test.ts b/tests/history.test.ts
index b23f4e5..08d7db3 100644
--- a/tests/history.test.ts
+++ b/tests/history.test.ts
@@ -171,10 +171,10 @@ describe('History System', () => {
 
       fs.writeFileSync('mark.txt', 'new');
 
-      undoChange();
+      const undoneEntry = undoChange();
 
-      const history = getHistory(10, true);
-      expect(history[0].undone).toBe(true);
+      expect(undoneEntry).not.toBeNull();
+      expect(undoneEntry!.undone).toBe(true);
     });
 
     it('undoes most recent non-undone entry', () => {
@@ -341,10 +341,18 @@ describe('History System', () => {
 
   describe('formatHistoryEntry', () => {
     it('formats entry for display', () => {
-      recordChange({ operation: 'write', filePath: 'format.txt', newContent: 'x', description: 'Test format' });
+      const entryId = recordChange({
+        operation: 'write',
+        filePath: 'format.txt',
+        newContent: 'x',
+        description: 'Test format',
+      });
 
-      const history = getHistory();
-      const formatted = formatHistoryEntry(history[0]);
+      const history = getHistory(50, true);
+      const entry = history.find((item) => item.id === entryId);
+
+      expect(entry).toBeDefined();
+      const formatted = formatHistoryEntry(entry!);
 
       expect(formatted).toContain('write');
       expect(formatted).toContain('format.txt');
@@ -357,10 +365,10 @@ describe('History System', () => {
       recordChange({ operation: 'write', filePath: 'undone-format.txt', newContent: 'x', description: 'Will undo' });
       fs.writeFileSync('undone-format.txt', 'x');
 
-      undoChange();
+      const undoneEntry = undoChange();
 
-      const history = getHistory(10, true);
-      const formatted = formatHistoryEntry(history[0]);
+      expect(undoneEntry).not.toBeNull();
+      const formatted = formatHistoryEntry(undoneEntry!);
 
       expect(formatted).toContain('(undone)');
     });
@@ -369,8 +377,13 @@ describe('History System', () => {
   describe('getHistoryDir', () => {
     it('returns the history directory path', () => {
       const dir = getHistoryDir();
-      expect(dir).toContain('.codi');
-      expect(dir).toContain('history');
+      if (process.env.VITEST || process.env.NODE_ENV === 'test') {
+        expect(dir).toContain('codi-history-');
+        expect(dir).toContain(os.tmpdir());
+      } else {
+        expect(dir).toContain('.codi');
+        expect(dir).toContain('history');
+      }
     });
   });
 });
diff --git a/tests/index.test.ts b/tests/index.test.ts
index c11c837..1aca111 100644
--- a/tests/index.test.ts
+++ b/tests/index.test.ts
@@ -163,7 +163,6 @@ describe('BaseTool', () => {
 
 describe('Tool implementations (filesystem / process tools)', () => {
   let root: string;
-  let prevCwd: string;
 
   beforeEach(async () => {
     root = tmpDir();
@@ -171,29 +170,26 @@ describe('Tool implementations (filesystem / process tools)', () => {
     await fs.writeFile(path.join(root, 'a.txt'), 'hello');
     await fs.mkdir(path.join(root, 'sub'), { recursive: true });
     await fs.writeFile(path.join(root, 'sub', 'b.txt'), 'world');
-    prevCwd = process.cwd();
-    process.chdir(root);
   });
 
   afterEach(async () => {
-    process.chdir(prevCwd);
     await fs.rm(root, { recursive: true, force: true });
   });
 
   it('ReadFileTool reads file contents', async () => {
     const tool = new ReadFileTool();
-    const out = await tool.execute({ path: 'a.txt' });
+    const out = await tool.execute({ path: path.join(root, 'a.txt') });
     expect(out).toContain('hello');
   });
 
   it('ReadFileTool errors on missing file', async () => {
     const tool = new ReadFileTool();
-    await expect(tool.execute({ path: 'nope.txt' })).rejects.toThrow(/not found/i);
+    await expect(tool.execute({ path: path.join(root, 'nope.txt') })).rejects.toThrow(/not found/i);
   });
 
   it('WriteFileTool writes file and returns success message', async () => {
     const tool = new WriteFileTool();
-    const out = await tool.execute({ path: 'new.txt', content: 'x' });
+    const out = await tool.execute({ path: path.join(root, 'new.txt'), content: 'x' });
     expect(out).toMatch(/wrote/i);
     await expect(fs.readFile(path.join(root, 'new.txt'), 'utf8')).resolves.toBe('x');
   });
@@ -202,7 +198,7 @@ describe('Tool implementations (filesystem / process tools)', () => {
     await fs.writeFile(path.join(root, 'edit.txt'), 'one two three');
     const tool = new EditFileTool();
     const out = await tool.execute({
-      path: 'edit.txt',
+      path: path.join(root, 'edit.txt'),
       old_string: 'two',
       new_string: 'TWO',
     });
@@ -214,42 +210,42 @@ describe('Tool implementations (filesystem / process tools)', () => {
     await fs.writeFile(path.join(root, 'edit2.txt'), 'abc');
     const tool = new EditFileTool();
     await expect(
-      tool.execute({ path: 'edit2.txt', old_string: 'zzz', new_string: 'x' }),
+      tool.execute({ path: path.join(root, 'edit2.txt'), old_string: 'zzz', new_string: 'x' }),
     ).rejects.toThrow(/not found/i);
   });
 
   it('InsertLineTool inserts at given line', async () => {
     await fs.writeFile(path.join(root, 'i.txt'), '1\n2\n3\n');
     const tool = new InsertLineTool();
-    const out = await tool.execute({ path: 'i.txt', line: 2, content: 'X' });
+    const out = await tool.execute({ path: path.join(root, 'i.txt'), line: 2, content: 'X' });
     expect(out).toMatch(/inserted/i);
     await expect(fs.readFile(path.join(root, 'i.txt'), 'utf8')).resolves.toBe('1\nX\n2\n3\n');
   });
 
   it('GlobTool returns matching paths', async () => {
     const tool = new GlobTool();
-    const out = await tool.execute({ pattern: '**/*.txt' });
+    const out = await tool.execute({ pattern: '**/*.txt', cwd: root });
     expect(out).toContain('a.txt');
     expect(out).toContain('sub/b.txt');
   });
 
   it('GrepTool finds matches in files', async () => {
     const tool = new GrepTool();
-    const out = await tool.execute({ pattern: 'world', path: '.' });
+    const out = await tool.execute({ pattern: 'world', path: root });
     expect(out).toContain('sub/b.txt');
     expect(out).toContain('world');
   });
 
   it('ListDirectoryTool lists directory contents', async () => {
     const tool = new ListDirectoryTool();
-    const out = await tool.execute({ path: '.' });
+    const out = await tool.execute({ path: root });
     expect(out).toContain('a.txt');
     expect(out).toContain('sub');
   });
 
   it('BashTool runs command', async () => {
     const tool = new BashTool();
-    const out = await tool.execute({ command: 'echo hello' });
+    const out = await tool.execute({ command: 'echo hello', cwd: root });
     expect(out).toContain('hello');
   });
 });

From f82c1b3e82452bde28a18f1b9cd429760e322c8d Mon Sep 17 00:00:00 2001
From: Layne Penney <dev@layne.pro>
Date: Thu, 15 Jan 2026 10:32:07 -0600
Subject: [PATCH 09/17] fix: harden tool extraction and symbol-index tests

---
 src/agent.ts                  | 10 ++++-
 src/providers/ollama-cloud.ts | 40 +++++++++++--------
 tests/symbol-index.test.ts    | 73 +++++++++++++++++++++++++++++------
 3 files changed, 94 insertions(+), 29 deletions(-)

diff --git a/src/agent.ts b/src/agent.ts
index 2e5bcb0..cb2b8ff 100644
--- a/src/agent.ts
+++ b/src/agent.ts
@@ -578,8 +578,14 @@ Always use tools to interact with the filesystem rather than asking the user to
       if (response.toolCalls.length === 0 && this.useTools && this.extractToolsFromText) {
         const toolDefinitions = this.toolRegistry.getDefinitions();
         const fallbackConfig = this.toolRegistry.getFallbackConfig();
-        const extractionText = [response.content, response.reasoningContent].filter(Boolean).join('\n');
-        if (extractionText) {
+        const extractionText = response.content;
+        const contentMatchesReasoning = Boolean(
+          response.content &&
+          response.reasoningContent &&
+          response.content.trim() === response.reasoningContent.trim()
+        );
+
+        if (extractionText && !contentMatchesReasoning) {
           const extractedCalls = extractToolCallsFromText(extractionText, toolDefinitions, fallbackConfig);
           if (extractedCalls.length > 0) {
             response.toolCalls = extractedCalls;
diff --git a/src/providers/ollama-cloud.ts b/src/providers/ollama-cloud.ts
index b9e99cc..fa57e6f 100644
--- a/src/providers/ollama-cloud.ts
+++ b/src/providers/ollama-cloud.ts
@@ -13,6 +13,7 @@ import { withRetry, type RetryOptions } from './retry.js';
 import { getProviderRateLimiter, type RateLimiter } from './rate-limiter.js';
 import { messageToText } from './message-converter.js';
 import type { Message, ToolDefinition, ProviderResponse, ProviderConfig, ToolCall } from '../types.js';
+import { DEFAULT_FALLBACK_CONFIG, findBestToolMatch } from '../tools/tool-fallback.js';
 
 /** Ollama message format */
 interface OllamaMessage {
@@ -229,9 +230,7 @@ export class OllamaCloudProvider extends BaseProvider {
           const useFallbackContent = !hasContent && combinedThinking.length > 0;
           const finalContent = useFallbackContent ? combinedThinking : thinkingCleanedContent;
           const reasoningContent = combinedThinking || undefined;
-          const toolExtractionText = combinedThinking && !finalContent.includes(combinedThinking)
-            ? `${finalContent}\n${combinedThinking}`
-            : finalContent;
+          const toolExtractionText = thinkingCleanedContent;
 
           // Fall back to extracting tool calls from text if no native calls
           if (toolCalls.length === 0 && tools && tools.length > 0) {
@@ -373,9 +372,7 @@ export class OllamaCloudProvider extends BaseProvider {
           const useFallbackContent = !hasContent && combinedThinking.length > 0;
           const finalContent = useFallbackContent ? combinedThinking : thinkingCleanedContent;
           const reasoningContent = combinedThinking || undefined;
-          const toolExtractionText = combinedThinking && !finalContent.includes(combinedThinking)
-            ? `${finalContent}\n${combinedThinking}`
-            : finalContent;
+          const toolExtractionText = thinkingCleanedContent;
 
           if (streamedContentChars === 0 && finalContent && onChunk && streamedThinkingChars === 0) {
             onChunk(finalContent);
@@ -500,7 +497,12 @@ export class OllamaCloudProvider extends BaseProvider {
    */
   private extractToolCalls(content: string, tools: ToolDefinition[]): ToolCall[] {
     const toolCalls: ToolCall[] = [];
-    const toolNames = new Set(tools.map(t => t.name));
+    const resolveToolName = (requestedName: string): string | null => {
+      const match = findBestToolMatch(requestedName, tools, DEFAULT_FALLBACK_CONFIG);
+      if (match.exactMatch) return requestedName;
+      if (match.shouldAutoCorrect && match.matchedName) return match.matchedName;
+      return null;
+    };
 
     // Pattern 1: JSON in code blocks - most reliable
     const codeBlockPattern = /```(?:json)?\s*([\s\S]*?)```/g;
@@ -508,7 +510,7 @@ export class OllamaCloudProvider extends BaseProvider {
 
     while ((match = codeBlockPattern.exec(content)) !== null) {
       const jsonContent = match[1].trim();
-      const extracted = this.tryParseToolCall(jsonContent, toolNames);
+      const extracted = this.tryParseToolCall(jsonContent, resolveToolName);
       if (extracted) {
         toolCalls.push(extracted);
       }
@@ -528,11 +530,12 @@ export class OllamaCloudProvider extends BaseProvider {
       const normalizedName = this.normalizeToolName(rawToolName);
       const argsString = match[2];
 
-      if (toolNames.has(normalizedName)) {
+      const resolvedName = resolveToolName(normalizedName);
+      if (resolvedName) {
         const args = this.parseFunctionCallArgs(argsString);
         toolCalls.push({
           id: `extracted_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`,
-          name: normalizedName,
+          name: resolvedName,
           input: args,
         });
       }
@@ -552,12 +555,13 @@ export class OllamaCloudProvider extends BaseProvider {
       const normalizedName = this.normalizeToolName(rawToolName);
       const jsonArgs = match[2];
 
-      if (toolNames.has(normalizedName)) {
+      const resolvedName = resolveToolName(normalizedName);
+      if (resolvedName) {
         try {
           const args = JSON.parse(jsonArgs);
           toolCalls.push({
             id: `extracted_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`,
-            name: normalizedName,
+            name: resolvedName,
             input: args,
           });
         } catch {
@@ -575,7 +579,7 @@ export class OllamaCloudProvider extends BaseProvider {
     const jsonPattern = /\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\}/g;
 
     while ((match = jsonPattern.exec(content)) !== null) {
-      const extracted = this.tryParseToolCall(match[0], toolNames);
+      const extracted = this.tryParseToolCall(match[0], resolveToolName);
       if (extracted) {
         toolCalls.push(extracted);
       }
@@ -620,17 +624,21 @@ export class OllamaCloudProvider extends BaseProvider {
   /**
    * Try to parse a JSON string as a tool call.
    */
-  private tryParseToolCall(jsonString: string, validToolNames: Set<string>): ToolCall | null {
+  private tryParseToolCall(
+    jsonString: string,
+    resolveToolName: (requestedName: string) => string | null
+  ): ToolCall | null {
     try {
       const parsed = JSON.parse(jsonString);
 
       // Check if it has a valid tool name (normalize to strip prefixes)
       if (parsed.name) {
         const normalizedName = this.normalizeToolName(parsed.name);
-        if (validToolNames.has(normalizedName)) {
+        const resolvedName = resolveToolName(normalizedName);
+        if (resolvedName) {
           return {
             id: `extracted_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`,
-            name: normalizedName,
+            name: resolvedName,
             input: parsed.arguments || parsed.input || parsed.parameters || {},
           };
         }
diff --git a/tests/symbol-index.test.ts b/tests/symbol-index.test.ts
index 9468bf0..445267a 100644
--- a/tests/symbol-index.test.ts
+++ b/tests/symbol-index.test.ts
@@ -9,11 +9,39 @@
  */
 
 import { describe, it, expect, beforeAll, afterAll } from 'vitest';
+import { createRequire } from 'module';
 import * as fs from 'fs';
 import * as path from 'path';
 import * as os from 'os';
-import { SymbolIndexService } from '../src/symbol-index/service.js';
-import { SymbolDatabase, getIndexDirectory } from '../src/symbol-index/database.js';
+
+type SymbolIndexServiceCtor = typeof import('../src/symbol-index/service.js').SymbolIndexService;
+type SymbolIndexServiceInstance = InstanceType<SymbolIndexServiceCtor>;
+type GetIndexDirectoryFn = typeof import('../src/symbol-index/database.js').getIndexDirectory;
+
+const require = createRequire(import.meta.url);
+let sqliteLoadError: string | null = null;
+
+try {
+  const Database = require('better-sqlite3');
+  const db = new Database(':memory:');
+  db.close();
+} catch (error) {
+  sqliteLoadError = error instanceof Error ? error.message : String(error);
+}
+
+if (sqliteLoadError) {
+  console.warn(`[tests] Skipping symbol-index tests: ${sqliteLoadError}`);
+}
+
+const describeSymbolIndex = sqliteLoadError ? describe.skip : describe;
+let SymbolIndexService: SymbolIndexServiceCtor | null = null;
+let getIndexDirectory: GetIndexDirectoryFn | null = null;
+const getSymbolIndexServiceCtor = (): SymbolIndexServiceCtor => {
+  if (!SymbolIndexService) {
+    throw new Error('SymbolIndexService not loaded.');
+  }
+  return SymbolIndexService;
+};
 
 // Test fixtures directory
 const TEST_DIR = path.join(os.tmpdir(), 'symbol-index-test-' + Date.now());
@@ -264,27 +292,47 @@ class MainActivity : ComponentActivity() {
   }, null, 2));
 }
 
-describe('Symbol Index Validation Suite', () => {
-  let service: SymbolIndexService;
+describeSymbolIndex('Symbol Index Validation Suite', () => {
+  let service: SymbolIndexServiceInstance | null = null;
 
   beforeAll(async () => {
+    if (sqliteLoadError) {
+      return;
+    }
+
+    const serviceModule = await import('../src/symbol-index/service.js');
+    const databaseModule = await import('../src/symbol-index/database.js');
+    SymbolIndexService = serviceModule.SymbolIndexService;
+    getIndexDirectory = databaseModule.getIndexDirectory;
+
+    if (!SymbolIndexService || !getIndexDirectory) {
+      throw new Error('Symbol index dependencies failed to load.');
+    }
+
     // Create test project
     createTestProject();
 
     // Initialize service and build index with deep indexing enabled
     // (needed for usage-based dependency tests)
-    service = new SymbolIndexService(TEST_DIR);
+    const Service = getSymbolIndexServiceCtor();
+    service = new Service(TEST_DIR);
     await service.initialize();
     await service.rebuild({ deepIndex: true });
   });
 
   afterAll(() => {
+    if (!service) {
+      return;
+    }
+
     service.close();
     // Clean up test directory
     fs.rmSync(TEST_DIR, { recursive: true, force: true });
     // Clean up index directory
-    const indexDir = getIndexDirectory(TEST_DIR);
-    fs.rmSync(indexDir, { recursive: true, force: true });
+    if (getIndexDirectory) {
+      const indexDir = getIndexDirectory(TEST_DIR);
+      fs.rmSync(indexDir, { recursive: true, force: true });
+    }
   });
 
   // =========================================================================
@@ -633,7 +681,8 @@ describe('Symbol Index Validation Suite', () => {
   describe('Parallel Processing', () => {
     it('should accept parallelJobs option in rebuild', async () => {
       // Create a fresh service for this test
-      const testService = new SymbolIndexService(TEST_DIR);
+      const Service = getSymbolIndexServiceCtor();
+      const testService = new Service(TEST_DIR);
       await testService.initialize();
 
       // Rebuild with different job counts should work
@@ -650,7 +699,8 @@ describe('Symbol Index Validation Suite', () => {
 
     it('should produce same dependencies with different parallelJobs values', async () => {
       // Rebuild with 1 job
-      const testService1 = new SymbolIndexService(TEST_DIR);
+      const Service = getSymbolIndexServiceCtor();
+      const testService1 = new Service(TEST_DIR);
       await testService1.initialize();
       await testService1.rebuild({ deepIndex: true, parallelJobs: 1 });
       const deps1 = testService1.getDependencyGraph(
@@ -660,7 +710,7 @@ describe('Symbol Index Validation Suite', () => {
       );
 
       // Rebuild with 4 jobs
-      const testService4 = new SymbolIndexService(TEST_DIR);
+      const testService4 = new Service(TEST_DIR);
       await testService4.initialize();
       await testService4.rebuild({ deepIndex: true, parallelJobs: 4 });
       const deps4 = testService4.getDependencyGraph(
@@ -676,7 +726,8 @@ describe('Symbol Index Validation Suite', () => {
     });
 
     it('should default to 4 parallel jobs when not specified', async () => {
-      const testService = new SymbolIndexService(TEST_DIR);
+      const Service = getSymbolIndexServiceCtor();
+      const testService = new Service(TEST_DIR);
       await testService.initialize();
 
       // Just verify it doesn't throw with default parallelJobs

From 19f312a78ea72cb8e1427673b6359bf93b1f62e2 Mon Sep 17 00:00:00 2001
From: Layne Penney <dev@layne.pro>
Date: Thu, 15 Jan 2026 10:49:00 -0600
Subject: [PATCH 10/17] fix: parse running tool traces

---
 src/tools/bash.ts         | 28 +++++++++++++++++++++++++++-
 src/utils/json-parser.ts  |  4 ++--
 tests/json-parser.test.ts | 12 ++++++++++++
 3 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/src/tools/bash.ts b/src/tools/bash.ts
index 048b7e0..b16adf3 100644
--- a/src/tools/bash.ts
+++ b/src/tools/bash.ts
@@ -35,7 +35,8 @@ export class BashTool extends BaseTool {
   }
 
   async execute(input: Record<string, unknown>): Promise<string> {
-    const command = input.command as string;
+    const rawCommand = input.command;
+    const command = this.normalizeCommandInput(rawCommand);
     const cwd = (input.cwd as string) || process.cwd();
 
     if (!command) {
@@ -112,6 +113,31 @@ export class BashTool extends BaseTool {
     });
   }
 
+  private normalizeCommandInput(command: unknown): string | null {
+    if (typeof command === 'string') {
+      return command;
+    }
+
+    if (Array.isArray(command)) {
+      const parts = command.filter((part): part is string => typeof part === 'string' && part.trim() !== '');
+      if (parts.length === 0) {
+        return null;
+      }
+
+      if (parts[0] === 'bash' && parts[1] === '-lc') {
+        const script = parts.slice(2).join(' ');
+        if (!script) {
+          return null;
+        }
+        return `bash -lc ${JSON.stringify(script)}`;
+      }
+
+      return parts.join(' ');
+    }
+
+    return null;
+  }
+
   /**
    * Format command output with clear structure.
    */
diff --git a/src/utils/json-parser.ts b/src/utils/json-parser.ts
index 399534f..cf592f0 100644
--- a/src/utils/json-parser.ts
+++ b/src/utils/json-parser.ts
@@ -122,9 +122,9 @@ export function extractToolCallsFromText(
     }
   }
 
-  // Pattern 2: [Calling tool_name]: {json} format
+  // Pattern 2: [Calling tool_name]: {json} or [Running tool_name] {json} format
   if (toolCalls.length === 0) {
-    const callingPattern = /\[Calling\s+([a-z_][a-z0-9_]*)\]\s*:\s*/gi;
+    const callingPattern = /\[(?:Calling|Running)\s+([a-z_][a-z0-9_]*)\]\s*:?\s*/gi;
 
     while ((match = callingPattern.exec(text)) !== null) {
       const resolvedName = resolveToolName(match[1]);
diff --git a/tests/json-parser.test.ts b/tests/json-parser.test.ts
index b7a4510..a8977cf 100644
--- a/tests/json-parser.test.ts
+++ b/tests/json-parser.test.ts
@@ -177,6 +177,18 @@ describe('json-parser', () => {
         expect(calls[0].input).toEqual({ path: 'a.txt' });
         expect(calls[1].input).toEqual({ path: 'b.txt' });
       });
+
+      it('extracts tool calls from running trace format without colon', () => {
+        const text = '[Running bash]{"cmd": ["bash", "-lc", "git status --porcelain"], "timeout": 100000}';
+        const calls = extractToolCallsFromText(text, toolDefinitions);
+
+        expect(calls).toHaveLength(1);
+        expect(calls[0].name).toBe('bash');
+        expect(calls[0].input).toEqual({
+          cmd: ['bash', '-lc', 'git status --porcelain'],
+          timeout: 100000,
+        });
+      });
     });
 
     describe('pattern 3: JSON in code blocks', () => {

From 0dab579ad0510bd8f3fa0bd312ad04309af8159c Mon Sep 17 00:00:00 2001
From: Layne Penney <dev@layne.pro>
Date: Thu, 15 Jan 2026 11:29:43 -0600
Subject: [PATCH 11/17] fix: extract tool calls from reasoning

---
 src/agent.ts        | 35 +++++++++++++++++++--------
 tests/index.test.ts | 59 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 10 deletions(-)

diff --git a/src/agent.ts b/src/agent.ts
index cb2b8ff..037f2ab 100644
--- a/src/agent.ts
+++ b/src/agent.ts
@@ -578,19 +578,34 @@ Always use tools to interact with the filesystem rather than asking the user to
       if (response.toolCalls.length === 0 && this.useTools && this.extractToolsFromText) {
         const toolDefinitions = this.toolRegistry.getDefinitions();
         const fallbackConfig = this.toolRegistry.getFallbackConfig();
-        const extractionText = response.content;
+        const contentText = response.content?.trim() || '';
+        const reasoningText = response.reasoningContent?.trim() || '';
         const contentMatchesReasoning = Boolean(
-          response.content &&
-          response.reasoningContent &&
-          response.content.trim() === response.reasoningContent.trim()
+          contentText &&
+          reasoningText &&
+          contentText === reasoningText
         );
+        const toolTracePattern = /\[(?:Calling|Running)\s+[a-z_][a-z0-9_]*\]|\{\s*"name"\s*:\s*"[a-z_][a-z0-9_]*"/i;
+        const hasToolTrace = (text: string): boolean => toolTracePattern.test(text);
 
-        if (extractionText && !contentMatchesReasoning) {
-          const extractedCalls = extractToolCallsFromText(extractionText, toolDefinitions, fallbackConfig);
-          if (extractedCalls.length > 0) {
-            response.toolCalls = extractedCalls;
-            response.stopReason = 'tool_use';
-          }
+        let extractedCalls: ToolCall[] = [];
+
+        if (contentText && (!contentMatchesReasoning || hasToolTrace(contentText))) {
+          extractedCalls = extractToolCallsFromText(contentText, toolDefinitions, fallbackConfig);
+        }
+
+        if (
+          extractedCalls.length === 0 &&
+          !contentText &&
+          reasoningText &&
+          hasToolTrace(reasoningText)
+        ) {
+          extractedCalls = extractToolCallsFromText(reasoningText, toolDefinitions, fallbackConfig);
+        }
+
+        if (extractedCalls.length > 0) {
+          response.toolCalls = extractedCalls;
+          response.stopReason = 'tool_use';
         }
       }
 
diff --git a/tests/index.test.ts b/tests/index.test.ts
index 1aca111..a8b67a8 100644
--- a/tests/index.test.ts
+++ b/tests/index.test.ts
@@ -335,6 +335,65 @@ describe('Agent', () => {
     expect(info.messages).toBe(0);
     expect(info.hasSummary).toBe(false);
   });
+
+  it('extracts tool calls from reasoning when content is empty', async () => {
+    const toolRegistry = new ToolRegistry();
+    let receivedInput: Record<string, unknown> | null = null;
+
+    class CaptureTool extends BaseTool {
+      getDefinition() {
+        return {
+          name: 'capture',
+          description: 'capture input',
+          input_schema: {
+            type: 'object' as const,
+            properties: {
+              value: { type: 'number' },
+            },
+            required: ['value'],
+          },
+        };
+      }
+
+      async execute(input: Record<string, unknown>): Promise<string> {
+        receivedInput = input;
+        return 'ok';
+      }
+    }
+
+    toolRegistry.register(new CaptureTool());
+
+    const mockProvider = {
+      streamChat: vi.fn()
+        .mockImplementationOnce(async (_messages, _tools, _onChunk, _systemPrompt, onReasoningChunk) => {
+          const reasoning = '[Calling capture]: {"value": 42}';
+          onReasoningChunk?.(reasoning);
+          return {
+            content: '',
+            toolCalls: [],
+            stopReason: 'end_turn',
+            reasoningContent: reasoning,
+          };
+        })
+        .mockImplementationOnce(async () => ({
+          content: 'done',
+          toolCalls: [],
+          stopReason: 'end_turn',
+        })),
+      supportsToolUse: () => true,
+      getName: () => 'mock',
+      getModel: () => 'mock-model',
+    };
+
+    const agent = new Agent({
+      provider: mockProvider as any,
+      toolRegistry,
+    });
+
+    const result = await agent.chat('continue');
+    expect(result).toBe('done');
+    expect(receivedInput).toEqual({ value: 42 });
+  });
 });
 
 describe('Providers', () => {

From 5960a30033af523f1502f71002ac227678a19932 Mon Sep 17 00:00:00 2001
From: Layne Penney <dev@layne.pro>
Date: Thu, 15 Jan 2026 11:46:38 -0600
Subject: [PATCH 12/17] feat: make context window configurable

---
 src/agent.ts  |  9 +++++++--
 src/config.ts | 20 ++++++++++++++++++++
 src/index.ts  | 14 +++++++++++++-
 3 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/src/agent.ts b/src/agent.ts
index 037f2ab..82d4ec1 100644
--- a/src/agent.ts
+++ b/src/agent.ts
@@ -91,6 +91,7 @@ export interface AgentOptions {
   logLevel?: LogLevel; // Log level for debug output (replaces debug)
   debug?: boolean; // @deprecated Use logLevel instead
   enableCompression?: boolean; // Enable entity-reference compression for context
+  maxContextTokens?: number; // Maximum context tokens before compaction
   secondaryProvider?: BaseProvider | null; // Optional secondary provider for summarization
   modelMap?: ModelMap | null; // Optional model map for multi-model orchestration
   auditLogger?: AuditLogger | null; // Optional audit logger for session debugging
@@ -123,6 +124,7 @@ export class Agent {
   private customDangerousPatterns: Array<{ pattern: RegExp; description: string }>;
   private logLevel: LogLevel;
   private enableCompression: boolean;
+  private maxContextTokens: number;
   private auditLogger: AuditLogger | null = null;
   private messages: Message[] = [];
   private conversationSummary: string | null = null;
@@ -165,6 +167,7 @@ export class Agent {
     // Support both logLevel and deprecated debug option
     this.logLevel = options.logLevel ?? (options.debug ? LogLevel.DEBUG : LogLevel.NORMAL);
     this.enableCompression = options.enableCompression ?? false;
+    this.maxContextTokens = options.maxContextTokens ?? AGENT_CONFIG.MAX_CONTEXT_TOKENS;
     this.auditLogger = options.auditLogger ?? null;
     this.systemPrompt = options.systemPrompt || this.getDefaultSystemPrompt();
     this.callbacks = {
@@ -341,11 +344,11 @@ Always use tools to interact with the filesystem rather than asking the user to
   private async compactContext(): Promise<void> {
     const totalTokens = countMessageTokens(this.messages);
 
-    if (totalTokens <= AGENT_CONFIG.MAX_CONTEXT_TOKENS) {
+    if (totalTokens <= this.maxContextTokens) {
       return; // No compaction needed
     }
 
-    logger.debug(`Compacting: ${totalTokens} tokens exceeds ${AGENT_CONFIG.MAX_CONTEXT_TOKENS} limit`);
+    logger.debug(`Compacting: ${totalTokens} tokens exceeds ${this.maxContextTokens} limit`);
 
     // Score messages by importance
     const scores = scoreMessages(this.messages, CONTEXT_OPTIMIZATION.WEIGHTS);
@@ -1096,6 +1099,7 @@ Always use tools to interact with the filesystem rather than asking the user to
    */
   getContextInfo(): {
     tokens: number;
+    maxTokens: number;
     messages: number;
     hasSummary: boolean;
     compression: CompressionStats | null;
@@ -1104,6 +1108,7 @@ Always use tools to interact with the filesystem rather than asking the user to
   } {
     return {
       tokens: countMessageTokens(this.messages),
+      maxTokens: this.maxContextTokens,
       messages: this.messages.length,
       hasSummary: this.conversationSummary !== null,
       compression: this.lastCompressionStats,
diff --git a/src/config.ts b/src/config.ts
index 66a2ad9..fac9acb 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -3,6 +3,7 @@
 
 import * as fs from 'fs';
 import * as path from 'path';
+import { AGENT_CONFIG } from './constants.js';
 
 /**
  * Workspace configuration for Codi.
@@ -69,6 +70,9 @@ export interface WorkspaceConfig {
   /** Enable context compression (reduces token usage) */
   enableCompression?: boolean;
 
+  /** Maximum context tokens before compaction */
+  maxContextTokens?: number;
+
   /** Context optimization settings */
   contextOptimization?: {
     /** Enable semantic deduplication (merge case variants) */
@@ -215,6 +219,7 @@ export interface ResolvedConfig {
   commandAliases: Record<string, string>;
   projectContext?: string;
   enableCompression: boolean;
+  maxContextTokens: number;
   /** Secondary model for summarization */
   summarizeProvider?: string;
   summarizeModel?: string;
@@ -235,6 +240,7 @@ const DEFAULT_CONFIG: ResolvedConfig = {
   extractToolsFromText: true,
   commandAliases: {},
   enableCompression: true, // Enabled by default for token savings
+  maxContextTokens: AGENT_CONFIG.MAX_CONTEXT_TOKENS,
   toolsConfig: {
     disabled: [],
     defaults: {},
@@ -318,6 +324,12 @@ export function validateConfig(config: WorkspaceConfig): string[] {
     }
   }
 
+  if (config.maxContextTokens !== undefined) {
+    if (!Number.isFinite(config.maxContextTokens) || config.maxContextTokens <= 0) {
+      warnings.push('maxContextTokens must be a positive number');
+    }
+  }
+
   return warnings;
 }
 
@@ -337,6 +349,7 @@ export function mergeConfig(
     session?: string;
     summarizeProvider?: string;
     summarizeModel?: string;
+    maxContextTokens?: number;
   }
 ): ResolvedConfig {
   const config: ResolvedConfig = { ...DEFAULT_CONFIG };
@@ -360,6 +373,9 @@ export function mergeConfig(
     if (workspaceConfig.commandAliases) config.commandAliases = workspaceConfig.commandAliases;
     if (workspaceConfig.projectContext) config.projectContext = workspaceConfig.projectContext;
     if (workspaceConfig.enableCompression !== undefined) config.enableCompression = workspaceConfig.enableCompression;
+    if (workspaceConfig.maxContextTokens !== undefined && Number.isFinite(workspaceConfig.maxContextTokens)) {
+      config.maxContextTokens = workspaceConfig.maxContextTokens;
+    }
     // Summarize model from workspace config
     if (workspaceConfig.models?.summarize?.provider) config.summarizeProvider = workspaceConfig.models.summarize.provider;
     if (workspaceConfig.models?.summarize?.model) config.summarizeModel = workspaceConfig.models.summarize.model;
@@ -376,6 +392,9 @@ export function mergeConfig(
   if (cliOptions.baseUrl) config.baseUrl = cliOptions.baseUrl;
   if (cliOptions.endpointId) config.endpointId = cliOptions.endpointId;
   if (cliOptions.session) config.defaultSession = cliOptions.session;
+  if (cliOptions.maxContextTokens !== undefined && Number.isFinite(cliOptions.maxContextTokens)) {
+    config.maxContextTokens = cliOptions.maxContextTokens;
+  }
 
   // CLI --yes flag adds all tools to autoApprove
   if (cliOptions.yes) {
@@ -462,6 +481,7 @@ export function getExampleConfig(): string {
     },
     projectContext: '',
     enableCompression: true,
+    maxContextTokens: AGENT_CONFIG.MAX_CONTEXT_TOKENS,
     models: {
       summarize: {
         provider: 'ollama',
diff --git a/src/index.ts b/src/index.ts
index 220a912..753d3d7 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -326,6 +326,7 @@ program
   .option('--trace', 'Show full request/response payloads')
   .option('-s, --session <name>', 'Load a saved session on startup')
   .option('-c, --compress', 'Context compression (enabled by default, use --no-compress to disable)')
+  .option('--context-window <tokens>', 'Context window size (tokens) before compaction')
   .option('--summarize-model <name>', 'Model to use for summarization (default: primary model)')
   .option('--summarize-provider <type>', 'Provider for summarization model (default: primary provider)')
   .option('--mcp-server', 'Run as MCP server (stdio transport) - exposes tools to other MCP clients')
@@ -2143,6 +2144,15 @@ async function main() {
   }
 
   // Merge workspace config with CLI options
+  const parsedContextWindow = options.contextWindow ? Number(options.contextWindow) : NaN;
+  const contextWindowTokens = Number.isFinite(parsedContextWindow) && parsedContextWindow > 0
+    ? Math.floor(parsedContextWindow)
+    : undefined;
+
+  if (options.contextWindow && contextWindowTokens === undefined) {
+    console.warn(chalk.yellow('Invalid --context-window value; expected a positive number.'));
+  }
+
   const resolvedConfig = mergeConfig(workspaceConfig, {
     provider: options.provider,
     model: options.model,
@@ -2153,6 +2163,7 @@ async function main() {
     session: options.session,
     summarizeProvider: options.summarizeProvider,
     summarizeModel: options.summarizeModel,
+    maxContextTokens: contextWindowTokens,
   });
 
   // Register tools and commands
@@ -2465,6 +2476,7 @@ async function main() {
     customDangerousPatterns,
     logLevel,
     enableCompression: options.compress ?? resolvedConfig.enableCompression,
+    maxContextTokens: resolvedConfig.maxContextTokens,
     onText: (text) => {
       // Stop spinner when we start receiving text
       if (!isStreaming) {
@@ -2705,7 +2717,7 @@ async function main() {
     if (trimmed === '/status') {
       const info = agent.getContextInfo();
       console.log(chalk.bold('\nContext Status:'));
-      console.log(chalk.dim(`  Tokens: ${info.tokens} / 8000`));
+      console.log(chalk.dim(`  Tokens: ${info.tokens} / ${info.maxTokens}`));
       console.log(chalk.dim(`  Messages: ${info.messages}`));
       console.log(chalk.dim(`  Has summary: ${info.hasSummary ? 'yes' : 'no'}`));
       console.log(chalk.dim(`  Compression: ${info.compressionEnabled ? 'enabled' : 'disabled'}`));

From d3ff2e9076978105ac46ac377ef1d9c4f94fc104 Mon Sep 17 00:00:00 2001
From: Layne Penney <dev@layne.pro>
Date: Thu, 15 Jan 2026 13:54:13 -0600
Subject: [PATCH 13/17] feat: discourage fenced bash in responses

---
 src/index.ts | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/index.ts b/src/index.ts
index 753d3d7..570f7cd 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -368,6 +368,13 @@ function generateSystemPrompt(projectInfo: ProjectInfo | null, useTools: boolean
 5. **Handle errors**: Include appropriate error handling
 6. **Test awareness**: Consider how changes affect tests
 
+## Tool Use Rules
+- The tool list below is authoritative for this run. Use only these tool names and their parameters.
+- When you need a tool, emit a tool call (do not describe tool usage in plain text).
+- Do not put tool-call syntax or commands in your normal response.
+- Do not present shell commands in fenced code blocks like \`\`\`bash\`\`\`; use the bash tool instead.
+- Wait for tool results before continuing; if a tool fails, explain and try a different tool.
+
 ## Available Tools
 
 ### File Operations

From 894432eb3afed2f3782a4b020801606cdb861df8 Mon Sep 17 00:00:00 2001
From: Layne Penney <dev@layne.pro>
Date: Fri, 16 Jan 2026 06:36:18 -0600
Subject: [PATCH 14/17] fix: address PR feedback

---
 ROADMAP.md                    |  1 +
 src/agent.ts                  |  2 +-
 src/config.ts                 |  9 +++++++++
 src/index.ts                  |  1 +
 src/providers/ollama-cloud.ts | 23 +++++++++++++++--------
 src/tools/bash.ts             | 20 +++++++++++++++-----
 src/types.ts                  |  2 ++
 7 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/ROADMAP.md b/ROADMAP.md
index eee379f..e0b208f 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -29,6 +29,7 @@ This would help bridge the gap between different model training data and Codi's
 **Current Mitigations**:
 - Added parameter aliases to `grep` tool (`query` -> `pattern`, `max_results`/`max`/`limit` -> `head_limit`)
 - Added `print_tree` tool (commonly expected by models)
+- Consider a vector-embedding index for tool/parameter semantics (similar to `search_codebase`) to improve matches beyond string similarity.
 
 ---
 
diff --git a/src/agent.ts b/src/agent.ts
index 82d4ec1..6aab93c 100644
--- a/src/agent.ts
+++ b/src/agent.ts
@@ -643,7 +643,7 @@ Always use tools to interact with the filesystem rather than asking the user to
       } else if (isExtractedToolCall) {
         // For extracted tool calls, store as plain text (model doesn't understand tool_use blocks)
         const combinedContent = thinkingText
-          ? `${response.content || ''}${response.content ? '\n\n' : ''}[Thinking]:\n${thinkingText}`
+          ? `[Thinking]:\n${thinkingText}${response.content ? `\n\n${response.content}` : ''}`
           : (response.content || '');
         this.messages.push({
           role: 'assistant',
diff --git a/src/config.ts b/src/config.ts
index fac9acb..df28c12 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -73,6 +73,9 @@ export interface WorkspaceConfig {
   /** Maximum context tokens before compaction */
   maxContextTokens?: number;
 
+  /** Strip hallucinated tool traces from provider content (provider-specific) */
+  cleanHallucinatedTraces?: boolean;
+
   /** Context optimization settings */
   contextOptimization?: {
     /** Enable semantic deduplication (merge case variants) */
@@ -220,6 +223,7 @@ export interface ResolvedConfig {
   projectContext?: string;
   enableCompression: boolean;
   maxContextTokens: number;
+  cleanHallucinatedTraces: boolean;
   /** Secondary model for summarization */
   summarizeProvider?: string;
   summarizeModel?: string;
@@ -241,6 +245,7 @@ const DEFAULT_CONFIG: ResolvedConfig = {
   commandAliases: {},
   enableCompression: true, // Enabled by default for token savings
   maxContextTokens: AGENT_CONFIG.MAX_CONTEXT_TOKENS,
+  cleanHallucinatedTraces: false,
   toolsConfig: {
     disabled: [],
     defaults: {},
@@ -376,6 +381,9 @@ export function mergeConfig(
     if (workspaceConfig.maxContextTokens !== undefined && Number.isFinite(workspaceConfig.maxContextTokens)) {
       config.maxContextTokens = workspaceConfig.maxContextTokens;
     }
+    if (workspaceConfig.cleanHallucinatedTraces !== undefined) {
+      config.cleanHallucinatedTraces = workspaceConfig.cleanHallucinatedTraces;
+    }
     // Summarize model from workspace config
     if (workspaceConfig.models?.summarize?.provider) config.summarizeProvider = workspaceConfig.models.summarize.provider;
     if (workspaceConfig.models?.summarize?.model) config.summarizeModel = workspaceConfig.models.summarize.model;
@@ -482,6 +490,7 @@ export function getExampleConfig(): string {
     projectContext: '',
     enableCompression: true,
     maxContextTokens: AGENT_CONFIG.MAX_CONTEXT_TOKENS,
+    cleanHallucinatedTraces: false,
     models: {
       summarize: {
         provider: 'ollama',
diff --git a/src/index.ts b/src/index.ts
index 570f7cd..1e12242 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -2338,6 +2338,7 @@ async function main() {
       model: resolvedConfig.model,
       baseUrl: resolvedConfig.baseUrl,
       endpointId: resolvedConfig.endpointId,
+      cleanHallucinatedTraces: resolvedConfig.cleanHallucinatedTraces,
     });
   }
 
diff --git a/src/providers/ollama-cloud.ts b/src/providers/ollama-cloud.ts
index fa57e6f..e6b0e0b 100644
--- a/src/providers/ollama-cloud.ts
+++ b/src/providers/ollama-cloud.ts
@@ -237,10 +237,7 @@ export class OllamaCloudProvider extends BaseProvider {
             toolCalls = this.extractToolCalls(toolExtractionText, tools);
           }
 
-          // Clean hallucinated traces from content (after tool extraction)
-          const cleanedContent = toolCalls.length > 0
-            ? this.cleanHallucinatedTraces(finalContent)
-            : finalContent;
+          const cleanedContent = this.maybeCleanHallucinatedTraces(finalContent, toolCalls);
 
           return createProviderResponse({
             content: cleanedContent,
@@ -384,10 +381,7 @@ export class OllamaCloudProvider extends BaseProvider {
             toolCalls = this.extractToolCalls(toolExtractionText, tools);
           }
 
-          // Clean hallucinated traces from content (after tool extraction)
-          const cleanedContent = toolCalls.length > 0
-            ? this.cleanHallucinatedTraces(finalContent)
-            : finalContent;
+          const cleanedContent = this.maybeCleanHallucinatedTraces(finalContent, toolCalls);
 
           return createProviderResponse({
             content: cleanedContent,
@@ -672,6 +666,19 @@ export class OllamaCloudProvider extends BaseProvider {
     return { content: cleanedContent, thinking };
   }
 
+  private maybeCleanHallucinatedTraces(content: string, toolCalls: ToolCall[]): string {
+    if (!this.config.cleanHallucinatedTraces || toolCalls.length === 0) {
+      return content;
+    }
+
+    const cleanedContent = this.cleanHallucinatedTraces(content);
+    if (cleanedContent !== content) {
+      console.warn('[ollama-cloud] Cleaned hallucinated tool traces from model output.');
+    }
+
+    return cleanedContent;
+  }
+
   /**
    * Clean hallucinated agent trace patterns from content.
    * Some models output fake "[Calling tool]: {json}[Result from tool]: result" traces.
diff --git a/src/tools/bash.ts b/src/tools/bash.ts
index b16adf3..e33c77b 100644
--- a/src/tools/bash.ts
+++ b/src/tools/bash.ts
@@ -114,6 +114,10 @@ export class BashTool extends BaseTool {
   }
 
   private normalizeCommandInput(command: unknown): string | null {
+    if (command === null || command === undefined) {
+      return null;
+    }
+
     if (typeof command === 'string') {
       return command;
     }
@@ -121,21 +125,27 @@ export class BashTool extends BaseTool {
     if (Array.isArray(command)) {
       const parts = command.filter((part): part is string => typeof part === 'string' && part.trim() !== '');
       if (parts.length === 0) {
-        return null;
+        return this.stringifyCommand(command);
       }
 
       if (parts[0] === 'bash' && parts[1] === '-lc') {
         const script = parts.slice(2).join(' ');
-        if (!script) {
-          return null;
-        }
         return `bash -lc ${JSON.stringify(script)}`;
       }
 
       return parts.join(' ');
     }
 
-    return null;
+    return this.stringifyCommand(command);
+  }
+
+  private stringifyCommand(command: unknown): string {
+    try {
+      const json = JSON.stringify(command);
+      return json === undefined ? String(command) : json;
+    } catch {
+      return String(command);
+    }
   }
 
   /**
diff --git a/src/types.ts b/src/types.ts
index 97a389c..7c7e249 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -169,6 +169,7 @@ export interface ProviderResponse {
  * @property {string} [model] - The AI model to use, if applicable.
  * @property {number} [temperature] - Sampling temperature for generation.
  * @property {number} [maxTokens] - Maximum number of tokens to generate.
+ * @property {boolean} [cleanHallucinatedTraces] - Strip hallucinated tool traces from provider content (provider-specific).
  */
 export interface ProviderConfig {
   apiKey?: string;
@@ -176,4 +177,5 @@ export interface ProviderConfig {
   model?: string;
   temperature?: number;
   maxTokens?: number;
+  cleanHallucinatedTraces?: boolean;
 }

From 0d1275217d7a2f7ceb5bd507a8c8042ebebdfa20 Mon Sep 17 00:00:00 2001
From: Layne Penney <dev@layne.pro>
Date: Fri, 16 Jan 2026 06:48:06 -0600
Subject: [PATCH 15/17] fix: log hallucinated traces and normalize bash

---
 src/providers/ollama-cloud.ts | 17 +++++++++++++++--
 src/tools/bash.ts             |  3 +++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/providers/ollama-cloud.ts b/src/providers/ollama-cloud.ts
index e6b0e0b..1fa83d7 100644
--- a/src/providers/ollama-cloud.ts
+++ b/src/providers/ollama-cloud.ts
@@ -14,6 +14,7 @@ import { getProviderRateLimiter, type RateLimiter } from './rate-limiter.js';
 import { messageToText } from './message-converter.js';
 import type { Message, ToolDefinition, ProviderResponse, ProviderConfig, ToolCall } from '../types.js';
 import { DEFAULT_FALLBACK_CONFIG, findBestToolMatch } from '../tools/tool-fallback.js';
+import { logger, LogLevel } from '../logger.js';
 
 /** Ollama message format */
 interface OllamaMessage {
@@ -671,9 +672,17 @@ export class OllamaCloudProvider extends BaseProvider {
       return content;
     }
 
+    const matches = content.match(this.getHallucinatedTracePattern()) || [];
     const cleanedContent = this.cleanHallucinatedTraces(content);
     if (cleanedContent !== content) {
-      console.warn('[ollama-cloud] Cleaned hallucinated tool traces from model output.');
+      if (logger.isLevelEnabled(LogLevel.VERBOSE) && matches.length > 0) {
+        const joined = matches.join('\n');
+        const clipped = joined.length > 2000
+          ? `${joined.slice(0, 2000)}\n... [truncated ${joined.length - 2000} chars]`
+          : joined;
+        logger.verbose(`[ollama-cloud] Stripped hallucinated traces:\n${clipped}`);
+      }
+      logger.warn('Ollama Cloud: cleaned hallucinated tool traces from model output.');
     }
 
     return cleanedContent;
@@ -686,7 +695,7 @@ export class OllamaCloudProvider extends BaseProvider {
    */
   private cleanHallucinatedTraces(content: string): string {
     // Pattern: [Calling tool_name]: {json}[Result from tool_name]: any text until next [ or end
-    const hallucinatedTracePattern = /\[Calling\s+[a-z_][a-z0-9_]*\]\s*:\s*\{[^}]*\}\s*(?:\[Result from\s+[a-z_][a-z0-9_]*\]\s*:\s*[^\[]*)?/gi;
+    const hallucinatedTracePattern = this.getHallucinatedTracePattern();
     let cleanedContent = content.replace(hallucinatedTracePattern, '').trim();
 
     // Clean up multiple newlines
@@ -695,6 +704,10 @@ export class OllamaCloudProvider extends BaseProvider {
     return cleanedContent;
   }
 
+  private getHallucinatedTracePattern(): RegExp {
+    return /\[Calling\s+[a-z_][a-z0-9_]*\]\s*:\s*\{[^}]*\}\s*(?:\[Result from\s+[a-z_][a-z0-9_]*\]\s*:\s*[^\[]*)?/gi;
+  }
+
   /**
    * Pull a model if it's not already available.
    */
diff --git a/src/tools/bash.ts b/src/tools/bash.ts
index e33c77b..2046021 100644
--- a/src/tools/bash.ts
+++ b/src/tools/bash.ts
@@ -130,6 +130,9 @@ export class BashTool extends BaseTool {
 
       if (parts[0] === 'bash' && parts[1] === '-lc') {
         const script = parts.slice(2).join(' ');
+        if (!script.trim()) {
+          return this.stringifyCommand(parts);
+        }
         return `bash -lc ${JSON.stringify(script)}`;
       }
 

From 65f75da8c77bd3cf34f0be6062b32bd589204c47 Mon Sep 17 00:00:00 2001
From: Layne Penney <dev@layne.pro>
Date: Fri, 16 Jan 2026 06:59:08 -0600
Subject: [PATCH 16/17] fix: skip continuation prompt

---
 src/agent.ts | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/agent.ts b/src/agent.ts
index 6aab93c..987d220 100644
--- a/src/agent.ts
+++ b/src/agent.ts
@@ -941,7 +941,8 @@ Always use tools to interact with the filesystem rather than asking the user to
             resultText += `Result from ${toolName}:\n${content}\n\n`;
           }
         }
-        resultText += this.buildContinuationPrompt(originalTask);
+        // lp 1/16/26: skip this for now. I believe it is causing issues
+        // resultText += this.buildContinuationPrompt(originalTask);
 
         this.messages.push({
           role: 'user',

From bf575f53fd8910a9b513caa10ccf5a7da29b9f18 Mon Sep 17 00:00:00 2001
From: Layne Penney <dev@layne.pro>
Date: Fri, 16 Jan 2026 07:03:33 -0600
Subject: [PATCH 17/17] chore: bump version to 0.7.9

---
 package.json   | 2 +-
 src/version.ts | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/package.json b/package.json
index 188db50..4817c04 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "codi",
-  "version": "0.7.8",
+  "version": "0.7.9",
   "description": "Your AI coding wingman - a hybrid assistant supporting Claude, OpenAI, and local models",
   "license": "Apache-2.0",
   "type": "module",
diff --git a/src/version.ts b/src/version.ts
index f0c5b83..f55b6aa 100644
--- a/src/version.ts
+++ b/src/version.ts
@@ -10,4 +10,4 @@
  * - MINOR: New features, significant refactoring, non-breaking changes
  * - PATCH: Bug fixes, minor improvements
  */
-export const VERSION = '0.7.8';
+export const VERSION = '0.7.9';