From 79aee58945f5c6680fed81eddb5b1da8d78672b4 Mon Sep 17 00:00:00 2001 From: Layne Penney Date: Thu, 15 Jan 2026 06:40:49 -0600 Subject: [PATCH 01/17] feat: add semantic tool fallback and print_tree tool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements semantic fallback system for tool calls: - Auto-corrects high-similarity tool name typos (≥0.85 threshold) - Suggests similar tools for medium-similarity matches (≥0.6) - Maps common parameter aliases (query→pattern, max→head_limit, etc.) - Configurable via .codi.json toolFallback settings New tools and features: - print_tree: Display directory structure as a tree - grep: Now accepts query/max_results aliases for pattern/head_limit Files: - src/tools/tool-fallback.ts: Core fallback logic with Levenshtein matching - src/tools/print-tree.ts: Tree visualization tool - tests/tool-fallback.test.ts: 45 unit tests - tests/print-tree.test.ts: 15 unit tests - ROADMAP.md: Future enhancement notes Co-Authored-By: Claude Opus 4.5 --- ROADMAP.md | 30 ++ src/config.ts | 14 + src/tools/grep.ts | 14 +- src/tools/index.ts | 16 + src/tools/print-tree.ts | 198 +++++++++++++ src/tools/registry.ts | 71 ++++- src/tools/tool-fallback.ts | 291 +++++++++++++++++++ tests/print-tree.test.ts | 205 +++++++++++++ tests/tool-fallback.test.ts | 563 ++++++++++++++++++++++++++++++++++++ 9 files changed, 1396 insertions(+), 6 deletions(-) create mode 100644 ROADMAP.md create mode 100644 src/tools/print-tree.ts create mode 100644 src/tools/tool-fallback.ts create mode 100644 tests/print-tree.test.ts create mode 100644 tests/tool-fallback.test.ts diff --git a/ROADMAP.md b/ROADMAP.md new file mode 100644 index 0000000..df7732d --- /dev/null +++ b/ROADMAP.md @@ -0,0 +1,30 @@ +# Codi Roadmap + +This document tracks planned features and improvements for Codi. + +## Planned Features + +### Semantic Fallback for Tool Calls + +When a model attempts to call a tool that doesn't exist or uses incorrect parameter names, implement a semantic fallback system that: + +1. **Tool Name Matching**: If a requested tool doesn't exist, find the closest matching tool by name similarity (e.g., `print_tree` -> `list_directory`, `search` -> `grep`) + +2. **Parameter Mapping**: When a tool is called with unrecognized parameters, attempt to map them to the correct parameter names based on: + - Common aliases (e.g., `query` -> `pattern`, `max_results` -> `head_limit`) + - Semantic similarity (e.g., `search_term` -> `pattern`) + - Parameter descriptions + +3. **Graceful Degradation**: Instead of failing on invalid tool calls, provide helpful feedback to the model about what tools/parameters are available + +This would help bridge the gap between different model training data and Codi's actual tool definitions, improving compatibility with various LLMs. + +**Current Mitigations**: +- Added parameter aliases to `grep` tool (`query` -> `pattern`, `max_results`/`max`/`limit` -> `head_limit`) +- Added `print_tree` tool (commonly expected by models) + +--- + +## Completed Features + +See [CLAUDE.md](./CLAUDE.md) for documentation on implemented features. diff --git a/src/config.ts b/src/config.ts index 4de04e3..66a2ad9 100644 --- a/src/config.ts +++ b/src/config.ts @@ -155,6 +155,20 @@ export interface WorkspaceConfig { [toolName: string]: Record; }; }; + + /** Tool fallback settings for handling unknown tools and parameter aliases */ + toolFallback?: { + /** Enable semantic tool fallback (default: true) */ + enabled?: boolean; + /** Threshold for auto-correcting tool names (0-1, default: 0.85) */ + autoCorrectThreshold?: number; + /** Threshold for suggesting similar tools (0-1, default: 0.6) */ + suggestionThreshold?: number; + /** Auto-execute corrected tools without confirmation (default: false) */ + autoExecute?: boolean; + /** Enable parameter aliasing (default: true) */ + parameterAliasing?: boolean; + }; } /** diff --git a/src/tools/grep.ts b/src/tools/grep.ts index 4dce6e1..2b0dfa2 100644 --- a/src/tools/grep.ts +++ b/src/tools/grep.ts @@ -44,19 +44,27 @@ export class GrepTool extends BaseTool { } async execute(input: Record): Promise { - const pattern = input.pattern as string; + // Support parameter aliases for model compatibility: + // - query -> pattern (common model assumption) + // - max_results, max, limit -> head_limit (various naming conventions) + const pattern = (input.pattern as string) || (input.query as string); const path = (input.path as string) || '.'; const filePattern = (input.file_pattern as string) || '**/*'; const ignoreCase = (input.ignore_case as boolean) || false; + const headLimit = (input.head_limit as number) || + (input.max_results as number) || + (input.max as number) || + (input.limit as number) || + 100; if (!pattern) { - throw new Error('Pattern is required'); + throw new Error('Pattern is required (or use "query" alias)'); } const resolvedPath = resolve(process.cwd(), path); const regex = new RegExp(pattern, ignoreCase ? 'gi' : 'g'); const matches: Match[] = []; - const MAX_MATCHES = 100; + const MAX_MATCHES = headLimit; // Get list of files to search const files: string[] = []; diff --git a/src/tools/index.ts b/src/tools/index.ts index 97b4cd9..fd871ba 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -20,6 +20,20 @@ export { RefactorTool } from './refactor.js'; export { ShellInfoTool } from './shell-info.js'; export { PipelineTool } from './pipeline.js'; export { GenerateDocsTool } from './generate-docs.js'; +export { PrintTreeTool } from './print-tree.js'; + +// Tool fallback utilities +export { + findBestToolMatch, + mapParameters, + formatFallbackError, + formatMappingInfo, + GLOBAL_PARAMETER_ALIASES, + DEFAULT_FALLBACK_CONFIG, + type ToolFallbackConfig, + type ToolMatchResult, + type ParameterMapResult, +} from './tool-fallback.js'; // Symbol index tools export { @@ -52,6 +66,7 @@ import { RefactorTool } from './refactor.js'; import { ShellInfoTool } from './shell-info.js'; import { PipelineTool } from './pipeline.js'; import { GenerateDocsTool } from './generate-docs.js'; +import { PrintTreeTool } from './print-tree.js'; import type { Retriever } from '../rag/retriever.js'; import type { SymbolIndexService } from '../symbol-index/service.js'; import { @@ -81,6 +96,7 @@ export function registerDefaultTools(): void { globalRegistry.register(new GlobTool()); globalRegistry.register(new GrepTool()); globalRegistry.register(new ListDirectoryTool()); + globalRegistry.register(new PrintTreeTool()); // Shell globalRegistry.register(new BashTool()); diff --git a/src/tools/print-tree.ts b/src/tools/print-tree.ts new file mode 100644 index 0000000..c264501 --- /dev/null +++ b/src/tools/print-tree.ts @@ -0,0 +1,198 @@ +// Copyright 2026 Layne Penney +// SPDX-License-Identifier: Apache-2.0 + +import { readdir, stat } from 'fs/promises'; +import { resolve, join, basename } from 'path'; +import { BaseTool } from './base.js'; +import type { ToolDefinition } from '../types.js'; + +interface TreeNode { + name: string; + type: 'file' | 'directory'; + children?: TreeNode[]; +} + +export class PrintTreeTool extends BaseTool { + getDefinition(): ToolDefinition { + return { + name: 'print_tree', + description: 'Print a tree-like directory structure. Useful for understanding project layout. Respects .gitignore patterns and skips common non-essential directories.', + input_schema: { + type: 'object', + properties: { + path: { + type: 'string', + description: 'Root directory path (optional, defaults to current directory)', + }, + depth: { + type: 'number', + description: 'Maximum depth to traverse (optional, defaults to 3)', + }, + show_hidden: { + type: 'boolean', + description: 'Include hidden files and directories (default: false)', + }, + show_files: { + type: 'boolean', + description: 'Include files in output, not just directories (default: true)', + }, + }, + required: [], + }, + }; + } + + // Directories to always skip (common non-essential directories) + private readonly SKIP_DIRS = new Set([ + 'node_modules', + '.git', + '.svn', + '.hg', + '__pycache__', + '.pytest_cache', + '.mypy_cache', + '.tox', + '.nox', + '.eggs', + '*.egg-info', + 'dist', + 'build', + '.next', + '.nuxt', + '.output', + '.cache', + '.parcel-cache', + '.turbo', + 'coverage', + '.nyc_output', + 'vendor', + 'target', // Rust/Java + '.gradle', + '.idea', + '.vscode', + ]); + + async execute(input: Record): Promise { + const path = (input.path as string) || '.'; + const maxDepth = (input.depth as number) || 3; + const showHidden = (input.show_hidden as boolean) || false; + const showFiles = input.show_files !== false; // Default to true + + const resolvedPath = resolve(process.cwd(), path); + const rootName = basename(resolvedPath) || resolvedPath; + + try { + const tree = await this.buildTree(resolvedPath, 0, maxDepth, showHidden, showFiles); + if (!tree) { + return `Directory not found or empty: ${path}`; + } + + const lines: string[] = [rootName + '/']; + this.renderTree(tree.children || [], '', lines, showFiles); + + return lines.join('\n'); + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + return `Error reading directory: ${msg}`; + } + } + + private async buildTree( + dirPath: string, + currentDepth: number, + maxDepth: number, + showHidden: boolean, + showFiles: boolean + ): Promise { + try { + const stats = await stat(dirPath); + if (!stats.isDirectory()) { + return null; + } + } catch { + return null; + } + + const name = basename(dirPath) || dirPath; + const node: TreeNode = { name, type: 'directory', children: [] }; + + if (currentDepth >= maxDepth) { + return node; + } + + try { + const entries = await readdir(dirPath); + const children: TreeNode[] = []; + + for (const entry of entries) { + // Skip hidden files unless requested + if (!showHidden && entry.startsWith('.')) { + continue; + } + + // Skip non-essential directories + if (this.SKIP_DIRS.has(entry)) { + continue; + } + + const fullPath = join(dirPath, entry); + + try { + const entryStats = await stat(fullPath); + + if (entryStats.isDirectory()) { + const childTree = await this.buildTree( + fullPath, + currentDepth + 1, + maxDepth, + showHidden, + showFiles + ); + if (childTree) { + children.push(childTree); + } + } else if (showFiles) { + children.push({ name: entry, type: 'file' }); + } + } catch { + // Skip entries we can't stat + continue; + } + } + + // Sort: directories first, then files, alphabetically + children.sort((a, b) => { + if (a.type !== b.type) { + return a.type === 'directory' ? -1 : 1; + } + return a.name.localeCompare(b.name); + }); + + node.children = children; + } catch { + // Can't read directory + } + + return node; + } + + private renderTree(nodes: TreeNode[], prefix: string, lines: string[], showFiles: boolean): void { + const filteredNodes = showFiles ? nodes : nodes.filter(n => n.type === 'directory'); + + for (let i = 0; i < filteredNodes.length; i++) { + const node = filteredNodes[i]; + const isLast = i === filteredNodes.length - 1; + const connector = isLast ? '└── ' : '├── '; + const childPrefix = isLast ? ' ' : '│ '; + + if (node.type === 'directory') { + lines.push(prefix + connector + node.name + '/'); + if (node.children && node.children.length > 0) { + this.renderTree(node.children, prefix + childPrefix, lines, showFiles); + } + } else { + lines.push(prefix + connector + node.name); + } + } + } +} diff --git a/src/tools/registry.ts b/src/tools/registry.ts index 67f9283..f12a1e7 100644 --- a/src/tools/registry.ts +++ b/src/tools/registry.ts @@ -3,6 +3,14 @@ import type { ToolDefinition, ToolCall, ToolResult } from '../types.js'; import { BaseTool } from './base.js'; +import { + findBestToolMatch, + mapParameters, + formatFallbackError, + formatMappingInfo, + type ToolFallbackConfig, + DEFAULT_FALLBACK_CONFIG, +} from './tool-fallback.js'; /** * Registry for managing available tools. @@ -10,6 +18,21 @@ import { BaseTool } from './base.js'; */ export class ToolRegistry { private tools: Map = new Map(); + private fallbackConfig: ToolFallbackConfig = DEFAULT_FALLBACK_CONFIG; + + /** + * Set fallback configuration. + */ + setFallbackConfig(config: Partial): void { + this.fallbackConfig = { ...DEFAULT_FALLBACK_CONFIG, ...config }; + } + + /** + * Get current fallback configuration. + */ + getFallbackConfig(): ToolFallbackConfig { + return { ...this.fallbackConfig }; + } /** * Register a tool with the registry. @@ -53,10 +76,32 @@ export class ToolRegistry { } /** - * Execute a single tool call. + * Execute a single tool call with semantic fallback support. */ async execute(toolCall: ToolCall): Promise { - const tool = this.tools.get(toolCall.name); + let tool = this.tools.get(toolCall.name); + let mappedInput = toolCall.input; + let toolCorrection: { from: string; to: string } | null = null; + let paramMappings: Array<{ from: string; to: string }> = []; + + // If tool not found, try fallback matching + if (!tool && this.fallbackConfig.enabled) { + const definitions = this.getDefinitions(); + const matchResult = findBestToolMatch(toolCall.name, definitions, this.fallbackConfig); + + if (matchResult.shouldAutoCorrect && matchResult.matchedName) { + // Auto-correct to matched tool + tool = this.tools.get(matchResult.matchedName); + toolCorrection = { from: toolCall.name, to: matchResult.matchedName }; + } else if (!matchResult.exactMatch) { + // Return error with suggestions + return { + tool_use_id: toolCall.id, + content: formatFallbackError(toolCall.name, matchResult), + is_error: true, + }; + } + } if (!tool) { return { @@ -66,7 +111,27 @@ export class ToolRegistry { }; } - return tool.run(toolCall.id, toolCall.input); + // Apply parameter mapping + if (this.fallbackConfig.parameterAliasing) { + const mapResult = mapParameters( + toolCall.input, + tool.getDefinition().input_schema, + this.fallbackConfig + ); + mappedInput = mapResult.mappedInput; + paramMappings = mapResult.mappings; + } + + // Execute the tool + const result = await tool.run(toolCall.id, mappedInput); + + // Prepend mapping info to result if any corrections were made + const mappingInfo = formatMappingInfo(toolCorrection, paramMappings); + if (mappingInfo && !result.is_error) { + result.content = `${mappingInfo}\n\n${result.content}`; + } + + return result; } /** diff --git a/src/tools/tool-fallback.ts b/src/tools/tool-fallback.ts new file mode 100644 index 0000000..e3b3ef6 --- /dev/null +++ b/src/tools/tool-fallback.ts @@ -0,0 +1,291 @@ +// Copyright 2026 Layne Penney +// SPDX-License-Identifier: Apache-2.0 + +/** + * Semantic Tool Fallback System + * + * Handles tool name matching and parameter mapping when exact matches fail. + * - Suggests similar tools for typos/misnamed tools + * - Auto-corrects high-similarity matches (configurable) + * - Maps common parameter aliases to canonical forms + */ + +import { stringSimilarity } from '../entity-normalization.js'; +import type { ToolDefinition } from '../types.js'; + +/** + * Configuration for tool fallback behavior. + */ +export interface ToolFallbackConfig { + /** Enable/disable fallback system */ + enabled: boolean; + /** Threshold above which tool is auto-corrected (0-1) */ + autoCorrectThreshold: number; + /** Threshold above which tool is suggested (0-1) */ + suggestionThreshold: number; + /** Auto-execute corrected tools without confirmation */ + autoExecute: boolean; + /** Enable parameter aliasing */ + parameterAliasing: boolean; +} + +/** + * Default fallback configuration. + */ +export const DEFAULT_FALLBACK_CONFIG: ToolFallbackConfig = { + enabled: true, + autoCorrectThreshold: 0.85, + suggestionThreshold: 0.6, + autoExecute: false, + parameterAliasing: true, +}; + +/** + * Result of a tool name match attempt. + */ +export interface ToolMatchResult { + /** Whether an exact match was found */ + exactMatch: boolean; + /** The matched tool name (may differ from requested) */ + matchedName: string | null; + /** Similarity score (1.0 for exact match) */ + score: number; + /** All candidates above suggestion threshold */ + suggestions: Array<{ name: string; score: number; description: string }>; + /** Whether auto-correction should be applied */ + shouldAutoCorrect: boolean; +} + +/** + * Find the best matching tool for a given name. + */ +export function findBestToolMatch( + requestedName: string, + availableTools: ToolDefinition[], + config: ToolFallbackConfig = DEFAULT_FALLBACK_CONFIG +): ToolMatchResult { + // Check for exact match first + const exactMatch = availableTools.find((t) => t.name === requestedName); + if (exactMatch) { + return { + exactMatch: true, + matchedName: requestedName, + score: 1.0, + suggestions: [], + shouldAutoCorrect: false, + }; + } + + if (!config.enabled) { + return { + exactMatch: false, + matchedName: null, + score: 0, + suggestions: [], + shouldAutoCorrect: false, + }; + } + + // Calculate similarity scores for all tools + const scores = availableTools.map((tool) => ({ + name: tool.name, + score: stringSimilarity(requestedName.toLowerCase(), tool.name.toLowerCase()), + description: tool.description.slice(0, 80) + (tool.description.length > 80 ? '...' : ''), + })); + + // Sort by score descending + scores.sort((a, b) => b.score - a.score); + + // Filter to suggestions above threshold + const suggestions = scores.filter((s) => s.score >= config.suggestionThreshold); + const bestMatch = scores[0]; + + // Only auto-correct if: + // 1. Best match is above auto-correct threshold + // 2. There's a clear winner (no other match within 0.05 of the best) + let shouldAutoCorrect = false; + if (bestMatch && bestMatch.score >= config.autoCorrectThreshold) { + const closeMatches = scores.filter((s) => s.score >= bestMatch.score - 0.05); + // Only auto-correct if there's exactly one clear winner + shouldAutoCorrect = closeMatches.length === 1; + } + + return { + exactMatch: false, + matchedName: shouldAutoCorrect ? bestMatch.name : null, + score: bestMatch?.score ?? 0, + suggestions, + shouldAutoCorrect, + }; +} + +/** + * Global parameter aliases. + * Maps canonical parameter names to their common aliases. + */ +export const GLOBAL_PARAMETER_ALIASES: Map = new Map([ + // Search/query related + ['pattern', ['query', 'search', 'search_term', 'search_query', 'regex', 'expression', 'search_pattern']], + ['path', ['file', 'file_path', 'filepath', 'directory', 'dir', 'folder', 'location']], + + // Result limiting + ['head_limit', ['max_results', 'max', 'limit', 'count', 'num_results', 'top_k', 'k', 'n']], + ['depth', ['max_depth', 'level', 'levels']], + + // Flags + ['ignore_case', ['case_insensitive', 'i', 'insensitive', 'no_case']], + ['recursive', ['recurse', 'r']], + ['show_hidden', ['hidden', 'all', 'include_hidden', 'show_all']], + ['show_files', ['include_files', 'files']], + + // Content + ['content', ['text', 'body', 'data', 'value']], + ['new_content', ['replacement', 'replace_with', 'new_text', 'new_value']], + ['old_content', ['original', 'old_text', 'find', 'search']], + + // File operations + ['file_pattern', ['glob', 'include', 'glob_pattern', 'filter']], + + // Bash specific + ['command', ['cmd', 'script', 'shell_command', 'exec']], +]); + +/** + * Result of parameter mapping. + */ +export interface ParameterMapResult { + /** The mapped parameters */ + mappedInput: Record; + /** Any parameters that couldn't be mapped */ + unmappedParams: string[]; + /** Mapping details for logging */ + mappings: Array<{ from: string; to: string }>; +} + +/** + * Map parameters using global aliases and tool-specific schema. + */ +export function mapParameters( + input: Record, + toolSchema: ToolDefinition['input_schema'], + config: ToolFallbackConfig = DEFAULT_FALLBACK_CONFIG +): ParameterMapResult { + if (!config.parameterAliasing) { + return { + mappedInput: input, + unmappedParams: [], + mappings: [], + }; + } + + const mappedInput: Record = {}; + const unmappedParams: string[] = []; + const mappings: Array<{ from: string; to: string }> = []; + const schemaProps = Object.keys(toolSchema.properties || {}); + + for (const [key, value] of Object.entries(input)) { + // If key exists in schema, use it directly + if (schemaProps.includes(key)) { + mappedInput[key] = value; + continue; + } + + // Try to find a mapping from global aliases + let mapped = false; + for (const [canonical, aliases] of GLOBAL_PARAMETER_ALIASES) { + if (schemaProps.includes(canonical) && aliases.includes(key.toLowerCase())) { + // Only map if we haven't already set this canonical parameter + if (!(canonical in mappedInput)) { + mappedInput[canonical] = value; + mappings.push({ from: key, to: canonical }); + mapped = true; + } + break; + } + } + + // Try semantic similarity as fallback + if (!mapped) { + const bestMatch = findBestParameterMatch(key, schemaProps); + if (bestMatch && bestMatch.score >= 0.7) { + // Only map if we haven't already set this parameter + if (!(bestMatch.name in mappedInput)) { + mappedInput[bestMatch.name] = value; + mappings.push({ from: key, to: bestMatch.name }); + mapped = true; + } + } + } + + if (!mapped) { + unmappedParams.push(key); + // Still include unmapped params - the tool might handle them + mappedInput[key] = value; + } + } + + return { mappedInput, unmappedParams, mappings }; +} + +/** + * Find best matching parameter name using similarity. + */ +function findBestParameterMatch( + paramName: string, + schemaProps: string[] +): { name: string; score: number } | null { + let bestMatch: { name: string; score: number } | null = null; + + for (const prop of schemaProps) { + const score = stringSimilarity(paramName.toLowerCase(), prop.toLowerCase()); + if (!bestMatch || score > bestMatch.score) { + bestMatch = { name: prop, score }; + } + } + + return bestMatch; +} + +/** + * Format an error message with suggestions. + */ +export function formatFallbackError(requestedTool: string, matchResult: ToolMatchResult): string { + const lines: string[] = [`Error: Unknown tool "${requestedTool}"`]; + + if (matchResult.suggestions.length > 0) { + lines.push(''); + lines.push('Did you mean:'); + // Show up to 3 suggestions + for (const suggestion of matchResult.suggestions.slice(0, 3)) { + const percent = Math.round(suggestion.score * 100); + lines.push(` - ${suggestion.name} (${percent}% match): ${suggestion.description}`); + } + } + + return lines.join('\n'); +} + +/** + * Format parameter mapping info for prepending to tool result. + */ +export function formatMappingInfo( + toolCorrection: { from: string; to: string } | null, + paramMappings: Array<{ from: string; to: string }> +): string | null { + const parts: string[] = []; + + if (toolCorrection) { + parts.push(`Tool: "${toolCorrection.from}" → "${toolCorrection.to}"`); + } + + if (paramMappings.length > 0) { + const mappingStr = paramMappings.map((m) => `${m.from}→${m.to}`).join(', '); + parts.push(`Params: ${mappingStr}`); + } + + if (parts.length === 0) { + return null; + } + + return `(Mapped: ${parts.join('; ')})`; +} diff --git a/tests/print-tree.test.ts b/tests/print-tree.test.ts new file mode 100644 index 0000000..07f1529 --- /dev/null +++ b/tests/print-tree.test.ts @@ -0,0 +1,205 @@ +// Copyright 2026 Layne Penney +// SPDX-License-Identifier: Apache-2.0 + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { PrintTreeTool } from '../src/tools/print-tree.js'; +import * as fs from 'fs/promises'; +import * as path from 'path'; +import * as os from 'os'; + +describe('PrintTreeTool', () => { + let tool: PrintTreeTool; + let tempDir: string; + + beforeEach(async () => { + tool = new PrintTreeTool(); + // Create a temporary directory structure for testing + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'print-tree-test-')); + + // Create test directory structure: + // tempDir/ + // src/ + // index.ts + // utils/ + // helpers.ts + // tests/ + // test.ts + // package.json + // .hidden + // node_modules/ (should be skipped) + // somelib/ + // index.js + + await fs.mkdir(path.join(tempDir, 'src')); + await fs.mkdir(path.join(tempDir, 'src', 'utils')); + await fs.mkdir(path.join(tempDir, 'tests')); + await fs.mkdir(path.join(tempDir, 'node_modules')); + await fs.mkdir(path.join(tempDir, 'node_modules', 'somelib')); + + await fs.writeFile(path.join(tempDir, 'src', 'index.ts'), 'export {};'); + await fs.writeFile(path.join(tempDir, 'src', 'utils', 'helpers.ts'), 'export {};'); + await fs.writeFile(path.join(tempDir, 'tests', 'test.ts'), 'test();'); + await fs.writeFile(path.join(tempDir, 'package.json'), '{}'); + await fs.writeFile(path.join(tempDir, '.hidden'), 'hidden file'); + await fs.writeFile(path.join(tempDir, 'node_modules', 'somelib', 'index.js'), ''); + }); + + afterEach(async () => { + // Clean up temp directory + await fs.rm(tempDir, { recursive: true, force: true }); + }); + + describe('getDefinition', () => { + it('returns correct tool definition', () => { + const def = tool.getDefinition(); + expect(def.name).toBe('print_tree'); + expect(def.description).toContain('tree'); + expect(def.input_schema.properties).toHaveProperty('path'); + expect(def.input_schema.properties).toHaveProperty('depth'); + expect(def.input_schema.properties).toHaveProperty('show_hidden'); + expect(def.input_schema.properties).toHaveProperty('show_files'); + }); + }); + + describe('execute', () => { + it('prints directory tree with default options', async () => { + const result = await tool.execute({ path: tempDir }); + + // Should show directories + expect(result).toContain('src/'); + expect(result).toContain('tests/'); + + // Should show files + expect(result).toContain('package.json'); + + // Should NOT show hidden files by default + expect(result).not.toContain('.hidden'); + + // Should NOT show node_modules (skipped directory) + expect(result).not.toContain('node_modules'); + }); + + it('shows tree connectors', async () => { + const result = await tool.execute({ path: tempDir }); + + // Should have tree connectors + expect(result).toMatch(/[├└]──/); + }); + + it('respects depth parameter', async () => { + const result = await tool.execute({ path: tempDir, depth: 1 }); + + // Should show top-level directories + expect(result).toContain('src/'); + expect(result).toContain('tests/'); + + // Should NOT show nested files/dirs at depth 1 + expect(result).not.toContain('utils/'); + expect(result).not.toContain('helpers.ts'); + }); + + it('shows hidden files when show_hidden is true', async () => { + const result = await tool.execute({ path: tempDir, show_hidden: true }); + + expect(result).toContain('.hidden'); + }); + + it('hides files when show_files is false', async () => { + const result = await tool.execute({ path: tempDir, show_files: false }); + + // Should show directories + expect(result).toContain('src/'); + expect(result).toContain('tests/'); + + // Should NOT show files + expect(result).not.toContain('package.json'); + expect(result).not.toContain('index.ts'); + }); + + it('skips common non-essential directories', async () => { + const result = await tool.execute({ path: tempDir }); + + // node_modules should be skipped + expect(result).not.toContain('node_modules'); + expect(result).not.toContain('somelib'); + }); + + it('sorts directories before files', async () => { + const result = await tool.execute({ path: tempDir }); + + // Get the position of directories and files + const srcPos = result.indexOf('src/'); + const testsPos = result.indexOf('tests/'); + const packagePos = result.indexOf('package.json'); + + // Directories should come before files + expect(srcPos).toBeLessThan(packagePos); + expect(testsPos).toBeLessThan(packagePos); + }); + + it('handles empty directory', async () => { + const emptyDir = path.join(tempDir, 'empty'); + await fs.mkdir(emptyDir); + + const result = await tool.execute({ path: emptyDir }); + + // Should show the directory name + expect(result).toContain('empty/'); + }); + + it('handles non-existent directory', async () => { + const result = await tool.execute({ path: '/nonexistent/path/xyz' }); + + expect(result).toContain('not found'); + }); + + it('uses current directory when path not specified', async () => { + // Save current dir + const originalCwd = process.cwd(); + + try { + process.chdir(tempDir); + const result = await tool.execute({}); + + // Should show contents of tempDir + expect(result).toContain('src/'); + expect(result).toContain('tests/'); + } finally { + // Restore original cwd + process.chdir(originalCwd); + } + }); + + it('shows nested structure correctly', async () => { + const result = await tool.execute({ path: tempDir, depth: 3 }); + + // Should show nested utils directory + expect(result).toContain('utils/'); + expect(result).toContain('helpers.ts'); + }); + }); + + describe('tree formatting', () => { + it('uses correct tree characters for last items', async () => { + const result = await tool.execute({ path: tempDir }); + + // Should have └── for last items in a directory + expect(result).toContain('└──'); + }); + + it('uses correct tree characters for non-last items', async () => { + const result = await tool.execute({ path: tempDir }); + + // Should have ├── for non-last items + expect(result).toContain('├──'); + }); + + it('uses vertical lines for nested items', async () => { + const result = await tool.execute({ path: tempDir, depth: 3 }); + + // Nested items should have │ for indentation + // This appears when there are siblings after the parent + expect(result).toMatch(/│\s+[├└]──/); + }); + }); +}); diff --git a/tests/tool-fallback.test.ts b/tests/tool-fallback.test.ts new file mode 100644 index 0000000..7e2ced9 --- /dev/null +++ b/tests/tool-fallback.test.ts @@ -0,0 +1,563 @@ +// Copyright 2026 Layne Penney +// SPDX-License-Identifier: Apache-2.0 + +import { describe, it, expect, beforeEach } from 'vitest'; +import { + findBestToolMatch, + mapParameters, + formatFallbackError, + formatMappingInfo, + GLOBAL_PARAMETER_ALIASES, + DEFAULT_FALLBACK_CONFIG, + type ToolFallbackConfig, +} from '../src/tools/tool-fallback.js'; +import { ToolRegistry } from '../src/tools/registry.js'; +import type { ToolDefinition } from '../src/types.js'; + +// Mock tool definitions for testing +const mockTools: ToolDefinition[] = [ + { + name: 'grep', + description: 'Search for patterns in file contents. Returns matching lines with file paths.', + input_schema: { + type: 'object', + properties: { + pattern: { type: 'string', description: 'Search pattern' }, + path: { type: 'string', description: 'Path to search in' }, + head_limit: { type: 'number', description: 'Max results' }, + ignore_case: { type: 'boolean', description: 'Case insensitive' }, + }, + required: ['pattern'], + }, + }, + { + name: 'glob', + description: 'Find files matching a glob pattern. Returns file paths.', + input_schema: { + type: 'object', + properties: { + pattern: { type: 'string', description: 'Glob pattern' }, + path: { type: 'string', description: 'Base path' }, + }, + required: ['pattern'], + }, + }, + { + name: 'read_file', + description: 'Read the contents of a file.', + input_schema: { + type: 'object', + properties: { + path: { type: 'string', description: 'File path' }, + }, + required: ['path'], + }, + }, + { + name: 'write_file', + description: 'Write content to a file.', + input_schema: { + type: 'object', + properties: { + path: { type: 'string', description: 'File path' }, + content: { type: 'string', description: 'Content to write' }, + }, + required: ['path', 'content'], + }, + }, + { + name: 'list_directory', + description: 'List files and directories in a given path.', + input_schema: { + type: 'object', + properties: { + path: { type: 'string', description: 'Directory path' }, + show_hidden: { type: 'boolean', description: 'Show hidden files' }, + }, + required: [], + }, + }, + { + name: 'print_tree', + description: 'Print a tree-like directory structure.', + input_schema: { + type: 'object', + properties: { + path: { type: 'string', description: 'Root path' }, + depth: { type: 'number', description: 'Max depth' }, + }, + required: [], + }, + }, + { + name: 'bash', + description: 'Execute a bash command.', + input_schema: { + type: 'object', + properties: { + command: { type: 'string', description: 'Command to execute' }, + }, + required: ['command'], + }, + }, +]; + +describe('findBestToolMatch', () => { + it('returns exact match when tool exists', () => { + const result = findBestToolMatch('grep', mockTools); + expect(result.exactMatch).toBe(true); + expect(result.matchedName).toBe('grep'); + expect(result.score).toBe(1.0); + expect(result.suggestions).toHaveLength(0); + expect(result.shouldAutoCorrect).toBe(false); + }); + + it('is case-sensitive for exact matches', () => { + const result = findBestToolMatch('Grep', mockTools); + expect(result.exactMatch).toBe(false); + // But should suggest grep with high similarity + expect(result.suggestions.length).toBeGreaterThan(0); + expect(result.suggestions[0].name).toBe('grep'); + }); + + it('suggests similar tools for typos', () => { + const result = findBestToolMatch('gre', mockTools); + expect(result.exactMatch).toBe(false); + expect(result.suggestions.length).toBeGreaterThan(0); + expect(result.suggestions[0].name).toBe('grep'); + }); + + it('auto-corrects high-similarity case typos', () => { + const result = findBestToolMatch('GREP', mockTools); + // GREP vs grep should have high similarity due to case-insensitive comparison + expect(result.shouldAutoCorrect).toBe(true); + expect(result.matchedName).toBe('grep'); + }); + + it('suggests print_tree for print_tre typo', () => { + // "print_tre" vs "print_tree" has high similarity + const result = findBestToolMatch('print_tre', mockTools); + expect(result.exactMatch).toBe(false); + expect(result.suggestions.some((s) => s.name === 'print_tree')).toBe(true); + }); + + it('suggests list_directory for list_directo typo', () => { + // "list_directo" vs "list_directory" has high similarity (only missing 'ry') + const result = findBestToolMatch('list_directo', mockTools); + expect(result.exactMatch).toBe(false); + expect(result.suggestions.length).toBeGreaterThan(0); + expect(result.suggestions[0].name).toBe('list_directory'); + }); + + it('returns no suggestions when disabled', () => { + const config: ToolFallbackConfig = { ...DEFAULT_FALLBACK_CONFIG, enabled: false }; + const result = findBestToolMatch('unknown', mockTools, config); + expect(result.suggestions).toHaveLength(0); + expect(result.matchedName).toBeNull(); + }); + + it('does not auto-correct when multiple close matches exist', () => { + // Create tools with close names to test ambiguity + // Both "test_a" and "test_b" are equally similar to "test_x" + const ambiguousTools: ToolDefinition[] = [ + { name: 'test_a', description: 'Test A', input_schema: { type: 'object', properties: {} } }, + { name: 'test_b', description: 'Test B', input_schema: { type: 'object', properties: {} } }, + ]; + // "test_x" has equal similarity to both test_a and test_b (both differ by 1 char) + const result = findBestToolMatch('test_x', ambiguousTools); + expect(result.suggestions.length).toBeGreaterThan(0); + // Should not auto-correct because both are equally close matches + expect(result.shouldAutoCorrect).toBe(false); + }); + + it('respects custom thresholds', () => { + const config: ToolFallbackConfig = { + ...DEFAULT_FALLBACK_CONFIG, + suggestionThreshold: 0.9, // Very high threshold + }; + const result = findBestToolMatch('gre', mockTools, config); + // 'gre' vs 'grep' is about 0.75 similarity, below 0.9 threshold + expect(result.suggestions).toHaveLength(0); + }); + + it('includes truncated descriptions in suggestions', () => { + const result = findBestToolMatch('search', mockTools); + for (const suggestion of result.suggestions) { + expect(suggestion.description).toBeDefined(); + expect(suggestion.description.length).toBeLessThanOrEqual(83); // 80 + '...' + } + }); +}); + +describe('mapParameters', () => { + const grepSchema = mockTools.find((t) => t.name === 'grep')!.input_schema; + const writeFileSchema = mockTools.find((t) => t.name === 'write_file')!.input_schema; + const bashSchema = mockTools.find((t) => t.name === 'bash')!.input_schema; + + it('passes through valid parameters unchanged', () => { + const result = mapParameters({ pattern: 'test', path: '.' }, grepSchema); + expect(result.mappedInput.pattern).toBe('test'); + expect(result.mappedInput.path).toBe('.'); + expect(result.mappings).toHaveLength(0); + expect(result.unmappedParams).toHaveLength(0); + }); + + it('maps query to pattern', () => { + const result = mapParameters({ query: 'test' }, grepSchema); + expect(result.mappedInput.pattern).toBe('test'); + expect(result.mappings).toContainEqual({ from: 'query', to: 'pattern' }); + }); + + it('maps search to pattern', () => { + const result = mapParameters({ search: 'test' }, grepSchema); + expect(result.mappedInput.pattern).toBe('test'); + expect(result.mappings).toContainEqual({ from: 'search', to: 'pattern' }); + }); + + it('maps max_results to head_limit', () => { + const result = mapParameters({ max_results: 10 }, grepSchema); + expect(result.mappedInput.head_limit).toBe(10); + expect(result.mappings).toContainEqual({ from: 'max_results', to: 'head_limit' }); + }); + + it('maps max to head_limit', () => { + const result = mapParameters({ max: 5 }, grepSchema); + expect(result.mappedInput.head_limit).toBe(5); + }); + + it('maps limit to head_limit', () => { + const result = mapParameters({ limit: 20 }, grepSchema); + expect(result.mappedInput.head_limit).toBe(20); + }); + + it('maps file_path to path', () => { + const result = mapParameters({ file_path: '/test.ts' }, grepSchema); + expect(result.mappedInput.path).toBe('/test.ts'); + }); + + it('maps case_insensitive to ignore_case', () => { + const result = mapParameters({ case_insensitive: true }, grepSchema); + expect(result.mappedInput.ignore_case).toBe(true); + }); + + it('maps text to content for write_file', () => { + const result = mapParameters({ path: '/test.txt', text: 'hello' }, writeFileSchema); + expect(result.mappedInput.content).toBe('hello'); + expect(result.mappings).toContainEqual({ from: 'text', to: 'content' }); + }); + + it('maps cmd to command for bash', () => { + const result = mapParameters({ cmd: 'ls -la' }, bashSchema); + expect(result.mappedInput.command).toBe('ls -la'); + }); + + it('preserves unmapped parameters', () => { + const result = mapParameters({ unknown_param: 'value', pattern: 'test' }, grepSchema); + expect(result.mappedInput.unknown_param).toBe('value'); + expect(result.unmappedParams).toContain('unknown_param'); + }); + + it('explicit parameters take precedence over aliases', () => { + const result = mapParameters({ pattern: 'explicit', query: 'alias' }, grepSchema); + expect(result.mappedInput.pattern).toBe('explicit'); + // query should be ignored since pattern is already set + expect(result.mappings).toHaveLength(0); + }); + + it('does not map when aliasing is disabled', () => { + const config: ToolFallbackConfig = { ...DEFAULT_FALLBACK_CONFIG, parameterAliasing: false }; + const result = mapParameters({ query: 'test' }, grepSchema, config); + expect(result.mappedInput.query).toBe('test'); + expect(result.mappedInput.pattern).toBeUndefined(); + expect(result.mappings).toHaveLength(0); + }); + + it('handles multiple alias mappings', () => { + const result = mapParameters( + { query: 'test', max: 10, file_path: '/src' }, + grepSchema + ); + expect(result.mappedInput.pattern).toBe('test'); + expect(result.mappedInput.head_limit).toBe(10); + expect(result.mappedInput.path).toBe('/src'); + expect(result.mappings).toHaveLength(3); + }); +}); + +describe('formatFallbackError', () => { + it('includes tool name in error', () => { + const matchResult = { + exactMatch: false, + matchedName: null, + score: 0.5, + suggestions: [], + shouldAutoCorrect: false, + }; + const error = formatFallbackError('unknown_tool', matchResult); + expect(error).toContain('unknown_tool'); + expect(error).toContain('Error'); + }); + + it('includes suggestions when available', () => { + const matchResult = { + exactMatch: false, + matchedName: null, + score: 0.75, + suggestions: [ + { name: 'grep', score: 0.75, description: 'Search for patterns' }, + { name: 'glob', score: 0.6, description: 'Find files' }, + ], + shouldAutoCorrect: false, + }; + const error = formatFallbackError('gre', matchResult); + expect(error).toContain('Did you mean'); + expect(error).toContain('grep'); + expect(error).toContain('75%'); + expect(error).toContain('glob'); + expect(error).toContain('60%'); + }); + + it('limits suggestions to 3', () => { + const matchResult = { + exactMatch: false, + matchedName: null, + score: 0.5, + suggestions: [ + { name: 'tool1', score: 0.8, description: 'Desc 1' }, + { name: 'tool2', score: 0.7, description: 'Desc 2' }, + { name: 'tool3', score: 0.65, description: 'Desc 3' }, + { name: 'tool4', score: 0.6, description: 'Desc 4' }, + { name: 'tool5', score: 0.55, description: 'Desc 5' }, + ], + shouldAutoCorrect: false, + }; + const error = formatFallbackError('unknown', matchResult); + expect(error).toContain('tool1'); + expect(error).toContain('tool2'); + expect(error).toContain('tool3'); + expect(error).not.toContain('tool4'); + expect(error).not.toContain('tool5'); + }); +}); + +describe('formatMappingInfo', () => { + it('returns null when no mappings', () => { + const result = formatMappingInfo(null, []); + expect(result).toBeNull(); + }); + + it('formats tool correction', () => { + const result = formatMappingInfo({ from: 'GREP', to: 'grep' }, []); + expect(result).toContain('Tool'); + expect(result).toContain('GREP'); + expect(result).toContain('grep'); + }); + + it('formats parameter mappings', () => { + const result = formatMappingInfo(null, [ + { from: 'query', to: 'pattern' }, + { from: 'max', to: 'head_limit' }, + ]); + expect(result).toContain('Params'); + expect(result).toContain('query→pattern'); + expect(result).toContain('max→head_limit'); + }); + + it('formats both tool and parameter mappings', () => { + const result = formatMappingInfo( + { from: 'GREP', to: 'grep' }, + [{ from: 'query', to: 'pattern' }] + ); + expect(result).toContain('Tool'); + expect(result).toContain('Params'); + }); +}); + +describe('GLOBAL_PARAMETER_ALIASES', () => { + it('has common query aliases for pattern', () => { + const aliases = GLOBAL_PARAMETER_ALIASES.get('pattern'); + expect(aliases).toContain('query'); + expect(aliases).toContain('search'); + expect(aliases).toContain('search_term'); + }); + + it('has common path aliases', () => { + const aliases = GLOBAL_PARAMETER_ALIASES.get('path'); + expect(aliases).toContain('file'); + expect(aliases).toContain('file_path'); + expect(aliases).toContain('directory'); + }); + + it('has limit aliases for head_limit', () => { + const aliases = GLOBAL_PARAMETER_ALIASES.get('head_limit'); + expect(aliases).toContain('max_results'); + expect(aliases).toContain('max'); + expect(aliases).toContain('limit'); + }); + + it('has command aliases for bash', () => { + const aliases = GLOBAL_PARAMETER_ALIASES.get('command'); + expect(aliases).toContain('cmd'); + expect(aliases).toContain('script'); + }); +}); + +describe('ToolRegistry integration', () => { + let registry: ToolRegistry; + + // Simple mock tool for testing + class MockTool { + private name: string; + private definition: ToolDefinition; + private response: string; + + constructor(name: string, definition: ToolDefinition, response: string = 'Success') { + this.name = name; + this.definition = definition; + this.response = response; + } + + getName(): string { + return this.name; + } + + getDefinition(): ToolDefinition { + return this.definition; + } + + async run(toolUseId: string, input: Record) { + // Validate required params + const required = this.definition.input_schema.required || []; + for (const param of required) { + if (!(param in input)) { + return { + tool_use_id: toolUseId, + content: `Error: Missing required parameter: ${param}`, + is_error: true, + }; + } + } + return { + tool_use_id: toolUseId, + content: `${this.response}: ${JSON.stringify(input)}`, + is_error: false, + }; + } + } + + beforeEach(() => { + registry = new ToolRegistry(); + // Register mock tools + for (const def of mockTools) { + registry.register(new MockTool(def.name, def) as any); + } + }); + + it('executes exact tool match', async () => { + const result = await registry.execute({ + id: 'test-1', + name: 'grep', + input: { pattern: 'test' }, + }); + expect(result.is_error).toBe(false); + expect(result.content).toContain('Success'); + }); + + it('provides suggestions for unknown tool with similar name', async () => { + const result = await registry.execute({ + id: 'test-2', + name: 'greb', // Typo of 'grep' - should get suggestions + input: { pattern: 'test' }, + }); + expect(result.is_error).toBe(true); + expect(result.content).toContain('Unknown tool'); + expect(result.content).toContain('Did you mean'); + expect(result.content).toContain('grep'); + }); + + it('returns error without suggestions for completely unknown tool', async () => { + const result = await registry.execute({ + id: 'test-2b', + name: 'xyzabc123', // Completely unknown - no similar tools + input: { pattern: 'test' }, + }); + expect(result.is_error).toBe(true); + expect(result.content).toContain('Unknown tool'); + // No suggestions because nothing is similar enough + }); + + it('auto-corrects high-similarity tool name', async () => { + const result = await registry.execute({ + id: 'test-3', + name: 'GREP', // Case typo + input: { pattern: 'test' }, + }); + // Should auto-correct and succeed + expect(result.is_error).toBe(false); + expect(result.content).toContain('Mapped'); + expect(result.content).toContain('GREP'); + expect(result.content).toContain('grep'); + }); + + it('maps query parameter to pattern', async () => { + const result = await registry.execute({ + id: 'test-4', + name: 'grep', + input: { query: 'test' }, + }); + expect(result.is_error).toBe(false); + expect(result.content).toContain('Mapped'); + expect(result.content).toContain('query→pattern'); + }); + + it('maps multiple parameters', async () => { + const result = await registry.execute({ + id: 'test-5', + name: 'grep', + input: { query: 'test', max_results: 10, file_path: '/src' }, + }); + expect(result.is_error).toBe(false); + expect(result.content).toContain('query→pattern'); + expect(result.content).toContain('max_results→head_limit'); + expect(result.content).toContain('file_path→path'); + }); + + it('respects disabled fallback', async () => { + registry.setFallbackConfig({ enabled: false }); + const result = await registry.execute({ + id: 'test-6', + name: 'search', + input: { pattern: 'test' }, + }); + expect(result.is_error).toBe(true); + expect(result.content).toBe('Error: Unknown tool "search"'); + expect(result.content).not.toContain('Did you mean'); + }); + + it('respects disabled parameter aliasing', async () => { + registry.setFallbackConfig({ parameterAliasing: false }); + const result = await registry.execute({ + id: 'test-7', + name: 'grep', + input: { query: 'test' }, // query won't be mapped to pattern + }); + // Should fail because 'pattern' is required but 'query' wasn't mapped + expect(result.is_error).toBe(true); + expect(result.content).toContain('Missing required parameter'); + }); + + it('getFallbackConfig returns current config', () => { + const config = registry.getFallbackConfig(); + expect(config.enabled).toBe(true); + expect(config.autoCorrectThreshold).toBe(0.85); + expect(config.suggestionThreshold).toBe(0.6); + }); + + it('setFallbackConfig updates config', () => { + registry.setFallbackConfig({ suggestionThreshold: 0.8 }); + const config = registry.getFallbackConfig(); + expect(config.suggestionThreshold).toBe(0.8); + // Other values should still have defaults + expect(config.enabled).toBe(true); + }); +}); From bf78b6a70f40d8ae228ee5b9660e686cb7da31f7 Mon Sep 17 00:00:00 2001 From: Layne Penney Date: Thu, 15 Jan 2026 07:09:40 -0600 Subject: [PATCH 02/17] clean content? --- src/providers/ollama-cloud.ts | 62 ++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 4 deletions(-) diff --git a/src/providers/ollama-cloud.ts b/src/providers/ollama-cloud.ts index edd0f80..2bc2541 100644 --- a/src/providers/ollama-cloud.ts +++ b/src/providers/ollama-cloud.ts @@ -217,15 +217,20 @@ export class OllamaCloudProvider extends BaseProvider { } // Extract thinking content from tags - const { content: cleanedContent, thinking } = this.extractThinkingContent( + const { content: thinkingCleanedContent, thinking } = this.extractThinkingContent( responseData.message.content ); // Fall back to extracting tool calls from text if no native calls if (toolCalls.length === 0 && tools && tools.length > 0) { - toolCalls = this.extractToolCalls(cleanedContent, tools); + toolCalls = this.extractToolCalls(thinkingCleanedContent, tools); } + // Clean hallucinated traces from content (after tool extraction) + const cleanedContent = toolCalls.length > 0 + ? this.cleanHallucinatedTraces(thinkingCleanedContent) + : thinkingCleanedContent; + return createProviderResponse({ content: cleanedContent, toolCalls, @@ -332,14 +337,19 @@ export class OllamaCloudProvider extends BaseProvider { } // Extract thinking content from tags (used by qwen3:thinking and similar models) - const { content: cleanedContent, thinking } = this.extractThinkingContent(fullText); + const { content: thinkingCleanedContent, thinking } = this.extractThinkingContent(fullText); // Use native tool calls if available, otherwise extract from text let toolCalls: ToolCall[] = nativeToolCalls; if (toolCalls.length === 0 && tools && tools.length > 0) { - toolCalls = this.extractToolCalls(cleanedContent, tools); + toolCalls = this.extractToolCalls(thinkingCleanedContent, tools); } + // Clean hallucinated traces from content (after tool extraction) + const cleanedContent = toolCalls.length > 0 + ? this.cleanHallucinatedTraces(thinkingCleanedContent) + : thinkingCleanedContent; + return createProviderResponse({ content: cleanedContent, toolCalls, @@ -489,6 +499,34 @@ export class OllamaCloudProvider extends BaseProvider { return toolCalls; } + // Pattern 3: [Calling tool_name]: {json} format + // Used by some models that simulate agent traces. We extract the call but ignore + // any "[Result from ...]" which are hallucinated results. + const callingPattern = /\[Calling\s+([a-z_][a-z0-9_]*)\]\s*:\s*(\{[^}]*\})/gi; + + while ((match = callingPattern.exec(content)) !== null) { + const rawToolName = match[1]; + const normalizedName = this.normalizeToolName(rawToolName); + const jsonArgs = match[2]; + + if (toolNames.has(normalizedName)) { + try { + const args = JSON.parse(jsonArgs); + toolCalls.push({ + id: `extracted_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`, + name: normalizedName, + input: args, + }); + } catch { + // Invalid JSON, skip + } + } + } + + if (toolCalls.length > 0) { + return toolCalls; + } + // Pattern 3: Look for JSON objects with "name" field // This pattern handles nested braces properly const jsonPattern = /\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\}/g; @@ -583,6 +621,22 @@ export class OllamaCloudProvider extends BaseProvider { return { content: cleanedContent, thinking }; } + /** + * Clean hallucinated agent trace patterns from content. + * Some models output fake "[Calling tool]: {json}[Result from tool]: result" traces. + * This should be called AFTER extractToolCalls to clean up the display content. + */ + private cleanHallucinatedTraces(content: string): string { + // Pattern: [Calling tool_name]: {json}[Result from tool_name]: any text until next [ or end + const hallucinatedTracePattern = /\[Calling\s+[a-z_][a-z0-9_]*\]\s*:\s*\{[^}]*\}\s*(?:\[Result from\s+[a-z_][a-z0-9_]*\]\s*:\s*[^\[]*)?/gi; + let cleanedContent = content.replace(hallucinatedTracePattern, '').trim(); + + // Clean up multiple newlines + cleanedContent = cleanedContent.replace(/\n{3,}/g, '\n\n').trim(); + + return cleanedContent; + } + /** * Pull a model if it's not already available. */ From 2677bedb64535aa6ae40dc2d7685e68c662d7805 Mon Sep 17 00:00:00 2001 From: Layne Penney Date: Thu, 15 Jan 2026 09:09:23 -0600 Subject: [PATCH 03/17] fix: surface thinking output and audit raw responses --- src/agent.ts | 29 ++++++++++++++-- src/providers/anthropic.ts | 4 ++- src/providers/message-converter.ts | 10 +++--- src/providers/ollama-cloud.ts | 55 +++++++++++++++++++++++------- src/providers/openai-compatible.ts | 5 +++ src/providers/response-parser.ts | 3 ++ src/types.ts | 2 ++ 7 files changed, 87 insertions(+), 21 deletions(-) diff --git a/src/agent.ts b/src/agent.ts index d4c0125..8062820 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -512,10 +512,17 @@ Always use tools to interact with the filesystem rather than asking the user to // Call the model with streaming (using native system prompt support) const apiStartTime = Date.now(); + let streamedChars = 0; + const onChunk = (chunk: string): void => { + if (chunk) { + streamedChars += chunk.length; + } + this.callbacks.onText?.(chunk); + }; const response = await chatProvider.streamChat( messagesToSend, tools, - this.callbacks.onText, + onChunk, systemContext ); const apiDuration = (Date.now() - apiStartTime) / 1000; @@ -541,7 +548,8 @@ Always use tools to interact with the filesystem rather than asking the user to response.content, response.toolCalls, response.usage, - Date.now() - apiStartTime + Date.now() - apiStartTime, + response.rawResponse ); // Record usage for cost tracking @@ -574,7 +582,22 @@ Always use tools to interact with the filesystem rather than asking the user to finalResponse = response.content; } - if (isExtractedToolCall) { + const shouldEmitFallback = !response.content && + response.toolCalls.length === 0 && + streamedChars === 0; + + if (shouldEmitFallback) { + const fallbackMessage = response.reasoningContent + ? 'Model returned reasoning without a final answer. Try again or check --audit for the raw response.' + : 'Model returned an empty response. Try again or check --audit for the raw response.'; + + finalResponse = fallbackMessage; + this.messages.push({ + role: 'assistant', + content: fallbackMessage, + }); + this.callbacks.onText?.(fallbackMessage); + } else if (isExtractedToolCall) { // For extracted tool calls, store as plain text (model doesn't understand tool_use blocks) this.messages.push({ role: 'assistant', diff --git a/src/providers/anthropic.ts b/src/providers/anthropic.ts index b5f8d1f..667ef0e 100644 --- a/src/providers/anthropic.ts +++ b/src/providers/anthropic.ts @@ -126,6 +126,7 @@ export class AnthropicProvider extends BaseProvider { cacheCreationInputTokens: usage.cache_creation_input_tokens, cacheReadInputTokens: usage.cache_read_input_tokens, }, + rawResponse: finalMessage, }; } @@ -209,7 +210,7 @@ export class AnthropicProvider extends BaseProvider { text: b.text || '', }), // Unknown block types become empty text blocks (logged by mapContentBlocks) - unknown: () => ({ type: 'text' as const, text: '' }), + unknown: (b) => ({ type: 'text' as const, text: b.text || b.content || '' }), }); return { role, content }; @@ -258,6 +259,7 @@ export class AnthropicProvider extends BaseProvider { outputTokens: usage.output_tokens, cacheCreationInputTokens: usage.cache_creation_input_tokens, cacheReadInputTokens: usage.cache_read_input_tokens, + rawResponse: response, }); } } diff --git a/src/providers/message-converter.ts b/src/providers/message-converter.ts index 4be7413..aa5b984 100644 --- a/src/providers/message-converter.ts +++ b/src/providers/message-converter.ts @@ -14,8 +14,8 @@ * - Prevents silent bugs like tool_result blocks being dropped */ -import type { Message, ContentBlock } from '../types.js'; -import { logger } from '../logger.js'; +import type {ContentBlock, Message} from '../types.js'; +import {logger} from '../logger.js'; /** * Typed block interfaces for type-safe extraction. @@ -229,7 +229,7 @@ export interface BlockConverters { image: (block: ContentBlock) => T; thinking: (block: ContentBlock) => T; /** Called for unknown block types - can return null to skip */ - unknown?: (block: ContentBlock) => T | null; + unknown: (block: ContentBlock) => T; } /** @@ -244,7 +244,7 @@ export interface BlockConverters { export function mapContentBlock( block: ContentBlock, converters: BlockConverters -): T | null { +): T { switch (block.type) { case 'text': return converters.text(block); @@ -258,7 +258,7 @@ export function mapContentBlock( return converters.thinking(block); default: logger.warn(`Unknown content block type: ${(block as ContentBlock).type}`); - return converters.unknown ? converters.unknown(block) : null; + return converters.unknown(block); } } diff --git a/src/providers/ollama-cloud.ts b/src/providers/ollama-cloud.ts index 2bc2541..978a134 100644 --- a/src/providers/ollama-cloud.ts +++ b/src/providers/ollama-cloud.ts @@ -56,6 +56,7 @@ interface OllamaChatResponse { message: { role: string; content: string; + thinking?: string; tool_calls?: OllamaToolCall[]; }; done: boolean; @@ -216,28 +217,37 @@ export class OllamaCloudProvider extends BaseProvider { })); } + const rawContent = responseData.message.content || ''; + const thinkingField = responseData.message.thinking || ''; + // Extract thinking content from tags - const { content: thinkingCleanedContent, thinking } = this.extractThinkingContent( - responseData.message.content + const { content: thinkingCleanedContent, thinking: tagThinking } = this.extractThinkingContent( + rawContent ); + const combinedThinking = [thinkingField, tagThinking].filter(Boolean).join('\n'); + const hasContent = thinkingCleanedContent.trim().length > 0; + const useFallbackContent = !hasContent && combinedThinking.length > 0; + const finalContent = useFallbackContent ? combinedThinking : thinkingCleanedContent; + const reasoningContent = useFallbackContent ? undefined : (combinedThinking || undefined); // Fall back to extracting tool calls from text if no native calls if (toolCalls.length === 0 && tools && tools.length > 0) { - toolCalls = this.extractToolCalls(thinkingCleanedContent, tools); + toolCalls = this.extractToolCalls(finalContent, tools); } // Clean hallucinated traces from content (after tool extraction) const cleanedContent = toolCalls.length > 0 - ? this.cleanHallucinatedTraces(thinkingCleanedContent) - : thinkingCleanedContent; + ? this.cleanHallucinatedTraces(finalContent) + : finalContent; return createProviderResponse({ content: cleanedContent, toolCalls, stopReason: responseData.done_reason, - reasoningContent: thinking || undefined, + reasoningContent, inputTokens: responseData.prompt_eval_count, outputTokens: responseData.eval_count, + rawResponse: responseData, }); }, { @@ -289,10 +299,13 @@ export class OllamaCloudProvider extends BaseProvider { const reader = response.body.getReader(); const decoder = new TextDecoder(); let fullText = ''; + let thinkingText = ''; + let streamedContentChars = 0; let inputTokens: number | undefined; let outputTokens: number | undefined; let stopReason: string | undefined; const nativeToolCalls: ToolCall[] = []; + const rawChunks: OllamaChatResponse[] = []; // Process streamed chunks while (true) { @@ -305,11 +318,19 @@ export class OllamaCloudProvider extends BaseProvider { for (const line of lines) { try { const data: OllamaChatResponse = JSON.parse(line); + rawChunks.push(data); if (data.message?.content) { const content = data.message.content; fullText += content; - if (onChunk) onChunk(content); + if (content) { + streamedContentChars += content.length; + if (onChunk) onChunk(content); + } + } + + if (data.message?.thinking) { + thinkingText += data.message.thinking; } // Capture native tool calls from Ollama API @@ -337,26 +358,36 @@ export class OllamaCloudProvider extends BaseProvider { } // Extract thinking content from tags (used by qwen3:thinking and similar models) - const { content: thinkingCleanedContent, thinking } = this.extractThinkingContent(fullText); + const { content: thinkingCleanedContent, thinking: tagThinking } = this.extractThinkingContent(fullText); + const combinedThinking = [thinkingText, tagThinking].filter(Boolean).join('\n'); + const hasContent = thinkingCleanedContent.trim().length > 0; + const useFallbackContent = !hasContent && combinedThinking.length > 0; + const finalContent = useFallbackContent ? combinedThinking : thinkingCleanedContent; + const reasoningContent = useFallbackContent ? undefined : (combinedThinking || undefined); + + if (streamedContentChars === 0 && finalContent && onChunk) { + onChunk(finalContent); + } // Use native tool calls if available, otherwise extract from text let toolCalls: ToolCall[] = nativeToolCalls; if (toolCalls.length === 0 && tools && tools.length > 0) { - toolCalls = this.extractToolCalls(thinkingCleanedContent, tools); + toolCalls = this.extractToolCalls(finalContent, tools); } // Clean hallucinated traces from content (after tool extraction) const cleanedContent = toolCalls.length > 0 - ? this.cleanHallucinatedTraces(thinkingCleanedContent) - : thinkingCleanedContent; + ? this.cleanHallucinatedTraces(finalContent) + : finalContent; return createProviderResponse({ content: cleanedContent, toolCalls, stopReason: stopReason || 'stop', - reasoningContent: thinking || undefined, + reasoningContent, inputTokens, outputTokens, + rawResponse: { stream: true, chunks: rawChunks }, }); }, { diff --git a/src/providers/openai-compatible.ts b/src/providers/openai-compatible.ts index e45d924..f952e6f 100644 --- a/src/providers/openai-compatible.ts +++ b/src/providers/openai-compatible.ts @@ -118,8 +118,10 @@ export class OpenAICompatibleProvider extends BaseProvider { let reasoningContent = ''; const toolCallAccumulator = new StreamingToolCallAccumulator(); let streamUsage: { prompt_tokens: number; completion_tokens: number; cached_tokens?: number } | null = null; + const rawChunks: OpenAI.ChatCompletionChunk[] = []; for await (const chunk of stream) { + rawChunks.push(chunk); const delta = chunk.choices[0]?.delta; // Handle reasoning content from reasoning models (e.g., DeepSeek-R1) @@ -168,6 +170,7 @@ export class OpenAICompatibleProvider extends BaseProvider { inputTokens, outputTokens, cachedInputTokens: streamUsage?.cached_tokens, + rawResponse: { stream: true, chunks: rawChunks }, }); } @@ -300,6 +303,7 @@ export class OpenAICompatibleProvider extends BaseProvider { } : null, // Thinking blocks are converted to text for OpenAI (it doesn't have native thinking input) thinking: (b) => ({ kind: 'text', text: b.text || '' }), + unknown: (b) => ({ kind: 'text', text: b.text || b.content || '' }), }; // Process blocks and collect by type @@ -479,6 +483,7 @@ export class OpenAICompatibleProvider extends BaseProvider { inputTokens, outputTokens, cachedInputTokens: cachedTokens, + rawResponse: response, }); } } diff --git a/src/providers/response-parser.ts b/src/providers/response-parser.ts index d900c18..7151178 100644 --- a/src/providers/response-parser.ts +++ b/src/providers/response-parser.ts @@ -43,6 +43,7 @@ export function createProviderResponse(params: { inputTokens?: number; outputTokens?: number; reasoningContent?: string; + rawResponse?: unknown; // Cache metrics cacheCreationInputTokens?: number; cacheReadInputTokens?: number; @@ -55,6 +56,7 @@ export function createProviderResponse(params: { inputTokens, outputTokens, reasoningContent, + rawResponse, cacheCreationInputTokens, cacheReadInputTokens, cachedInputTokens, @@ -65,6 +67,7 @@ export function createProviderResponse(params: { toolCalls, stopReason: mapStopReason(stopReason, toolCalls.length > 0), ...(reasoningContent && { reasoningContent }), + ...(rawResponse !== undefined && { rawResponse }), ...(inputTokens !== undefined && outputTokens !== undefined && { usage: { inputTokens, diff --git a/src/types.ts b/src/types.ts index 018a5ed..97a389c 100644 --- a/src/types.ts +++ b/src/types.ts @@ -150,6 +150,7 @@ export interface TokenUsage { * @property {{'end_turn' | 'tool_use' | 'max_tokens'}} stopReason - Reason for stopping the response generation. * @property {string} [reasoningContent] - Optional reasoning/thinking content from reasoning models. * @property {TokenUsage} [usage] - Token usage information if available. + * @property {unknown} [rawResponse] - Raw provider response payload for audit/debugging. */ export interface ProviderResponse { content: string; @@ -157,6 +158,7 @@ export interface ProviderResponse { stopReason: 'end_turn' | 'tool_use' | 'max_tokens'; reasoningContent?: string; usage?: TokenUsage; + rawResponse?: unknown; } // Provider configuration From 438fbd9301736500fee44bbc7aafd40a65f4bc75 Mon Sep 17 00:00:00 2001 From: Layne Penney Date: Thu, 15 Jan 2026 09:12:59 -0600 Subject: [PATCH 04/17] docs: add roadmap item for test sandbox compatibility --- ROADMAP.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ROADMAP.md b/ROADMAP.md index df7732d..eee379f 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -4,6 +4,13 @@ This document tracks planned features and improvements for Codi. ## Planned Features +### Test Sandbox Compatibility + +Update tests that write to `~/.codi` or bind to `127.0.0.1` so they use local temporary +directories and ephemeral ports by default, avoiding sandbox permission errors. + +--- + ### Semantic Fallback for Tool Calls When a model attempts to call a tool that doesn't exist or uses incorrect parameter names, implement a semantic fallback system that: From a7005531eb200efbcdfa8a48dd800c606ee2e2a0 Mon Sep 17 00:00:00 2001 From: Layne Penney Date: Thu, 15 Jan 2026 09:35:04 -0600 Subject: [PATCH 05/17] feat: stream reasoning and include it in messages --- src/agent.ts | 28 +++++++++++++++++++++++++--- src/index.ts | 17 +++++++++++++++++ src/providers/anthropic.ts | 3 ++- src/providers/base.ts | 3 ++- src/providers/mock.ts | 3 ++- src/providers/ollama-cloud.ts | 24 ++++++++++++++++++------ src/providers/openai-compatible.ts | 4 +++- tests/providers.test.ts | 8 +++++++- 8 files changed, 76 insertions(+), 14 deletions(-) diff --git a/src/agent.ts b/src/agent.ts index 8062820..884ef08 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -96,6 +96,7 @@ export interface AgentOptions { auditLogger?: AuditLogger | null; // Optional audit logger for session debugging onText?: (text: string) => void; onReasoning?: (reasoning: string) => void; // Called with reasoning trace from reasoning models + onReasoningChunk?: (chunk: string) => void; // Streaming reasoning output onToolCall?: (name: string, input: Record) => void; onToolResult?: (name: string, result: string, isError: boolean) => void; onConfirm?: (confirmation: ToolConfirmation) => Promise; // Confirm destructive tools @@ -130,6 +131,7 @@ export class Agent { private callbacks: { onText?: (text: string) => void; onReasoning?: (reasoning: string) => void; + onReasoningChunk?: (chunk: string) => void; onToolCall?: (name: string, input: Record) => void; onToolResult?: (name: string, result: string, isError: boolean) => void; onConfirm?: (confirmation: ToolConfirmation) => Promise; @@ -168,6 +170,7 @@ export class Agent { this.callbacks = { onText: options.onText, onReasoning: options.onReasoning, + onReasoningChunk: options.onReasoningChunk, onToolCall: options.onToolCall, onToolResult: options.onToolResult, onConfirm: options.onConfirm, @@ -519,11 +522,19 @@ Always use tools to interact with the filesystem rather than asking the user to } this.callbacks.onText?.(chunk); }; + let streamedReasoningChars = 0; + const onReasoningChunk = (chunk: string): void => { + if (chunk) { + streamedReasoningChars += chunk.length; + } + this.callbacks.onReasoningChunk?.(chunk); + }; const response = await chatProvider.streamChat( messagesToSend, tools, onChunk, - systemContext + systemContext, + onReasoningChunk ); const apiDuration = (Date.now() - apiStartTime) / 1000; @@ -558,7 +569,7 @@ Always use tools to interact with the filesystem rather than asking the user to } // Call reasoning callback if reasoning content is present (e.g., from DeepSeek-R1) - if (response.reasoningContent && this.callbacks.onReasoning) { + if (response.reasoningContent && this.callbacks.onReasoning && streamedReasoningChars === 0) { this.callbacks.onReasoning(response.reasoningContent); } @@ -582,6 +593,10 @@ Always use tools to interact with the filesystem rather than asking the user to finalResponse = response.content; } + const thinkingText = response.reasoningContent?.trim(); + const shouldAddThinkingBlock = !!thinkingText && + (!response.content || response.content.trim() !== thinkingText); + const shouldEmitFallback = !response.content && response.toolCalls.length === 0 && streamedChars === 0; @@ -599,14 +614,21 @@ Always use tools to interact with the filesystem rather than asking the user to this.callbacks.onText?.(fallbackMessage); } else if (isExtractedToolCall) { // For extracted tool calls, store as plain text (model doesn't understand tool_use blocks) + const combinedContent = thinkingText + ? `${response.content || ''}${response.content ? '\n\n' : ''}[Thinking]:\n${thinkingText}` + : (response.content || ''); this.messages.push({ role: 'assistant', - content: response.content || '', + content: combinedContent, }); } else if (response.content || response.toolCalls.length > 0) { // For native tool calls, use content blocks const contentBlocks: ContentBlock[] = []; + if (shouldAddThinkingBlock && thinkingText) { + contentBlocks.push({ type: 'thinking', text: thinkingText }); + } + if (response.content) { contentBlocks.push({ type: 'text', text: response.content }); } diff --git a/src/index.ts b/src/index.ts index 82d7acb..220a912 100644 --- a/src/index.ts +++ b/src/index.ts @@ -2442,6 +2442,7 @@ async function main() { // Track if we've received streaming output (to manage spinner) let isStreaming = false; + let isReasoningStreaming = false; // Track tool start times for duration logging const toolStartTimes = new Map(); @@ -2478,6 +2479,14 @@ async function main() { console.log(chalk.dim(reasoning)); console.log(chalk.dim.italic('---\n')); }, + onReasoningChunk: (chunk) => { + if (!isReasoningStreaming) { + isReasoningStreaming = true; + spinner.stop(); + console.log(chalk.dim.italic('\n💭 Thinking...')); + } + process.stdout.write(chalk.dim(chunk)); + }, onToolCall: (name, input) => { // Stop any spinner and record start time spinner.stop(); @@ -3315,6 +3324,10 @@ async function main() { spinner.thinking(); const startTime = Date.now(); await agent.chat(result, { taskType: command.taskType }); + if (isReasoningStreaming) { + console.log(chalk.dim.italic('\n---\n')); + isReasoningStreaming = false; + } const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); console.log(chalk.dim(`\n(${elapsed}s)`)); } @@ -3340,6 +3353,10 @@ async function main() { try { const startTime = Date.now(); await agent.chat(trimmed); + if (isReasoningStreaming) { + console.log(chalk.dim.italic('\n---\n')); + isReasoningStreaming = false; + } const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); console.log(chalk.dim(`\n(${elapsed}s)`)); } catch (error) { diff --git a/src/providers/anthropic.ts b/src/providers/anthropic.ts index 667ef0e..0bc2efb 100644 --- a/src/providers/anthropic.ts +++ b/src/providers/anthropic.ts @@ -74,7 +74,8 @@ export class AnthropicProvider extends BaseProvider { messages: Message[], tools?: ToolDefinition[], onChunk?: (chunk: string) => void, - systemPrompt?: string + systemPrompt?: string, + _onReasoningChunk?: (chunk: string) => void ): Promise { const stream = this.client.messages.stream({ model: this.model, diff --git a/src/providers/base.ts b/src/providers/base.ts index 6e95a31..9371ca4 100644 --- a/src/providers/base.ts +++ b/src/providers/base.ts @@ -65,7 +65,8 @@ export abstract class BaseProvider { messages: Message[], tools?: ToolDefinition[], onChunk?: (chunk: string) => void, - systemPrompt?: string + systemPrompt?: string, + onReasoningChunk?: (chunk: string) => void ): Promise; /** diff --git a/src/providers/mock.ts b/src/providers/mock.ts index 06da603..e8099dc 100644 --- a/src/providers/mock.ts +++ b/src/providers/mock.ts @@ -289,7 +289,8 @@ export class MockProvider extends BaseProvider { messages: Message[], tools?: ToolDefinition[], onChunk?: (chunk: string) => void, - systemPrompt?: string + systemPrompt?: string, + _onReasoningChunk?: (chunk: string) => void ): Promise { this.recordCall('streamChat', messages, tools, systemPrompt); diff --git a/src/providers/ollama-cloud.ts b/src/providers/ollama-cloud.ts index 978a134..b9e99cc 100644 --- a/src/providers/ollama-cloud.ts +++ b/src/providers/ollama-cloud.ts @@ -228,11 +228,14 @@ export class OllamaCloudProvider extends BaseProvider { const hasContent = thinkingCleanedContent.trim().length > 0; const useFallbackContent = !hasContent && combinedThinking.length > 0; const finalContent = useFallbackContent ? combinedThinking : thinkingCleanedContent; - const reasoningContent = useFallbackContent ? undefined : (combinedThinking || undefined); + const reasoningContent = combinedThinking || undefined; + const toolExtractionText = combinedThinking && !finalContent.includes(combinedThinking) + ? `${finalContent}\n${combinedThinking}` + : finalContent; // Fall back to extracting tool calls from text if no native calls if (toolCalls.length === 0 && tools && tools.length > 0) { - toolCalls = this.extractToolCalls(finalContent, tools); + toolCalls = this.extractToolCalls(toolExtractionText, tools); } // Clean hallucinated traces from content (after tool extraction) @@ -262,7 +265,8 @@ export class OllamaCloudProvider extends BaseProvider { messages: Message[], tools?: ToolDefinition[], onChunk?: (chunk: string) => void, - systemPrompt?: string + systemPrompt?: string, + onReasoningChunk?: (chunk: string) => void ): Promise { const ollamaMessages = this.convertMessages(messages, systemPrompt); @@ -301,6 +305,7 @@ export class OllamaCloudProvider extends BaseProvider { let fullText = ''; let thinkingText = ''; let streamedContentChars = 0; + let streamedThinkingChars = 0; let inputTokens: number | undefined; let outputTokens: number | undefined; let stopReason: string | undefined; @@ -331,6 +336,10 @@ export class OllamaCloudProvider extends BaseProvider { if (data.message?.thinking) { thinkingText += data.message.thinking; + if (onReasoningChunk) { + streamedThinkingChars += data.message.thinking.length; + onReasoningChunk(data.message.thinking); + } } // Capture native tool calls from Ollama API @@ -363,16 +372,19 @@ export class OllamaCloudProvider extends BaseProvider { const hasContent = thinkingCleanedContent.trim().length > 0; const useFallbackContent = !hasContent && combinedThinking.length > 0; const finalContent = useFallbackContent ? combinedThinking : thinkingCleanedContent; - const reasoningContent = useFallbackContent ? undefined : (combinedThinking || undefined); + const reasoningContent = combinedThinking || undefined; + const toolExtractionText = combinedThinking && !finalContent.includes(combinedThinking) + ? `${finalContent}\n${combinedThinking}` + : finalContent; - if (streamedContentChars === 0 && finalContent && onChunk) { + if (streamedContentChars === 0 && finalContent && onChunk && streamedThinkingChars === 0) { onChunk(finalContent); } // Use native tool calls if available, otherwise extract from text let toolCalls: ToolCall[] = nativeToolCalls; if (toolCalls.length === 0 && tools && tools.length > 0) { - toolCalls = this.extractToolCalls(finalContent, tools); + toolCalls = this.extractToolCalls(toolExtractionText, tools); } // Clean hallucinated traces from content (after tool extraction) diff --git a/src/providers/openai-compatible.ts b/src/providers/openai-compatible.ts index f952e6f..9994df3 100644 --- a/src/providers/openai-compatible.ts +++ b/src/providers/openai-compatible.ts @@ -95,7 +95,8 @@ export class OpenAICompatibleProvider extends BaseProvider { messages: Message[], tools?: ToolDefinition[], onChunk?: (chunk: string) => void, - systemPrompt?: string + systemPrompt?: string, + onReasoningChunk?: (chunk: string) => void ): Promise { const convertedMessages = this.convertMessages(messages); const messagesWithSystem: OpenAI.ChatCompletionMessageParam[] = systemPrompt @@ -128,6 +129,7 @@ export class OpenAICompatibleProvider extends BaseProvider { const reasoningDelta = (delta as any)?.reasoning_content; if (reasoningDelta) { reasoningContent += reasoningDelta; + onReasoningChunk?.(reasoningDelta); } if (delta?.content) { diff --git a/tests/providers.test.ts b/tests/providers.test.ts index b18c26e..f94c67e 100644 --- a/tests/providers.test.ts +++ b/tests/providers.test.ts @@ -39,7 +39,13 @@ describe('BaseProvider', () => { async chat() { return { content: '', toolCalls: [], stopReason: 'end_turn' as const }; } - async streamChat() { + async streamChat( + _messages: Message[] = [], + _tools?: unknown, + _onChunk?: (chunk: string) => void, + _systemPrompt?: string, + _onReasoningChunk?: (chunk: string) => void + ) { return { content: '', toolCalls: [], stopReason: 'end_turn' as const }; } supportsToolUse() { return true; } From 7eab210e2b075b1933c7fe9839ff629dc5a7eaf4 Mon Sep 17 00:00:00 2001 From: Layne Penney Date: Thu, 15 Jan 2026 09:43:28 -0600 Subject: [PATCH 06/17] fix: parse tool traces from text output --- src/agent.ts | 13 +++++--- src/utils/json-parser.ts | 70 ++++++++++++++++++++++++++++++++++++++- tests/json-parser.test.ts | 25 +++++++++++++- 3 files changed, 101 insertions(+), 7 deletions(-) diff --git a/src/agent.ts b/src/agent.ts index 884ef08..e99808b 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -575,12 +575,15 @@ Always use tools to interact with the filesystem rather than asking the user to // If no tool calls were detected via API but tools are enabled, // try to extract tool calls from the text (for models that output JSON as text) - if (response.toolCalls.length === 0 && this.useTools && this.extractToolsFromText && response.content) { + if (response.toolCalls.length === 0 && this.useTools && this.extractToolsFromText) { const availableTools = this.toolRegistry.listTools(); - const extractedCalls = extractToolCallsFromText(response.content, availableTools); - if (extractedCalls.length > 0) { - response.toolCalls = extractedCalls; - response.stopReason = 'tool_use'; + const extractionText = [response.content, response.reasoningContent].filter(Boolean).join('\n'); + if (extractionText) { + const extractedCalls = extractToolCallsFromText(extractionText, availableTools); + if (extractedCalls.length > 0) { + response.toolCalls = extractedCalls; + response.stopReason = 'tool_use'; + } } } diff --git a/src/utils/json-parser.ts b/src/utils/json-parser.ts index 8082ba2..d5f3086 100644 --- a/src/utils/json-parser.ts +++ b/src/utils/json-parser.ts @@ -38,6 +38,50 @@ export function tryParseJson(jsonStr: string): unknown | null { } } +function extractJsonObjectFromIndex( + text: string, + startIndex: number +): { json: string; endIndex: number } | null { + const start = text.indexOf('{', startIndex); + if (start === -1) return null; + + let depth = 0; + let inString = false; + let isEscaped = false; + + for (let i = start; i < text.length; i++) { + const char = text[i]; + + if (isEscaped) { + isEscaped = false; + continue; + } + + if (char === '\\') { + isEscaped = true; + continue; + } + + if (char === '"') { + inString = !inString; + continue; + } + + if (inString) continue; + + if (char === '{') { + depth += 1; + } else if (char === '}') { + depth -= 1; + if (depth === 0) { + return { json: text.slice(start, i + 1), endIndex: i + 1 }; + } + } + } + + return null; +} + /** * Try to extract tool calls from text when models output JSON instead of using * proper function calling (common with Ollama models). @@ -63,7 +107,31 @@ export function extractToolCallsFromText(text: string, availableTools: string[]) } } - // Pattern 2: Look for JSON in code blocks (objects or arrays) + // Pattern 2: [Calling tool_name]: {json} format + if (toolCalls.length === 0) { + const callingPattern = /\[Calling\s+([a-z_][a-z0-9_]*)\]\s*:\s*/gi; + + while ((match = callingPattern.exec(text)) !== null) { + const toolName = match[1]; + if (!availableTools.includes(toolName)) continue; + + const extracted = extractJsonObjectFromIndex(text, match.index + match[0].length); + if (!extracted) continue; + + const args = tryParseJson(extracted.json); + if (args && typeof args === 'object') { + toolCalls.push({ + id: `extracted_${Date.now()}_${toolCalls.length}`, + name: toolName, + input: args as Record, + }); + } + + callingPattern.lastIndex = extracted.endIndex; + } + } + + // Pattern 3: Look for JSON in code blocks (objects or arrays) if (toolCalls.length === 0) { const codeBlockPattern = /```(?:json)?\s*([\s\S]*?)\s*```/g; while ((match = codeBlockPattern.exec(text)) !== null) { diff --git a/tests/json-parser.test.ts b/tests/json-parser.test.ts index 3dd47ab..66f9371 100644 --- a/tests/json-parser.test.ts +++ b/tests/json-parser.test.ts @@ -151,7 +151,30 @@ describe('json-parser', () => { }); }); - describe('pattern 2: JSON in code blocks', () => { + describe('pattern 2: [Calling tool_name]: {json} traces', () => { + it('extracts tool calls from calling trace format', () => { + const text = '[Calling write_file]: {"path": "notes.txt", "content": "hello"}'; + const calls = extractToolCallsFromText(text, availableTools); + + expect(calls).toHaveLength(1); + expect(calls[0].name).toBe('write_file'); + expect(calls[0].input).toEqual({ path: 'notes.txt', content: 'hello' }); + }); + + it('extracts multiple calling trace tool calls', () => { + const text = ` +[Calling read_file]: {"path": "a.txt"} +[Calling read_file]: {"path": "b.txt"} + `; + const calls = extractToolCallsFromText(text, availableTools); + + expect(calls).toHaveLength(2); + expect(calls[0].input).toEqual({ path: 'a.txt' }); + expect(calls[1].input).toEqual({ path: 'b.txt' }); + }); + }); + + describe('pattern 3: JSON in code blocks', () => { it('extracts from json code block', () => { const text = ` Here's the tool call: From b6e38a5e6b8c289b339e0e80f5f2dd61abbdcc56 Mon Sep 17 00:00:00 2001 From: Layne Penney Date: Thu, 15 Jan 2026 09:58:41 -0600 Subject: [PATCH 07/17] feat: apply tool fallback during text extraction --- src/agent.ts | 5 ++-- src/utils/json-parser.ts | 43 ++++++++++++++++++++++++---------- tests/json-parser.test.ts | 49 +++++++++++++++++++++++++-------------- 3 files changed, 65 insertions(+), 32 deletions(-) diff --git a/src/agent.ts b/src/agent.ts index e99808b..2e5bcb0 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -576,10 +576,11 @@ Always use tools to interact with the filesystem rather than asking the user to // If no tool calls were detected via API but tools are enabled, // try to extract tool calls from the text (for models that output JSON as text) if (response.toolCalls.length === 0 && this.useTools && this.extractToolsFromText) { - const availableTools = this.toolRegistry.listTools(); + const toolDefinitions = this.toolRegistry.getDefinitions(); + const fallbackConfig = this.toolRegistry.getFallbackConfig(); const extractionText = [response.content, response.reasoningContent].filter(Boolean).join('\n'); if (extractionText) { - const extractedCalls = extractToolCallsFromText(extractionText, availableTools); + const extractedCalls = extractToolCallsFromText(extractionText, toolDefinitions, fallbackConfig); if (extractedCalls.length > 0) { response.toolCalls = extractedCalls; response.stopReason = 'tool_use'; diff --git a/src/utils/json-parser.ts b/src/utils/json-parser.ts index d5f3086..399534f 100644 --- a/src/utils/json-parser.ts +++ b/src/utils/json-parser.ts @@ -6,7 +6,12 @@ * Extracted from agent.ts for reusability. */ -import type { ToolCall } from '../types.js'; +import type { ToolCall, ToolDefinition } from '../types.js'; +import { + DEFAULT_FALLBACK_CONFIG, + findBestToolMatch, + type ToolFallbackConfig, +} from '../tools/tool-fallback.js'; /** * Attempt to fix common JSON issues from LLM output: @@ -86,21 +91,31 @@ function extractJsonObjectFromIndex( * Try to extract tool calls from text when models output JSON instead of using * proper function calling (common with Ollama models). */ -export function extractToolCallsFromText(text: string, availableTools: string[]): ToolCall[] { +export function extractToolCallsFromText( + text: string, + toolDefinitions: ToolDefinition[], + fallbackConfig: ToolFallbackConfig = DEFAULT_FALLBACK_CONFIG +): ToolCall[] { const toolCalls: ToolCall[] = []; + const resolveToolName = (requestedName: string): string | null => { + const match = findBestToolMatch(requestedName, toolDefinitions, fallbackConfig); + if (match.exactMatch) return requestedName; + if (match.shouldAutoCorrect && match.matchedName) return match.matchedName; + return null; + }; // Pattern 1: {"name": "tool_name", "arguments": {...}} or {"name": "tool_name", "parameters": {...}} const jsonPattern = /\{[\s\S]*?"name"\s*:\s*"(\w+)"[\s\S]*?(?:"arguments"|"parameters"|"input")\s*:\s*(\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\})[\s\S]*?\}/g; let match; while ((match = jsonPattern.exec(text)) !== null) { - const toolName = match[1]; - if (availableTools.includes(toolName)) { + const resolvedName = resolveToolName(match[1]); + if (resolvedName) { const args = tryParseJson(match[2]); if (args && typeof args === 'object') { toolCalls.push({ id: `extracted_${Date.now()}_${toolCalls.length}`, - name: toolName, + name: resolvedName, input: args as Record, }); } @@ -112,8 +127,8 @@ export function extractToolCallsFromText(text: string, availableTools: string[]) const callingPattern = /\[Calling\s+([a-z_][a-z0-9_]*)\]\s*:\s*/gi; while ((match = callingPattern.exec(text)) !== null) { - const toolName = match[1]; - if (!availableTools.includes(toolName)) continue; + const resolvedName = resolveToolName(match[1]); + if (!resolvedName) continue; const extracted = extractJsonObjectFromIndex(text, match.index + match[0].length); if (!extracted) continue; @@ -122,7 +137,7 @@ export function extractToolCallsFromText(text: string, availableTools: string[]) if (args && typeof args === 'object') { toolCalls.push({ id: `extracted_${Date.now()}_${toolCalls.length}`, - name: toolName, + name: resolvedName, input: args as Record, }); } @@ -144,10 +159,12 @@ export function extractToolCallsFromText(text: string, availableTools: string[]) // Handle array of tool calls if (Array.isArray(parsed)) { for (const item of parsed) { - if (item?.name && availableTools.includes(item.name as string)) { + if (item?.name) { + const resolvedName = resolveToolName(item.name as string); + if (!resolvedName) continue; toolCalls.push({ id: `extracted_${Date.now()}_${toolCalls.length}`, - name: item.name as string, + name: resolvedName, input: (item.arguments || item.parameters || item.input || {}) as Record, }); } @@ -156,10 +173,12 @@ export function extractToolCallsFromText(text: string, availableTools: string[]) // Handle single object else { const obj = parsed as Record; - if (obj.name && availableTools.includes(obj.name as string)) { + if (obj.name) { + const resolvedName = resolveToolName(obj.name as string); + if (!resolvedName) continue; toolCalls.push({ id: `extracted_${Date.now()}_${toolCalls.length}`, - name: obj.name as string, + name: resolvedName, input: (obj.arguments || obj.parameters || obj.input || {}) as Record, }); } diff --git a/tests/json-parser.test.ts b/tests/json-parser.test.ts index 66f9371..b7a4510 100644 --- a/tests/json-parser.test.ts +++ b/tests/json-parser.test.ts @@ -90,12 +90,17 @@ describe('json-parser', () => { }); describe('extractToolCallsFromText', () => { - const availableTools = ['read_file', 'write_file', 'bash', 'glob']; + const toolNames = ['read_file', 'write_file', 'bash', 'glob']; + const toolDefinitions = toolNames.map((name) => ({ + name, + description: `${name} tool`, + input_schema: { type: 'object', properties: {} }, + })); describe('pattern 1: inline JSON with name and arguments', () => { it('extracts tool call with "arguments" key', () => { const text = 'I will read the file: {"name": "read_file", "arguments": {"path": "test.txt"}}'; - const calls = extractToolCallsFromText(text, availableTools); + const calls = extractToolCallsFromText(text, toolDefinitions); expect(calls).toHaveLength(1); expect(calls[0].name).toBe('read_file'); @@ -104,7 +109,7 @@ describe('json-parser', () => { it('extracts tool call with "parameters" key', () => { const text = '{"name": "bash", "parameters": {"command": "ls -la"}}'; - const calls = extractToolCallsFromText(text, availableTools); + const calls = extractToolCallsFromText(text, toolDefinitions); expect(calls).toHaveLength(1); expect(calls[0].name).toBe('bash'); @@ -113,7 +118,7 @@ describe('json-parser', () => { it('extracts tool call with "input" key', () => { const text = '{"name": "glob", "input": {"pattern": "*.ts"}}'; - const calls = extractToolCallsFromText(text, availableTools); + const calls = extractToolCallsFromText(text, toolDefinitions); expect(calls).toHaveLength(1); expect(calls[0].name).toBe('glob'); @@ -125,7 +130,7 @@ describe('json-parser', () => { {"name": "read_file", "arguments": {"path": "a.txt"}} {"name": "read_file", "arguments": {"path": "b.txt"}} `; - const calls = extractToolCallsFromText(text, availableTools); + const calls = extractToolCallsFromText(text, toolDefinitions); expect(calls).toHaveLength(2); expect(calls[0].input).toEqual({ path: 'a.txt' }); @@ -134,7 +139,7 @@ describe('json-parser', () => { it('ignores unknown tools', () => { const text = '{"name": "unknown_tool", "arguments": {"foo": "bar"}}'; - const calls = extractToolCallsFromText(text, availableTools); + const calls = extractToolCallsFromText(text, toolDefinitions); expect(calls).toHaveLength(0); }); @@ -144,7 +149,7 @@ describe('json-parser', () => { {"name": "bash", "arguments": {"command": "ls"}} {"name": "bash", "arguments": {"command": "pwd"}} `; - const calls = extractToolCallsFromText(text, availableTools); + const calls = extractToolCallsFromText(text, toolDefinitions); expect(calls[0].id).not.toBe(calls[1].id); expect(calls[0].id).toMatch(/^extracted_/); @@ -154,7 +159,7 @@ describe('json-parser', () => { describe('pattern 2: [Calling tool_name]: {json} traces', () => { it('extracts tool calls from calling trace format', () => { const text = '[Calling write_file]: {"path": "notes.txt", "content": "hello"}'; - const calls = extractToolCallsFromText(text, availableTools); + const calls = extractToolCallsFromText(text, toolDefinitions); expect(calls).toHaveLength(1); expect(calls[0].name).toBe('write_file'); @@ -166,7 +171,7 @@ describe('json-parser', () => { [Calling read_file]: {"path": "a.txt"} [Calling read_file]: {"path": "b.txt"} `; - const calls = extractToolCallsFromText(text, availableTools); + const calls = extractToolCallsFromText(text, toolDefinitions); expect(calls).toHaveLength(2); expect(calls[0].input).toEqual({ path: 'a.txt' }); @@ -182,7 +187,7 @@ Here's the tool call: {"name": "read_file", "arguments": {"path": "config.json"}} \`\`\` `; - const calls = extractToolCallsFromText(text, availableTools); + const calls = extractToolCallsFromText(text, toolDefinitions); expect(calls).toHaveLength(1); expect(calls[0].name).toBe('read_file'); @@ -194,7 +199,7 @@ Here's the tool call: {"name": "bash", "arguments": {"command": "echo hello"}} \`\`\` `; - const calls = extractToolCallsFromText(text, availableTools); + const calls = extractToolCallsFromText(text, toolDefinitions); expect(calls).toHaveLength(1); expect(calls[0].name).toBe('bash'); @@ -209,7 +214,7 @@ Here's the tool call: ] \`\`\` `; - const calls = extractToolCallsFromText(text, availableTools); + const calls = extractToolCallsFromText(text, toolDefinitions); expect(calls).toHaveLength(2); expect(calls[0].name).toBe('read_file'); @@ -222,7 +227,7 @@ Here's the tool call: just some text \`\`\` `; - const calls = extractToolCallsFromText(text, availableTools); + const calls = extractToolCallsFromText(text, toolDefinitions); expect(calls).toHaveLength(0); }); @@ -233,16 +238,24 @@ just some text {"foo": "bar"} \`\`\` `; - const calls = extractToolCallsFromText(text, availableTools); + const calls = extractToolCallsFromText(text, toolDefinitions); expect(calls).toHaveLength(0); }); }); describe('edge cases', () => { + it('auto-corrects close tool name matches', () => { + const text = '{"name": "readfile", "arguments": {"path": "test.txt"}}'; + const calls = extractToolCallsFromText(text, toolDefinitions); + + expect(calls).toHaveLength(1); + expect(calls[0].name).toBe('read_file'); + }); + it('returns empty array for text without tool calls', () => { const text = 'Just a regular response without any tools'; - const calls = extractToolCallsFromText(text, availableTools); + const calls = extractToolCallsFromText(text, toolDefinitions); expect(calls).toEqual([]); }); @@ -256,7 +269,7 @@ just some text it('handles nested objects in arguments', () => { const text = '{"name": "write_file", "arguments": {"path": "test.json", "content": "{\\"key\\": \\"value\\"}"}}'; - const calls = extractToolCallsFromText(text, availableTools); + const calls = extractToolCallsFromText(text, toolDefinitions); expect(calls).toHaveLength(1); expect(calls[0].input).toHaveProperty('path'); @@ -264,7 +277,7 @@ just some text it('handles whitespace variations', () => { const text = '{ "name" : "bash" , "arguments" : { "command" : "ls" } }'; - const calls = extractToolCallsFromText(text, availableTools); + const calls = extractToolCallsFromText(text, toolDefinitions); expect(calls).toHaveLength(1); }); @@ -272,7 +285,7 @@ just some text it('prefers inline pattern over code block pattern', () => { // When inline pattern matches, code block pattern should not run const text = '{"name": "bash", "arguments": {"command": "ls"}}'; - const calls = extractToolCallsFromText(text, availableTools); + const calls = extractToolCallsFromText(text, toolDefinitions); expect(calls).toHaveLength(1); }); From df6f3babafaa6d858f712c930d61b2c14a43f0d6 Mon Sep 17 00:00:00 2001 From: Layne Penney Date: Thu, 15 Jan 2026 10:14:03 -0600 Subject: [PATCH 08/17] test: isolate history storage in test runs --- src/history.ts | 29 +++++++++++++++++++++-------- tests/history.test.ts | 35 ++++++++++++++++++++++++----------- tests/index.test.ts | 24 ++++++++++-------------- 3 files changed, 55 insertions(+), 33 deletions(-) diff --git a/src/history.ts b/src/history.ts index d662a45..80f7ae0 100644 --- a/src/history.ts +++ b/src/history.ts @@ -7,13 +7,26 @@ */ import * as fs from 'fs'; import * as path from 'path'; -import { homedir } from 'os'; +import { homedir, tmpdir } from 'os'; /** Maximum number of history entries to keep */ const MAX_HISTORY_SIZE = 50; -/** Directory where history is stored */ -const HISTORY_DIR = path.join(homedir(), '.codi', 'history'); +/** Directory where history is stored (allow test override). */ +const DEFAULT_HISTORY_DIR = path.join(homedir(), '.codi', 'history'); +const TEST_HISTORY_DIR = path.join(tmpdir(), `.codi-history-${process.pid}`); + +function resolveHistoryDir(): string { + if (process.env.CODI_HISTORY_DIR) { + return process.env.CODI_HISTORY_DIR; + } + + if (process.env.VITEST || process.env.NODE_ENV === 'test') { + return TEST_HISTORY_DIR; + } + + return DEFAULT_HISTORY_DIR; +} /** * Types of file operations that can be undone. @@ -54,21 +67,21 @@ interface HistoryIndex { * Get the path to the history index file. */ function getIndexPath(): string { - return path.join(HISTORY_DIR, 'index.json'); + return path.join(resolveHistoryDir(), 'index.json'); } /** * Get the path to a backup file. */ function getBackupPath(id: string): string { - return path.join(HISTORY_DIR, 'backups', `${id}.backup`); + return path.join(resolveHistoryDir(), 'backups', `${id}.backup`); } /** * Ensure the history directory exists. */ function ensureHistoryDir(): void { - const backupsDir = path.join(HISTORY_DIR, 'backups'); + const backupsDir = path.join(resolveHistoryDir(), 'backups'); if (!fs.existsSync(backupsDir)) { fs.mkdirSync(backupsDir, { recursive: true }); } @@ -331,7 +344,7 @@ export function clearHistory(): number { const count = index.entries.length; // Delete all backup files - const backupsDir = path.join(HISTORY_DIR, 'backups'); + const backupsDir = path.join(resolveHistoryDir(), 'backups'); if (fs.existsSync(backupsDir)) { try { fs.rmSync(backupsDir, { recursive: true }); @@ -379,5 +392,5 @@ export function formatHistoryEntry(entry: HistoryEntry): string { * Get the history directory path. */ export function getHistoryDir(): string { - return HISTORY_DIR; + return resolveHistoryDir(); } diff --git a/tests/history.test.ts b/tests/history.test.ts index b23f4e5..08d7db3 100644 --- a/tests/history.test.ts +++ b/tests/history.test.ts @@ -171,10 +171,10 @@ describe('History System', () => { fs.writeFileSync('mark.txt', 'new'); - undoChange(); + const undoneEntry = undoChange(); - const history = getHistory(10, true); - expect(history[0].undone).toBe(true); + expect(undoneEntry).not.toBeNull(); + expect(undoneEntry!.undone).toBe(true); }); it('undoes most recent non-undone entry', () => { @@ -341,10 +341,18 @@ describe('History System', () => { describe('formatHistoryEntry', () => { it('formats entry for display', () => { - recordChange({ operation: 'write', filePath: 'format.txt', newContent: 'x', description: 'Test format' }); + const entryId = recordChange({ + operation: 'write', + filePath: 'format.txt', + newContent: 'x', + description: 'Test format', + }); - const history = getHistory(); - const formatted = formatHistoryEntry(history[0]); + const history = getHistory(50, true); + const entry = history.find((item) => item.id === entryId); + + expect(entry).toBeDefined(); + const formatted = formatHistoryEntry(entry!); expect(formatted).toContain('write'); expect(formatted).toContain('format.txt'); @@ -357,10 +365,10 @@ describe('History System', () => { recordChange({ operation: 'write', filePath: 'undone-format.txt', newContent: 'x', description: 'Will undo' }); fs.writeFileSync('undone-format.txt', 'x'); - undoChange(); + const undoneEntry = undoChange(); - const history = getHistory(10, true); - const formatted = formatHistoryEntry(history[0]); + expect(undoneEntry).not.toBeNull(); + const formatted = formatHistoryEntry(undoneEntry!); expect(formatted).toContain('(undone)'); }); @@ -369,8 +377,13 @@ describe('History System', () => { describe('getHistoryDir', () => { it('returns the history directory path', () => { const dir = getHistoryDir(); - expect(dir).toContain('.codi'); - expect(dir).toContain('history'); + if (process.env.VITEST || process.env.NODE_ENV === 'test') { + expect(dir).toContain('codi-history-'); + expect(dir).toContain(os.tmpdir()); + } else { + expect(dir).toContain('.codi'); + expect(dir).toContain('history'); + } }); }); }); diff --git a/tests/index.test.ts b/tests/index.test.ts index c11c837..1aca111 100644 --- a/tests/index.test.ts +++ b/tests/index.test.ts @@ -163,7 +163,6 @@ describe('BaseTool', () => { describe('Tool implementations (filesystem / process tools)', () => { let root: string; - let prevCwd: string; beforeEach(async () => { root = tmpDir(); @@ -171,29 +170,26 @@ describe('Tool implementations (filesystem / process tools)', () => { await fs.writeFile(path.join(root, 'a.txt'), 'hello'); await fs.mkdir(path.join(root, 'sub'), { recursive: true }); await fs.writeFile(path.join(root, 'sub', 'b.txt'), 'world'); - prevCwd = process.cwd(); - process.chdir(root); }); afterEach(async () => { - process.chdir(prevCwd); await fs.rm(root, { recursive: true, force: true }); }); it('ReadFileTool reads file contents', async () => { const tool = new ReadFileTool(); - const out = await tool.execute({ path: 'a.txt' }); + const out = await tool.execute({ path: path.join(root, 'a.txt') }); expect(out).toContain('hello'); }); it('ReadFileTool errors on missing file', async () => { const tool = new ReadFileTool(); - await expect(tool.execute({ path: 'nope.txt' })).rejects.toThrow(/not found/i); + await expect(tool.execute({ path: path.join(root, 'nope.txt') })).rejects.toThrow(/not found/i); }); it('WriteFileTool writes file and returns success message', async () => { const tool = new WriteFileTool(); - const out = await tool.execute({ path: 'new.txt', content: 'x' }); + const out = await tool.execute({ path: path.join(root, 'new.txt'), content: 'x' }); expect(out).toMatch(/wrote/i); await expect(fs.readFile(path.join(root, 'new.txt'), 'utf8')).resolves.toBe('x'); }); @@ -202,7 +198,7 @@ describe('Tool implementations (filesystem / process tools)', () => { await fs.writeFile(path.join(root, 'edit.txt'), 'one two three'); const tool = new EditFileTool(); const out = await tool.execute({ - path: 'edit.txt', + path: path.join(root, 'edit.txt'), old_string: 'two', new_string: 'TWO', }); @@ -214,42 +210,42 @@ describe('Tool implementations (filesystem / process tools)', () => { await fs.writeFile(path.join(root, 'edit2.txt'), 'abc'); const tool = new EditFileTool(); await expect( - tool.execute({ path: 'edit2.txt', old_string: 'zzz', new_string: 'x' }), + tool.execute({ path: path.join(root, 'edit2.txt'), old_string: 'zzz', new_string: 'x' }), ).rejects.toThrow(/not found/i); }); it('InsertLineTool inserts at given line', async () => { await fs.writeFile(path.join(root, 'i.txt'), '1\n2\n3\n'); const tool = new InsertLineTool(); - const out = await tool.execute({ path: 'i.txt', line: 2, content: 'X' }); + const out = await tool.execute({ path: path.join(root, 'i.txt'), line: 2, content: 'X' }); expect(out).toMatch(/inserted/i); await expect(fs.readFile(path.join(root, 'i.txt'), 'utf8')).resolves.toBe('1\nX\n2\n3\n'); }); it('GlobTool returns matching paths', async () => { const tool = new GlobTool(); - const out = await tool.execute({ pattern: '**/*.txt' }); + const out = await tool.execute({ pattern: '**/*.txt', cwd: root }); expect(out).toContain('a.txt'); expect(out).toContain('sub/b.txt'); }); it('GrepTool finds matches in files', async () => { const tool = new GrepTool(); - const out = await tool.execute({ pattern: 'world', path: '.' }); + const out = await tool.execute({ pattern: 'world', path: root }); expect(out).toContain('sub/b.txt'); expect(out).toContain('world'); }); it('ListDirectoryTool lists directory contents', async () => { const tool = new ListDirectoryTool(); - const out = await tool.execute({ path: '.' }); + const out = await tool.execute({ path: root }); expect(out).toContain('a.txt'); expect(out).toContain('sub'); }); it('BashTool runs command', async () => { const tool = new BashTool(); - const out = await tool.execute({ command: 'echo hello' }); + const out = await tool.execute({ command: 'echo hello', cwd: root }); expect(out).toContain('hello'); }); }); From f82c1b3e82452bde28a18f1b9cd429760e322c8d Mon Sep 17 00:00:00 2001 From: Layne Penney Date: Thu, 15 Jan 2026 10:32:07 -0600 Subject: [PATCH 09/17] fix: harden tool extraction and symbol-index tests --- src/agent.ts | 10 ++++- src/providers/ollama-cloud.ts | 40 +++++++++++-------- tests/symbol-index.test.ts | 73 +++++++++++++++++++++++++++++------ 3 files changed, 94 insertions(+), 29 deletions(-) diff --git a/src/agent.ts b/src/agent.ts index 2e5bcb0..cb2b8ff 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -578,8 +578,14 @@ Always use tools to interact with the filesystem rather than asking the user to if (response.toolCalls.length === 0 && this.useTools && this.extractToolsFromText) { const toolDefinitions = this.toolRegistry.getDefinitions(); const fallbackConfig = this.toolRegistry.getFallbackConfig(); - const extractionText = [response.content, response.reasoningContent].filter(Boolean).join('\n'); - if (extractionText) { + const extractionText = response.content; + const contentMatchesReasoning = Boolean( + response.content && + response.reasoningContent && + response.content.trim() === response.reasoningContent.trim() + ); + + if (extractionText && !contentMatchesReasoning) { const extractedCalls = extractToolCallsFromText(extractionText, toolDefinitions, fallbackConfig); if (extractedCalls.length > 0) { response.toolCalls = extractedCalls; diff --git a/src/providers/ollama-cloud.ts b/src/providers/ollama-cloud.ts index b9e99cc..fa57e6f 100644 --- a/src/providers/ollama-cloud.ts +++ b/src/providers/ollama-cloud.ts @@ -13,6 +13,7 @@ import { withRetry, type RetryOptions } from './retry.js'; import { getProviderRateLimiter, type RateLimiter } from './rate-limiter.js'; import { messageToText } from './message-converter.js'; import type { Message, ToolDefinition, ProviderResponse, ProviderConfig, ToolCall } from '../types.js'; +import { DEFAULT_FALLBACK_CONFIG, findBestToolMatch } from '../tools/tool-fallback.js'; /** Ollama message format */ interface OllamaMessage { @@ -229,9 +230,7 @@ export class OllamaCloudProvider extends BaseProvider { const useFallbackContent = !hasContent && combinedThinking.length > 0; const finalContent = useFallbackContent ? combinedThinking : thinkingCleanedContent; const reasoningContent = combinedThinking || undefined; - const toolExtractionText = combinedThinking && !finalContent.includes(combinedThinking) - ? `${finalContent}\n${combinedThinking}` - : finalContent; + const toolExtractionText = thinkingCleanedContent; // Fall back to extracting tool calls from text if no native calls if (toolCalls.length === 0 && tools && tools.length > 0) { @@ -373,9 +372,7 @@ export class OllamaCloudProvider extends BaseProvider { const useFallbackContent = !hasContent && combinedThinking.length > 0; const finalContent = useFallbackContent ? combinedThinking : thinkingCleanedContent; const reasoningContent = combinedThinking || undefined; - const toolExtractionText = combinedThinking && !finalContent.includes(combinedThinking) - ? `${finalContent}\n${combinedThinking}` - : finalContent; + const toolExtractionText = thinkingCleanedContent; if (streamedContentChars === 0 && finalContent && onChunk && streamedThinkingChars === 0) { onChunk(finalContent); @@ -500,7 +497,12 @@ export class OllamaCloudProvider extends BaseProvider { */ private extractToolCalls(content: string, tools: ToolDefinition[]): ToolCall[] { const toolCalls: ToolCall[] = []; - const toolNames = new Set(tools.map(t => t.name)); + const resolveToolName = (requestedName: string): string | null => { + const match = findBestToolMatch(requestedName, tools, DEFAULT_FALLBACK_CONFIG); + if (match.exactMatch) return requestedName; + if (match.shouldAutoCorrect && match.matchedName) return match.matchedName; + return null; + }; // Pattern 1: JSON in code blocks - most reliable const codeBlockPattern = /```(?:json)?\s*([\s\S]*?)```/g; @@ -508,7 +510,7 @@ export class OllamaCloudProvider extends BaseProvider { while ((match = codeBlockPattern.exec(content)) !== null) { const jsonContent = match[1].trim(); - const extracted = this.tryParseToolCall(jsonContent, toolNames); + const extracted = this.tryParseToolCall(jsonContent, resolveToolName); if (extracted) { toolCalls.push(extracted); } @@ -528,11 +530,12 @@ export class OllamaCloudProvider extends BaseProvider { const normalizedName = this.normalizeToolName(rawToolName); const argsString = match[2]; - if (toolNames.has(normalizedName)) { + const resolvedName = resolveToolName(normalizedName); + if (resolvedName) { const args = this.parseFunctionCallArgs(argsString); toolCalls.push({ id: `extracted_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`, - name: normalizedName, + name: resolvedName, input: args, }); } @@ -552,12 +555,13 @@ export class OllamaCloudProvider extends BaseProvider { const normalizedName = this.normalizeToolName(rawToolName); const jsonArgs = match[2]; - if (toolNames.has(normalizedName)) { + const resolvedName = resolveToolName(normalizedName); + if (resolvedName) { try { const args = JSON.parse(jsonArgs); toolCalls.push({ id: `extracted_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`, - name: normalizedName, + name: resolvedName, input: args, }); } catch { @@ -575,7 +579,7 @@ export class OllamaCloudProvider extends BaseProvider { const jsonPattern = /\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\}/g; while ((match = jsonPattern.exec(content)) !== null) { - const extracted = this.tryParseToolCall(match[0], toolNames); + const extracted = this.tryParseToolCall(match[0], resolveToolName); if (extracted) { toolCalls.push(extracted); } @@ -620,17 +624,21 @@ export class OllamaCloudProvider extends BaseProvider { /** * Try to parse a JSON string as a tool call. */ - private tryParseToolCall(jsonString: string, validToolNames: Set): ToolCall | null { + private tryParseToolCall( + jsonString: string, + resolveToolName: (requestedName: string) => string | null + ): ToolCall | null { try { const parsed = JSON.parse(jsonString); // Check if it has a valid tool name (normalize to strip prefixes) if (parsed.name) { const normalizedName = this.normalizeToolName(parsed.name); - if (validToolNames.has(normalizedName)) { + const resolvedName = resolveToolName(normalizedName); + if (resolvedName) { return { id: `extracted_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`, - name: normalizedName, + name: resolvedName, input: parsed.arguments || parsed.input || parsed.parameters || {}, }; } diff --git a/tests/symbol-index.test.ts b/tests/symbol-index.test.ts index 9468bf0..445267a 100644 --- a/tests/symbol-index.test.ts +++ b/tests/symbol-index.test.ts @@ -9,11 +9,39 @@ */ import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import { createRequire } from 'module'; import * as fs from 'fs'; import * as path from 'path'; import * as os from 'os'; -import { SymbolIndexService } from '../src/symbol-index/service.js'; -import { SymbolDatabase, getIndexDirectory } from '../src/symbol-index/database.js'; + +type SymbolIndexServiceCtor = typeof import('../src/symbol-index/service.js').SymbolIndexService; +type SymbolIndexServiceInstance = InstanceType; +type GetIndexDirectoryFn = typeof import('../src/symbol-index/database.js').getIndexDirectory; + +const require = createRequire(import.meta.url); +let sqliteLoadError: string | null = null; + +try { + const Database = require('better-sqlite3'); + const db = new Database(':memory:'); + db.close(); +} catch (error) { + sqliteLoadError = error instanceof Error ? error.message : String(error); +} + +if (sqliteLoadError) { + console.warn(`[tests] Skipping symbol-index tests: ${sqliteLoadError}`); +} + +const describeSymbolIndex = sqliteLoadError ? describe.skip : describe; +let SymbolIndexService: SymbolIndexServiceCtor | null = null; +let getIndexDirectory: GetIndexDirectoryFn | null = null; +const getSymbolIndexServiceCtor = (): SymbolIndexServiceCtor => { + if (!SymbolIndexService) { + throw new Error('SymbolIndexService not loaded.'); + } + return SymbolIndexService; +}; // Test fixtures directory const TEST_DIR = path.join(os.tmpdir(), 'symbol-index-test-' + Date.now()); @@ -264,27 +292,47 @@ class MainActivity : ComponentActivity() { }, null, 2)); } -describe('Symbol Index Validation Suite', () => { - let service: SymbolIndexService; +describeSymbolIndex('Symbol Index Validation Suite', () => { + let service: SymbolIndexServiceInstance | null = null; beforeAll(async () => { + if (sqliteLoadError) { + return; + } + + const serviceModule = await import('../src/symbol-index/service.js'); + const databaseModule = await import('../src/symbol-index/database.js'); + SymbolIndexService = serviceModule.SymbolIndexService; + getIndexDirectory = databaseModule.getIndexDirectory; + + if (!SymbolIndexService || !getIndexDirectory) { + throw new Error('Symbol index dependencies failed to load.'); + } + // Create test project createTestProject(); // Initialize service and build index with deep indexing enabled // (needed for usage-based dependency tests) - service = new SymbolIndexService(TEST_DIR); + const Service = getSymbolIndexServiceCtor(); + service = new Service(TEST_DIR); await service.initialize(); await service.rebuild({ deepIndex: true }); }); afterAll(() => { + if (!service) { + return; + } + service.close(); // Clean up test directory fs.rmSync(TEST_DIR, { recursive: true, force: true }); // Clean up index directory - const indexDir = getIndexDirectory(TEST_DIR); - fs.rmSync(indexDir, { recursive: true, force: true }); + if (getIndexDirectory) { + const indexDir = getIndexDirectory(TEST_DIR); + fs.rmSync(indexDir, { recursive: true, force: true }); + } }); // ========================================================================= @@ -633,7 +681,8 @@ describe('Symbol Index Validation Suite', () => { describe('Parallel Processing', () => { it('should accept parallelJobs option in rebuild', async () => { // Create a fresh service for this test - const testService = new SymbolIndexService(TEST_DIR); + const Service = getSymbolIndexServiceCtor(); + const testService = new Service(TEST_DIR); await testService.initialize(); // Rebuild with different job counts should work @@ -650,7 +699,8 @@ describe('Symbol Index Validation Suite', () => { it('should produce same dependencies with different parallelJobs values', async () => { // Rebuild with 1 job - const testService1 = new SymbolIndexService(TEST_DIR); + const Service = getSymbolIndexServiceCtor(); + const testService1 = new Service(TEST_DIR); await testService1.initialize(); await testService1.rebuild({ deepIndex: true, parallelJobs: 1 }); const deps1 = testService1.getDependencyGraph( @@ -660,7 +710,7 @@ describe('Symbol Index Validation Suite', () => { ); // Rebuild with 4 jobs - const testService4 = new SymbolIndexService(TEST_DIR); + const testService4 = new Service(TEST_DIR); await testService4.initialize(); await testService4.rebuild({ deepIndex: true, parallelJobs: 4 }); const deps4 = testService4.getDependencyGraph( @@ -676,7 +726,8 @@ describe('Symbol Index Validation Suite', () => { }); it('should default to 4 parallel jobs when not specified', async () => { - const testService = new SymbolIndexService(TEST_DIR); + const Service = getSymbolIndexServiceCtor(); + const testService = new Service(TEST_DIR); await testService.initialize(); // Just verify it doesn't throw with default parallelJobs From 19f312a78ea72cb8e1427673b6359bf93b1f62e2 Mon Sep 17 00:00:00 2001 From: Layne Penney Date: Thu, 15 Jan 2026 10:49:00 -0600 Subject: [PATCH 10/17] fix: parse running tool traces --- src/tools/bash.ts | 28 +++++++++++++++++++++++++++- src/utils/json-parser.ts | 4 ++-- tests/json-parser.test.ts | 12 ++++++++++++ 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/src/tools/bash.ts b/src/tools/bash.ts index 048b7e0..b16adf3 100644 --- a/src/tools/bash.ts +++ b/src/tools/bash.ts @@ -35,7 +35,8 @@ export class BashTool extends BaseTool { } async execute(input: Record): Promise { - const command = input.command as string; + const rawCommand = input.command; + const command = this.normalizeCommandInput(rawCommand); const cwd = (input.cwd as string) || process.cwd(); if (!command) { @@ -112,6 +113,31 @@ export class BashTool extends BaseTool { }); } + private normalizeCommandInput(command: unknown): string | null { + if (typeof command === 'string') { + return command; + } + + if (Array.isArray(command)) { + const parts = command.filter((part): part is string => typeof part === 'string' && part.trim() !== ''); + if (parts.length === 0) { + return null; + } + + if (parts[0] === 'bash' && parts[1] === '-lc') { + const script = parts.slice(2).join(' '); + if (!script) { + return null; + } + return `bash -lc ${JSON.stringify(script)}`; + } + + return parts.join(' '); + } + + return null; + } + /** * Format command output with clear structure. */ diff --git a/src/utils/json-parser.ts b/src/utils/json-parser.ts index 399534f..cf592f0 100644 --- a/src/utils/json-parser.ts +++ b/src/utils/json-parser.ts @@ -122,9 +122,9 @@ export function extractToolCallsFromText( } } - // Pattern 2: [Calling tool_name]: {json} format + // Pattern 2: [Calling tool_name]: {json} or [Running tool_name] {json} format if (toolCalls.length === 0) { - const callingPattern = /\[Calling\s+([a-z_][a-z0-9_]*)\]\s*:\s*/gi; + const callingPattern = /\[(?:Calling|Running)\s+([a-z_][a-z0-9_]*)\]\s*:?\s*/gi; while ((match = callingPattern.exec(text)) !== null) { const resolvedName = resolveToolName(match[1]); diff --git a/tests/json-parser.test.ts b/tests/json-parser.test.ts index b7a4510..a8977cf 100644 --- a/tests/json-parser.test.ts +++ b/tests/json-parser.test.ts @@ -177,6 +177,18 @@ describe('json-parser', () => { expect(calls[0].input).toEqual({ path: 'a.txt' }); expect(calls[1].input).toEqual({ path: 'b.txt' }); }); + + it('extracts tool calls from running trace format without colon', () => { + const text = '[Running bash]{"cmd": ["bash", "-lc", "git status --porcelain"], "timeout": 100000}'; + const calls = extractToolCallsFromText(text, toolDefinitions); + + expect(calls).toHaveLength(1); + expect(calls[0].name).toBe('bash'); + expect(calls[0].input).toEqual({ + cmd: ['bash', '-lc', 'git status --porcelain'], + timeout: 100000, + }); + }); }); describe('pattern 3: JSON in code blocks', () => { From 0dab579ad0510bd8f3fa0bd312ad04309af8159c Mon Sep 17 00:00:00 2001 From: Layne Penney Date: Thu, 15 Jan 2026 11:29:43 -0600 Subject: [PATCH 11/17] fix: extract tool calls from reasoning --- src/agent.ts | 35 +++++++++++++++++++-------- tests/index.test.ts | 59 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 10 deletions(-) diff --git a/src/agent.ts b/src/agent.ts index cb2b8ff..037f2ab 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -578,19 +578,34 @@ Always use tools to interact with the filesystem rather than asking the user to if (response.toolCalls.length === 0 && this.useTools && this.extractToolsFromText) { const toolDefinitions = this.toolRegistry.getDefinitions(); const fallbackConfig = this.toolRegistry.getFallbackConfig(); - const extractionText = response.content; + const contentText = response.content?.trim() || ''; + const reasoningText = response.reasoningContent?.trim() || ''; const contentMatchesReasoning = Boolean( - response.content && - response.reasoningContent && - response.content.trim() === response.reasoningContent.trim() + contentText && + reasoningText && + contentText === reasoningText ); + const toolTracePattern = /\[(?:Calling|Running)\s+[a-z_][a-z0-9_]*\]|\{\s*"name"\s*:\s*"[a-z_][a-z0-9_]*"/i; + const hasToolTrace = (text: string): boolean => toolTracePattern.test(text); - if (extractionText && !contentMatchesReasoning) { - const extractedCalls = extractToolCallsFromText(extractionText, toolDefinitions, fallbackConfig); - if (extractedCalls.length > 0) { - response.toolCalls = extractedCalls; - response.stopReason = 'tool_use'; - } + let extractedCalls: ToolCall[] = []; + + if (contentText && (!contentMatchesReasoning || hasToolTrace(contentText))) { + extractedCalls = extractToolCallsFromText(contentText, toolDefinitions, fallbackConfig); + } + + if ( + extractedCalls.length === 0 && + !contentText && + reasoningText && + hasToolTrace(reasoningText) + ) { + extractedCalls = extractToolCallsFromText(reasoningText, toolDefinitions, fallbackConfig); + } + + if (extractedCalls.length > 0) { + response.toolCalls = extractedCalls; + response.stopReason = 'tool_use'; } } diff --git a/tests/index.test.ts b/tests/index.test.ts index 1aca111..a8b67a8 100644 --- a/tests/index.test.ts +++ b/tests/index.test.ts @@ -335,6 +335,65 @@ describe('Agent', () => { expect(info.messages).toBe(0); expect(info.hasSummary).toBe(false); }); + + it('extracts tool calls from reasoning when content is empty', async () => { + const toolRegistry = new ToolRegistry(); + let receivedInput: Record | null = null; + + class CaptureTool extends BaseTool { + getDefinition() { + return { + name: 'capture', + description: 'capture input', + input_schema: { + type: 'object' as const, + properties: { + value: { type: 'number' }, + }, + required: ['value'], + }, + }; + } + + async execute(input: Record): Promise { + receivedInput = input; + return 'ok'; + } + } + + toolRegistry.register(new CaptureTool()); + + const mockProvider = { + streamChat: vi.fn() + .mockImplementationOnce(async (_messages, _tools, _onChunk, _systemPrompt, onReasoningChunk) => { + const reasoning = '[Calling capture]: {"value": 42}'; + onReasoningChunk?.(reasoning); + return { + content: '', + toolCalls: [], + stopReason: 'end_turn', + reasoningContent: reasoning, + }; + }) + .mockImplementationOnce(async () => ({ + content: 'done', + toolCalls: [], + stopReason: 'end_turn', + })), + supportsToolUse: () => true, + getName: () => 'mock', + getModel: () => 'mock-model', + }; + + const agent = new Agent({ + provider: mockProvider as any, + toolRegistry, + }); + + const result = await agent.chat('continue'); + expect(result).toBe('done'); + expect(receivedInput).toEqual({ value: 42 }); + }); }); describe('Providers', () => { From 5960a30033af523f1502f71002ac227678a19932 Mon Sep 17 00:00:00 2001 From: Layne Penney Date: Thu, 15 Jan 2026 11:46:38 -0600 Subject: [PATCH 12/17] feat: make context window configurable --- src/agent.ts | 9 +++++++-- src/config.ts | 20 ++++++++++++++++++++ src/index.ts | 14 +++++++++++++- 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/src/agent.ts b/src/agent.ts index 037f2ab..82d4ec1 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -91,6 +91,7 @@ export interface AgentOptions { logLevel?: LogLevel; // Log level for debug output (replaces debug) debug?: boolean; // @deprecated Use logLevel instead enableCompression?: boolean; // Enable entity-reference compression for context + maxContextTokens?: number; // Maximum context tokens before compaction secondaryProvider?: BaseProvider | null; // Optional secondary provider for summarization modelMap?: ModelMap | null; // Optional model map for multi-model orchestration auditLogger?: AuditLogger | null; // Optional audit logger for session debugging @@ -123,6 +124,7 @@ export class Agent { private customDangerousPatterns: Array<{ pattern: RegExp; description: string }>; private logLevel: LogLevel; private enableCompression: boolean; + private maxContextTokens: number; private auditLogger: AuditLogger | null = null; private messages: Message[] = []; private conversationSummary: string | null = null; @@ -165,6 +167,7 @@ export class Agent { // Support both logLevel and deprecated debug option this.logLevel = options.logLevel ?? (options.debug ? LogLevel.DEBUG : LogLevel.NORMAL); this.enableCompression = options.enableCompression ?? false; + this.maxContextTokens = options.maxContextTokens ?? AGENT_CONFIG.MAX_CONTEXT_TOKENS; this.auditLogger = options.auditLogger ?? null; this.systemPrompt = options.systemPrompt || this.getDefaultSystemPrompt(); this.callbacks = { @@ -341,11 +344,11 @@ Always use tools to interact with the filesystem rather than asking the user to private async compactContext(): Promise { const totalTokens = countMessageTokens(this.messages); - if (totalTokens <= AGENT_CONFIG.MAX_CONTEXT_TOKENS) { + if (totalTokens <= this.maxContextTokens) { return; // No compaction needed } - logger.debug(`Compacting: ${totalTokens} tokens exceeds ${AGENT_CONFIG.MAX_CONTEXT_TOKENS} limit`); + logger.debug(`Compacting: ${totalTokens} tokens exceeds ${this.maxContextTokens} limit`); // Score messages by importance const scores = scoreMessages(this.messages, CONTEXT_OPTIMIZATION.WEIGHTS); @@ -1096,6 +1099,7 @@ Always use tools to interact with the filesystem rather than asking the user to */ getContextInfo(): { tokens: number; + maxTokens: number; messages: number; hasSummary: boolean; compression: CompressionStats | null; @@ -1104,6 +1108,7 @@ Always use tools to interact with the filesystem rather than asking the user to } { return { tokens: countMessageTokens(this.messages), + maxTokens: this.maxContextTokens, messages: this.messages.length, hasSummary: this.conversationSummary !== null, compression: this.lastCompressionStats, diff --git a/src/config.ts b/src/config.ts index 66a2ad9..fac9acb 100644 --- a/src/config.ts +++ b/src/config.ts @@ -3,6 +3,7 @@ import * as fs from 'fs'; import * as path from 'path'; +import { AGENT_CONFIG } from './constants.js'; /** * Workspace configuration for Codi. @@ -69,6 +70,9 @@ export interface WorkspaceConfig { /** Enable context compression (reduces token usage) */ enableCompression?: boolean; + /** Maximum context tokens before compaction */ + maxContextTokens?: number; + /** Context optimization settings */ contextOptimization?: { /** Enable semantic deduplication (merge case variants) */ @@ -215,6 +219,7 @@ export interface ResolvedConfig { commandAliases: Record; projectContext?: string; enableCompression: boolean; + maxContextTokens: number; /** Secondary model for summarization */ summarizeProvider?: string; summarizeModel?: string; @@ -235,6 +240,7 @@ const DEFAULT_CONFIG: ResolvedConfig = { extractToolsFromText: true, commandAliases: {}, enableCompression: true, // Enabled by default for token savings + maxContextTokens: AGENT_CONFIG.MAX_CONTEXT_TOKENS, toolsConfig: { disabled: [], defaults: {}, @@ -318,6 +324,12 @@ export function validateConfig(config: WorkspaceConfig): string[] { } } + if (config.maxContextTokens !== undefined) { + if (!Number.isFinite(config.maxContextTokens) || config.maxContextTokens <= 0) { + warnings.push('maxContextTokens must be a positive number'); + } + } + return warnings; } @@ -337,6 +349,7 @@ export function mergeConfig( session?: string; summarizeProvider?: string; summarizeModel?: string; + maxContextTokens?: number; } ): ResolvedConfig { const config: ResolvedConfig = { ...DEFAULT_CONFIG }; @@ -360,6 +373,9 @@ export function mergeConfig( if (workspaceConfig.commandAliases) config.commandAliases = workspaceConfig.commandAliases; if (workspaceConfig.projectContext) config.projectContext = workspaceConfig.projectContext; if (workspaceConfig.enableCompression !== undefined) config.enableCompression = workspaceConfig.enableCompression; + if (workspaceConfig.maxContextTokens !== undefined && Number.isFinite(workspaceConfig.maxContextTokens)) { + config.maxContextTokens = workspaceConfig.maxContextTokens; + } // Summarize model from workspace config if (workspaceConfig.models?.summarize?.provider) config.summarizeProvider = workspaceConfig.models.summarize.provider; if (workspaceConfig.models?.summarize?.model) config.summarizeModel = workspaceConfig.models.summarize.model; @@ -376,6 +392,9 @@ export function mergeConfig( if (cliOptions.baseUrl) config.baseUrl = cliOptions.baseUrl; if (cliOptions.endpointId) config.endpointId = cliOptions.endpointId; if (cliOptions.session) config.defaultSession = cliOptions.session; + if (cliOptions.maxContextTokens !== undefined && Number.isFinite(cliOptions.maxContextTokens)) { + config.maxContextTokens = cliOptions.maxContextTokens; + } // CLI --yes flag adds all tools to autoApprove if (cliOptions.yes) { @@ -462,6 +481,7 @@ export function getExampleConfig(): string { }, projectContext: '', enableCompression: true, + maxContextTokens: AGENT_CONFIG.MAX_CONTEXT_TOKENS, models: { summarize: { provider: 'ollama', diff --git a/src/index.ts b/src/index.ts index 220a912..753d3d7 100644 --- a/src/index.ts +++ b/src/index.ts @@ -326,6 +326,7 @@ program .option('--trace', 'Show full request/response payloads') .option('-s, --session ', 'Load a saved session on startup') .option('-c, --compress', 'Context compression (enabled by default, use --no-compress to disable)') + .option('--context-window ', 'Context window size (tokens) before compaction') .option('--summarize-model ', 'Model to use for summarization (default: primary model)') .option('--summarize-provider ', 'Provider for summarization model (default: primary provider)') .option('--mcp-server', 'Run as MCP server (stdio transport) - exposes tools to other MCP clients') @@ -2143,6 +2144,15 @@ async function main() { } // Merge workspace config with CLI options + const parsedContextWindow = options.contextWindow ? Number(options.contextWindow) : NaN; + const contextWindowTokens = Number.isFinite(parsedContextWindow) && parsedContextWindow > 0 + ? Math.floor(parsedContextWindow) + : undefined; + + if (options.contextWindow && contextWindowTokens === undefined) { + console.warn(chalk.yellow('Invalid --context-window value; expected a positive number.')); + } + const resolvedConfig = mergeConfig(workspaceConfig, { provider: options.provider, model: options.model, @@ -2153,6 +2163,7 @@ async function main() { session: options.session, summarizeProvider: options.summarizeProvider, summarizeModel: options.summarizeModel, + maxContextTokens: contextWindowTokens, }); // Register tools and commands @@ -2465,6 +2476,7 @@ async function main() { customDangerousPatterns, logLevel, enableCompression: options.compress ?? resolvedConfig.enableCompression, + maxContextTokens: resolvedConfig.maxContextTokens, onText: (text) => { // Stop spinner when we start receiving text if (!isStreaming) { @@ -2705,7 +2717,7 @@ async function main() { if (trimmed === '/status') { const info = agent.getContextInfo(); console.log(chalk.bold('\nContext Status:')); - console.log(chalk.dim(` Tokens: ${info.tokens} / 8000`)); + console.log(chalk.dim(` Tokens: ${info.tokens} / ${info.maxTokens}`)); console.log(chalk.dim(` Messages: ${info.messages}`)); console.log(chalk.dim(` Has summary: ${info.hasSummary ? 'yes' : 'no'}`)); console.log(chalk.dim(` Compression: ${info.compressionEnabled ? 'enabled' : 'disabled'}`)); From d3ff2e9076978105ac46ac377ef1d9c4f94fc104 Mon Sep 17 00:00:00 2001 From: Layne Penney Date: Thu, 15 Jan 2026 13:54:13 -0600 Subject: [PATCH 13/17] feat: discourage fenced bash in responses --- src/index.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/index.ts b/src/index.ts index 753d3d7..570f7cd 100644 --- a/src/index.ts +++ b/src/index.ts @@ -368,6 +368,13 @@ function generateSystemPrompt(projectInfo: ProjectInfo | null, useTools: boolean 5. **Handle errors**: Include appropriate error handling 6. **Test awareness**: Consider how changes affect tests +## Tool Use Rules +- The tool list below is authoritative for this run. Use only these tool names and their parameters. +- When you need a tool, emit a tool call (do not describe tool usage in plain text). +- Do not put tool-call syntax or commands in your normal response. +- Do not present shell commands in fenced code blocks like \`\`\`bash\`\`\`; use the bash tool instead. +- Wait for tool results before continuing; if a tool fails, explain and try a different tool. + ## Available Tools ### File Operations From 894432eb3afed2f3782a4b020801606cdb861df8 Mon Sep 17 00:00:00 2001 From: Layne Penney Date: Fri, 16 Jan 2026 06:36:18 -0600 Subject: [PATCH 14/17] fix: address PR feedback --- ROADMAP.md | 1 + src/agent.ts | 2 +- src/config.ts | 9 +++++++++ src/index.ts | 1 + src/providers/ollama-cloud.ts | 23 +++++++++++++++-------- src/tools/bash.ts | 20 +++++++++++++++----- src/types.ts | 2 ++ 7 files changed, 44 insertions(+), 14 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index eee379f..e0b208f 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -29,6 +29,7 @@ This would help bridge the gap between different model training data and Codi's **Current Mitigations**: - Added parameter aliases to `grep` tool (`query` -> `pattern`, `max_results`/`max`/`limit` -> `head_limit`) - Added `print_tree` tool (commonly expected by models) +- Consider a vector-embedding index for tool/parameter semantics (similar to `search_codebase`) to improve matches beyond string similarity. --- diff --git a/src/agent.ts b/src/agent.ts index 82d4ec1..6aab93c 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -643,7 +643,7 @@ Always use tools to interact with the filesystem rather than asking the user to } else if (isExtractedToolCall) { // For extracted tool calls, store as plain text (model doesn't understand tool_use blocks) const combinedContent = thinkingText - ? `${response.content || ''}${response.content ? '\n\n' : ''}[Thinking]:\n${thinkingText}` + ? `[Thinking]:\n${thinkingText}${response.content ? `\n\n${response.content}` : ''}` : (response.content || ''); this.messages.push({ role: 'assistant', diff --git a/src/config.ts b/src/config.ts index fac9acb..df28c12 100644 --- a/src/config.ts +++ b/src/config.ts @@ -73,6 +73,9 @@ export interface WorkspaceConfig { /** Maximum context tokens before compaction */ maxContextTokens?: number; + /** Strip hallucinated tool traces from provider content (provider-specific) */ + cleanHallucinatedTraces?: boolean; + /** Context optimization settings */ contextOptimization?: { /** Enable semantic deduplication (merge case variants) */ @@ -220,6 +223,7 @@ export interface ResolvedConfig { projectContext?: string; enableCompression: boolean; maxContextTokens: number; + cleanHallucinatedTraces: boolean; /** Secondary model for summarization */ summarizeProvider?: string; summarizeModel?: string; @@ -241,6 +245,7 @@ const DEFAULT_CONFIG: ResolvedConfig = { commandAliases: {}, enableCompression: true, // Enabled by default for token savings maxContextTokens: AGENT_CONFIG.MAX_CONTEXT_TOKENS, + cleanHallucinatedTraces: false, toolsConfig: { disabled: [], defaults: {}, @@ -376,6 +381,9 @@ export function mergeConfig( if (workspaceConfig.maxContextTokens !== undefined && Number.isFinite(workspaceConfig.maxContextTokens)) { config.maxContextTokens = workspaceConfig.maxContextTokens; } + if (workspaceConfig.cleanHallucinatedTraces !== undefined) { + config.cleanHallucinatedTraces = workspaceConfig.cleanHallucinatedTraces; + } // Summarize model from workspace config if (workspaceConfig.models?.summarize?.provider) config.summarizeProvider = workspaceConfig.models.summarize.provider; if (workspaceConfig.models?.summarize?.model) config.summarizeModel = workspaceConfig.models.summarize.model; @@ -482,6 +490,7 @@ export function getExampleConfig(): string { projectContext: '', enableCompression: true, maxContextTokens: AGENT_CONFIG.MAX_CONTEXT_TOKENS, + cleanHallucinatedTraces: false, models: { summarize: { provider: 'ollama', diff --git a/src/index.ts b/src/index.ts index 570f7cd..1e12242 100644 --- a/src/index.ts +++ b/src/index.ts @@ -2338,6 +2338,7 @@ async function main() { model: resolvedConfig.model, baseUrl: resolvedConfig.baseUrl, endpointId: resolvedConfig.endpointId, + cleanHallucinatedTraces: resolvedConfig.cleanHallucinatedTraces, }); } diff --git a/src/providers/ollama-cloud.ts b/src/providers/ollama-cloud.ts index fa57e6f..e6b0e0b 100644 --- a/src/providers/ollama-cloud.ts +++ b/src/providers/ollama-cloud.ts @@ -237,10 +237,7 @@ export class OllamaCloudProvider extends BaseProvider { toolCalls = this.extractToolCalls(toolExtractionText, tools); } - // Clean hallucinated traces from content (after tool extraction) - const cleanedContent = toolCalls.length > 0 - ? this.cleanHallucinatedTraces(finalContent) - : finalContent; + const cleanedContent = this.maybeCleanHallucinatedTraces(finalContent, toolCalls); return createProviderResponse({ content: cleanedContent, @@ -384,10 +381,7 @@ export class OllamaCloudProvider extends BaseProvider { toolCalls = this.extractToolCalls(toolExtractionText, tools); } - // Clean hallucinated traces from content (after tool extraction) - const cleanedContent = toolCalls.length > 0 - ? this.cleanHallucinatedTraces(finalContent) - : finalContent; + const cleanedContent = this.maybeCleanHallucinatedTraces(finalContent, toolCalls); return createProviderResponse({ content: cleanedContent, @@ -672,6 +666,19 @@ export class OllamaCloudProvider extends BaseProvider { return { content: cleanedContent, thinking }; } + private maybeCleanHallucinatedTraces(content: string, toolCalls: ToolCall[]): string { + if (!this.config.cleanHallucinatedTraces || toolCalls.length === 0) { + return content; + } + + const cleanedContent = this.cleanHallucinatedTraces(content); + if (cleanedContent !== content) { + console.warn('[ollama-cloud] Cleaned hallucinated tool traces from model output.'); + } + + return cleanedContent; + } + /** * Clean hallucinated agent trace patterns from content. * Some models output fake "[Calling tool]: {json}[Result from tool]: result" traces. diff --git a/src/tools/bash.ts b/src/tools/bash.ts index b16adf3..e33c77b 100644 --- a/src/tools/bash.ts +++ b/src/tools/bash.ts @@ -114,6 +114,10 @@ export class BashTool extends BaseTool { } private normalizeCommandInput(command: unknown): string | null { + if (command === null || command === undefined) { + return null; + } + if (typeof command === 'string') { return command; } @@ -121,21 +125,27 @@ export class BashTool extends BaseTool { if (Array.isArray(command)) { const parts = command.filter((part): part is string => typeof part === 'string' && part.trim() !== ''); if (parts.length === 0) { - return null; + return this.stringifyCommand(command); } if (parts[0] === 'bash' && parts[1] === '-lc') { const script = parts.slice(2).join(' '); - if (!script) { - return null; - } return `bash -lc ${JSON.stringify(script)}`; } return parts.join(' '); } - return null; + return this.stringifyCommand(command); + } + + private stringifyCommand(command: unknown): string { + try { + const json = JSON.stringify(command); + return json === undefined ? String(command) : json; + } catch { + return String(command); + } } /** diff --git a/src/types.ts b/src/types.ts index 97a389c..7c7e249 100644 --- a/src/types.ts +++ b/src/types.ts @@ -169,6 +169,7 @@ export interface ProviderResponse { * @property {string} [model] - The AI model to use, if applicable. * @property {number} [temperature] - Sampling temperature for generation. * @property {number} [maxTokens] - Maximum number of tokens to generate. + * @property {boolean} [cleanHallucinatedTraces] - Strip hallucinated tool traces from provider content (provider-specific). */ export interface ProviderConfig { apiKey?: string; @@ -176,4 +177,5 @@ export interface ProviderConfig { model?: string; temperature?: number; maxTokens?: number; + cleanHallucinatedTraces?: boolean; } From 0d1275217d7a2f7ceb5bd507a8c8042ebebdfa20 Mon Sep 17 00:00:00 2001 From: Layne Penney Date: Fri, 16 Jan 2026 06:48:06 -0600 Subject: [PATCH 15/17] fix: log hallucinated traces and normalize bash --- src/providers/ollama-cloud.ts | 17 +++++++++++++++-- src/tools/bash.ts | 3 +++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/providers/ollama-cloud.ts b/src/providers/ollama-cloud.ts index e6b0e0b..1fa83d7 100644 --- a/src/providers/ollama-cloud.ts +++ b/src/providers/ollama-cloud.ts @@ -14,6 +14,7 @@ import { getProviderRateLimiter, type RateLimiter } from './rate-limiter.js'; import { messageToText } from './message-converter.js'; import type { Message, ToolDefinition, ProviderResponse, ProviderConfig, ToolCall } from '../types.js'; import { DEFAULT_FALLBACK_CONFIG, findBestToolMatch } from '../tools/tool-fallback.js'; +import { logger, LogLevel } from '../logger.js'; /** Ollama message format */ interface OllamaMessage { @@ -671,9 +672,17 @@ export class OllamaCloudProvider extends BaseProvider { return content; } + const matches = content.match(this.getHallucinatedTracePattern()) || []; const cleanedContent = this.cleanHallucinatedTraces(content); if (cleanedContent !== content) { - console.warn('[ollama-cloud] Cleaned hallucinated tool traces from model output.'); + if (logger.isLevelEnabled(LogLevel.VERBOSE) && matches.length > 0) { + const joined = matches.join('\n'); + const clipped = joined.length > 2000 + ? `${joined.slice(0, 2000)}\n... [truncated ${joined.length - 2000} chars]` + : joined; + logger.verbose(`[ollama-cloud] Stripped hallucinated traces:\n${clipped}`); + } + logger.warn('Ollama Cloud: cleaned hallucinated tool traces from model output.'); } return cleanedContent; @@ -686,7 +695,7 @@ export class OllamaCloudProvider extends BaseProvider { */ private cleanHallucinatedTraces(content: string): string { // Pattern: [Calling tool_name]: {json}[Result from tool_name]: any text until next [ or end - const hallucinatedTracePattern = /\[Calling\s+[a-z_][a-z0-9_]*\]\s*:\s*\{[^}]*\}\s*(?:\[Result from\s+[a-z_][a-z0-9_]*\]\s*:\s*[^\[]*)?/gi; + const hallucinatedTracePattern = this.getHallucinatedTracePattern(); let cleanedContent = content.replace(hallucinatedTracePattern, '').trim(); // Clean up multiple newlines @@ -695,6 +704,10 @@ export class OllamaCloudProvider extends BaseProvider { return cleanedContent; } + private getHallucinatedTracePattern(): RegExp { + return /\[Calling\s+[a-z_][a-z0-9_]*\]\s*:\s*\{[^}]*\}\s*(?:\[Result from\s+[a-z_][a-z0-9_]*\]\s*:\s*[^\[]*)?/gi; + } + /** * Pull a model if it's not already available. */ diff --git a/src/tools/bash.ts b/src/tools/bash.ts index e33c77b..2046021 100644 --- a/src/tools/bash.ts +++ b/src/tools/bash.ts @@ -130,6 +130,9 @@ export class BashTool extends BaseTool { if (parts[0] === 'bash' && parts[1] === '-lc') { const script = parts.slice(2).join(' '); + if (!script.trim()) { + return this.stringifyCommand(parts); + } return `bash -lc ${JSON.stringify(script)}`; } From 65f75da8c77bd3cf34f0be6062b32bd589204c47 Mon Sep 17 00:00:00 2001 From: Layne Penney Date: Fri, 16 Jan 2026 06:59:08 -0600 Subject: [PATCH 16/17] fix: skip continuation prompt --- src/agent.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/agent.ts b/src/agent.ts index 6aab93c..987d220 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -941,7 +941,8 @@ Always use tools to interact with the filesystem rather than asking the user to resultText += `Result from ${toolName}:\n${content}\n\n`; } } - resultText += this.buildContinuationPrompt(originalTask); + // lp 1/16/26: skip this for now. I believe it is causing issues + // resultText += this.buildContinuationPrompt(originalTask); this.messages.push({ role: 'user', From bf575f53fd8910a9b513caa10ccf5a7da29b9f18 Mon Sep 17 00:00:00 2001 From: Layne Penney Date: Fri, 16 Jan 2026 07:03:33 -0600 Subject: [PATCH 17/17] chore: bump version to 0.7.9 --- package.json | 2 +- src/version.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 188db50..4817c04 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "codi", - "version": "0.7.8", + "version": "0.7.9", "description": "Your AI coding wingman - a hybrid assistant supporting Claude, OpenAI, and local models", "license": "Apache-2.0", "type": "module", diff --git a/src/version.ts b/src/version.ts index f0c5b83..f55b6aa 100644 --- a/src/version.ts +++ b/src/version.ts @@ -10,4 +10,4 @@ * - MINOR: New features, significant refactoring, non-breaking changes * - PATCH: Bug fixes, minor improvements */ -export const VERSION = '0.7.8'; +export const VERSION = '0.7.9';