Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions ROADMAP.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Codi Roadmap

This document tracks planned features and improvements for Codi.

## Planned Features

### Test Sandbox Compatibility

Update tests that write to `~/.codi` or bind to `127.0.0.1` so they use local temporary
directories and ephemeral ports by default, avoiding sandbox permission errors.

---

### Semantic Fallback for Tool Calls

When a model attempts to call a tool that doesn't exist or uses incorrect parameter names, implement a semantic fallback system that:

1. **Tool Name Matching**: If a requested tool doesn't exist, find the closest matching tool by name similarity (e.g., `print_tree` -> `list_directory`, `search` -> `grep`)

2. **Parameter Mapping**: When a tool is called with unrecognized parameters, attempt to map them to the correct parameter names based on:
- Common aliases (e.g., `query` -> `pattern`, `max_results` -> `head_limit`)
- Semantic similarity (e.g., `search_term` -> `pattern`)
- Parameter descriptions

3. **Graceful Degradation**: Instead of failing on invalid tool calls, provide helpful feedback to the model about what tools/parameters are available

This would help bridge the gap between different model training data and Codi's actual tool definitions, improving compatibility with various LLMs.

**Current Mitigations**:
- Added parameter aliases to `grep` tool (`query` -> `pattern`, `max_results`/`max`/`limit` -> `head_limit`)
- Added `print_tree` tool (commonly expected by models)
- Consider a vector-embedding index for tool/parameter semantics (similar to `search_codebase`) to improve matches beyond string similarity.

---

## Completed Features

See [CLAUDE.md](./CLAUDE.md) for documentation on implemented features.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "codi",
"version": "0.7.8",
"version": "0.7.9",
"description": "Your AI coding wingman - a hybrid assistant supporting Claude, OpenAI, and local models",
"license": "Apache-2.0",
"type": "module",
Expand Down
100 changes: 88 additions & 12 deletions src/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,13 @@ export interface AgentOptions {
logLevel?: LogLevel; // Log level for debug output (replaces debug)
debug?: boolean; // @deprecated Use logLevel instead
enableCompression?: boolean; // Enable entity-reference compression for context
maxContextTokens?: number; // Maximum context tokens before compaction
secondaryProvider?: BaseProvider | null; // Optional secondary provider for summarization
modelMap?: ModelMap | null; // Optional model map for multi-model orchestration
auditLogger?: AuditLogger | null; // Optional audit logger for session debugging
onText?: (text: string) => void;
onReasoning?: (reasoning: string) => void; // Called with reasoning trace from reasoning models
onReasoningChunk?: (chunk: string) => void; // Streaming reasoning output
onToolCall?: (name: string, input: Record<string, unknown>) => void;
onToolResult?: (name: string, result: string, isError: boolean) => void;
onConfirm?: (confirmation: ToolConfirmation) => Promise<ConfirmationResult>; // Confirm destructive tools
Expand All @@ -122,6 +124,7 @@ export class Agent {
private customDangerousPatterns: Array<{ pattern: RegExp; description: string }>;
private logLevel: LogLevel;
private enableCompression: boolean;
private maxContextTokens: number;
private auditLogger: AuditLogger | null = null;
private messages: Message[] = [];
private conversationSummary: string | null = null;
Expand All @@ -130,6 +133,7 @@ export class Agent {
private callbacks: {
onText?: (text: string) => void;
onReasoning?: (reasoning: string) => void;
onReasoningChunk?: (chunk: string) => void;
onToolCall?: (name: string, input: Record<string, unknown>) => void;
onToolResult?: (name: string, result: string, isError: boolean) => void;
onConfirm?: (confirmation: ToolConfirmation) => Promise<ConfirmationResult>;
Expand Down Expand Up @@ -163,11 +167,13 @@ export class Agent {
// Support both logLevel and deprecated debug option
this.logLevel = options.logLevel ?? (options.debug ? LogLevel.DEBUG : LogLevel.NORMAL);
this.enableCompression = options.enableCompression ?? false;
this.maxContextTokens = options.maxContextTokens ?? AGENT_CONFIG.MAX_CONTEXT_TOKENS;
this.auditLogger = options.auditLogger ?? null;
this.systemPrompt = options.systemPrompt || this.getDefaultSystemPrompt();
this.callbacks = {
onText: options.onText,
onReasoning: options.onReasoning,
onReasoningChunk: options.onReasoningChunk,
onToolCall: options.onToolCall,
onToolResult: options.onToolResult,
onConfirm: options.onConfirm,
Expand Down Expand Up @@ -338,11 +344,11 @@ Always use tools to interact with the filesystem rather than asking the user to
private async compactContext(): Promise<void> {
const totalTokens = countMessageTokens(this.messages);

if (totalTokens <= AGENT_CONFIG.MAX_CONTEXT_TOKENS) {
if (totalTokens <= this.maxContextTokens) {
return; // No compaction needed
}

logger.debug(`Compacting: ${totalTokens} tokens exceeds ${AGENT_CONFIG.MAX_CONTEXT_TOKENS} limit`);
logger.debug(`Compacting: ${totalTokens} tokens exceeds ${this.maxContextTokens} limit`);

// Score messages by importance
const scores = scoreMessages(this.messages, CONTEXT_OPTIMIZATION.WEIGHTS);
Expand Down Expand Up @@ -512,11 +518,26 @@ Always use tools to interact with the filesystem rather than asking the user to

// Call the model with streaming (using native system prompt support)
const apiStartTime = Date.now();
let streamedChars = 0;
const onChunk = (chunk: string): void => {
if (chunk) {
streamedChars += chunk.length;
}
this.callbacks.onText?.(chunk);
};
let streamedReasoningChars = 0;
const onReasoningChunk = (chunk: string): void => {
if (chunk) {
streamedReasoningChars += chunk.length;
}
this.callbacks.onReasoningChunk?.(chunk);
};
const response = await chatProvider.streamChat(
messagesToSend,
tools,
this.callbacks.onText,
systemContext
onChunk,
systemContext,
onReasoningChunk
);
const apiDuration = (Date.now() - apiStartTime) / 1000;

Expand All @@ -541,7 +562,8 @@ Always use tools to interact with the filesystem rather than asking the user to
response.content,
response.toolCalls,
response.usage,
Date.now() - apiStartTime
Date.now() - apiStartTime,
response.rawResponse
);

// Record usage for cost tracking
Expand All @@ -550,15 +572,40 @@ Always use tools to interact with the filesystem rather than asking the user to
}

// Call reasoning callback if reasoning content is present (e.g., from DeepSeek-R1)
if (response.reasoningContent && this.callbacks.onReasoning) {
if (response.reasoningContent && this.callbacks.onReasoning && streamedReasoningChars === 0) {
this.callbacks.onReasoning(response.reasoningContent);
}

// If no tool calls were detected via API but tools are enabled,
// try to extract tool calls from the text (for models that output JSON as text)
if (response.toolCalls.length === 0 && this.useTools && this.extractToolsFromText && response.content) {
const availableTools = this.toolRegistry.listTools();
const extractedCalls = extractToolCallsFromText(response.content, availableTools);
if (response.toolCalls.length === 0 && this.useTools && this.extractToolsFromText) {
const toolDefinitions = this.toolRegistry.getDefinitions();
const fallbackConfig = this.toolRegistry.getFallbackConfig();
const contentText = response.content?.trim() || '';
const reasoningText = response.reasoningContent?.trim() || '';
const contentMatchesReasoning = Boolean(
contentText &&
reasoningText &&
contentText === reasoningText
);
const toolTracePattern = /\[(?:Calling|Running)\s+[a-z_][a-z0-9_]*\]|\{\s*"name"\s*:\s*"[a-z_][a-z0-9_]*"/i;
const hasToolTrace = (text: string): boolean => toolTracePattern.test(text);

let extractedCalls: ToolCall[] = [];

if (contentText && (!contentMatchesReasoning || hasToolTrace(contentText))) {
extractedCalls = extractToolCallsFromText(contentText, toolDefinitions, fallbackConfig);
}

if (
extractedCalls.length === 0 &&
!contentText &&
reasoningText &&
hasToolTrace(reasoningText)
) {
extractedCalls = extractToolCallsFromText(reasoningText, toolDefinitions, fallbackConfig);
}

if (extractedCalls.length > 0) {
response.toolCalls = extractedCalls;
response.stopReason = 'tool_use';
Expand All @@ -574,16 +621,42 @@ Always use tools to interact with the filesystem rather than asking the user to
finalResponse = response.content;
}

if (isExtractedToolCall) {
const thinkingText = response.reasoningContent?.trim();
const shouldAddThinkingBlock = !!thinkingText &&
(!response.content || response.content.trim() !== thinkingText);

const shouldEmitFallback = !response.content &&
response.toolCalls.length === 0 &&
streamedChars === 0;

if (shouldEmitFallback) {
const fallbackMessage = response.reasoningContent
? 'Model returned reasoning without a final answer. Try again or check --audit for the raw response.'
: 'Model returned an empty response. Try again or check --audit for the raw response.';

finalResponse = fallbackMessage;
this.messages.push({
role: 'assistant',
content: fallbackMessage,
});
this.callbacks.onText?.(fallbackMessage);
} else if (isExtractedToolCall) {
// For extracted tool calls, store as plain text (model doesn't understand tool_use blocks)
const combinedContent = thinkingText
? `[Thinking]:\n${thinkingText}${response.content ? `\n\n${response.content}` : ''}`
: (response.content || '');
this.messages.push({
role: 'assistant',
content: response.content || '',
content: combinedContent,
});
} else if (response.content || response.toolCalls.length > 0) {
// For native tool calls, use content blocks
const contentBlocks: ContentBlock[] = [];

if (shouldAddThinkingBlock && thinkingText) {
contentBlocks.push({ type: 'thinking', text: thinkingText });
}

if (response.content) {
contentBlocks.push({ type: 'text', text: response.content });
}
Expand Down Expand Up @@ -868,7 +941,8 @@ Always use tools to interact with the filesystem rather than asking the user to
resultText += `Result from ${toolName}:\n${content}\n\n`;
}
}
resultText += this.buildContinuationPrompt(originalTask);
// lp 1/16/26: skip this for now. I believe it is causing issues
// resultText += this.buildContinuationPrompt(originalTask);

this.messages.push({
role: 'user',
Expand Down Expand Up @@ -1026,6 +1100,7 @@ Always use tools to interact with the filesystem rather than asking the user to
*/
getContextInfo(): {
tokens: number;
maxTokens: number;
messages: number;
hasSummary: boolean;
compression: CompressionStats | null;
Expand All @@ -1034,6 +1109,7 @@ Always use tools to interact with the filesystem rather than asking the user to
} {
return {
tokens: countMessageTokens(this.messages),
maxTokens: this.maxContextTokens,
messages: this.messages.length,
hasSummary: this.conversationSummary !== null,
compression: this.lastCompressionStats,
Expand Down
43 changes: 43 additions & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import * as fs from 'fs';
import * as path from 'path';
import { AGENT_CONFIG } from './constants.js';

/**
* Workspace configuration for Codi.
Expand Down Expand Up @@ -69,6 +70,12 @@ export interface WorkspaceConfig {
/** Enable context compression (reduces token usage) */
enableCompression?: boolean;

/** Maximum context tokens before compaction */
maxContextTokens?: number;

/** Strip hallucinated tool traces from provider content (provider-specific) */
cleanHallucinatedTraces?: boolean;

/** Context optimization settings */
contextOptimization?: {
/** Enable semantic deduplication (merge case variants) */
Expand Down Expand Up @@ -155,6 +162,20 @@ export interface WorkspaceConfig {
[toolName: string]: Record<string, unknown>;
};
};

/** Tool fallback settings for handling unknown tools and parameter aliases */
toolFallback?: {
/** Enable semantic tool fallback (default: true) */
enabled?: boolean;
/** Threshold for auto-correcting tool names (0-1, default: 0.85) */
autoCorrectThreshold?: number;
/** Threshold for suggesting similar tools (0-1, default: 0.6) */
suggestionThreshold?: number;
/** Auto-execute corrected tools without confirmation (default: false) */
autoExecute?: boolean;
/** Enable parameter aliasing (default: true) */
parameterAliasing?: boolean;
};
}

/**
Expand Down Expand Up @@ -201,6 +222,8 @@ export interface ResolvedConfig {
commandAliases: Record<string, string>;
projectContext?: string;
enableCompression: boolean;
maxContextTokens: number;
cleanHallucinatedTraces: boolean;
/** Secondary model for summarization */
summarizeProvider?: string;
summarizeModel?: string;
Expand All @@ -221,6 +244,8 @@ const DEFAULT_CONFIG: ResolvedConfig = {
extractToolsFromText: true,
commandAliases: {},
enableCompression: true, // Enabled by default for token savings
maxContextTokens: AGENT_CONFIG.MAX_CONTEXT_TOKENS,
cleanHallucinatedTraces: false,
toolsConfig: {
disabled: [],
defaults: {},
Expand Down Expand Up @@ -304,6 +329,12 @@ export function validateConfig(config: WorkspaceConfig): string[] {
}
}

if (config.maxContextTokens !== undefined) {
if (!Number.isFinite(config.maxContextTokens) || config.maxContextTokens <= 0) {
warnings.push('maxContextTokens must be a positive number');
}
}

return warnings;
}

Expand All @@ -323,6 +354,7 @@ export function mergeConfig(
session?: string;
summarizeProvider?: string;
summarizeModel?: string;
maxContextTokens?: number;
}
): ResolvedConfig {
const config: ResolvedConfig = { ...DEFAULT_CONFIG };
Expand All @@ -346,6 +378,12 @@ export function mergeConfig(
if (workspaceConfig.commandAliases) config.commandAliases = workspaceConfig.commandAliases;
if (workspaceConfig.projectContext) config.projectContext = workspaceConfig.projectContext;
if (workspaceConfig.enableCompression !== undefined) config.enableCompression = workspaceConfig.enableCompression;
if (workspaceConfig.maxContextTokens !== undefined && Number.isFinite(workspaceConfig.maxContextTokens)) {
config.maxContextTokens = workspaceConfig.maxContextTokens;
}
if (workspaceConfig.cleanHallucinatedTraces !== undefined) {
config.cleanHallucinatedTraces = workspaceConfig.cleanHallucinatedTraces;
}
// Summarize model from workspace config
if (workspaceConfig.models?.summarize?.provider) config.summarizeProvider = workspaceConfig.models.summarize.provider;
if (workspaceConfig.models?.summarize?.model) config.summarizeModel = workspaceConfig.models.summarize.model;
Expand All @@ -362,6 +400,9 @@ export function mergeConfig(
if (cliOptions.baseUrl) config.baseUrl = cliOptions.baseUrl;
if (cliOptions.endpointId) config.endpointId = cliOptions.endpointId;
if (cliOptions.session) config.defaultSession = cliOptions.session;
if (cliOptions.maxContextTokens !== undefined && Number.isFinite(cliOptions.maxContextTokens)) {
config.maxContextTokens = cliOptions.maxContextTokens;
}

// CLI --yes flag adds all tools to autoApprove
if (cliOptions.yes) {
Expand Down Expand Up @@ -448,6 +489,8 @@ export function getExampleConfig(): string {
},
projectContext: '',
enableCompression: true,
maxContextTokens: AGENT_CONFIG.MAX_CONTEXT_TOKENS,
cleanHallucinatedTraces: false,
models: {
summarize: {
provider: 'ollama',
Expand Down
Loading