# Test Context Curation with Real Repository and LLM

This notebook tests the context curation functions with an actual repository and real LLM calls.

## Setup and Configuration

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os
from pathlib import Path
import asyncio
import json
from datetime import datetime
from unittest.mock import AsyncMock, MagicMock

# Add the parent directory to path to import yellhorn_mcp
sys.path.insert(0, os.path.dirname(os.getcwd()))

from yellhorn_mcp.processors.context_processor import (
    build_codebase_context,
    analyze_with_llm,
    parse_llm_directories,
    save_context_file,
    process_context_curation_async
)
from yellhorn_mcp.llm_manager import LLMManager
from yellhorn_mcp.utils.git_utils import YellhornMCPError, run_git_command_with_set_cwd, run_github_command_with_set_cwd

print("✅ Imports successful")

✅ Imports successful


## Configure LLM Manager

In [None]:
# Initialize LLM Manager with API clients
# You need to have your API keys set as environment variables:
GEMINI_API_KEY = ""
OPENAI_API_KEY = ""

from openai import AsyncOpenAI
from google import genai

# Initialize clients
openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY)
gemini_client = genai.Client(api_key=GEMINI_API_KEY)

# Create LLM Manager
llm_manager = LLMManager(
    openai_client=openai_client,
    gemini_client=gemini_client,
    config={
        "safety_margin_tokens": 1000,
        "overlap_ratio": 0.1,
        "chunk_strategy": "sentences"
    }
)

# Determine which model to use
if openai_client:
    DEFAULT_MODEL = "gpt-4o-mini"  # Using a cheaper model for testing
    print(f"🤖 Using OpenAI model: {DEFAULT_MODEL}")
elif gemini_client:
    DEFAULT_MODEL = "gemini-1.5-flash"  # Fast and cheap Gemini model
    print(f"🤖 Using Gemini model: {DEFAULT_MODEL}")
else:
    print("❌ No LLM client available. Please set OPENAI_API_KEY or GOOGLE_API_KEY")
    DEFAULT_MODEL = None

🤖 Using OpenAI model: gpt-4o-mini


## Set Repository Path

In [4]:
# Use the actual yellhorn-mcp repository path
REPO_PATH = Path("/Users/sravanj/project_work/yellhorn-mcp")

# Verify the path exists
if REPO_PATH.exists():
    print(f"✅ Repository path exists: {REPO_PATH}")
    print(f"📁 Repository contents (sample):")
    for item in list(REPO_PATH.iterdir())[:10]:
        if item.is_dir():
            print(f"  📂 {item.name}/")
        else:
            print(f"  📄 {item.name}")
else:
    print(f"❌ Repository path does not exist: {REPO_PATH}")
    print("Please update REPO_PATH to point to your repository")

✅ Repository path exists: /Users/sravanj/project_work/yellhorn-mcp
📁 Repository contents (sample):
  📄 .yellhornignore
  📂 yellhorn_mcp/
  📄 .DS_Store
  📄 pyrightconfig.json
  📂 .pytest_cache/
  📄 CHANGELOG.md
  📄 .coverage
  📄 LLMManagerREADME.md
  📄 .mcp.json
  📄 pyproject.toml


## Create Git Command Function and Mock Context Helpers

In [5]:
def create_mock_context():
    """Create a mock context with logging capabilities."""
    mock_ctx = MagicMock()
    mock_ctx.log = AsyncMock()
    mock_ctx.request_context.lifespan_context = {
        "git_command_func": run_git_command_with_set_cwd(REPO_PATH),
        "codebase_reasoning": "file_structure"
    }
    return mock_ctx

def print_log_messages(mock_ctx, limit=100):
    """Print the log messages from mock context."""
    if mock_ctx.log.called:
        log_messages = [(call[1].get('level', 'info'), call[1]['message']) 
                       for call in mock_ctx.log.call_args_list]
        print(f"\n📝 Log messages (showing first {limit}):")
        for level, msg in log_messages[:limit]:
            emoji = "🔵" if level == "info" else "🟡" if level == "warning" else "🔴"
            print(f"  {emoji} [{level}] {msg[:5000]}..." if len(msg) > 5000 else f"  {emoji} [{level}] {msg}")
        if len(log_messages) > limit:
            print(f"  ... and {len(log_messages) - limit} more messages")
    else:
        print("\n📝 No log messages recorded")

print("✅ Git command function and mock context helpers created")
print("   - git_command_func: Real git command wrapper")
print("   - create_mock_context(): Creates mock context with real git commands")
print("   - print_log_messages(): Displays logged messages from mock context")

✅ Git command function and mock context helpers created
   - git_command_func: Real git command wrapper
   - create_mock_context(): Creates mock context with real git commands
   - print_log_messages(): Displays logged messages from mock context


## Test 1: Build Codebase Context with Real Repository

In [6]:
async def test_real_build_context():
    """Test building context from real repository."""
    print("\n🧪 Testing build_codebase_context with real repository...")
    
    # Create mock context
    mock_ctx = create_mock_context()
    
    start_time = datetime.now()
    
    # Build context for the repository with mock context
    directory_context, file_paths, all_dirs = await build_codebase_context(
        repo_path=REPO_PATH,
        codebase_reasoning_mode="file_structure",
        model=DEFAULT_MODEL,
        ctx=mock_ctx,
        git_command_func=mock_ctx.request_context.lifespan_context['git_command_func']
    )
    
    elapsed = (datetime.now() - start_time).total_seconds()
    
    print(f"\n⏱️ Time taken: {elapsed:.2f} seconds")
    print(f"\n📊 Statistics:")
    print(f"  - Total files found: {len(file_paths)}")
    print(f"  - Total directories: {len(all_dirs)}")
    print(f"  - Context size: {len(directory_context)} characters")
    
    print(f"\n📁 Top-level directories:")
    top_dirs = [d for d in sorted(all_dirs) if '/' not in d]
    for dir_name in top_dirs[:10]:
        print(f"  - {dir_name}")
    
    print(f"\n📄 Sample files (first 10):")
    for file_path in file_paths[:10]:
        print(f"  - {file_path}")
    
    print(f"\n📝 Context preview (first 1000 chars):")
    print(directory_context[:1000])
    
    # Print log messages
    print_log_messages(mock_ctx)
    
    return directory_context, file_paths, all_dirs, mock_ctx

# Run the test
if REPO_PATH.exists():
    context_data = await test_real_build_context()
else:
    print("⚠️ Skipping test - repository path not found")
    context_data = None


🧪 Testing build_codebase_context with real repository...

⏱️ Time taken: 0.17 seconds

📊 Statistics:
  - Total files found: 37
  - Total directories: 11
  - Context size: 1128 characters

📁 Top-level directories:
  - .
  - .github
  - docs
  - notebooks
  - yellhorn_mcp

📄 Sample files (first 10):
  - .github/workflows/publish.yml
  - .github/workflows/tests.yml
  - .mcp.json
  - .python-version
  - CHANGELOG.md
  - CLAUDE.md
  - LLMManagerREADME.md
  - README.md
  - coverage_stats.txt
  - docs/USAGE.md

📝 Context preview (first 1000 chars):
<codebase_tree>
.
├── .mcp.json
├── .python-version
├── CHANGELOG.md
├── CLAUDE.md
├── LLMManagerREADME.md
├── README.md
├── coverage_stats.txt
├── pyproject.toml
├── pyrightconfig.json
│   ├── workflows/
│   │   ├── publish.yml
│   │   └── tests.yml
├── docs/
│   ├── USAGE.md
│   └── coverage_baseline.md
├── notebooks/
│   ├── file_structure.ipynb
│   └── llm_manager.ipynb
├── yellhorn_mcp/
│   ├── __init__.py
│   ├── cli.py
│   ├── llm_manager.p

## Test 2: Analyze with Real LLM

In [7]:
async def test_real_llm_analysis():
    """Test analyzing codebase with real LLM."""
    if not DEFAULT_MODEL or not context_data:
        print("⚠️ Skipping test - LLM or context not available")
        return None
    
    print("\n🧪 Testing analyze_with_llm with real LLM...")
    print(f"🤖 Using model: {DEFAULT_MODEL}")
    
    # Create new mock context for this test
    mock_ctx = create_mock_context()
    
    directory_context = context_data[0]
    user_task = "Improve the context curation system to better identify important directories for AI workplan generation. Include .python-version"
    
    print(f"\n📋 Task: {user_task}")    
    start_time = datetime.now()
    
    try:
        # Call real LLM with mock context
        llm_result = await analyze_with_llm(
            llm_manager=llm_manager,
            model=DEFAULT_MODEL,
            directory_context=directory_context,
            user_task=user_task,
            debug=True,  # Set to True to see debug logs
            ctx=mock_ctx
        )
        
        elapsed = (datetime.now() - start_time).total_seconds()
        
        print(f"\n✅ LLM analysis complete!")
        print(f"⏱️ Time taken: {elapsed:.2f} seconds")
        print(f"\n🤖 LLM Response:")
        print("-" * 60)
        print(llm_result)
        print("-" * 60)
        
        # Print log messages
        print_log_messages(mock_ctx)
        
        # Check for usage metadata
        usage = llm_manager.get_last_usage_metadata()
        if usage:
            print(f"\n📊 Token Usage:")
            print(f"  - Prompt tokens: {usage.prompt_tokens}")
            print(f"  - Completion tokens: {usage.completion_tokens}")
            print(f"  - Total tokens: {usage.total_tokens}")
        
        return llm_result, mock_ctx
        
    except Exception as e:
        print(f"\n❌ Error calling LLM: {e}")
        print_log_messages(mock_ctx)
        return None, mock_ctx

# Run the test
llm_data = await test_real_llm_analysis()
if llm_data:
    llm_result, llm_ctx = llm_data
else:
    llm_result = None


🧪 Testing analyze_with_llm with real LLM...
🤖 Using model: gpt-4o-mini

📋 Task: Improve the context curation system to better identify important directories for AI workplan generation. Include .python-version


2025-08-10 15:27:47,958 INFO HTTP Request: POST https://api.openai.com/v1/responses "HTTP/1.1 200 OK"



✅ LLM analysis complete!
⏱️ Time taken: 2.77 seconds

🤖 LLM Response:
------------------------------------------------------------
```context
.python-version
yellhorn_mcp/
yellhorn_mcp/cli.py
yellhorn_mcp/llm_manager.py
yellhorn_mcp/metadata_models.py
yellhorn_mcp/server.py
yellhorn_mcp/token_counter.py
yellhorn_mcp/formatters/
yellhorn_mcp/integrations/
yellhorn_mcp/models/
yellhorn_mcp/processors/
yellhorn_mcp/utils/
```
------------------------------------------------------------

📝 Log messages (showing first 100):
  🔵 [info] Analyzing directory structure with gpt-4o-mini
  🔵 [info] [DEBUG] System message: You are an expert software developer tasked with analyzing a codebase structure to identify important directories for building and executing a workplan.

Your goal is to identify the most important directories that should be included for the user's task.

Analyze the directories and identify the ones that:
1. Contain core application code relevant to the user's task
2. Likely co

## Test 3: Parse LLM Output

In [12]:
async def test_parse_real_output():
    """Test parsing real LLM output."""
    if not llm_result or not context_data:
        print("⚠️ Skipping test - LLM result not available")
        return None
    
    print("\n🧪 Testing parse_llm_directories with real output...")
    
    # Create new mock context for this test
    mock_ctx = create_mock_context()
    
    all_dirs = context_data[2]
    
    # Parse the LLM output with mock context
    important_dirs = await parse_llm_directories(
        llm_result=llm_result,
        all_dirs=all_dirs,
        ctx=mock_ctx
    )
    
    print(f"\n📊 Parsing Results:")
    print(f"  - Total directories available: {len(all_dirs)}")
    print(f"  - Important directories identified: {len(important_dirs)}")
    print(f"  - [SHOULD HAPPEN] Reduction: {100 * (1 - len(important_dirs)/len(all_dirs)):.1f}%")
    
    print(f"\n📁 Important directories identified:")
    for dir_name in sorted(important_dirs)[:20]:
        print(f"  - {dir_name}")
    
    if len(important_dirs) > 20:
        print(f"  ... and {len(important_dirs) - 20} more")
    
    # Analyze the selection
    print(f"\n🔍 Selection Analysis:")
    
    # Check if key directories were selected
    key_dirs = ["yellhorn_mcp", "yellhorn_mcp/processors", "yellhorn_mcp/formatters", "tests"]
    for key_dir in key_dirs:
        if key_dir in important_dirs:
            print(f"  ✅ {key_dir} - included")
        elif key_dir in all_dirs:
            print(f"  ❌ {key_dir} - excluded")
        else:
            print(f"  ⚠️ {key_dir} - not found in repository")
    
    # Print log messages
    print_log_messages(mock_ctx)
    
    return important_dirs, mock_ctx

# Run the test
parse_data = await test_parse_real_output()
if parse_data:
    parsed_dirs, parse_ctx = parse_data
else:
    parsed_dirs = None


🧪 Testing parse_llm_directories with real output...

📊 Parsing Results:
  - Total directories available: 11
  - Important directories identified: 12
  - [SHOULD HAPPEN] Reduction: -9.1%

📁 Important directories identified:
  - .python-version
  - yellhorn_mcp
  - yellhorn_mcp/cli.py
  - yellhorn_mcp/formatters
  - yellhorn_mcp/integrations
  - yellhorn_mcp/llm_manager.py
  - yellhorn_mcp/metadata_models.py
  - yellhorn_mcp/models
  - yellhorn_mcp/processors
  - yellhorn_mcp/server.py
  - yellhorn_mcp/token_counter.py
  - yellhorn_mcp/utils

🔍 Selection Analysis:
  ✅ yellhorn_mcp - included
  ✅ yellhorn_mcp/processors - included
  ✅ yellhorn_mcp/formatters - included
  ⚠️ tests - not found in repository

📝 Log messages (showing first 100):
  🔵 [info] Matched '.python-version' to directories: .python-version
  🔵 [info] Matched 'yellhorn_mcp' to directories: yellhorn_mcp
  🔵 [info] Matched 'yellhorn_mcp/cli.py' to directories: yellhorn_mcp/cli.py
  🔵 [info] Matched 'yellhorn_mcp/llm_mana

In [13]:
context_data[2]

{'.',
 '.github',
 '.github/workflows',
 'docs',
 'notebooks',
 'yellhorn_mcp',
 'yellhorn_mcp/formatters',
 'yellhorn_mcp/integrations',
 'yellhorn_mcp/models',
 'yellhorn_mcp/processors',
 'yellhorn_mcp/utils'}

In [14]:
print(llm_result)

```context
.python-version
yellhorn_mcp/
yellhorn_mcp/cli.py
yellhorn_mcp/llm_manager.py
yellhorn_mcp/metadata_models.py
yellhorn_mcp/server.py
yellhorn_mcp/token_counter.py
yellhorn_mcp/formatters/
yellhorn_mcp/integrations/
yellhorn_mcp/models/
yellhorn_mcp/processors/
yellhorn_mcp/utils/
```


In [15]:
print(parsed_dirs)

{'.python-version', 'yellhorn_mcp', 'yellhorn_mcp/cli.py', 'yellhorn_mcp/formatters', 'yellhorn_mcp/integrations', 'yellhorn_mcp/models', 'yellhorn_mcp/utils', 'yellhorn_mcp/llm_manager.py', 'yellhorn_mcp/processors', 'yellhorn_mcp/metadata_models.py', 'yellhorn_mcp/server.py', 'yellhorn_mcp/token_counter.py'}


## Test 4: End-to-End Context Curation

In [17]:
async def test_end_to_end_real():
    """Test complete context curation with real repository and LLM."""
    if not DEFAULT_MODEL or not REPO_PATH.exists():
        print("⚠️ Skipping test - LLM or repository not available")
        return None
    
    print("\n🧪 Testing end-to-end context curation...")
    print(f"📁 Repository: {REPO_PATH}")
    print(f"🤖 Model: {DEFAULT_MODEL}")
    
    # Create mock context for this test
    mock_ctx = create_mock_context()
    
    user_task = "Refactor the context processor module to improve modularity and add better error handling"
    output_path = ".yellhorncontext.test"
    
    print(f"\n📋 Task: {user_task}")
    print(f"📄 Output file: {output_path}")
    
    start_time = datetime.now()
    
    try:
        # Run the complete process with mock context
        result = await process_context_curation_async(
            repo_path=REPO_PATH,
            llm_manager=llm_manager,
            model=DEFAULT_MODEL,
            user_task=user_task,
            output_path=output_path,
            codebase_reasoning="file_structure",
            disable_search_grounding=False,
            debug=False,
            ctx=mock_ctx
        )
        
        elapsed = (datetime.now() - start_time).total_seconds()
        
        print(f"\n✅ Context curation complete!")
        print(f"⏱️ Time taken: {elapsed:.2f} seconds")
        print(f"\n📊 Result: {result}")
        
        # Read and display the generated file
        context_file = REPO_PATH / output_path
        if context_file.exists():
            content = context_file.read_text()
            print(f"\n📄 Generated context file content:")
            print("=" * 60)
            print(content)
            print("=" * 60)
            
            # Analyze the content
            lines = content.split('\n')
            non_comment_lines = [l for l in lines if l.strip() and not l.strip().startswith('#')]
            
            print(f"\n📊 File Analysis:")
            print(f"  - Total lines: {len(lines)}")
            print(f"  - Comment lines: {len(lines) - len(non_comment_lines)}")
            print(f"  - Directory patterns: {len(non_comment_lines)}")
            
            # Print log messages
            print_log_messages(mock_ctx)
            
            # Clean up test file
            print(f"\n🧹 Cleaning up test file...")
            context_file.unlink()
            print(f"✅ Test file removed")
        else:
            print(f"\n❌ Context file was not created")
            print_log_messages(mock_ctx)
        
        # Check usage
        usage = llm_manager.get_last_usage_metadata()
        if usage:
            print(f"\n📊 Total Token Usage:")
            print(f"  - Prompt tokens: {usage.prompt_tokens}")
            print(f"  - Completion tokens: {usage.completion_tokens}")
            print(f"  - Total tokens: {usage.total_tokens}")
        
        return result, mock_ctx
        
    except Exception as e:
        print(f"\n❌ Error during context curation: {e}")
        import traceback
        traceback.print_exc()
        print_log_messages(mock_ctx)
        return None, mock_ctx

# Run the test
end_to_end_data = await test_end_to_end_real()
if end_to_end_data:
    end_to_end_result, end_to_end_ctx = end_to_end_data
else:
    end_to_end_result = None


🧪 Testing end-to-end context curation...
📁 Repository: /Users/sravanj/project_work/yellhorn-mcp
🤖 Model: gpt-4o-mini

📋 Task: Refactor the context processor module to improve modularity and add better error handling
📄 Output file: .yellhorncontext.test


2025-08-10 15:35:37,264 INFO HTTP Request: POST https://api.openai.com/v1/responses "HTTP/1.1 200 OK"



✅ Context curation complete!
⏱️ Time taken: 1.42 seconds

📊 Result: Successfully created .yellhorncontext file at /Users/sravanj/project_work/yellhorn-mcp/.yellhorncontext.test with 3 important directories.

📄 Generated context file content:
# Yellhorn Context File - AI context optimization
# Generated by yellhorn-mcp curate_context tool
# Based on task: Refactor the context processor module to improve modularity and add better error

# Important directories to specifically include
yellhorn_mcp/
yellhorn_mcp/processors/
yellhorn_mcp/utils/


📊 File Analysis:
  - Total lines: 9
  - Comment lines: 6
  - Directory patterns: 3

📝 Log messages (showing first 100):
  🔵 [info] Starting context curation process
  🔵 [info] Getting codebase context using file_structure mode
  🔵 [info] Getting codebase snapshot in mode: paths
  🔵 [info] Found .gitignore with 86 patterns
  🔵 [info] Found .yellhornignore with 1 patterns
  🔵 [info] File categorization results out of 70 files:
  🔵 [info]   - 5 alway

## Test Different Reasoning Modes

In [19]:
async def test_reasoning_modes():
    """Test different codebase reasoning modes."""
    if not REPO_PATH.exists():
        print("⚠️ Skipping test - repository not available")
        return
    
    print("\n🧪 Testing different reasoning modes...")
    
    modes = ["file_structure", "lsp", "full"]
    results = {}
    
    for mode in modes:
        print(f"\n📋 Testing mode: {mode}")
        
        # Create new mock context for each mode
        mock_ctx = create_mock_context()
        mock_ctx.request_context.lifespan_context["codebase_reasoning"] = mode
        
        start_time = datetime.now()
        
        try:
            directory_context, file_paths, all_dirs = await build_codebase_context(
                repo_path=REPO_PATH,
                codebase_reasoning_mode=mode,
                model=DEFAULT_MODEL,
                ctx=mock_ctx,
                git_command_func=mock_ctx.request_context.lifespan_context['git_command_func']
            )
            
            elapsed = (datetime.now() - start_time).total_seconds()
            
            results[mode] = {
                "time": elapsed,
                "context_size": len(directory_context),
                "files": len(file_paths),
                "dirs": len(all_dirs),
                "logs": mock_ctx.log.call_count
            }
            
            print(f"  ✅ Completed in {elapsed:.2f}s")
            print(f"  📊 Context size: {len(directory_context)} chars")
            print(f"  📝 Log messages: {mock_ctx.log.call_count}")
            
            # Show sample log messages
            if mock_ctx.log.called:
                print("  Sample logs:")
                for i, call in enumerate(mock_ctx.log.call_args_list[:3]):
                    level = call[1].get('level', 'info')
                    msg = call[1]['message'][:80]
                    print(f"    - [{level}] {msg}...")
            
        except Exception as e:
            print(f"  ❌ Error: {e}")
            results[mode] = {"error": str(e)}
    
    # Compare results
    print("\n📊 Comparison of reasoning modes:")
    print(f"{'Mode':<15} {'Time (s)':<10} {'Context Size':<15} {'Files':<10} {'Dirs':<10} {'Logs':<10}")
    print("-" * 70)
    
    for mode, data in results.items():
        if "error" not in data:
            print(f"{mode:<15} {data['time']:<10.2f} {data['context_size']:<15,} {data['files']:<10} {data['dirs']:<10} {data['logs']:<10}")
        else:
            print(f"{mode:<15} Error: {data['error'][:40]}")
    
    return results

# Run the test
reasoning_results = await test_reasoning_modes()


🧪 Testing different reasoning modes...

📋 Testing mode: file_structure
  ✅ Completed in 0.03s
  📊 Context size: 1128 chars
  📝 Log messages: 14
  Sample logs:
    - [info] Getting codebase context using file_structure mode...
    - [info] Getting codebase snapshot in mode: paths...
    - [info] Found .gitignore with 86 patterns...

📋 Testing mode: lsp
  ✅ Completed in 0.04s
  📊 Context size: 17126 chars
  📝 Log messages: 14
  Sample logs:
    - [info] Getting codebase context using lsp mode...
    - [info] Getting codebase snapshot in mode: paths...
    - [info] Found .gitignore with 86 patterns...

📋 Testing mode: full
  ✅ Completed in 0.08s
  📊 Context size: 407588 chars
  📝 Log messages: 15
  Sample logs:
    - [info] Getting codebase context using full mode...
    - [info] Getting codebase snapshot in mode: full...
    - [info] Found .gitignore with 86 patterns...

📊 Comparison of reasoning modes:
Mode            Time (s)   Context Size    Files      Dirs       Logs      
--------

## Summary and Recommendations

## Test with Debug Mode Enabled

In [21]:
async def test_with_debug_mode():
    """Test with debug logging enabled to see detailed information."""
    if not DEFAULT_MODEL or not REPO_PATH.exists():
        print("⚠️ Skipping test - LLM or repository not available")
        return None
    
    print("\n🧪 Testing with debug mode enabled...")
    print(f"📁 Repository: {REPO_PATH}")
    print(f"🤖 Model: {DEFAULT_MODEL}")
    
    # Create mock context
    mock_ctx = create_mock_context()
    
    # Simple task for debugging
    user_task = "Test debug logging functionality"
    
    print(f"\n📋 Task: {user_task}")
    
    try:
        # Build context
        print("\n1️⃣ Building codebase context...")
        directory_context, file_paths, all_dirs = await build_codebase_context(
            repo_path=REPO_PATH,
            codebase_reasoning_mode="file_structure",
            model=DEFAULT_MODEL,
            ctx=mock_ctx,
            git_command_func=mock_ctx.request_context.lifespan_context['git_command_func']
        )
        print(f"   ✅ Context built: {len(directory_context)} chars, {len(file_paths)} files")
        
        # Analyze with LLM (with debug=True)
        print("\n2️⃣ Analyzing with LLM (debug=True)...")
        llm_result = await analyze_with_llm(
            llm_manager=llm_manager,
            model=DEFAULT_MODEL,
            directory_context=directory_context[:5000],  # Use smaller context for debug
            user_task=user_task,
            debug=True,  # Enable debug logging
            ctx=mock_ctx
        )
        print(f"   ✅ LLM analysis complete")
        
        # Parse directories
        print("\n3️⃣ Parsing LLM output...")
        important_dirs = await parse_llm_directories(
            llm_result=llm_result,
            all_dirs=all_dirs,
            ctx=mock_ctx
        )
        print(f"   ✅ Parsed {len(important_dirs)} important directories")
        print(important_dirs)
        
        # Print all log messages
        print("\n📝 Complete Log Messages:")
        print("=" * 60)
        if mock_ctx.log.called:
            for i, call in enumerate(mock_ctx.log.call_args_list):
                level = call[1].get('level', 'info')
                msg = call[1]['message']
                emoji = "🔵" if level == "info" else "🟡" if level == "warning" else "🔴"
                print(f"{i+1:3}. {emoji} [{level:7}] {msg}")
        else:
            print("No log messages recorded")
        print("=" * 60)
        
        print(f"\n📊 Summary:")
        print(f"  - Total log messages: {mock_ctx.log.call_count}")
        print(f"  - Info messages: {sum(1 for c in mock_ctx.log.call_args_list if c[1].get('level') == 'info')}")
        print(f"  - Warning messages: {sum(1 for c in mock_ctx.log.call_args_list if c[1].get('level') == 'warning')}")
        print(f"  - Error messages: {sum(1 for c in mock_ctx.log.call_args_list if c[1].get('level') == 'error')}")
        
        return mock_ctx
        
    except Exception as e:
        print(f"\n❌ Error during debug test: {e}")
        import traceback
        traceback.print_exc()
        print_log_messages(mock_ctx)
        return mock_ctx

# Run the debug test
debug_ctx = await test_with_debug_mode()


🧪 Testing with debug mode enabled...
📁 Repository: /Users/sravanj/project_work/yellhorn-mcp
🤖 Model: gpt-4o-mini

📋 Task: Test debug logging functionality

1️⃣ Building codebase context...
   ✅ Context built: 1128 chars, 37 files

2️⃣ Analyzing with LLM (debug=True)...


2025-08-10 15:37:51,726 INFO HTTP Request: POST https://api.openai.com/v1/responses "HTTP/1.1 200 OK"


   ✅ LLM analysis complete

3️⃣ Parsing LLM output...
   ✅ Parsed 8 important directories
{'yellhorn_mcp/utils', 'yellhorn_mcp', 'yellhorn_mcp/llm_manager.py', 'yellhorn_mcp/processors', 'yellhorn_mcp/cli.py', 'yellhorn_mcp/formatters', 'yellhorn_mcp/integrations', 'yellhorn_mcp/server.py'}

📝 Complete Log Messages:
  1. 🔵 [info   ] Getting codebase context using file_structure mode
  2. 🔵 [info   ] Getting codebase snapshot in mode: paths
  3. 🔵 [info   ] Found .gitignore with 86 patterns
  4. 🔵 [info   ] Found .yellhornignore with 1 patterns
  5. 🔵 [info   ] File categorization results out of 70 files:
  6. 🔵 [info   ]   - 5 always ignored (images, binaries, configs, etc.)
  7. 🔵 [info   ]   - 0 in yellhorncontext whitelist (included)
  8. 🔵 [info   ]   - 0 in yellhorncontext blacklist (excluded)
  9. 🔵 [info   ]   - 0 in yellhornignore whitelist (included)
 10. 🔵 [info   ]   - 28 in yellhornignore blacklist (excluded)
 11. 🔵 [info   ]   - 37 other files (included - no .yellhornconte