# Flexible Orchestrator Workflow

This notebook demonstrates a simplified text-to-SQL workflow where:
1. The orchestrator only decides which tool to call next
2. Each tool operates on the current node (determined by the code, not LLM)
3. Tools are called with minimal input - they know what to do
4. The orchestrator's job is to call tools or TERMINATE

## Key Differences from Previous Versions:
- **No node IDs in tool calls** - the current node is managed automatically
- **Simple tool calls** - tools need minimal parameters
- **Task status checker** - provides clear ACTION directives for the orchestrator

In [1]:
import os
import sys
import asyncio
import logging
from pathlib import Path
from typing import Dict, Any, List, Optional
from dotenv import load_dotenv

# Add src path for imports
sys.path.append('../src')

# Load environment variables
try:
    load_dotenv()
    print("✅ Environment loaded successfully")
except Exception as e:
    print(f"⚠️  Warning: Could not load .env file: {e}")

# Important: For running this notebook, ensure OPENAI_API_KEY is set
if not os.getenv("OPENAI_API_KEY"):
    print("⚠️  WARNING: OPENAI_API_KEY not found in environment")
    print("📋 Run: source ../.env && export OPENAI_API_KEY")
    print("📋 Or set it manually in the environment")
else:
    print("✅ OPENAI_API_KEY found in environment")

# Set up clean logging - minimal noise, focus on workflow
logging.basicConfig(
    level=logging.WARNING,  # Higher level to reduce noise
    format='%(name)s - %(levelname)s - %(message)s'  # Simpler format
)

# Only show agent-specific logs at INFO level
agent_loggers = [
    'QueryAnalyzerAgent', 'SchemaLinkerAgent', 
    'SQLGeneratorAgent', 'SQLEvaluatorAgent', 'TaskStatusChecker'
]

for logger_name in agent_loggers:
    logging.getLogger(logger_name).setLevel(logging.INFO)

# Silence very noisy libraries completely
noisy_loggers = [
    'autogen_core', 'autogen_agentchat', 'httpx', 'openai', 
    'httpcore', 'httpcore.connection', 'httpcore.http11'
]

for logger_name in noisy_loggers:
    logging.getLogger(logger_name).setLevel(logging.ERROR)

print("✅ Logging configured for clean output")
print("📝 Ready to initialize memory system and agents")

✅ Environment loaded successfully
✅ OPENAI_API_KEY found in environment
✅ Logging configured for clean output
📝 Ready to initialize memory system and agents


In [2]:
# Memory and managers
from keyvalue_memory import KeyValueMemory
from task_context_manager import TaskContextManager
from query_tree_manager import QueryTreeManager
from database_schema_manager import DatabaseSchemaManager
from node_history_manager import NodeHistoryManager

# Schema reader
from schema_reader import SchemaReader

# All 4 agents + task status checker
from query_analyzer_agent import QueryAnalyzerAgent
from schema_linker_agent import SchemaLinkerAgent
from sql_generator_agent import SQLGeneratorAgent
from sql_evaluator_agent import SQLEvaluatorAgent
from task_status_checker import TaskStatusChecker

# Memory types - updated imports based on actual content
from memory_content_types import (
    TaskContext, QueryNode, NodeStatus, TaskStatus,
    TableSchema, ColumnInfo, ExecutionResult, NodeOperation
)

# AutoGen components
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.conditions import TextMentionTermination
from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_ext.models.openai import OpenAIChatCompletionClient

In [3]:
# Initialize shared memory system - foundation for all agents
try:
    print("🧠 Initializing shared memory system...")
    memory = KeyValueMemory()

    # Initialize all managers that coordinate workflow
    print("📋 Initializing workflow managers...")
    task_manager = TaskContextManager(memory)
    tree_manager = QueryTreeManager(memory)
    schema_manager = DatabaseSchemaManager(memory)
    history_manager = NodeHistoryManager(memory)

    print("✅ Shared memory and managers initialized")
    print("   - Memory system ready for multi-agent coordination")
    print("   - All managers connected to shared memory")
    print("   - Ready for flexible orchestrator workflow")
    
except Exception as e:
    print(f"❌ Error initializing memory system: {e}")
    import traceback
    print(f"Full error: {traceback.format_exc()}")
    raise

🧠 Initializing shared memory system...
📋 Initializing workflow managers...
✅ Shared memory and managers initialized
   - Memory system ready for multi-agent coordination
   - All managers connected to shared memory
   - Ready for flexible orchestrator workflow


In [4]:
# Database configuration
data_path = "/home/norman/work/text-to-sql/MAC-SQL/data/bird"
tables_json_path = Path(data_path) / "dev_tables.json"
db_name = "california_schools"

# Test queries
test_queries = [
    "What is the highest eligible free rate for K-12 students in schools located in Alameda County?",
    "Show me schools with SAT scores above 1400 and their free lunch eligibility rates",
    "Find the top 5 counties by average SAT scores, including the number of schools and average free lunch rate"
]

# Pick a query (try different ones!)
test_query = test_queries[2]  # Use the complex query that matches the output
evidence = "Eligible free rate for K-12 = `Free Meal Count (K-12)` / `Enrollment (K-12)`"

print(f"🎯 Target Query: {test_query}")
print(f"🎯 Evidence: {evidence}")
print(f"📊 Database: {db_name}")
print("─" * 80)

# Initialize task context in shared memory
print("📝 Initializing task context in shared memory...")
task_id = "flexible_demo_001"
await task_manager.initialize(task_id, test_query, db_name, evidence)

# Load schema into shared memory
print("📚 Loading database schema into shared memory...")
schema_reader = SchemaReader(
    data_path=data_path,
    tables_json_path=str(tables_json_path),
    dataset_name="bird",
    lazy=False
)

await schema_manager.load_from_schema_reader(schema_reader, db_name)

# Initialize query tree with root node
print("🌳 Initializing query tree with root node...")
root_id = await tree_manager.initialize(test_query, evidence)
print(f"✅ Query tree initialized with root node: {root_id}")

# Get schema summary
summary = await schema_manager.get_schema_summary()
print(f"\nLoaded '{db_name}' database:")
print(f"  Tables: {summary['table_count']}")
print(f"  Columns: {summary['total_columns']}")
print(f"  Foreign keys: {summary['total_foreign_keys']}")
print("\n✅ All initialization complete - ready for orchestrator workflow")

🎯 Target Query: Find the top 5 counties by average SAT scores, including the number of schools and average free lunch rate
🎯 Evidence: Eligible free rate for K-12 = `Free Meal Count (K-12)` / `Enrollment (K-12)`
📊 Database: california_schools
────────────────────────────────────────────────────────────────────────────────
📝 Initializing task context in shared memory...
📚 Loading database schema into shared memory...
load json file from /home/norman/work/text-to-sql/MAC-SQL/data/bird/dev_tables.json

Loading all database info...
Found 11 databases in bird dataset
🌳 Initializing query tree with root node...
✅ Query tree initialized with root node: root

Loaded 'california_schools' database:
  Tables: 3
  Columns: 89
  Foreign keys: 2

✅ All initialization complete - ready for orchestrator workflow


In [5]:
# LLM configuration for all agents
llm_config = {
    "model_name": "gpt-4o",
    "temperature": 0.1,
    "timeout": 60
}

try:
    print("🤖 Initializing all agents with shared memory connection...")

    # Initialize all agents with shared memory access
    query_analyzer = QueryAnalyzerAgent(memory, llm_config)
    schema_linker = SchemaLinkerAgent(memory, llm_config)
    sql_generator = SQLGeneratorAgent(memory, llm_config)
    sql_evaluator = SQLEvaluatorAgent(memory, llm_config)

    # Initialize TaskStatusChecker (deterministic - no LLM needed)
    task_status_checker = TaskStatusChecker(memory)

    print("✅ All agents initialized and connected to shared memory")
    print("   - QueryAnalyzerAgent: Ready for query decomposition")
    print("   - SchemaLinkerAgent: Ready for schema mapping")
    print("   - SQLGeneratorAgent: Ready for SQL generation")
    print("   - SQLEvaluatorAgent: Ready for execution and evaluation")
    print("   - TaskStatusChecker: Ready for workflow coordination")
    print("   - All agents share the same memory space for coordination")
    
except Exception as e:
    print(f"❌ Error initializing agents: {e}")
    import traceback
    print(f"Full error: {traceback.format_exc()}")
    raise

QueryAnalyzerAgent - INFO - Initialized query_analyzer with model gpt-4o
SchemaLinkerAgent - INFO - Initialized schema_linker with model gpt-4o
SQLGeneratorAgent - INFO - Initialized sql_generator with model gpt-4o
SQLEvaluatorAgent - INFO - Initialized sql_evaluator with model gpt-4o


🤖 Initializing all agents with shared memory connection...
✅ All agents initialized and connected to shared memory
   - QueryAnalyzerAgent: Ready for query decomposition
   - SchemaLinkerAgent: Ready for schema mapping
   - SQLGeneratorAgent: Ready for SQL generation
   - SQLEvaluatorAgent: Ready for execution and evaluation
   - TaskStatusChecker: Ready for workflow coordination
   - All agents share the same memory space for coordination


## Orchestrator Agent
The text-to-sql task is represented in a tree, the root node represent the initial user query. If it's simple, then there is only one node. If it's complex, it will be decomposed first, then process the simpler child nodes. The generated SQL of the parent node is the combination of the SQL of children nodes. TaskStatusChecker will report the overall status by navigating the complete tree. Orchestrator will based on that status report to decide how to process each node. There are 4 tools in total: schema_linker (select which tables and columns will be used in the SQL), query_analyzer (understand the intent of query, give hints for SQL generation), sql_generator (generate SQL based on all available information), sql_evaluator (execute the SQL and analyze the result, give suggestions if the SQL needs improvement). There is no order of these tools, just call them when necessary, until TaskStatusChecker report all nodes are successfuly processed (with good SQLs).

In [6]:
# Initialize OpenAI client for coordinator
try:
    coordinator_client = OpenAIChatCompletionClient(
        model="gpt-4o",
        temperature=0.1,
        timeout=120,
        api_key=os.getenv("OPENAI_API_KEY")
    )

    # Create flexible coordinator with status-based decision making
    coordinator = AssistantAgent(
        name="coordinator",
        system_message="""You are an orchestrator for a tree-based text-to-SQL workflow system with context-aware processing.

## System Overview
- User queries are represented as a tree structure in shared memory
- Simple queries = single root node
- Complex queries = root node with decomposed child nodes
- Parent node SQL = combination of child node SQLs
- Each node maintains context including previous attempts, errors, and feedback
- Your goal: Process all nodes until all have successful SQLs

## Available Tools
1. **schema_linker** - Identifies relevant tables/columns for the current node
2. **query_analyzer** - Understands query intent and decomposes complex queries into subtrees
3. **sql_generator** - Generates SQL based on linked schema and query analysis
4. **sql_evaluator** - Executes SQL and analyzes results, suggests improvements
5. **task_status_checker** - Reports tree status and next required action

## Tool Dependencies & Constraints
- **sql_generator** REQUIRES: Schema from current node OR parent node
- **sql_evaluator** REQUIRES: Existing SQL in current node (skip if no SQL exists)
- **sql_generator** LIMIT: Maximum 2 attempts per node (for easy fixes only)
- **NEVER call same tool twice consecutively on same node** (especially schema_linker and sql_evaluator)

## Tool Parameters
All tools accept a **"goal"** parameter that specifies the expected action. The goal should be:
- Contextual to the current node's state and needs
- May include summary of previous tool outputs
- May reference errors or feedback from prior attempts
- Adapted based on the node's current status

Example goals:
- "Link schema for customer orders query, focusing on date filters"
- "Generate SQL incorporating feedback: add GROUP BY clause for aggregation"
- "Retry SQL generation fixing the column name error"
- "Evaluate SQL results, check if totals match expected business logic"

## Processing Workflow
1. **Start**: Call task_status_checker with goal="check overall task status"
2. **Interpret status** to determine current node and its needs
3. **Call appropriate tool** with contextual goal based on:
   - Current node status
   - Tool dependencies (check if prerequisites met)
   - Previous attempts/errors (if any)
   - Tool call history (avoid consecutive duplicates)
4. **Iterate** until OVERALL_STATUS indicates completion

## Status-Based Actions
- **needs_schema** → schema_linker with goal="link schema for [node intent]"
- **needs_analysis** → query_analyzer with goal="analyze [node intent] for decomposition"
- **needs_sql** → Check if schema exists (current/parent), then sql_generator
- **needs_eval** → Check if SQL exists, then sql_evaluator (skip if no SQL)
- **bad_sql** → 
  - If first failure: sql_generator with goal addressing specific error
  - If second failure: Consider alternative approach or escalate

## Error Handling
- **No schema for SQL generation**: Check parent node for schema inheritance
- **SQL generation failed twice**: Mark node as requiring manual intervention
- **No SQL for evaluation**: Skip evaluator, check why SQL is missing
- **Consecutive tool calls detected**: Switch to different tool or check status

## Key Principles
- Tools automatically access full context (previous attempts, errors, feedback)
- Respect tool dependencies and constraints
- Track SQL generation attempts per node (max 2)
- Avoid tool repetition patterns
- Parent nodes wait for child completion

## Termination
Say "TERMINATE" only when:
- TREE OVERVIEW shows X/X nodes complete (all done)
- No PENDING nodes remain
- OVERALL_STATUS confirms completion

Begin by calling task_status_checker with goal="check initial task status and identify starting point".
        """,
        model_client=coordinator_client,
        tools=[
            schema_linker.get_tool(),
            query_analyzer.get_tool(), 
            sql_generator.get_tool(), 
            sql_evaluator.get_tool(),
            task_status_checker.get_tool()
        ]
    )
    
    print("✅ Coordinator initialized successfully")
    print("   - OpenAI client configured")
    print("   - All agent tools registered")
    print("   - Ready for workflow orchestration")
    
except Exception as e:
    print(f"❌ Error initializing coordinator: {e}")
    import traceback
    print(f"Full error: {traceback.format_exc()}")
    raise

✅ Coordinator initialized successfully
   - OpenAI client configured
   - All agent tools registered
   - Ready for workflow orchestration


In [7]:
# Create a team with termination condition
try:
    termination_condition = TextMentionTermination("TERMINATE")
    team = RoundRobinGroupChat(
        participants=[coordinator],
        termination_condition=termination_condition
    )

    # Run the workflow - let the coordinator manage the shared memory workflow
    print("🚀 Starting flexible orchestrator workflow...")
    print("   - Query tree already initialized with root node")
    print("   - Shared memory contains task context and database schema")
    print("   - Coordinator will manage the workflow sequence")

    stream = team.run_stream(task="Process the text-to-SQL workflow using shared memory")
    print("✅ Workflow stream created successfully")
    
except Exception as e:
    print(f"❌ Error starting workflow: {e}")
    import traceback
    print(f"Full error: {traceback.format_exc()}")
    raise

🚀 Starting flexible orchestrator workflow...
   - Query tree already initialized with root node
   - Shared memory contains task context and database schema
   - Coordinator will manage the workflow sequence
✅ Workflow stream created successfully


In [8]:
# Process messages with proper loop control
step_count = 0
max_steps = 100  # Safety limit
last_agent_called = None
workflow_complete = False
retry_count = {}  # Track retries per node
last_message_content = None  # Track duplicate messages
duplicate_count = 0

print("="*80)
print("WORKFLOW EXECUTION (Status-Based)")
print("="*80)

async for message in stream:
    # Check for duplicate messages first
    current_content = str(message.content) if hasattr(message, 'content') else None
    if current_content and current_content == last_message_content:
        duplicate_count += 1
        if duplicate_count >= 3:  # Stop after 3 identical messages
            print(f"\n⚠️  Detected repeated messages. Stopping to prevent infinite loop.")
            break
    else:
        duplicate_count = 0
        last_message_content = current_content
    
    # Process coordinator messages
    if hasattr(message, 'source') and message.source == 'coordinator':
        step_count += 1
        
        # Check max steps IMMEDIATELY after incrementing
        if step_count >= max_steps:
            print(f"\n⚠️  Reached maximum steps ({max_steps}). Stopping.")
            break
        
        if hasattr(message, 'content'):
            if isinstance(message.content, list) and len(message.content) > 0:
                # Tool calls
                for tool_call in message.content:
                    if hasattr(tool_call, 'name'):
                        agent_name = tool_call.name
                        last_agent_called = agent_name
                        
                        # Show which tool is being called
                        print(f"\n[Step {step_count}] Calling: {agent_name}")
                        
                        # Check for error patterns
                        if hasattr(tool_call, 'arguments'):
                            args = str(tool_call.arguments)
                            if "takes 2 positional arguments" in args:
                                print(f"         → ❌ Tool call error detected")
                                print(f"         → The coordinator is calling the tool incorrectly")
                                workflow_complete = False
                                break
                        
                        # Get current node info for context (only for tools that need it)
                        if agent_name not in ["task_status_checker"]:
                            current_id = await memory.get("current_node_id")
                            if current_id:
                                node = await tree_manager.get_node(current_id)
                                if node:
                                    # Show node intent
                                    if node.intent:
                                        print(f"         Current node: {node.intent[:50]}...")
                                    
                                    # Show node status
                                    status_info = []
                                    if node.mapping and node.mapping.tables:
                                        status_info.append("✓mapped")
                                    else:
                                        status_info.append("✗no-mapping")
                                    
                                    if node.sql:
                                        status_info.append("✓has-sql")
                                    else:
                                        status_info.append("✗no-sql")
                                    
                                    if node.executionResult:
                                        status_info.append("✓executed")
                                        
                                    # Check if this is a retry
                                    if agent_name == "sql_generator" and node.sql:
                                        retry_count[current_id] = retry_count.get(current_id, 0) + 1
                                        print(f"         Status: {' '.join(status_info)} [RETRY #{retry_count[current_id]}]")
                                    else:
                                        print(f"         Status: {' '.join(status_info)}")
                            
            elif isinstance(message.content, str):
                # Check if this is termination
                if "TERMINATE" in message.content:
                    workflow_complete = True
                    print(f"\n[Step {step_count}] ✅ WORKFLOW COMPLETE")
                    break
                # Check for error messages
                elif "error" in message.content.lower() or "takes 2 positional arguments" in message.content:
                    print(f"\n[Step {step_count}] ❌ Error in coordinator message")
                    print(f"         Content: {message.content[:100]}...")
    
    # Capture and display tool responses
    elif hasattr(message, 'source') and message.source != 'coordinator':
        if hasattr(message, 'content'):
            content = str(message.content)
            
            # Check for tool errors
            if "takes 2 positional arguments" in content:
                print(f"         → ❌ Tool error: Incorrect arguments provided")
                print(f"         → This is why the workflow is stuck!")
                # Force stop on tool errors
                print(f"\n⚠️  Stopping due to tool invocation error.")
                break
            
            # Special handling for task_status_checker to show ACTION and quality
            if last_agent_called == "task_status_checker" and "ACTION:" in content:
                import re
                action_match = re.search(r'ACTION:\s*(.+?)(?:\n|$)', content)
                if action_match:
                    action = action_match.group(1).strip()
                    print(f"         → ACTION: {action}")
                    
                    # Check for quality info
                    if "Evaluation Quality:" in content:
                        quality_match = re.search(r'Evaluation Quality:\s*(\w+)', content)
                        if quality_match:
                            quality = quality_match.group(1)
                            print(f"         → Quality: {quality}")
                    
                    # Show what this means
                    if "RETRY NODE" in action:
                        print("         → Next: Regenerate SQL due to poor quality")
                    elif "PROCESS NODE" in action:
                        print("         → Next: Continue processing this node")
                    elif "TASK COMPLETE" in action:
                        print("         → Next: TERMINATE")
            
            # Show success/failure for other tools
            elif last_agent_called and last_agent_called != "task_status_checker":
                if "error" in content.lower():
                    print(f"         → ❌ Error occurred")
                else:
                    print(f"         → ✅ Success")

print("\n" + "="*80)
if workflow_complete:
    print("WORKFLOW COMPLETED SUCCESSFULLY")
else:
    print("WORKFLOW STOPPED (may not have completed)")
    print("\nPossible issues:")
    print("- Check tool argument compatibility")
    print("- Verify all required parameters are provided correctly")
    
# Show retry statistics
if retry_count:
    print("\nRetry Statistics:")
    for node_id, count in retry_count.items():
        print(f"  Node {node_id[-15:]}: {count} retries")

WORKFLOW EXECUTION (Status-Based)

[Step 1] Calling: task_status_checker

[Step 2] Calling: task_status_checker


SchemaLinkerAgent - INFO - Available tables in schema: ['frpm', 'satscores', 'schools']
SchemaLinkerAgent - INFO - Schema linking context prepared for node: root
SchemaLinkerAgent - INFO - Node details: {'nodeId': 'root', 'status': 'created', 'childIds': [], 'intent': 'Find the top 5 counties by average SAT scores, including the number of schools and average free lunch rate', 'schema_linking': {}, 'generation': {}, 'evaluation': {}, 'evidence': 'Eligible free rate for K-12 = `Free Meal Count (K-12)` / `Enrollment (K-12)`'}



[Step 4] Calling: schema_linker


SchemaLinkerAgent - INFO - Raw LLM output: ```xml
<schema_linking>
  <available_schema>
    <tables>
      <table name="frpm">
        <columns>
          <column name="CDSCode" type="text" sample_values="['12345678901234', '98765432109876']"/>
          <column name="County Name" type="text" sample_values="['Los Angeles', 'San Diego', 'Orange']"/>
          <column name="Enrollment (K-12)" type="real" sample_values="[1000, 2000, 1500]"/>
          <column name="Free Meal Count (K-12)" type="real" sample_values="[500, 800, 600]"/>
        </columns>
      </table>
      <table name="satscores">
        <columns>
          <column name="cds" type="text" sample_values="['12345678901234', '98765432109876']"/>
          <column name="cname" type="text" sample_values="['Los Angeles', 'San Diego', 'Orange']"/>
          <column name="AvgScrRead" type="integer" sample_values="[500, 550, 600]"/>
          <column name="AvgScrMath" type="integer" sample_values="[520, 530, 610]"/>
          <col


[Step 5] Calling: schema_linker


SQLGeneratorAgent - INFO - SQL generator context prepared for node: root
SQLGeneratorAgent - INFO - Node detail: {'nodeId': 'root', 'status': 'created', 'childIds': [], 'intent': 'Find the top 5 counties by average SAT scores, including the number of schools and average free lunch rate', 'schema_linking': {'available_schema': {'tables': {'table': [{'name': 'frpm', 'columns': {'column': [{'name': 'CDSCode', 'type': 'text', 'sample_values': "['12345678901234', '98765432109876']"}, {'name': 'County Name', 'type': 'text', 'sample_values': "['Los Angeles', 'San Diego', 'Orange']"}, {'name': 'Enrollment (K-12)', 'type': 'real', 'sample_values': '[1000, 2000, 1500]'}, {'name': 'Free Meal Count (K-12)', 'type': 'real', 'sample_values': '[500, 800, 600]'}]}}, {'name': 'satscores', 'columns': {'column': [{'name': 'cds', 'type': 'text', 'sample_values': "['12345678901234', '98765432109876']"}, {'name': 'cname', 'type': 'text', 'sample_values': "['Los Angeles', 'San Diego', 'Orange']"}, {'name': '


[Step 7] Calling: sql_generator


SQLGeneratorAgent - INFO - Raw LLM output: <generation>
  <query_type>complex</query_type>
  <sql>
    SELECT 
      ss.cname AS County,
      AVG((ss.AvgScrRead + ss.AvgScrMath + ss.AvgScrWrite) / 3.0) AS AvgSATScore,
      COUNT(DISTINCT s.CDSCode) AS NumberOfSchools,
      AVG(CAST(f."Free Meal Count (K-12)" AS REAL) / f."Enrollment (K-12)") AS AvgFreeLunchRate
    FROM 
      satscores ss
    INNER JOIN 
      schools s ON ss.cds = s.CDSCode
    INNER JOIN 
      frpm f ON s.CDSCode = f.CDSCode
    GROUP BY 
      ss.cname
    ORDER BY 
      AvgSATScore DESC
    LIMIT 5;
  </sql>
  <explanation>
    This query calculates the top 5 counties by average SAT scores. It joins the `satscores`, `schools`, and `frpm` tables to gather necessary data. The average SAT score is computed by averaging the reading, math, and writing scores. The number of schools is counted using distinct `CDSCode` values from the `schools` table. The average free lunch rate is calculated using the formula provid


[Step 8] Calling: sql_generator


SQLEvaluatorAgent - INFO - Using current node: root


[SQLExecutor] Connecting to database: /home/norman/work/text-to-sql/MAC-SQL/data/bird/dev_databases/california_schools/california_schools.sqlite

[Step 10] Calling: sql_evaluator


SQLEvaluatorAgent - INFO - Raw LLM output: <evaluation>
  <answers_intent>yes</answers_intent>
  <result_quality>excellent</result_quality>
  <result_summary>The SQL query successfully retrieves the top 5 counties by average SAT scores, including the number of schools and average free lunch rate. The results are complete, accurate, and directly address the query intent.</result_summary>
  <generator_context_review>
    <generator_reasoning>The SQL generator correctly identified the need to calculate the average SAT score by averaging the reading, math, and writing scores. It also correctly grouped by county and calculated the number of schools and average free lunch rate.</generator_reasoning>
    <reasoning_validity>valid</reasoning_validity>
    <context_notes>The generator's approach was appropriate for the query intent, and the SQL logic aligns well with the expected results.</context_notes>
  </generator_context_review>
  <issues>
  </issues>
  <suggestions>
  </suggestions>
  <co


[Step 11] Calling: sql_evaluator

[Step 13] Calling: task_status_checker

[Step 14] Calling: task_status_checker

[Step 16] ✅ WORKFLOW COMPLETE

WORKFLOW COMPLETED SUCCESSFULLY


In [9]:
# Show summary of what was processed
print("\n" + "="*60)
print("PROCESSING SUMMARY")
print("="*60)

tree = await tree_manager.get_tree()
if tree and "nodes" in tree:
    # Count nodes by status
    status_counts = {}
    for node_id, node_data in tree["nodes"].items():
        status = node_data.get("status", "unknown")
        status_counts[status] = status_counts.get(status, 0) + 1
    
    print(f"\nTotal nodes created: {len(tree['nodes'])}")
    for status, count in status_counts.items():
        print(f"  {status}: {count}")
    
    # Show nodes with results
    print("\nNodes with SQL results:")
    for node_id, node_data in tree["nodes"].items():
        if node_data.get("sql") and node_data.get("executionResult"):
            print(f"\n  Node: {node_id[-15:]}")
            print(f"  Intent: {node_data['intent'][:60]}...")
            
            result = node_data['executionResult']
            if result.get('data'):
                print(f"  Result: {result.get('rowCount', 0)} rows")
                # Show first result
                first_row = result['data'][0] if result['data'] else None
                if first_row:
                    if isinstance(first_row, list) and len(first_row) >= 2:
                        print(f"  Sample: {first_row[0]} - {first_row[1]}")
                    else:
                        print(f"  Sample: {str(first_row)[:200]}...")


PROCESSING SUMMARY

Total nodes created: 1
  executed_success: 1

Nodes with SQL results:

  Node: root
  Intent: Find the top 5 counties by average SAT scores, including the...
  Result: 5 rows
  Sample: Marin - 539.9259259259259
