# Flexible Orchestrator Workflow

This notebook demonstrates a simplified text-to-SQL workflow where:
1. The orchestrator only decides which tool to call next
2. Each tool operates on the current node (determined by the code, not LLM)
3. Tools are called with minimal input - they know what to do
4. The orchestrator's job is to call tools or TERMINATE

## Key Differences from Previous Versions:
- **No node IDs in tool calls** - the current node is managed automatically
- **Simple tool calls** - tools need minimal parameters
- **Task status checker** - provides clear ACTION directives for the orchestrator

In [1]:
import os
import sys
import asyncio
import logging
from pathlib import Path
from typing import Dict, Any, List, Optional
from dotenv import load_dotenv

sys.path.append('../src')
load_dotenv()

# Important: For running this notebook, ensure OPENAI_API_KEY is set
# You can run: source ../.env && export OPENAI_API_KEY
# Or set it here directly (not recommended for production)
if not os.getenv("OPENAI_API_KEY"):
    print("WARNING: OPENAI_API_KEY not found in environment")
    print("Run: source ../.env && export OPENAI_API_KEY")
else:
    print("OPENAI_API_KEY found in environment")

# Set up logging - disable most debug messages but keep LLM calls
logging.basicConfig(level=logging.INFO, 
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# Keep autogen_core.events at INFO to see LLM calls
logging.getLogger('autogen_core.events').setLevel(logging.WARNING)

# Disable other noisy loggers
logging.getLogger('autogen_core').setLevel(logging.WARNING)
logging.getLogger('autogen_agentchat').setLevel(logging.WARNING)
logging.getLogger('httpx').setLevel(logging.WARNING)
logging.getLogger('openai').setLevel(logging.WARNING)
logging.getLogger('httpcore').setLevel(logging.WARNING)
logging.getLogger('httpcore.connection').setLevel(logging.WARNING)
logging.getLogger('httpcore.http11').setLevel(logging.WARNING)

OPENAI_API_KEY found in environment


In [2]:
# Memory and managers
from keyvalue_memory import KeyValueMemory
from task_context_manager import TaskContextManager
from query_tree_manager import QueryTreeManager
from database_schema_manager import DatabaseSchemaManager
from node_history_manager import NodeHistoryManager

# Schema reader
from schema_reader import SchemaReader

# All 4 agents + task status checker
from query_analyzer_agent import QueryAnalyzerAgent
from schema_linker_agent import SchemaLinkerAgent
from sql_generator_agent import SQLGeneratorAgent
from sql_evaluator_agent import SQLEvaluatorAgent
from task_status_checker import TaskStatusChecker

# Memory types
from memory_content_types import (
    TaskContext, QueryNode, NodeStatus, TaskStatus,
    QueryMapping, TableMapping, ColumnMapping, JoinMapping,
    TableSchema, ColumnInfo, ExecutionResult
)

# AutoGen components
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.conditions import TextMentionTermination
from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_ext.models.openai import OpenAIChatCompletionClient

In [3]:
# Initialize shared memory
memory = KeyValueMemory()

# Initialize managers
task_manager = TaskContextManager(memory)
tree_manager = QueryTreeManager(memory)
schema_manager = DatabaseSchemaManager(memory)
history_manager = NodeHistoryManager(memory)

In [4]:
# Database configuration
data_path = "/home/norman/work/text-to-sql/MAC-SQL/data/bird"
tables_json_path = Path(data_path) / "dev_tables.json"
db_name = "california_schools"

# Test queries
test_queries = [
    "What is the highest eligible free rate for K-12 students in schools located in Alameda County?",
    "Show me schools with SAT scores above 1400 and their free lunch eligibility rates",
    "Find the top 5 counties by average SAT scores, including the number of schools and average free lunch rate"
]

# Pick a query (try different ones!)
test_query = test_queries[2]
print(f"Query: {test_query}")
print("-" * 80)

# Initialize task
task_id = "flexible_demo_001"
await task_manager.initialize(task_id, test_query, db_name)

# Load schema
schema_reader = SchemaReader(
    data_path=data_path,
    tables_json_path=str(tables_json_path),
    dataset_name="bird",
    lazy=False
)

await schema_manager.load_from_schema_reader(schema_reader, db_name)

# Get schema summary
summary = await schema_manager.get_schema_summary()
print(f"\nLoaded '{db_name}' database:")
print(f"  Tables: {summary['table_count']}")
print(f"  Columns: {summary['total_columns']}")
print(f"  Foreign keys: {summary['total_foreign_keys']}")

2025-05-26 19:11:06,887 - TaskContextManager - INFO - Initialized task context for task flexible_demo_001


Query: Find the top 5 counties by average SAT scores, including the number of schools and average free lunch rate
--------------------------------------------------------------------------------
load json file from /home/norman/work/text-to-sql/MAC-SQL/data/bird/dev_tables.json

Loading all database info...
Found 11 databases in bird dataset


2025-05-26 19:11:19,450 - DatabaseSchemaManager - INFO - Initialized empty database schema
2025-05-26 19:11:19,451 - DatabaseSchemaManager - INFO - Added table 'frpm' to schema
2025-05-26 19:11:19,451 - DatabaseSchemaManager - INFO - Added table 'satscores' to schema
2025-05-26 19:11:19,452 - DatabaseSchemaManager - INFO - Added table 'schools' to schema
2025-05-26 19:11:19,452 - DatabaseSchemaManager - INFO - Loaded schema for database 'california_schools' with 3 tables



Loaded 'california_schools' database:
  Tables: 3
  Columns: 89
  Foreign keys: 2


In [5]:
# LLM configuration
llm_config = {
    "model_name": "gpt-4o",
    "temperature": 0.1,
    "timeout": 60
}

# Initialize all agents
query_analyzer = QueryAnalyzerAgent(memory, llm_config)
schema_linker = SchemaLinkerAgent(memory, llm_config)
sql_generator = SQLGeneratorAgent(memory, llm_config)
sql_evaluator = SQLEvaluatorAgent(memory, llm_config)

# Initialize TaskStatusChecker (no LLM config needed)
task_status_checker = TaskStatusChecker(memory)

2025-05-26 19:11:19,474 - QueryAnalyzerAgent - INFO - Initialized query_analyzer with model gpt-4o
2025-05-26 19:11:19,485 - SchemaLinkerAgent - INFO - Initialized schema_linker with model gpt-4o
2025-05-26 19:11:19,495 - SQLGeneratorAgent - INFO - Initialized sql_generator with model gpt-4o
2025-05-26 19:11:19,506 - SQLEvaluatorAgent - INFO - Initialized sql_evaluator with model gpt-4o


In [6]:
# Initialize OpenAI client for coordinator
coordinator_client = OpenAIChatCompletionClient(
    model="gpt-4o",
    temperature=0.1,
    timeout=120,
    api_key=os.getenv("OPENAI_API_KEY")
)

# Create flexible coordinator with status-based decision making
coordinator = AssistantAgent(
    name="coordinator",
    system_message="""You are a smart orchestrator for a text-to-SQL workflow.

Your job is to:
1. Examine node status and decide which tool to call
2. Call tools based on what each node needs
3. Say TERMINATE when all nodes are complete

Available tools:
- query_analyzer: Breaks down the user's query into a tree structure
- schema_linker: Links the current node to database schema (ALWAYS call this before generating SQL)
- sql_generator: Generates SQL for the current node
- sql_evaluator: Executes and evaluates SQL for the current node
- task_status_checker: Tells you the current node status and what needs to be done

IMPORTANT: 
- Tools operate on the "current node" automatically - the system tracks which node to work on.
- All tools require a 'task' parameter. Always call tools with task="current task description"

DECISION LOGIC based on node status:
1. No tree exists → call query_analyzer with the user's query as task
2. Node has no SQL → ALWAYS call schema_linker first with task="link schema for current node", then sql_generator
3. Node has bad SQL (evaluation showed poor quality) → call schema_linker with task="relink schema with fixes", then sql_generator with task="regenerate SQL with fixes"
4. Node has SQL but not evaluated → call sql_evaluator with task="evaluate SQL for current node"
5. After any evaluation → call task_status_checker with task="check task status"

CRITICAL: Even if query_analyzer creates some mapping, you MUST still call schema_linker before sql_generator. The mapping from query_analyzer is not sufficient for SQL generation.

The task_status_checker will tell you:
- Current node status (what it has/needs)
- Quality of results (if evaluated)
- ACTION directive:
  - "ACTION: PROCESS NODE" → Node needs work (always schema_linker then sql_generator)
  - "ACTION: RETRY NODE" → Node has poor results (schema_linker then sql_generator again)
  - "ACTION: TASK COMPLETE" → All nodes done, say TERMINATE
  - "ACTION: ERROR" → Something went wrong

WORKFLOW:
1. Start: call query_analyzer with the query
2. Call task_status_checker to understand current state
3. Based on the status report:
   - If node needs SQL → ALWAYS: schema_linker first, then sql_generator
   - If SQL needs retry → ALWAYS: schema_linker first, then sql_generator
   - If SQL not evaluated → sql_evaluator
   - After evaluation → task_status_checker
4. Repeat until task_status_checker says "TASK COMPLETE"

Remember: 
- ALWAYS call schema_linker before sql_generator (even on retries)
- Nodes can be retried if results are poor
- SQL can be regenerated if evaluation shows issues
- Always check status after evaluation to determine next steps""",
    model_client=coordinator_client,
    tools=[
        query_analyzer.get_tool(), 
        schema_linker.get_tool(), 
        sql_generator.get_tool(), 
        sql_evaluator.get_tool(),
        task_status_checker.get_tool()
    ]
)

In [7]:
# Create a team with termination condition
termination_condition = TextMentionTermination("TERMINATE")
team = RoundRobinGroupChat(
    participants=[coordinator],
    termination_condition=termination_condition
)

# Run the workflow
stream = team.run_stream(task=test_query)

In [8]:
# Process messages with proper loop control
step_count = 0
max_steps = 100  # Safety limit
last_agent_called = None
workflow_complete = False
retry_count = {}  # Track retries per node
last_message_content = None  # Track duplicate messages
duplicate_count = 0

print("="*80)
print("WORKFLOW EXECUTION (Status-Based)")
print("="*80)

async for message in stream:
    # Check for duplicate messages first
    current_content = str(message.content) if hasattr(message, 'content') else None
    if current_content and current_content == last_message_content:
        duplicate_count += 1
        if duplicate_count >= 3:  # Stop after 3 identical messages
            print(f"\n⚠️  Detected repeated messages. Stopping to prevent infinite loop.")
            break
    else:
        duplicate_count = 0
        last_message_content = current_content
    
    # Process coordinator messages
    if hasattr(message, 'source') and message.source == 'coordinator':
        step_count += 1
        
        # Check max steps IMMEDIATELY after incrementing
        if step_count >= max_steps:
            print(f"\n⚠️  Reached maximum steps ({max_steps}). Stopping.")
            break
        
        if hasattr(message, 'content'):
            if isinstance(message.content, list) and len(message.content) > 0:
                # Tool calls
                for tool_call in message.content:
                    if hasattr(tool_call, 'name'):
                        agent_name = tool_call.name
                        last_agent_called = agent_name
                        
                        # Show which tool is being called
                        print(f"\n[Step {step_count}] Calling: {agent_name}")
                        
                        # Check for error patterns
                        if hasattr(tool_call, 'arguments'):
                            args = str(tool_call.arguments)
                            if "takes 2 positional arguments" in args:
                                print(f"         → ❌ Tool call error detected")
                                print(f"         → The coordinator is calling the tool incorrectly")
                                workflow_complete = False
                                break
                        
                        # Get current node info for context
                        if agent_name not in ["query_analyzer", "task_status_checker"]:
                            current_id = await memory.get("current_node_id")
                            if current_id:
                                node = await tree_manager.get_node(current_id)
                                if node:
                                    # Show node intent
                                    if node.intent:
                                        print(f"         Current node: {node.intent[:50]}...")
                                    
                                    # Show node status
                                    status_info = []
                                    if node.mapping and node.mapping.tables:
                                        status_info.append("✓mapped")
                                    else:
                                        status_info.append("✗no-mapping")
                                    
                                    if node.sql:
                                        status_info.append("✓has-sql")
                                    else:
                                        status_info.append("✗no-sql")
                                    
                                    if node.executionResult:
                                        status_info.append("✓executed")
                                        
                                    # Check if this is a retry
                                    if agent_name == "sql_generator" and node.sql:
                                        retry_count[current_id] = retry_count.get(current_id, 0) + 1
                                        print(f"         Status: {' '.join(status_info)} [RETRY #{retry_count[current_id]}]")
                                    else:
                                        print(f"         Status: {' '.join(status_info)}")
                            
            elif isinstance(message.content, str):
                # Check if this is termination
                if "TERMINATE" in message.content:
                    workflow_complete = True
                    print(f"\n[Step {step_count}] ✅ WORKFLOW COMPLETE")
                    break
                # Check for error messages
                elif "error" in message.content.lower() or "takes 2 positional arguments" in message.content:
                    print(f"\n[Step {step_count}] ❌ Error in coordinator message")
                    print(f"         Content: {message.content[:100]}...")
    
    # Capture and display tool responses
    elif hasattr(message, 'source') and message.source != 'coordinator':
        if hasattr(message, 'content'):
            content = str(message.content)
            
            # Check for tool errors
            if "takes 2 positional arguments" in content:
                print(f"         → ❌ Tool error: Incorrect arguments provided")
                print(f"         → This is why the workflow is stuck!")
                # Force stop on tool errors
                print(f"\n⚠️  Stopping due to tool invocation error.")
                break
            
            # Special handling for task_status_checker to show ACTION and quality
            if last_agent_called == "task_status_checker" and "ACTION:" in content:
                import re
                action_match = re.search(r'ACTION:\s*(.+?)(?:\n|$)', content)
                if action_match:
                    action = action_match.group(1).strip()
                    print(f"         → ACTION: {action}")
                    
                    # Check for quality info
                    if "Evaluation Quality:" in content:
                        quality_match = re.search(r'Evaluation Quality:\s*(\w+)', content)
                        if quality_match:
                            quality = quality_match.group(1)
                            print(f"         → Quality: {quality}")
                    
                    # Show what this means
                    if "RETRY NODE" in action:
                        print("         → Next: Regenerate SQL due to poor quality")
                    elif "PROCESS NODE" in action:
                        print("         → Next: Continue processing this node")
                    elif "TASK COMPLETE" in action:
                        print("         → Next: TERMINATE")
            
            # Show success/failure for other tools
            elif last_agent_called:
                if "error" in content.lower():
                    print(f"         → ❌ Error occurred")
                else:
                    print(f"         → ✅ Success")

print("\n" + "="*80)
if workflow_complete:
    print("WORKFLOW COMPLETED SUCCESSFULLY")
else:
    print("WORKFLOW STOPPED (may not have completed)")
    print("\nPossible issues:")
    print("- task_status_checker is being called with wrong arguments")
    print("- The coordinator needs to call it without any arguments")
    
# Show retry statistics
if retry_count:
    print("\nRetry Statistics:")
    for node_id, count in retry_count.items():
        print(f"  Node {node_id[-15:]}: {count} retries")

WORKFLOW EXECUTION (Status-Based)


2025-05-26 19:11:20,855 - QueryAnalyzerAgent - INFO - Query analyzer context prepared with schema length: 9697
2025-05-26 19:11:20,855 - QueryAnalyzerAgent - INFO - query: Find the top 5 counties by average SAT scores, including the number of schools and average free lunch rate database: california_schools



[Step 1] Calling: query_analyzer


2025-05-26 19:11:26,293 - QueryTreeManager - INFO - Initialized query tree with root node node_1748301086.293675_root
2025-05-26 19:11:26,294 - NodeHistoryManager - INFO - Added create operation for node node_1748301086.293675_root
2025-05-26 19:11:26,294 - QueryTreeManager - INFO - Added node node_1748301086.294442_1 to tree
2025-05-26 19:11:26,294 - NodeHistoryManager - INFO - Added create operation for node node_1748301086.294442_1
2025-05-26 19:11:26,295 - QueryTreeManager - INFO - Added node node_1748301086.295259_2 to tree
2025-05-26 19:11:26,295 - NodeHistoryManager - INFO - Added create operation for node node_1748301086.295259_2
2025-05-26 19:11:26,295 - QueryTreeManager - INFO - Added node node_1748301086.295826_3 to tree
2025-05-26 19:11:26,296 - NodeHistoryManager - INFO - Added create operation for node node_1748301086.295826_3
2025-05-26 19:11:26,296 - QueryTreeManager - INFO - Updated node node_1748301086.293675_root
2025-05-26 19:11:26,296 - QueryTreeManager - INFO - Se


[Step 2] Calling: query_analyzer


2025-05-26 19:11:27,456 - TaskStatusChecker - INFO - Checking overall task status...
2025-05-26 19:11:27,456 - TaskStatusChecker - INFO - Tree status: 0 good, 0 poor, 4 unprocessed
2025-05-26 19:11:27,456 - QueryTreeManager - INFO - Set current node to node_1748301086.294442_1
2025-05-26 19:11:27,457 - TaskStatusChecker - INFO - Updated current node to: node_1748301086.294442_1



[Step 4] Calling: task_status_checker

[Step 5] Calling: task_status_checker


2025-05-26 19:11:29,364 - SchemaLinkerAgent - INFO - Available tables in schema: ['frpm', 'satscores', 'schools']
2025-05-26 19:11:29,365 - SchemaLinkerAgent - INFO - Schema linking context prepared for node: node_1748301086.294442_1
2025-05-26 19:11:29,365 - SchemaLinkerAgent - INFO - Node details: {'nodeId': 'node_1748301086.294442_1', 'intent': 'Calculate average SAT scores for each county.', 'mapping': {'tables': [{'name': 'satscores', 'alias': None, 'purpose': 'Used in subquery 1'}], 'columns': []}, 'childIds': [], 'status': 'created', 'parentId': 'node_1748301086.293675_root'}



[Step 7] Calling: schema_linker


2025-05-26 19:11:41,485 - SchemaLinkerAgent - INFO - Raw LLM output (first 500 chars): To calculate the average SAT scores for each county, we need to select the appropriate tables and columns from the provided schema. Here's the analysis:

### Analysis

1. **Query Intent**: Calculate average SAT scores for each county.
2. **Necessary Tables and Columns**:
   - **Table**: `satscores`
     - **Purpose**: This table contains SAT score data, which is necessary to calculate the average SAT scores.
     - **Columns**:
       - **`cname`**: Used for grouping by county to calculate the a
2025-05-26 19:11:41,486 - QueryTreeManager - INFO - Updated node node_1748301086.294442_1
2025-05-26 19:11:41,486 - NodeHistoryManager - INFO - Added revise operation for node node_1748301086.294442_1
2025-05-26 19:11:41,487 - SchemaLinkerAgent - INFO - Schema Linking
2025-05-26 19:11:41,487 - SchemaLinkerAgent - INFO - Query intent: Calculate average SAT scores for each county.
2025-05-26 19:11:41,487 - Sche


[Step 8] Calling: schema_linker


2025-05-26 19:11:42,676 - SQLGeneratorAgent - INFO - SQL generator context prepared for node: node_1748301086.294442_1
2025-05-26 19:11:42,677 - SQLGeneratorAgent - INFO - Node detail: {'nodeId': 'node_1748301086.294442_1', 'intent': 'Calculate average SAT scores for each county.', 'mapping': {'tables': [{'name': 'satscores', 'alias': 's', 'purpose': 'Contains SAT score data needed to calculate average scores by county'}], 'columns': [{'table': 'satscores', 'column': 'cname', 'usedFor': 'group'}, {'table': 'satscores', 'column': 'AvgScrRead', 'usedFor': 'aggregate'}, {'table': 'satscores', 'column': 'AvgScrMath', 'usedFor': 'aggregate'}, {'table': 'satscores', 'column': 'AvgScrWrite', 'usedFor': 'aggregate'}]}, 'childIds': [], 'status': 'created', 'parentId': 'node_1748301086.293675_root'}



[Step 10] Calling: sql_generator


2025-05-26 19:11:45,646 - SQLGeneratorAgent - INFO - Raw LLM output (first 500 chars): ```xml
<sql_generation>
  <query_type>aggregate</query_type>
  <sql>
    SELECT 
      s.cname AS county_name,
      AVG(s.AvgScrRead) AS avg_reading_score,
      AVG(s.AvgScrMath) AS avg_math_score,
      AVG(s.AvgScrWrite) AS avg_writing_score
    FROM 
      satscores s
    GROUP BY 
      s.cname
  </sql>
  <explanation>
    The query calculates the average SAT scores for reading, math, and writing for each county. It uses the 'satscores' table, grouping the results by the 'cname' column, wh
2025-05-26 19:11:45,646 - QueryTreeManager - INFO - Updated node node_1748301086.294442_1
2025-05-26 19:11:45,647 - NodeHistoryManager - INFO - Added generate_sql operation for node node_1748301086.294442_1
2025-05-26 19:11:45,648 - SQLGeneratorAgent - INFO - SQL Generation
2025-05-26 19:11:45,648 - SQLGeneratorAgent - INFO - Query intent: Calculate average SAT scores for each county.
2025-05-26 19:11:45,648 


[Step 11] Calling: sql_generator


2025-05-26 19:11:46,670 - SQLEvaluatorAgent - INFO - Using current node: node_1748301086.294442_1
2025-05-26 19:11:46,672 - QueryTreeManager - INFO - Updated node node_1748301086.294442_1


[SQLExecutor] Connecting to database: /home/norman/work/text-to-sql/MAC-SQL/data/bird/dev_databases/california_schools/california_schools.sqlite

[Step 13] Calling: sql_evaluator


2025-05-26 19:11:50,870 - SQLEvaluatorAgent - INFO - Raw LLM output (first 500 chars): ```xml
<evaluation>
  <answers_intent>yes</answers_intent>
  <result_quality>excellent</result_quality>
  <result_summary>The results provide the average SAT reading, math, and writing scores for each county.</result_summary>
  <issues>
    <issue>
      <type>performance</type>
      <description>Execution time is not provided, which could be useful for performance evaluation.</description>
      <severity>low</severity>
    </issue>
  </issues>
  <suggestions>
    <suggestion>Consider capturin
2025-05-26 19:11:50,871 - SQLEvaluatorAgent - INFO - SQL Execution & Evaluation
2025-05-26 19:11:50,871 - SQLEvaluatorAgent - INFO - Query intent: Calculate average SAT scores for each county.
2025-05-26 19:11:50,871 - SQLEvaluatorAgent - INFO - Evaluation results:
2025-05-26 19:11:50,871 - SQLEvaluatorAgent - INFO -   - Answers intent: YES
2025-05-26 19:11:50,872 - SQLEvaluatorAgent - INFO -   - Result quali


[Step 14] Calling: sql_evaluator


2025-05-26 19:11:52,814 - TaskStatusChecker - INFO - Checking overall task status...
2025-05-26 19:11:52,814 - TaskStatusChecker - INFO - Tree status: 1 good, 0 poor, 3 unprocessed
2025-05-26 19:11:52,814 - QueryTreeManager - INFO - Set current node to node_1748301086.295259_2
2025-05-26 19:11:52,815 - TaskStatusChecker - INFO - Updated current node to: node_1748301086.295259_2



[Step 16] Calling: task_status_checker

[Step 17] Calling: task_status_checker


2025-05-26 19:11:55,853 - SchemaLinkerAgent - INFO - Available tables in schema: ['frpm', 'satscores', 'schools']
2025-05-26 19:11:55,853 - SchemaLinkerAgent - INFO - Schema linking context prepared for node: node_1748301086.295259_2
2025-05-26 19:11:55,853 - SchemaLinkerAgent - INFO - Node details: {'nodeId': 'node_1748301086.295259_2', 'intent': 'Calculate the average free lunch rate for each county.', 'mapping': {'tables': [{'name': 'frpm', 'alias': None, 'purpose': 'Used in subquery 2'}], 'columns': []}, 'childIds': [], 'status': 'created', 'parentId': 'node_1748301086.293675_root'}



[Step 19] Calling: schema_linker


2025-05-26 19:12:03,465 - SchemaLinkerAgent - INFO - Raw LLM output (first 500 chars): To calculate the average free lunch rate for each county, we need to select the appropriate tables and columns from the provided schema. Here's the analysis:

### Analysis

1. **Query Intent**: Calculate the average free lunch rate for each county.
2. **Necessary Tables and Columns**:
   - **Table**: `frpm`
     - **Purpose**: This table contains data on free and reduced-price meals, which is necessary to calculate the average free lunch rate.
     - **Columns**:
       - **`County Name`**: Used
2025-05-26 19:12:03,466 - QueryTreeManager - INFO - Updated node node_1748301086.295259_2
2025-05-26 19:12:03,466 - NodeHistoryManager - INFO - Added revise operation for node node_1748301086.295259_2
2025-05-26 19:12:03,466 - SchemaLinkerAgent - INFO - Schema Linking
2025-05-26 19:12:03,466 - SchemaLinkerAgent - INFO - Query intent: Calculate the average free lunch rate for each county.
2025-05-26 19:12:03,4


[Step 20] Calling: schema_linker


2025-05-26 19:12:04,283 - SQLGeneratorAgent - INFO - SQL generator context prepared for node: node_1748301086.295259_2
2025-05-26 19:12:04,283 - SQLGeneratorAgent - INFO - Node detail: {'nodeId': 'node_1748301086.295259_2', 'intent': 'Calculate the average free lunch rate for each county.', 'mapping': {'tables': [{'name': 'frpm', 'alias': 'f', 'purpose': 'Contains free lunch data needed to calculate average rates by county'}], 'columns': [{'table': 'frpm', 'column': 'County Name', 'usedFor': 'group'}, {'table': 'frpm', 'column': 'Percent (%) Eligible Free (K-12)', 'usedFor': 'aggregate'}]}, 'childIds': [], 'status': 'created', 'parentId': 'node_1748301086.293675_root'}



[Step 22] Calling: sql_generator


2025-05-26 19:12:06,742 - SQLGeneratorAgent - INFO - Raw LLM output (first 500 chars): ```xml
<sql_generation>
  <query_type>aggregate</query_type>
  <sql>
    SELECT 
      f."County Name" AS county_name,
      AVG(f."Percent (%) Eligible Free (K-12)") AS avg_free_lunch_rate
    FROM 
      frpm f
    GROUP BY 
      f."County Name"
  </sql>
  <explanation>
    The query calculates the average free lunch rate for each county using the 'frpm' table. It groups the results by the 'County Name' column and calculates the average of the 'Percent (%) Eligible Free (K-12)' column.
  </ex
2025-05-26 19:12:06,743 - QueryTreeManager - INFO - Updated node node_1748301086.295259_2
2025-05-26 19:12:06,743 - NodeHistoryManager - INFO - Added generate_sql operation for node node_1748301086.295259_2
2025-05-26 19:12:06,744 - SQLGeneratorAgent - INFO - SQL Generation
2025-05-26 19:12:06,744 - SQLGeneratorAgent - INFO - Query intent: Calculate the average free lunch rate for each county.
2025-05-26 19:1


[Step 23] Calling: sql_generator


2025-05-26 19:12:08,584 - SQLEvaluatorAgent - INFO - Using current node: node_1748301086.295259_2
2025-05-26 19:12:08,588 - QueryTreeManager - INFO - Updated node node_1748301086.295259_2


[SQLExecutor] Connecting to database: /home/norman/work/text-to-sql/MAC-SQL/data/bird/dev_databases/california_schools/california_schools.sqlite

[Step 25] Calling: sql_evaluator


2025-05-26 19:12:10,919 - SQLEvaluatorAgent - INFO - Raw LLM output (first 500 chars): ```xml
<evaluation>
  <answers_intent>yes</answers_intent>
  <result_quality>excellent</result_quality>
  <result_summary>The results provide the average free lunch rate for each county, expressed as a percentage.</result_summary>
  <issues>
    <issue>
      <type>performance</type>
      <description>Execution time is not provided, which could be useful for performance evaluation.</description>
      <severity>low</severity>
    </issue>
  </issues>
  <suggestions>
    <suggestion>Consider cap
2025-05-26 19:12:10,920 - SQLEvaluatorAgent - INFO - SQL Execution & Evaluation
2025-05-26 19:12:10,920 - SQLEvaluatorAgent - INFO - Query intent: Calculate the average free lunch rate for each county.
2025-05-26 19:12:10,920 - SQLEvaluatorAgent - INFO - Evaluation results:
2025-05-26 19:12:10,921 - SQLEvaluatorAgent - INFO -   - Answers intent: YES
2025-05-26 19:12:10,921 - SQLEvaluatorAgent - INFO -   - Res


[Step 26] Calling: sql_evaluator


2025-05-26 19:12:12,168 - TaskStatusChecker - INFO - Checking overall task status...
2025-05-26 19:12:12,168 - TaskStatusChecker - INFO - Tree status: 2 good, 0 poor, 2 unprocessed
2025-05-26 19:12:12,169 - QueryTreeManager - INFO - Set current node to node_1748301086.295826_3
2025-05-26 19:12:12,169 - TaskStatusChecker - INFO - Updated current node to: node_1748301086.295826_3



[Step 28] Calling: task_status_checker

[Step 29] Calling: task_status_checker


2025-05-26 19:12:15,036 - SchemaLinkerAgent - INFO - Available tables in schema: ['frpm', 'satscores', 'schools']
2025-05-26 19:12:15,036 - SchemaLinkerAgent - INFO - Schema linking context prepared for node: node_1748301086.295826_3
2025-05-26 19:12:15,037 - SchemaLinkerAgent - INFO - Node details: {'nodeId': 'node_1748301086.295826_3', 'intent': 'Count the number of schools in each county.', 'mapping': {'tables': [{'name': 'schools', 'alias': None, 'purpose': 'Used in subquery 3'}], 'columns': []}, 'childIds': [], 'status': 'created', 'parentId': 'node_1748301086.293675_root'}



[Step 31] Calling: schema_linker


2025-05-26 19:12:22,471 - SchemaLinkerAgent - INFO - Raw LLM output (first 500 chars): To count the number of schools in each county, we need to select the appropriate tables and columns from the provided schema. Here's the analysis:

### Analysis

1. **Query Intent**: Count the number of schools in each county.
2. **Necessary Tables and Columns**:
   - **Table**: `schools`
     - **Purpose**: This table contains information about schools, which is necessary to count the number of schools in each county.
     - **Columns**:
       - **`County`**: Used for grouping by county to cou
2025-05-26 19:12:22,471 - QueryTreeManager - INFO - Updated node node_1748301086.295826_3
2025-05-26 19:12:22,471 - NodeHistoryManager - INFO - Added revise operation for node node_1748301086.295826_3
2025-05-26 19:12:22,472 - SchemaLinkerAgent - INFO - Schema Linking
2025-05-26 19:12:22,472 - SchemaLinkerAgent - INFO - Query intent: Count the number of schools in each county.
2025-05-26 19:12:22,472 - Schema


[Step 32] Calling: schema_linker


2025-05-26 19:12:23,843 - SQLGeneratorAgent - INFO - SQL generator context prepared for node: node_1748301086.295826_3
2025-05-26 19:12:23,843 - SQLGeneratorAgent - INFO - Node detail: {'nodeId': 'node_1748301086.295826_3', 'intent': 'Count the number of schools in each county.', 'mapping': {'tables': [{'name': 'schools', 'alias': 'sc', 'purpose': 'Contains school data needed to count the number of schools by county'}], 'columns': [{'table': 'schools', 'column': 'County', 'usedFor': 'group'}, {'table': 'schools', 'column': 'CDSCode', 'usedFor': 'count'}]}, 'childIds': [], 'status': 'created', 'parentId': 'node_1748301086.293675_root'}



[Step 34] Calling: sql_generator


2025-05-26 19:12:26,403 - SQLGeneratorAgent - INFO - Raw LLM output (first 500 chars): ```xml
<sql_generation>
  <query_type>aggregate</query_type>
  <sql>
    SELECT 
      sc.County AS county_name,
      COUNT(sc.CDSCode) AS number_of_schools
    FROM 
      schools sc
    GROUP BY 
      sc.County
  </sql>
  <explanation>
    The query counts the number of schools in each county using the 'schools' table. It groups the results by the 'County' column and counts the number of entries in the 'CDSCode' column, which uniquely identifies each school.
  </explanation>
  <consideration
2025-05-26 19:12:26,404 - QueryTreeManager - INFO - Updated node node_1748301086.295826_3
2025-05-26 19:12:26,404 - NodeHistoryManager - INFO - Added generate_sql operation for node node_1748301086.295826_3
2025-05-26 19:12:26,404 - SQLGeneratorAgent - INFO - SQL Generation
2025-05-26 19:12:26,404 - SQLGeneratorAgent - INFO - Query intent: Count the number of schools in each county.
2025-05-26 19:12:26,405 - 


[Step 35] Calling: sql_generator


2025-05-26 19:12:28,347 - SQLEvaluatorAgent - INFO - Using current node: node_1748301086.295826_3
2025-05-26 19:12:28,351 - QueryTreeManager - INFO - Updated node node_1748301086.295826_3


[SQLExecutor] Connecting to database: /home/norman/work/text-to-sql/MAC-SQL/data/bird/dev_databases/california_schools/california_schools.sqlite

[Step 37] Calling: sql_evaluator


2025-05-26 19:12:31,521 - SQLEvaluatorAgent - INFO - Raw LLM output (first 500 chars): ```xml
<evaluation>
  <answers_intent>yes</answers_intent>
  <result_quality>excellent</result_quality>
  <result_summary>The results show the number of schools in each county, accurately reflecting the count of schools per county.</result_summary>
  <issues>
    <issue>
      <type>performance</type>
      <description>Execution time is not provided, which could be useful for performance evaluation.</description>
      <severity>low</severity>
    </issue>
  </issues>
  <suggestions>
    <sugge
2025-05-26 19:12:31,522 - SQLEvaluatorAgent - INFO - SQL Execution & Evaluation
2025-05-26 19:12:31,522 - SQLEvaluatorAgent - INFO - Query intent: Count the number of schools in each county.
2025-05-26 19:12:31,523 - SQLEvaluatorAgent - INFO - Evaluation results:
2025-05-26 19:12:31,523 - SQLEvaluatorAgent - INFO -   - Answers intent: YES
2025-05-26 19:12:31,523 - SQLEvaluatorAgent - INFO -   - Result quality


[Step 38] Calling: sql_evaluator


2025-05-26 19:12:32,573 - TaskStatusChecker - INFO - Checking overall task status...
2025-05-26 19:12:32,574 - TaskStatusChecker - INFO - Tree status: 3 good, 0 poor, 1 unprocessed
2025-05-26 19:12:32,574 - QueryTreeManager - INFO - Set current node to node_1748301086.293675_root
2025-05-26 19:12:32,574 - TaskStatusChecker - INFO - Updated current node to: node_1748301086.293675_root



[Step 40] Calling: task_status_checker

[Step 41] Calling: task_status_checker


2025-05-26 19:12:33,472 - SchemaLinkerAgent - INFO - Available tables in schema: ['frpm', 'satscores', 'schools']
2025-05-26 19:12:33,472 - SchemaLinkerAgent - INFO - Schema linking context prepared for node: node_1748301086.293675_root
2025-05-26 19:12:33,473 - SchemaLinkerAgent - INFO - Node details: {'nodeId': 'node_1748301086.293675_root', 'intent': 'Find the top 5 counties based on average SAT scores, and for each county, include the number of schools and the average free lunch rate.', 'mapping': {'tables': [], 'columns': []}, 'childIds': ['node_1748301086.294442_1', 'node_1748301086.295259_2', 'node_1748301086.295826_3'], 'status': 'created', 'combineStrategy': {'type': 'join', 'joinType': 'INNER'}}



[Step 43] Calling: schema_linker


2025-05-26 19:12:52,720 - SchemaLinkerAgent - INFO - Raw LLM output (first 500 chars): To find the top 5 counties based on average SAT scores and include the number of schools and the average free lunch rate for each county, we need to combine the results from the child nodes. Here's the analysis:

### Analysis

1. **Query Intent**: Find the top 5 counties based on average SAT scores, including the number of schools and the average free lunch rate for each county.
2. **Necessary Tables and Columns**:
   - **Table**: `satscores`
     - **Purpose**: Provides average SAT scores by co
2025-05-26 19:12:52,721 - QueryTreeManager - INFO - Updated node node_1748301086.293675_root
2025-05-26 19:12:52,721 - NodeHistoryManager - INFO - Added revise operation for node node_1748301086.293675_root
2025-05-26 19:12:52,722 - SchemaLinkerAgent - INFO - Schema Linking
2025-05-26 19:12:52,722 - SchemaLinkerAgent - INFO - Query intent: Find the top 5 counties based on average SAT scores, and for each coun


[Step 44] Calling: schema_linker


2025-05-26 19:12:54,051 - SQLGeneratorAgent - INFO - SQL generator context prepared for node: node_1748301086.293675_root
2025-05-26 19:12:54,052 - SQLGeneratorAgent - INFO - Node detail: {'nodeId': 'node_1748301086.293675_root', 'intent': 'Find the top 5 counties based on average SAT scores, and for each county, include the number of schools and the average free lunch rate.', 'mapping': {'tables': [{'name': 'satscores', 'alias': 's', 'purpose': 'Provides average SAT scores by county'}, {'name': 'frpm', 'alias': 'f', 'purpose': 'Provides average free lunch rate by county'}, {'name': 'schools', 'alias': 'sc', 'purpose': 'Provides the count of schools by county'}], 'columns': [{'table': 'satscores', 'column': 'cname', 'usedFor': 'group|join'}, {'table': 'satscores', 'column': 'AvgScrRead', 'usedFor': 'aggregate'}, {'table': 'satscores', 'column': 'AvgScrMath', 'usedFor': 'aggregate'}, {'table': 'satscores', 'column': 'AvgScrWrite', 'usedFor': 'aggregate'}, {'table': 'frpm', 'column': 'Co


[Step 46] Calling: sql_generator


2025-05-26 19:13:00,296 - SQLGeneratorAgent - INFO - Raw LLM output (first 500 chars): ```xml
<sql_generation>
  <query_type>complex</query_type>
  <sql>
    WITH AvgSATScores AS (
      SELECT 
        s.cname AS county_name,
        AVG(s.AvgScrRead) AS avg_reading_score,
        AVG(s.AvgScrMath) AS avg_math_score,
        AVG(s.AvgScrWrite) AS avg_writing_score,
        (AVG(s.AvgScrRead) + AVG(s.AvgScrMath) + AVG(s.AvgScrWrite)) / 3 AS avg_total_score
      FROM 
        satscores s
      GROUP BY 
        s.cname
    ),
    AvgFreeLunchRate AS (
      SELECT 
        f."Coun
2025-05-26 19:13:00,297 - QueryTreeManager - INFO - Updated node node_1748301086.293675_root
2025-05-26 19:13:00,297 - NodeHistoryManager - INFO - Added generate_sql operation for node node_1748301086.293675_root
2025-05-26 19:13:00,297 - SQLGeneratorAgent - INFO - SQL Generation
2025-05-26 19:13:00,297 - SQLGeneratorAgent - INFO - Query intent: Find the top 5 counties based on average SAT scores, and for eac


[Step 47] Calling: sql_generator


2025-05-26 19:13:01,731 - SQLEvaluatorAgent - INFO - Using current node: node_1748301086.293675_root
2025-05-26 19:13:01,737 - QueryTreeManager - INFO - Updated node node_1748301086.293675_root


[SQLExecutor] Connecting to database: /home/norman/work/text-to-sql/MAC-SQL/data/bird/dev_databases/california_schools/california_schools.sqlite

[Step 49] Calling: sql_evaluator


2025-05-26 19:13:06,644 - SQLEvaluatorAgent - INFO - Raw LLM output (first 500 chars): ```xml
<evaluation>
  <answers_intent>yes</answers_intent>
  <result_quality>excellent</result_quality>
  <result_summary>The results identify the top 5 counties based on average SAT scores, including the number of schools and average free lunch rate for each county.</result_summary>
  <issues>
    <issue>
      <type>performance</type>
      <description>Execution time is not provided, which could be useful for performance evaluation.</description>
      <severity>low</severity>
    </issue>
  
2025-05-26 19:13:06,645 - SQLEvaluatorAgent - INFO - SQL Execution & Evaluation
2025-05-26 19:13:06,645 - SQLEvaluatorAgent - INFO - Query intent: Find the top 5 counties based on average SAT scores, and for each county, include the number of schools and the average free lunch rate.
2025-05-26 19:13:06,645 - SQLEvaluatorAgent - INFO - Evaluation results:
2025-05-26 19:13:06,645 - SQLEvaluatorAgent - INFO -   


[Step 50] Calling: sql_evaluator


2025-05-26 19:13:10,434 - TaskStatusChecker - INFO - Checking overall task status...
2025-05-26 19:13:10,435 - TaskStatusChecker - INFO - Tree status: 4 good, 0 poor, 0 unprocessed



[Step 52] Calling: task_status_checker

[Step 53] Calling: task_status_checker

[Step 55] ✅ WORKFLOW COMPLETE

WORKFLOW COMPLETED SUCCESSFULLY


In [11]:
# Show summary of what was processed
print("\n" + "="*60)
print("PROCESSING SUMMARY")
print("="*60)

tree = await tree_manager.get_tree()
if tree and "nodes" in tree:
    # Count nodes by status
    status_counts = {}
    for node_id, node_data in tree["nodes"].items():
        status = node_data.get("status", "unknown")
        status_counts[status] = status_counts.get(status, 0) + 1
    
    print(f"\nTotal nodes created: {len(tree['nodes'])}")
    for status, count in status_counts.items():
        print(f"  {status}: {count}")
    
    # Show nodes with results
    print("\nNodes with SQL results:")
    for node_id, node_data in tree["nodes"].items():
        if node_data.get("sql") and node_data.get("executionResult"):
            print(f"\n  Node: {node_id[-15:]}")
            print(f"  Intent: {node_data['intent'][:60]}...")
            
            result = node_data['executionResult']
            if result.get('data'):
                print(f"  Result: {result.get('rowCount', 0)} rows")
                # Show first result
                first_row = result['data'][0] if result['data'] else None
                if first_row:
                    if isinstance(first_row, list) and len(first_row) >= 2:
                        print(f"  Sample: {first_row[0]} - {first_row[1]}")
                    else:
                        print(f"  Sample: {str(first_row)[:200]}...")


PROCESSING SUMMARY

Total nodes created: 4
  executed_success: 4

Nodes with SQL results:

  Node: 086.293675_root
  Intent: Find the top 5 counties based on average SAT scores, and for...
  Result: 5 rows
  Sample: Marin - 535.3846153846154

  Node: 301086.294442_1
  Intent: Calculate average SAT scores for each county....
  Result: 5 rows
  Sample: Alameda - 480.5753424657534

  Node: 301086.295259_2
  Intent: Calculate the average free lunch rate for each county....
  Result: 5 rows
  Sample: Alameda - 0.4532504204074731

  Node: 301086.295826_3
  Intent: Count the number of schools in each county....
  Result: 5 rows
  Sample: Alameda - 761
