In [1]:
import os
import sys
import asyncio
import sqlite3
import json
import logging
import re
from typing import Dict, Any, List, Optional
from dotenv import load_dotenv

sys.path.append('../src')
load_dotenv()

# Set up logging
logging.basicConfig(level=logging.INFO, 
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# Reduce noise from autogen
logging.getLogger('autogen_core').setLevel(logging.WARNING)

In [2]:
from pathlib import Path
from keyvalue_memory import KeyValueMemory
from task_context_manager import TaskContextManager
from query_tree_manager import QueryTreeManager
from database_schema_manager import DatabaseSchemaManager
from node_history_manager import NodeHistoryManager
from query_analyzer_agent import QueryAnalyzerAgent
from schema_reader import SchemaReader
from memory_content_types import (
    TaskContext, QueryNode, NodeStatus, TaskStatus,
    QueryMapping, TableMapping, ColumnMapping, JoinMapping,
    TableSchema, ColumnInfo, CombineStrategyType
)
from sql_generator_agent import SQLGeneratorAgent

data_path = "/home/norman/work/text-to-sql/MAC-SQL/data/bird"
tables_json_path = Path(data_path) / "dev_tables.json"
db_name = "california_schools"

In [ ]:
task_id = "experimental-test"

query = "What is the highest eligible free rate for K-12 students in schools in Alameda County?"
intent = "Find the maximum eligible free rate for K-12 students in schools located in Alameda County"
memory = KeyValueMemory()
        
# Initialize task
task_manager = TaskContextManager(memory)
await task_manager.initialize(task_id, query, db_name)

# Load schema
schema_manager = DatabaseSchemaManager(memory)
await schema_manager.initialize()

schema_reader = SchemaReader(
    data_path=data_path,
    tables_json_path=str(tables_json_path),
    dataset_name="bird",
    lazy=False
)
await schema_manager.load_from_schema_reader(schema_reader, db_name)

# Initialize query tree
tree_manager = QueryTreeManager(memory)
node_id = await tree_manager.initialize(intent)

# Create schema linking for the node (simulating what schema linker would do)
schema_linking = {
    "selected_tables": [
        {
            "name": "frpm",
            "alias": "f",
            "purpose": "To find the highest eligible free rate for K-12 students in Alameda County",
            "columns": [
                {
                    "name": "County Name",
                    "used_for": "filter",
                    "reason": "To filter the records for Alameda County"
                },
                {
                    "name": "Percent (%) Eligible Free (K-12)",
                    "used_for": "aggregate",
                    "reason": "To determine the highest eligible free rate for K-12 students"
                }
            ]
        }
    ],
    "joins": [],
    "sample_query_pattern": 'SELECT MAX(f."Percent (%) Eligible Free (K-12)") FROM frpm AS f WHERE f."County Name" = \'Alameda\''
}

# Update node with schema linking
node = await tree_manager.get_current_node()
node.schema_linking = schema_linking
await tree_manager.update_node(node_id, node)

In [4]:
agent = SQLGeneratorAgent(memory, llm_config={
    "model_name": "gpt-4o",
    "temperature": 0.1,
    "timeout": 60
}, debug=True)

2025-05-25 07:23:41,477 - SQLGeneratorAgent - DEBUG - Created AssistantAgent: sql_generator
2025-05-25 07:23:41,477 - SQLGeneratorAgent - DEBUG - Created MemoryAgentTool for sql_generator
2025-05-25 07:23:41,477 - SQLGeneratorAgent - INFO - Initialized sql_generator with model gpt-4o


In [5]:
result = await agent.run(query)

2025-05-25 07:23:41,479 - SQLGeneratorAgent - DEBUG - SQL generator context prepared for node: None
2025-05-25 07:23:46,181 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-25 07:23:46,185 - SQLGeneratorAgent - INFO - Stored generated SQL in memory


In [6]:
for message in result.messages:
    print(f"\n[{getattr(message, 'source', 'Unknown')}]:")
    print(message.content)
    print("-" * 40)


[user]:
I'm providing you with context from previous interactions:

### Intent
What is the highest eligible free rate for K-12 students in schools in Alameda County?

What is the highest eligible free rate for K-12 students in schools in Alameda County?
----------------------------------------

[sql_generator]:
To generate the SQL query for the given intent, we need to identify the relevant tables and columns from the schema mapping. Assuming we have a table structure like this:

- `schools` (alias `s`): Contains information about schools, including their location and student demographics.
  - `school_id`
  - `county`
  - `eligible_free_rate`

We will focus on filtering schools located in Alameda County and then finding the maximum eligible free rate for K-12 students.

```xml
<sql_generation>
  <query_type>aggregate</query_type>
  <sql>
    SELECT MAX(s.eligible_free_rate) AS highest_eligible_free_rate
    FROM schools s
    WHERE s.county = 'Alameda'
  </sql>
  <explanation>
    The