In [1]:
import os
import sys
import asyncio
import sqlite3
import json
import logging
import re
from typing import Dict, Any, List, Optional
from dotenv import load_dotenv

sys.path.append('../src')
load_dotenv()

# Set up logging
logging.basicConfig(level=logging.INFO, 
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# Reduce noise from autogen
logging.getLogger('autogen_core').setLevel(logging.WARNING)

In [2]:
from pathlib import Path
from keyvalue_memory import KeyValueMemory
from task_context_manager import TaskContextManager
from query_tree_manager import QueryTreeManager
from database_schema_manager import DatabaseSchemaManager
from node_history_manager import NodeHistoryManager
from query_analyzer_agent import QueryAnalyzerAgent
from schema_reader import SchemaReader


query = "What is the total number of schools in Alameda County?"
data_path = "/home/norman/work/text-to-sql/MAC-SQL/data/bird"
tables_json_path = Path(data_path) / "dev_tables.json"
db_name = "california_schools"

In [3]:
task_id = "experimental-test"
memory = KeyValueMemory()
        
# Initialize task
task_manager = TaskContextManager(memory)
await task_manager.initialize(task_id, query, db_name)

# Load schema
schema_manager = DatabaseSchemaManager(memory)
await schema_manager.initialize()

schema_reader = SchemaReader(
    data_path=data_path,
    tables_json_path=str(tables_json_path),
    dataset_name="bird",
    lazy=False
)
await schema_manager.load_from_schema_reader(schema_reader, db_name)

2025-05-29 17:11:12,769 - TaskContextManager - INFO - Initialized task context for task experimental-test
2025-05-29 17:11:12,770 - DatabaseSchemaManager - INFO - Initialized empty database schema


load json file from /home/norman/work/text-to-sql/MAC-SQL/data/bird/dev_tables.json

Loading all database info...
Found 11 databases in bird dataset


2025-05-29 17:11:25,282 - DatabaseSchemaManager - INFO - Initialized empty database schema
2025-05-29 17:11:25,282 - DatabaseSchemaManager - INFO - Added table 'frpm' to schema
2025-05-29 17:11:25,283 - DatabaseSchemaManager - INFO - Added table 'satscores' to schema
2025-05-29 17:11:25,284 - DatabaseSchemaManager - INFO - Added table 'schools' to schema
2025-05-29 17:11:25,284 - DatabaseSchemaManager - INFO - Loaded schema for database 'california_schools' with 3 tables


In [4]:
analyzer = QueryAnalyzerAgent(memory, llm_config={
    "model_name": "gpt-4o",
    "temperature": 0.1,
    "timeout": 60
}, debug=True)

2025-05-29 17:11:25,307 - QueryAnalyzerAgent - DEBUG - Created AssistantAgent: query_analyzer
2025-05-29 17:11:25,307 - QueryAnalyzerAgent - DEBUG - Created MemoryAgentTool for query_analyzer
2025-05-29 17:11:25,307 - QueryAnalyzerAgent - INFO - Initialized query_analyzer with model gpt-4o


In [5]:
result = await analyzer.run(query)

2025-05-29 17:11:25,309 - QueryAnalyzerAgent - DEBUG - Query analysis will be schema-agnostic
2025-05-29 17:11:25,309 - QueryAnalyzerAgent - INFO - Query analyzer context prepared with schema length: 9697
2025-05-29 17:11:25,309 - QueryAnalyzerAgent - INFO - query: What is the total number of schools in Alameda County? database: california_schools
2025-05-29 17:11:26,846 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-29 17:11:26,850 - QueryAnalyzerAgent - INFO - Raw LLM output: <analysis>
  <intent>Find the total number of schools located in Alameda County.</intent>
  <complexity>simple</complexity>
  <tables>
    <table name="schools" purpose="To count the number of schools in Alameda County using the 'County' column."/>
  </tables>
</analysis>
2025-05-29 17:11:26,851 - QueryAnalyzerAgent - ERROR - No current node found - orchestrator should have initialized the tree


In [6]:
for message in result.messages:
    print(f"\n[{getattr(message, 'source', 'Unknown')}]:")
    print(message.content)
    print("-" * 40)


[user]:
I'm providing you with context from previous interactions:

### Query
What is the total number of schools in Alameda County?

### Database Id
california_schools

### Schema
<database_schema>
  <table name="frpm">
    <column name="CDSCode">
      <type>text</type>
      <nullable>True</nullable>
      <primary_key>true</primary_key>
      <foreign_key>
        <references_table>schools</references_table>
        <references_column>CDSCode</references_column>
      </foreign_key>
    </column>
    <column name="Academic Year">
      <type>text</type>
      <nullable>True</nullable>
    </column>
    <column name="County Code">
      <type>text</type>
      <nullable>True</nullable>
    </column>
    <column name="District Code">
      <type>integer</type>
      <nullable>True</nullable>
    </column>
    <column name="School Code">
      <type>text</type>
      <nullable>True</nullable>
    </column>
    <column name="County Name">
      <type>text</type>
      <nullable>True</