In [1]:
from Text2SQL.SnowflakeCore import *

In [2]:
agent = SnowflakeAgent()

M = SnowMetadata(
    agent,
    "AIRLINES", "AIRLINES", model_name = model_name)
sn_m = M.get_metadata()

Reading Tables....


100%|██████████| 8/8 [00:08<00:00,  1.01s/it]


Building up FKs....


In [4]:
def create_system_prompt(schema: ParentSchema) -> str:
    return f"""You are a SQL query generator for {schema.dialect}.

DATABASE: {schema.database}.{schema.Schema}
SCHEMA: {schema.model_dump_json(indent=2)}

CORE RULES:
1. Generate SELECT queries only - no modifications
2. ALWAYS quote identifiers: SELECT "column_name" FROM "table_name"
3. Use fully qualified names: {schema.database}.{schema.Schema}."TABLE_NAME"
4. No semicolons - single statement only
5. Use the execute_query tool to run queries

JSON COLUMNS:
- Many columns store JSON as VARCHAR (see samples)
- To extract values: GET(PARSE_JSON("column_name"), 'key')::VARCHAR
- Example: GET(PARSE_JSON("airport_name"), 'en')::VARCHAR

NULL VALUES:
- Sample data shows '\\N' but use IS NULL / IS NOT NULL in WHERE clauses

ERROR RECOVERY:
- If query fails, check: quoted identifiers, table/column names, JSON syntax
- Analyze error message and retry with corrections

Be precise and efficient."""
SYSTEM_PROMPT = create_system_prompt(sn_m)
print(SYSTEM_PROMPT)

You are a SQL query generator for snowflake.

DATABASE: AIRLINES.AIRLINES
SCHEMA: {
  "dialect": "snowflake",
  "database": "AIRLINES",
  "Schema": "AIRLINES",
  "tables": [
    {
      "name": "AIRLINES.AIRLINES.AIRCRAFTS_DATA",
      "column_names": [
        {
          "name": "aircraft_code",
          "type": "VARCHAR(16777216)",
          "null?": "Y"
        },
        {
          "name": "model",
          "type": "VARCHAR(16777216)",
          "null?": "Y"
        },
        {
          "name": "range",
          "type": "NUMBER(38,0)",
          "null?": "Y"
        }
      ],
      "sample_rows": [
        {
          "aircraft_code": "773",
          "model": "{\"en\": \"Boeing 777-300\", \"ru\": \"Боинг 777-300\"}",
          "range": 11100
        }
      ],
      "row_count": 9
    },
    {
      "name": "AIRLINES.AIRLINES.AIRPORTS_DATA",
      "column_names": [
        {
          "name": "airport_code",
          "type": "VARCHAR(16777216)",
          "null?": "Y"
   

In [5]:
from lisette import *

chat = Chat(model_name, sp=SYSTEM_PROMPT, tools=[agent.execute_query])

res = chat("How many flights are there?", max_steps=5)
res

The total number of flights is **33,121**.

<details>

- id: `chatcmpl-udg4vhxkp3v6q2lk2uhoh`
- model: `lm_studio/openai/gpt-oss-20b`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=16, prompt_tokens=3111, total_tokens=3127, completion_tokens_details=None, prompt_tokens_details=None)`

</details>

In [6]:
chat.print_hist()

{'role': 'user', 'content': 'How many flights are there?'}

Message(content='', role='assistant', tool_calls=[{'function': {'arguments': '{"query":"SELECT COUNT(*) AS \\"flight_count\\" FROM AIRLINES.AIRLINES.\\"FLIGHTS\\""}', 'name': 'execute_query'}, 'id': '485447735', 'type': 'function'}], function_call=None, reasoning_content='Need count of FLIGHTS.', provider_specific_fields={'refusal': None, 'reasoning': 'Need count of FLIGHTS.', 'reasoning_content': 'Need count of FLIGHTS.'})

{'tool_call_id': '485447735', 'role': 'tool', 'name': 'execute_query', 'content': 'query=\'SELECT COUNT(*) AS "flight_count" FROM AIRLINES.AIRLINES."FLIGHTS"\' success=True data=[{\'flight_count\': 33121}] error=None row_count=1 execution_time=0.31689882278442383'}

Message(content='The total number of flights is **33,121**.', role='assistant', tool_calls=None, function_call=None, reasoning_content='', provider_specific_fields={'refusal': None, 'reasoning': ''})



In [8]:
chat = Chat(model_name, sp=SYSTEM_PROMPT, tools=[agent.execute_query])
res = chat("Which airport has the most departures?", max_steps=5)
res

The airport with the most departures is **Domodedovo International Airport** (airport code: DME), with **3,217** departures.

<details>

- id: `chatcmpl-qkesbpqgg7nihwxpmpks2`
- model: `lm_studio/openai/gpt-oss-20b`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=42, prompt_tokens=3231, total_tokens=3273, completion_tokens_details=None, prompt_tokens_details=None)`

</details>

In [9]:
chat.print_hist()

{'role': 'user', 'content': 'Which airport has the most departures?'}

Message(content='', role='assistant', tool_calls=[{'function': {'arguments': '{"query":"SELECT GET(PARSE_JSON(\\"airport_name\\"), \'en\')::VARCHAR AS airport_name, COUNT(*) AS departures FROM AIRLINES.AIRLINES.FLIGHTS JOIN AIRLINES.AIRLINES.AIRPORTS_DATA ON \\"departure_airport\\" = \\"airport_code\\" GROUP BY \\"departure_airport\\", \\"airport_name\\" ORDER BY departures DESC LIMIT 1"}', 'name': 'execute_query'}, 'id': '659684994', 'type': 'function'}], function_call=None, reasoning_content='Need count of flights per departure_airport. Use FLIGHTS table. Count distinct flight_id? Probably each flight counts once. So group by departure_airport order desc limit 1. Return airport_code and count. Also maybe get name via AIRPORTS_DATA. Let\'s join to get name. Query: SELECT GET(PARSE_JSON("airport_name"), \'en\')::VARCHAR AS airport_name, COUNT(*) AS departures FROM AIRLINES.AIRLINES.FLIGHTS JOIN AIRLINES.AIRLINES.AIR

In [15]:
chat = Chat(model_name, sp=SYSTEM_PROMPT, tools=[execute_query])
res = chat("Which airport has the most departures?", max_steps=5)
res

The airport with the most departures is **DME** (Domodedovo International Airport), with **3,217** departures.

<details>

- id: `chatcmpl-11m3mw6arj0nqfe846eid8q`
- model: `lm_studio/openai/gpt-oss-20b`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=41, prompt_tokens=3120, total_tokens=3161, completion_tokens_details=None, prompt_tokens_details=None)`

</details>

In [16]:
chat.print_hist()

{'role': 'user', 'content': 'Which airport has the most departures?'}

Message(content='', role='assistant', tool_calls=[{'function': {'arguments': '{"query":"SELECT \\"departure_airport\\", COUNT(*) AS \\"departures\\" FROM AIRLINES.AIRLINES.\\"FLIGHTS\\" GROUP BY \\"departure_airport\\" ORDER BY \\"departures\\" DESC LIMIT 1"}', 'name': 'execute_query'}, 'id': '609665208', 'type': 'function'}], function_call=None, reasoning_content='Need count of departures per departure_airport. Use FLIGHTS table. Count distinct flight_id? Probably count flights. Query.', provider_specific_fields={'refusal': None, 'reasoning': 'Need count of departures per departure_airport. Use FLIGHTS table. Count distinct flight_id? Probably count flights. Query.', 'reasoning_content': 'Need count of departures per departure_airport. Use FLIGHTS table. Count distinct flight_id? Probably count flights. Query.'})

{'tool_call_id': '609665208', 'role': 'tool', 'name': 'execute_query', 'content': 'query=\'SELECT "dep