# SnowflakeCore

> Fill in a module description here

In [None]:
#| default_exp SnowflakeCore

## Init Setup

In [None]:
#!pip install -q snowflake-connector-python  lisette  pandas tqdm fastcore

In [None]:
#|export
from dotenv import load_dotenv
import os
import json
import pandas as pd
from tqdm import tqdm
from fastcore.utils import *
import regex as re
from lisette import *

# Load environment variables from the .env file
load_dotenv()

assert os.getenv("SPIDER2_SNOWFLAKE_USERNAME")
assert os.getenv("SPIDER2_SNOWFLAKE_PASSWORD")
assert os.getenv("SPIDER2_SNOWFLAKE_ACCOUNT")
assert os.getenv("COMPUTE_WH_PARTICIPANT")

LM_STUDIO calling 

In [None]:
#|export
assert os.getenv("LM_STUDIO_API_BASE")
assert os.getenv("LM_STUDIO_MODEL_NAME")
model_name = os.getenv("LM_STUDIO_MODEL_NAME")
model_name

In [None]:
#|export
import litellm

litellm.register_model(
    {
        model_name:{
        "max_tokens": 8192, # put the model‚Äôs real context limit
        "input_cost_per_token": 0.0,
        "output_cost_per_token": 0.0,
        "supports_assistant_prefill": False
    }})

{'lm_studio/openai/gpt-oss-20b': {'max_tokens': 8192,
  'input_cost_per_token': 0.0,
  'output_cost_per_token': 0.0,
  'supports_assistant_prefill': False}}

In [None]:
chat = Chat(model_name)
chat("Hello there!")

Hey! üëã How‚Äôs it going?

<details>

- id: `chatcmpl-rathcjbgkrjydhbc3dsmor`
- model: `lm_studio/openai/gpt-oss-20b`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=22, prompt_tokens=70, total_tokens=92, completion_tokens_details=None, prompt_tokens_details=None)`

</details>

Snowflake setup

In [None]:
#|export
import snowflake.connector

conn = snowflake.connector.connect(
    user=os.getenv("SPIDER2_SNOWFLAKE_USERNAME"),
    password=os.getenv("SPIDER2_SNOWFLAKE_PASSWORD"),
    account=os.getenv("SPIDER2_SNOWFLAKE_ACCOUNT"),
    warehouse=os.getenv("COMPUTE_WH_PARTICIPANT"),
)
cursor = conn.cursor()

assert not cursor.is_closed()

## execute_query
To run single SQL query and return data in following format:
- `query`: query 
- `success`: True if successfully executed else False
- `data`: `df.to_dict('record')`  if successfully executed else `None`
- `error`: `None`  if successfully executed else `str(Exception)`
- `row_count`:  len(df) 
- `query`: time to execution time 


---
1. **Max row limit** - Should you limit results (e.g., max 10,000 rows) to prevent memory issues? : help me to do this
2. **Empty query check** - What if someone passes an empty string? : this will bw handled by the try catch block
3. **Execution time** - Track how long queries take? (useful for RLM logging) : ok lets add it
4. **SQL comments** - Queries like `"SELECT * FROM x -- ; DROP TABLE y"` might bypass your check : it is a single query function right?

Note: assuming `df` is the result generated after a query

In [None]:
cursor.execute("SELECT * FROM AIRLINES.AIRLINES.FLIGHTS LIMIT 4")
column_names = [desc[0] for desc in cursor.description]
results = cursor.fetchall()

In [None]:
len(results)

4

In [None]:
df = pd.DataFrame(results, columns=column_names)
df

Unnamed: 0,flight_id,flight_no,scheduled_departure,scheduled_arrival,departure_airport,arrival_airport,status,aircraft_code,actual_departure,actual_arrival
0,1185,PG0134,2017-09-10 09:50:00+03,2017-09-10 14:55:00+03,DME,BTK,Scheduled,319,\N,\N
1,3979,PG0052,2017-08-25 14:50:00+03,2017-08-25 17:35:00+03,VKO,HMA,Scheduled,CR2,\N,\N
2,4739,PG0561,2017-09-05 12:30:00+03,2017-09-05 14:15:00+03,VKO,AER,Scheduled,763,\N,\N
3,5502,PG0529,2017-09-12 09:50:00+03,2017-09-12 11:20:00+03,SVO,UFA,Scheduled,763,\N,\N


In [None]:
def sql_df(query: str)->pd.core.frame.DataFrame:
    cursor.execute(query)
    column_names = [desc[0] for desc in cursor.description]
    results = cursor.fetchall()
    df = pd.DataFrame(results, columns=column_names)
    return df

In [None]:
df = sql_df("SELECT * FROM AIRLINES.AIRLINES.FLIGHTS LIMIT 1")
df.head()

Unnamed: 0,flight_id,flight_no,scheduled_departure,scheduled_arrival,departure_airport,arrival_airport,status,aircraft_code,actual_departure,actual_arrival
0,1185,PG0134,2017-09-10 09:50:00+03,2017-09-10 14:55:00+03,DME,BTK,Scheduled,319,\N,\N


The function should not accept multiple queries. It should only accept a single query. To achive this we will filter out by `;` from the query. 
Here's the complete better semicolon check logic:

1. Count all semicolons in the entire query
2. Extract all quoted strings (both single and double quotes)
3. Count semicolons that appear inside those quoted strings
4. If counts don't match, there's an unquoted semicolon ‚Üí reject




In [None]:
txt  = "SELECT * FROM x WHERE name = 'test;data';"
re.findall(r'"[^"]*;[^"]*"|\'[^\']*;[^\']*\'' , txt)

["'test;data'"]

In [None]:
t = [
    "SELECT * FROM x WHERE name = 'test;data'",
    "SELECT * FROM x; DROP TABLE y", 
    "SELECT * WHERE x = 'a;b' AND y = 'c;d'",
    "SELECT * FROM x; DROP TABLE y",
    """SELECT * FROM x;
-- comment
DROP TABLE y
"""
    ]
for i in t:
    print(re.findall(r'"[^"]*;[^"]*"|\'[^\']*;[^\']*\'' , i))

["'test;data'"]
[]
["'a;b'", "'c;d'"]
[]
[]



Here's the complete better semicolon check logic:

1. Count all semicolons in the entire query
2. Extract all quoted strings (both single and double quotes)
3. Count semicolons that appear inside those quoted strings
4. If counts don't match, there's an unquoted semicolon ‚Üí reject


In [None]:
for i in t:
    print(i,  i.count(';'), len(re.findall(r';', ''.join(re.findall(r'"[^"]*"|\'[^\']*\'', i)))))

SELECT * FROM x WHERE name = 'test;data' 1 1
SELECT * FROM x; DROP TABLE y 1 0
SELECT * WHERE x = 'a;b' AND y = 'c;d' 2 2
SELECT * FROM x; DROP TABLE y 1 0
SELECT * FROM x;
-- comment
DROP TABLE y
 1 0


In [None]:
#|export
from pydantic import BaseModel
from typing import Optional

class QueryResult(BaseModel):
    query: str
    success: bool
    data: Optional[list[dict]] = None
    error: Optional[str] = None
    row_count: int = 0
    execution_time: float = 0.0

In [None]:
#|export 
import time

UNSAFE_QUERIES = ['DROP', 'UPDATE', 'DELETE', 'INSERT', 'TRUNCATE', 'ALTER']

def execute_query(
    query: str,                 # SQL query with to execute
    max_rows: int = 10,         # Maximum rows to fetch
    fetch_all: bool = False,    # Fetch all rows if True
) -> QueryResult:
    """
    Execute a SQL query and return results as a QueryResult object.
    
    This function provides safe SQL execution with protection against:
    - Multiple statements (via semicolon detection)
    - Unsafe operations (DROP, DELETE, UPDATE, etc.)
    - Memory issues (via row limiting)
    
    Args:
        query (str): The SQL query to execute. Must be a single SELECT statement.
        max_rows (int, optional): Maximum number of rows to fetch. Defaults to 10.
            Only applies when fetch_all=False.
        fetch_all (bool, optional): If True, fetch all rows regardless of max_rows.
            Defaults to False for safety.
    
    Returns:
        QueryResult: A Pydantic model containing:
            - query: The executed query
            - success: Whether execution succeeded
            - data: List of dictionaries (rows) if successful, None otherwise
            - error: Error message if failed, None otherwise
            - row_count: Number of rows returned
            - execution_time: Time taken to execute the query in seconds
    
    Raises:
        ValueError: If query contains multiple statements or unsafe operations.
        Exception: Any database errors are caught and returned in QueryResult.error
    
    Examples:
        >>> result = sql_df("SELECT * FROM users")
        >>> result.success
        True
        >>> result.row_count
        10
        
        >>> result = sql_df("SELECT * FROM users", fetch_all=True)
        >>> result.row_count
        1000
    """
    try:
        query = query.strip()

        # Check for multiple statements via semicolon detection
        if ';' in query:
            total_semicolons = query.count(';')
            safe_semicolons = len(re.findall(r';', ''.join(re.findall(r'"[^"]*"|\'[^\']*\'', query))))
            
            if total_semicolons != safe_semicolons:
                raise ValueError("Multiple statements or unsafe semicolons detected!")

        # Prevent unsafe data modification queries
        if any([query.upper().startswith(i) for i in UNSAFE_QUERIES]):
            raise ValueError("Trying Data Update, Not allowed!!!")
        
        # Execute query and measure time
        start_time = time.time()
        cursor.execute(query)
        execution_time = time.time() - start_time
        
        # Fetch results
        column_names = [desc[0] for desc in cursor.description]
        if fetch_all:
            results = cursor.fetchall()
        else:
            results = cursor.fetchmany(max_rows)
            
        df = pd.DataFrame(results, columns=column_names)
        
        return QueryResult(
            query=query,
            success=True,
            data=df.to_dict('records'),
            error=None,
            row_count=len(results),
            execution_time=execution_time
        )
        
    except Exception as e:
        return QueryResult(
            query=query,
            success=False,
            data=None,
            error=str(e),
            row_count=0,
            execution_time=0.0
        )


In [None]:
result = execute_query("SELECT * FROM AIRLINES.AIRLINES.FLIGHTS")
assert result.success
assert result.row_count > 0
assert type(result.execution_time) == float


In [None]:
df = pd.DataFrame(  result.data)
df

Unnamed: 0,flight_id,flight_no,scheduled_departure,scheduled_arrival,departure_airport,arrival_airport,status,aircraft_code,actual_departure,actual_arrival
0,1185,PG0134,2017-09-10 09:50:00+03,2017-09-10 14:55:00+03,DME,BTK,Scheduled,319,\N,\N
1,3979,PG0052,2017-08-25 14:50:00+03,2017-08-25 17:35:00+03,VKO,HMA,Scheduled,CR2,\N,\N
2,4739,PG0561,2017-09-05 12:30:00+03,2017-09-05 14:15:00+03,VKO,AER,Scheduled,763,\N,\N
3,5502,PG0529,2017-09-12 09:50:00+03,2017-09-12 11:20:00+03,SVO,UFA,Scheduled,763,\N,\N
4,6938,PG0461,2017-09-04 12:25:00+03,2017-09-04 13:20:00+03,SVO,ULV,Scheduled,SU9,\N,\N
5,7784,PG0667,2017-09-10 15:00:00+03,2017-09-10 17:30:00+03,SVO,KRO,Scheduled,CR2,\N,\N
6,9478,PG0360,2017-08-28 09:00:00+03,2017-08-28 11:35:00+03,LED,REN,Scheduled,CR2,\N,\N
7,11085,PG0569,2017-08-24 15:05:00+03,2017-08-24 16:10:00+03,SVX,SCW,Scheduled,733,\N,\N
8,11847,PG0498,2017-09-12 10:15:00+03,2017-09-12 14:55:00+03,KZN,IKT,Scheduled,319,\N,\N
9,12012,PG0621,2017-08-26 16:05:00+03,2017-08-26 17:00:00+03,KZN,MQF,Scheduled,CR2,\N,\N


In [None]:
assert len(df) == result.row_count 
assert not result.error

In [None]:
# Test cases for execute_query function

print("=" * 50)
print("Test 1: Successful query with default max_rows")
result = execute_query("SELECT * FROM AIRLINES.AIRLINES.FLIGHTS")
print(f"Success: {result.success}")
print(f"Row count: {result.row_count}")
print(f"Execution time: {result.execution_time:.4f}s")
print(f"First row: {result.data[0] if result.data else None}")

print("\n" + "=" * 50)
print("Test 2: Query with fetch_all=True")
result = execute_query("SELECT * FROM AIRLINES.AIRLINES.AIRCRAFTS_DATA", fetch_all=True)
print(f"Success: {result.success}")
print(f"Row count: {result.row_count}")
print(f"Data: {result.data[:2] if result.data else None}")  # First 2 rows

print("\n" + "=" * 50)
print("Test 3: Query with custom max_rows")
result = execute_query("SELECT * FROM AIRLINES.AIRLINES.AIRPORTS_DATA", max_rows=3)
print(f"Success: {result.success}")
print(f"Row count: {result.row_count}")

print("\n" + "=" * 50)
print("Test 4: Unsafe query (DROP)")
result = execute_query("DROP TABLE AIRLINES.AIRLINES.FLIGHTS")
print(f"Success: {result.success}")
print(f"Error: {result.error}")

print("\n" + "=" * 50)
print("Test 5: Multiple statements")
result = execute_query("SELECT * FROM AIRLINES.AIRLINES.FLIGHTS; DROP TABLE AIRLINES.AIRLINES.FLIGHTS")
print(f"Success: {result.success}")
print(f"Error: {result.error}")

print("\n" + "=" * 50)
print("Test 6: Query with semicolon in string (should pass)")
result = execute_query("SELECT * FROM AIRLINES.AIRLINES.FLIGHTS WHERE flight_no = 'PG;0134'", max_rows=5)
print(f"Success: {result.success}")
print(f"Error: {result.error}")

print("\n" + "=" * 50)
print("Test 7: Invalid table name")
result = execute_query("SELECT * FROM AIRLINES.AIRLINES.NONEXISTENT_TABLE")
print(f"Success: {result.success}")
print(f"Error: {result.error}")

print("\n" + "=" * 50)
print("Test 8: Invalid column name")
result = execute_query("SELECT nonexistent_column FROM AIRLINES.AIRLINES.FLIGHTS", max_rows=5)
print(f"Success: {result.success}")
print(f"Error: {result.error}")

print("\n" + "=" * 50)
print("Test 9: Empty result set")
result = execute_query("SELECT * FROM AIRLINES.AIRLINES.FLIGHTS WHERE 1=0")
print(f"Success: {result.success}")
print(f"Row count: {result.row_count}")
print(f"Data: {result.data}")

print("\n" + "=" * 50)
print("Test 10: Complex JOIN query")
result = execute_query("""
    SELECT f.flight_no, a.airport_name 
    FROM AIRLINES.AIRLINES.FLIGHTS f
    JOIN AIRLINES.AIRLINES.AIRPORTS_DATA a ON f.departure_airport = a.airport_code
    LIMIT 5
""")
print(f"Success: {result.success}")
print(f"Row count: {result.row_count}")
print(f"First row: {result.data[0] if result.data else None}")


Test 1: Successful query with default max_rows
Success: True
Row count: 10
Execution time: 1.1932s
First row: {'flight_id': 1185, 'flight_no': 'PG0134', 'scheduled_departure': '2017-09-10 09:50:00+03', 'scheduled_arrival': '2017-09-10 14:55:00+03', 'departure_airport': 'DME', 'arrival_airport': 'BTK', 'status': 'Scheduled', 'aircraft_code': '319', 'actual_departure': '\\N', 'actual_arrival': '\\N'}

Test 2: Query with fetch_all=True
Success: True
Row count: 9
Data: [{'aircraft_code': '773', 'model': '{"en": "Boeing 777-300", "ru": "–ë–æ–∏–Ω–≥ 777-300"}', 'range': 11100}, {'aircraft_code': '763', 'model': '{"en": "Boeing 767-300", "ru": "–ë–æ–∏–Ω–≥ 767-300"}', 'range': 7900}]

Test 3: Query with custom max_rows
Success: True
Row count: 3

Test 4: Unsafe query (DROP)
Success: False
Error: Trying Data Update, Not allowed!!!

Test 5: Multiple statements
Success: False
Error: Multiple statements or unsafe semicolons detected!

Test 6: Query with semicolon in string (should pass)
Success: Fa

## DB context

The schema context should be:
- **Complete** - all info to generate correct SQL
- **Compact** - fit in LLM context window
- **Clear** - easy structure to parse


```json
{
  "database": "AIRLINES",
  "schema": "AIRLINES",
  "tables": [
    {
      "name": "FLIGHTS",
      "row_count": 33121,
      "columns": [
        {
          "name": "flight_id",
          "type": "NUMBER(38,0)",
          "primary_key": true
        },
        {
          "name": "aircraft_code",
          "type": "VARCHAR",
          "foreign_key": {
            "table": "AIRCRAFTS_DATA",
            "column": "aircraft_code"
          }
        }
      ],
      "sample_rows": [
        {"flight_id": 1185, "flight_no": "PG0134", ...}
      ]
    }
  ]
}
```

Key points:
- Array of tables (easier to iterate)
- Column metadata inline (PK/FK flags)
- Sample rows show actual data format
- Compact types (no need for full precision)


In [None]:
#|export
# indivisual table
class TableAttr(BaseModel):
    name: str
    column_names: Optional[list[dict]] = None
    sample_rows: Optional[list[dict]] = None
    row_count : int 

# complete 
class ParentSchema(BaseModel):
    dialect: str
    database: str
    Schema: str
    tables: list[TableAttr]
    relationships: Optional[list[dict]] = None  # For foreign key

Lets use this function `execute_query`  to extract all the tabls and Populate for schema.

In [None]:
r = execute_query(f"SHOW TABLES IN AIRLINES.AIRLINES", fetch_all=True)
assert r.success

In [None]:
df = pd.DataFrame(r.data)
df

Unnamed: 0,created_on,name,database_name,schema_name,kind,comment,cluster_by,rows,bytes,owner,...,search_optimization_progress,search_optimization_bytes,is_external,enable_schema_evolution,owner_role_type,is_event,is_hybrid,is_iceberg,is_dynamic,is_immutable
0,2024-10-24 17:19:09.958000-07:00,AIRCRAFTS_DATA,AIRLINES,AIRLINES,TABLE,,,9,2048,ACCOUNTADMIN,...,,,N,N,ROLE,N,N,N,N,N
1,2024-10-24 17:13:56.105000-07:00,AIRPORTS_DATA,AIRLINES,AIRLINES,TABLE,,,104,11264,ACCOUNTADMIN,...,,,N,N,ROLE,N,N,N,N,N
2,2024-10-24 17:20:59.367000-07:00,BOARDING_PASSES,AIRLINES,AIRLINES,TABLE,,,579686,3896320,ACCOUNTADMIN,...,,,N,N,ROLE,N,N,N,N,N
3,2024-10-24 17:21:23.225000-07:00,BOOKINGS,AIRLINES,AIRLINES,TABLE,,,262788,3322880,ACCOUNTADMIN,...,,,N,N,ROLE,N,N,N,N,N
4,2024-10-24 17:13:31.466000-07:00,FLIGHTS,AIRLINES,AIRLINES,TABLE,,,33121,749568,ACCOUNTADMIN,...,,,N,N,ROLE,N,N,N,N,N
5,2024-10-24 17:19:56.520000-07:00,SEATS,AIRLINES,AIRLINES,TABLE,,,1339,4608,ACCOUNTADMIN,...,,,N,N,ROLE,N,N,N,N,N
6,2024-10-24 17:13:45.103000-07:00,TICKETS,AIRLINES,AIRLINES,TABLE,,,366733,6424576,ACCOUNTADMIN,...,,,N,N,ROLE,N,N,N,N,N
7,2024-10-24 17:20:21.125000-07:00,TICKET_FLIGHTS,AIRLINES,AIRLINES,TABLE,,,1045726,5586944,ACCOUNTADMIN,...,,,N,N,ROLE,N,N,N,N,N


To get the schema of a table, we can use the following query like

```sql
desc table table_name;
```

In [None]:
r1 = execute_query(f"DESCRIBE TABLE AIRLINES.AIRLINES.{df['name'][0]}", fetch_all=True)
assert r1.success

In [None]:
df1 = pd.DataFrame(r1.data)
df1

Unnamed: 0,name,type,kind,null?,default,primary key,unique key,check,expression,comment,policy name,privacy domain
0,aircraft_code,VARCHAR(16777216),COLUMN,Y,,N,N,,,,,
1,model,VARCHAR(16777216),COLUMN,Y,,N,N,,,,,
2,range,"NUMBER(38,0)",COLUMN,Y,,N,N,,,,,


In [None]:
for idx, row in df1.iterrows():
    print(row)
    break

name                  aircraft_code
type              VARCHAR(16777216)
kind                         COLUMN
null?                             Y
default                        None
primary key                       N
unique key                        N
check                          None
expression                     None
comment                        None
policy name                    None
privacy domain                 None
Name: 0, dtype: object


In [None]:
print(row.to_dict())

{'name': 'aircraft_code', 'type': 'VARCHAR(16777216)', 'kind': 'COLUMN', 'null?': 'Y', 'default': None, 'primary key': 'N', 'unique key': 'N', 'check': None, 'expression': None, 'comment': None, 'policy name': None, 'privacy domain': None}


Each field from the DESCRIBE TABLE result:

- **name**: Column name
- **type**: Data type (e.g., VARCHAR, NUMBER)
- **kind**: Always "COLUMN" for column descriptions
- **null?**: Whether NULL values are allowed ('Y' = yes, 'N' = no)
- **default**: Default value if column not specified in INSERT
- **primary key**: 'Y' if this column is a primary key
- **unique key**: 'Y' if this column must have unique values
- **check**: Check constraint expression (validation rule)
- **expression**: For computed/virtual columns
- **comment**: Description/documentation for the column
- **policy name**: Data masking/row access policy name
- **privacy domain**: Privacy classification


In [None]:
df1.columns

Index(['name', 'type', 'kind', 'null?', 'default', 'primary key', 'unique key',
       'check', 'expression', 'comment', 'policy name', 'privacy domain'],
      dtype='object')

In [None]:
FIELD_TO_FILTER = ['name', 'type', 'null?', 'default', 'primary key', 'unique key', 'comment', ] #"check", 'expression']
{k:v for k, v in row.to_dict().items() if k in FIELD_TO_FILTER and v and v != 'N'} # for removing 'primary key': 'N' and 'unique key': 'N'

{'name': 'aircraft_code', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}

Filtering non null metadata.

In [None]:
column_names = []
for _, row in df1.iterrows():
    column_names.append({k:v for k, v in row.to_dict().items() if k in FIELD_TO_FILTER and v  and v != 'N'})
column_names

[{'name': 'aircraft_code', 'type': 'VARCHAR(16777216)', 'null?': 'Y'},
 {'name': 'model', 'type': 'VARCHAR(16777216)', 'null?': 'Y'},
 {'name': 'range', 'type': 'NUMBER(38,0)', 'null?': 'Y'}]

Single row data.

In [None]:
# fectch a single row of data from the table
table_data_example = execute_query(f"select * from AIRLINES.AIRLINES.{df['name'][0]} limit 1", fetch_all=True)
assert table_data_example.success
assert len(table_data_example.data) == table_data_example.row_count == 1
table_data_example.data

[{'aircraft_code': '773',
  'model': '{"en": "Boeing 777-300", "ru": "–ë–æ–∏–Ω–≥ 777-300"}',
  'range': 11100}]

In [None]:
# fectch a single row of data from the table
row_count = execute_query(f"select count(*)  as count from AIRLINES.AIRLINES.{df['name'][0]}", fetch_all=True)
assert row_count.success
row_count.data

[{'COUNT': 9}]

In [None]:
TableAttr(
    name=df['name'][0],
    column_names=column_names,
    sample_rows=table_data_example.data,
    row_count=row_count.data[0]['COUNT']
)

TableAttr(name='AIRCRAFTS_DATA', column_names=[{'name': 'aircraft_code', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'model', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'range', 'type': 'NUMBER(38,0)', 'null?': 'Y'}], sample_rows=[{'aircraft_code': '773', 'model': '{"en": "Boeing 777-300", "ru": "–ë–æ–∏–Ω–≥ 777-300"}', 'range': 11100}], row_count=9)

In [None]:
all_table_atrs = []
from tqdm import tqdm

for tn in tqdm(df['name']):
    # get table info
    result = execute_query(f"DESCRIBE TABLE AIRLINES.AIRLINES.{tn}", fetch_all=True)
    assert result.success

    table_df = pd.DataFrame(result.data)

    # get column related data
    column_names = []
    for _, row in table_df.iterrows():
        column_names.append({k:v for k, v in row.to_dict().items() if k in FIELD_TO_FILTER and v  and v != 'N'})
    
    # fectch a single row of data from the table
    table_data_example = execute_query(f"select * from AIRLINES.AIRLINES.{tn} limit 1", fetch_all=True)
    assert table_data_example.success

    # fectch a single row of data from the table
    row_count = execute_query(f"select count(*)  as count from AIRLINES.AIRLINES.{tn}", fetch_all=True)
    assert row_count.success

    all_table_atrs.append(
        TableAttr(
            name=tn,
            column_names=column_names,
            sample_rows=table_data_example.data,
            row_count=row_count.data[0]['COUNT']
            ))
all_table_atrs

  0%|          | 0/8 [00:00<?, ?it/s]

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8/8 [00:10<00:00,  1.29s/it]


[TableAttr(name='AIRCRAFTS_DATA', column_names=[{'name': 'aircraft_code', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'model', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'range', 'type': 'NUMBER(38,0)', 'null?': 'Y'}], sample_rows=[{'aircraft_code': '773', 'model': '{"en": "Boeing 777-300", "ru": "–ë–æ–∏–Ω–≥ 777-300"}', 'range': 11100}], row_count=9),
 TableAttr(name='AIRPORTS_DATA', column_names=[{'name': 'airport_code', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'airport_name', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'city', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'coordinates', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'timezone', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}], sample_rows=[{'airport_code': 'YKS', 'airport_name': '{"en": "Yakutsk Airport", "ru": "–Ø–∫—É—Ç—Å–∫"}', 'city': '{"en": "Yakutsk", "ru": "–Ø–∫—É—Ç—Å–∫"}', 'coordinates': '(129.77099609375,62.0932998657226562)', 'timezone': 'Asia/Yakutsk'}], row_co

In [None]:
schema = ParentSchema(
    dialect="snowflake",
    database="AIRLINES",
    Schema="AIRLINES",
    tables=all_table_atrs
)
print(schema)

dialect='snowflake' database='AIRLINES' Schema='AIRLINES' tables=[TableAttr(name='AIRCRAFTS_DATA', column_names=[{'name': 'aircraft_code', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'model', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'range', 'type': 'NUMBER(38,0)', 'null?': 'Y'}], sample_rows=[{'aircraft_code': '773', 'model': '{"en": "Boeing 777-300", "ru": "–ë–æ–∏–Ω–≥ 777-300"}', 'range': 11100}], row_count=9), TableAttr(name='AIRPORTS_DATA', column_names=[{'name': 'airport_code', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'airport_name', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'city', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'coordinates', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'timezone', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}], sample_rows=[{'airport_code': 'YKS', 'airport_name': '{"en": "Yakutsk Airport", "ru": "–Ø–∫—É—Ç—Å–∫"}', 'city': '{"en": "Yakutsk", "ru": "–Ø–∫—É—Ç—Å–∫"}', 'coordinates': '(129.7709960

In [None]:
schema.relationships is None

True

`relationships` for populating foreign key.

### Foreign key build up
As there is no explicit way to find the FK. We will use LLM to calculate it.

In [None]:
r = execute_query("""
SELECT COLUMN_NAME, TABLE_NAME
FROM AIRLINES.INFORMATION_SCHEMA.COLUMNS 
WHERE TABLE_SCHEMA = 'AIRLINES'
ORDER BY COLUMN_NAME, TABLE_NAME
""", fetch_all=True)

assert r.success

In [None]:
df2 = pd.DataFrame(r.data)
df2

Unnamed: 0,COLUMN_NAME,TABLE_NAME
0,actual_arrival,FLIGHTS
1,actual_departure,FLIGHTS
2,aircraft_code,AIRCRAFTS_DATA
3,aircraft_code,FLIGHTS
4,aircraft_code,SEATS
5,airport_code,AIRPORTS_DATA
6,airport_name,AIRPORTS_DATA
7,amount,TICKET_FLIGHTS
8,arrival_airport,FLIGHTS
9,boarding_no,BOARDING_PASSES


In [None]:
df2.groupby('COLUMN_NAME')['TABLE_NAME'].apply(list).to_dict()

{'actual_arrival': ['FLIGHTS'],
 'actual_departure': ['FLIGHTS'],
 'aircraft_code': ['AIRCRAFTS_DATA', 'FLIGHTS', 'SEATS'],
 'airport_code': ['AIRPORTS_DATA'],
 'airport_name': ['AIRPORTS_DATA'],
 'amount': ['TICKET_FLIGHTS'],
 'arrival_airport': ['FLIGHTS'],
 'boarding_no': ['BOARDING_PASSES'],
 'book_date': ['BOOKINGS'],
 'book_ref': ['BOOKINGS', 'TICKETS'],
 'city': ['AIRPORTS_DATA'],
 'coordinates': ['AIRPORTS_DATA'],
 'departure_airport': ['FLIGHTS'],
 'fare_conditions': ['SEATS', 'TICKET_FLIGHTS'],
 'flight_id': ['BOARDING_PASSES', 'FLIGHTS', 'TICKET_FLIGHTS'],
 'flight_no': ['FLIGHTS'],
 'model': ['AIRCRAFTS_DATA'],
 'passenger_id': ['TICKETS'],
 'range': ['AIRCRAFTS_DATA'],
 'scheduled_arrival': ['FLIGHTS'],
 'scheduled_departure': ['FLIGHTS'],
 'seat_no': ['BOARDING_PASSES', 'SEATS'],
 'status': ['FLIGHTS'],
 'ticket_no': ['BOARDING_PASSES', 'TICKETS', 'TICKET_FLIGHTS'],
 'timezone': ['AIRPORTS_DATA'],
 'total_amount': ['BOOKINGS']}

In [None]:
schema_summary = []
for table in schema.tables:
    cols = [col['name'] for col in table.column_names]
    schema_summary.append({
        "table": table.name,
        "columns": cols,
        "row_count": table.row_count,
        "sample": table.sample_rows[0] if table.sample_rows else {}
    })
schema_summary

[{'table': 'AIRCRAFTS_DATA',
  'columns': ['aircraft_code', 'model', 'range'],
  'row_count': 9,
  'sample': {'aircraft_code': '773',
   'model': '{"en": "Boeing 777-300", "ru": "–ë–æ–∏–Ω–≥ 777-300"}',
   'range': 11100}},
 {'table': 'AIRPORTS_DATA',
  'columns': ['airport_code',
   'airport_name',
   'city',
   'coordinates',
   'timezone'],
  'row_count': 104,
  'sample': {'airport_code': 'YKS',
   'airport_name': '{"en": "Yakutsk Airport", "ru": "–Ø–∫—É—Ç—Å–∫"}',
   'city': '{"en": "Yakutsk", "ru": "–Ø–∫—É—Ç—Å–∫"}',
   'coordinates': '(129.77099609375,62.0932998657226562)',
   'timezone': 'Asia/Yakutsk'}},
 {'table': 'BOARDING_PASSES',
  'columns': ['ticket_no', 'flight_id', 'boarding_no', 'seat_no'],
  'row_count': 579686,
  'sample': {'ticket_no': '0005435212351',
   'flight_id': 30625,
   'boarding_no': 1,
   'seat_no': '2D'}},
 {'table': 'BOOKINGS',
  'columns': ['book_ref', 'book_date', 'total_amount'],
  'row_count': 262788,
  'sample': {'book_ref': '00000F',
   'book_date': '

In [None]:
prompt = f"""Given this database schema, identify the foreign key relationships.

Schema: {json.dumps(schema_summary, indent=2)}

Foreign Keys relations db column name:
{json.dumps(df2.groupby('COLUMN_NAME')['TABLE_NAME'].apply(list).to_dict(), indent=2)}

Return ONLY a JSON array of relationships in this exact format:
[
  {{
    "from_table": "FLIGHTS",
    "from_column": "aircraft_code",
    "to_table": "AIRCRAFTS_DATA",
    "to_column": "aircraft_code"
  }}
]

Rules:
- Only include relationships where a column in one table references a primary key in another
- Use row counts as hints (parent tables typically have fewer rows)
- Consider naming patterns (e.g., aircraft_code likely references AIRCRAFTS_DATA)
"""
print(prompt)

Given this database schema, identify the foreign key relationships.

Schema: [
  {
    "table": "AIRCRAFTS_DATA",
    "columns": [
      "aircraft_code",
      "model",
      "range"
    ],
    "row_count": 9,
    "sample": {
      "aircraft_code": "773",
      "model": "{\"en\": \"Boeing 777-300\", \"ru\": \"\u0411\u043e\u0438\u043d\u0433 777-300\"}",
      "range": 11100
    }
  },
  {
    "table": "AIRPORTS_DATA",
    "columns": [
      "airport_code",
      "airport_name",
      "city",
      "coordinates",
      "timezone"
    ],
    "row_count": 104,
    "sample": {
      "airport_code": "YKS",
      "airport_name": "{\"en\": \"Yakutsk Airport\", \"ru\": \"\u042f\u043a\u0443\u0442\u0441\u043a\"}",
      "city": "{\"en\": \"Yakutsk\", \"ru\": \"\u042f\u043a\u0443\u0442\u0441\u043a\"}",
      "coordinates": "(129.77099609375,62.0932998657226562)",
      "timezone": "Asia/Yakutsk"
    }
  },
  {
    "table": "BOARDING_PASSES",
    "columns": [
      "ticket_no",
      "flight_id",
 

In [None]:
chat = Chat(model_name)
resp = chat(prompt)
resp

[
  {
    "from_table": "FLIGHTS",
    "from_column": "aircraft_code",
    "to_table": "AIRCRAFTS_DATA",
    "to_column": "aircraft_code"
  },
  {
    "from_table": "FLIGHTS",
    "from_column": "departure_airport",
    "to_table": "AIRPORTS_DATA",
    "to_column": "airport_code"
  },
  {
    "from_table": "FLIGHTS",
    "from_column": "arrival_airport",
    "to_table": "AIRPORTs_DATA",
    "to_column": "airport_code"
  },
  {
    "from_table": "BOARDING_PASSES",
    "from_column": "ticket_no",
    "to_table": "TICKETS",
    "to_column": "ticket_no"
  },
  {
    "from_table": "BOARDING_PASSES",
    "from_column": "flight_id",
    "to_table": "FLIGHTS",
    "to_column": "flight_id"
  },
  {
    "from_table": "TICKET_FLIGHTS",
    "from_column": "ticket_no",
    "to_table": "TICKETS",
    "to_column": "ticket_no"
  },
  {
    "from_table": "TICKET_FLIGHTS",
    "from_column": "flight_id",
    "to_table": "FLIGHTS",
    "to_column": "flight_id"
  },
  {
    "from_table": "TICKETS",
    "from_column": "book_ref",
    "to_table": "BOOKINGS",
    "to_column": "book_ref"
  },
  {
    "from_table": "SEATS",
    "from_column": "aircraft_code",
    "to_table": "AIRCRAFTS_DATA",
    "to_column": "aircraft_code"
  }
]

<details>

- id: `chatcmpl-hak9vm3vimejbmt8est9jl`
- model: `lm_studio/openai/gpt-oss-20b`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=706, prompt_tokens=1559, total_tokens=2265, completion_tokens_details=None, prompt_tokens_details=None)`

</details>

In [None]:
print(resp.choices[0].message.content)

[
  {
    "from_table": "FLIGHTS",
    "from_column": "aircraft_code",
    "to_table": "AIRCRAFTS_DATA",
    "to_column": "aircraft_code"
  },
  {
    "from_table": "FLIGHTS",
    "from_column": "departure_airport",
    "to_table": "AIRPORTS_DATA",
    "to_column": "airport_code"
  },
  {
    "from_table": "FLIGHTS",
    "from_column": "arrival_airport",
    "to_table": "AIRPORTs_DATA",
    "to_column": "airport_code"
  },
  {
    "from_table": "BOARDING_PASSES",
    "from_column": "ticket_no",
    "to_table": "TICKETS",
    "to_column": "ticket_no"
  },
  {
    "from_table": "BOARDING_PASSES",
    "from_column": "flight_id",
    "to_table": "FLIGHTS",
    "to_column": "flight_id"
  },
  {
    "from_table": "TICKET_FLIGHTS",
    "from_column": "ticket_no",
    "to_table": "TICKETS",
    "to_column": "ticket_no"
  },
  {
    "from_table": "TICKET_FLIGHTS",
    "from_column": "flight_id",
    "to_table": "FLIGHTS",
    "to_column": "flight_id"
  },
  {
    "from_table": "TICKETS",
    "f

In [None]:
msg = resp.choices[0].message.content.replace('```json', '').replace('```', '')
print(msg)

[
  {
    "from_table": "FLIGHTS",
    "from_column": "aircraft_code",
    "to_table": "AIRCRAFTS_DATA",
    "to_column": "aircraft_code"
  },
  {
    "from_table": "FLIGHTS",
    "from_column": "departure_airport",
    "to_table": "AIRPORTS_DATA",
    "to_column": "airport_code"
  },
  {
    "from_table": "FLIGHTS",
    "from_column": "arrival_airport",
    "to_table": "AIRPORTs_DATA",
    "to_column": "airport_code"
  },
  {
    "from_table": "BOARDING_PASSES",
    "from_column": "ticket_no",
    "to_table": "TICKETS",
    "to_column": "ticket_no"
  },
  {
    "from_table": "BOARDING_PASSES",
    "from_column": "flight_id",
    "to_table": "FLIGHTS",
    "to_column": "flight_id"
  },
  {
    "from_table": "TICKET_FLIGHTS",
    "from_column": "ticket_no",
    "to_table": "TICKETS",
    "to_column": "ticket_no"
  },
  {
    "from_table": "TICKET_FLIGHTS",
    "from_column": "flight_id",
    "to_table": "FLIGHTS",
    "to_column": "flight_id"
  },
  {
    "from_table": "TICKETS",
    "f

In [None]:
schema.relationships = json.loads(msg)

The foreign key relationships are not visible in the schema.

In [None]:
r = execute_query("""SELECT 
  fk.table_name as from_table,
  fk.column_name as from_column,
  pk.table_name as to_table,
  pk.column_name as to_column
FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS rc
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE fk 
  ON rc.constraint_name = fk.constraint_name
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE pk
  ON rc.unique_constraint_name = pk.constraint_name
WHERE fk.table_schema = 'AIRLINES'
""")
r.data

In [None]:
#|export


def get_fk(schema: ParentSchema, model_name: str ="gemini/gemini-2.5-flash") -> ParentSchema:
    try:
        # Query for column information
        r = execute_query(f"""
SELECT COLUMN_NAME, TABLE_NAME
FROM {schema.database}.INFORMATION_SCHEMA.COLUMNS 
WHERE TABLE_SCHEMA = '{schema.Schema}'
ORDER BY COLUMN_NAME, TABLE_NAME
""", fetch_all=True)
        
        if not r.success:
            raise Exception(f"Failed to query schema: {r.error}")
        
        df = pd.DataFrame(r.data)
        
        # Build schema summary
        schema_summary = []
        for table in schema.tables:
            cols = [col['name'] for col in table.column_names]
            schema_summary.append({
                "table": table.name,
                "columns": cols,
                "row_count": table.row_count,
                "sample": table.sample_rows[0] if table.sample_rows else {}
            })
        
        # Build prompt
        prompt = f"""Given this database schema, identify the foreign key relationships.

Schema: {json.dumps(schema_summary, indent=2)}

Foreign Keys relations db column name:
{json.dumps(df.groupby('COLUMN_NAME')['TABLE_NAME'].apply(list).to_dict(), indent=2)}

Return ONLY a JSON array of relationships in this exact format:
[
  {{
    "from_table": "FLIGHTS",
    "from_column": "aircraft_code",
    "to_table": "AIRCRAFTS_DATA",
    "to_column": "aircraft_code"
  }}
]

Rules:
- Only include relationships where a column in one table references a primary key in another
- Use row counts as hints (parent tables typically have fewer rows)
- Consider naming patterns (e.g., aircraft_code likely references AIRCRAFTS_DATA)
"""
        
        # Call LLM
        chat = Chat(model_name)
        resp = chat(prompt)
        content = resp.choices[0].message.content
        
        # Extract JSON more robustly
        # Try to find JSON array in the response
        json_match = re.search(r'\[.*\]', content, re.DOTALL)
        if not json_match:
            raise ValueError("Could not find JSON array in LLM response")
        
        json_str = json_match.group(0)
        relationships = json.loads(json_str)
        
        # Validate it's a list
        if not isinstance(relationships, list):
            raise ValueError("Expected list of relationships from LLM")
        
        schema.relationships = relationships
        return schema
        
    except json.JSONDecodeError as e:
        raise Exception(f"Failed to parse LLM response as JSON: {e}")
    except Exception as e:
        raise Exception(f"Error inferring foreign keys: {e}")


In [None]:
n_s = get_fk(schema, model_name)
n_s

ParentSchema(dialect='snowflake', database='AIRLINES', Schema='AIRLINES', tables=[TableAttr(name='AIRCRAFTS_DATA', column_names=[{'name': 'aircraft_code', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'model', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'range', 'type': 'NUMBER(38,0)', 'null?': 'Y'}], sample_rows=[{'aircraft_code': '773', 'model': '{"en": "Boeing 777-300", "ru": "–ë–æ–∏–Ω–≥ 777-300"}', 'range': 11100}], row_count=9), TableAttr(name='AIRPORTS_DATA', column_names=[{'name': 'airport_code', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'airport_name', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'city', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'coordinates', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'timezone', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}], sample_rows=[{'airport_code': 'YKS', 'airport_name': '{"en": "Yakutsk Airport", "ru": "–Ø–∫—É—Ç—Å–∫"}', 'city': '{"en": "Yakutsk", "ru": "–Ø–∫—É—Ç—Å–∫"}', 'coordinates

### Final Schema Build UP
Wrapping up all the above in a function.

In [None]:
#|export

FIELD_TO_FILTER = ['name', 'type', 'null?', 'default', 'primary key', 'unique key', 'comment', ] #"check", 'expression']


def get_schema_context(db_name: str, schema_name: str, row_limit: int = 1, model_name:str = "gemini/gemini-2.5-flash" ) -> ParentSchema:
    """
    Extract complete schema context from a Snowflake database for LLM text-to-SQL generation.
    
    This function retrieves comprehensive metadata about all tables in a schema, including:
    - Table and column names with data types
    - Column constraints (nullable, primary keys, unique keys)
    - Sample rows to show data format
    - Row counts for each table
    
    Args:
        db_name (str): Name of the Snowflake database
        schema_name (str): Name of the schema within the database
        row_limit (int, optional): Number of sample rows to fetch per table. Defaults to 1.
    
    Returns:
        ParentSchema: Pydantic model containing complete schema context with:
            - dialect: Database dialect (always 'snowflake')
            - database: Database name
            - Schema: Schema name
            - tables: List of TableAttr objects with full metadata
    
    Raises:
        AssertionError: If any database query fails during schema extraction
    
    Example:
        >>> schema = get_schema_context("AIRLINES", "AIRLINES", row_limit=3)
        >>> print(f"Found {len(schema.tables)} tables")
        >>> print(schema.model_dump_json(indent=2))
    """
    if row_limit <= 0: row_limit = 1 # if negative row count is provided

    result = execute_query(f"SHOW TABLES IN {db_name}.{schema_name}", fetch_all=True)
    assert result.success, f"Not able to fetch the `SHOW TABLES IN {db_name}.{schema_name}`"

    if len(result.data) == 0 :
        raise ValueError("Empty schema")

    all_tables_df = pd.DataFrame(result.data)

    print("Reading Tables....")
    all_table_atrs = []
    for tn in tqdm(all_tables_df['name']):
        # get table info
        result = execute_query(f"DESCRIBE TABLE {db_name}.{schema_name}.{tn}", fetch_all=True)
        assert result.success, f"Not able to fetch the `SHOW TABLES IN {db_name}.{schema_name}.{tn}`"

        table_df = pd.DataFrame(result.data)

        # get column related data
        # this needs to be updated for the new db setup ie oracle and others
        column_names = []
        for _, row in table_df.iterrows():
            column_names.append({k:v for k, v in row.to_dict().items() if k in FIELD_TO_FILTER and v  and v != 'N'})
        
        # fectch a single row of data from the table
        table_data_example = execute_query(f"select * from {db_name}.{schema_name}.{tn} limit {row_limit}", fetch_all=True)
        assert table_data_example.success

        # fectch a single row of data from the table
        row_count = execute_query(f"select count(*)  as count from {db_name}.{schema_name}.{tn}", fetch_all=True)
        assert row_count.success

        all_table_atrs.append(
            TableAttr(
                name=tn,
                column_names=column_names,
                sample_rows=table_data_example.data,
                row_count=next(iter(row_count.data[0].values())) # for ignoring the key if upper or lower type
                ))

    base_s =  ParentSchema(
        dialect="snowflake", # for now it is fixed to snowflake
        database=db_name,
        Schema=schema_name,
        tables=all_table_atrs
    )
    print("Building up FKs....")
    new_s = get_fk(base_s, model_name)

    return new_s



In [None]:
schema = get_schema_context("AIRLINES", "AIRLINES", model_name = model_name)
print(schema)

Reading Tables....


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8/8 [00:08<00:00,  1.09s/it]


Building up FKs....
dialect='snowflake' database='AIRLINES' Schema='AIRLINES' tables=[TableAttr(name='AIRCRAFTS_DATA', column_names=[{'name': 'aircraft_code', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'model', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'range', 'type': 'NUMBER(38,0)', 'null?': 'Y'}], sample_rows=[{'aircraft_code': '773', 'model': '{"en": "Boeing 777-300", "ru": "–ë–æ–∏–Ω–≥ 777-300"}', 'range': 11100}], row_count=9), TableAttr(name='AIRPORTS_DATA', column_names=[{'name': 'airport_code', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'airport_name', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'city', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'coordinates', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}, {'name': 'timezone', 'type': 'VARCHAR(16777216)', 'null?': 'Y'}], sample_rows=[{'airport_code': 'YKS', 'airport_name': '{"en": "Yakutsk Airport", "ru": "–Ø–∫—É—Ç—Å–∫"}', 'city': '{"en": "Yakutsk", "ru": "–Ø–∫—É—Ç—Å–∫"}', 'coordin

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()