In [None]:
from dotenv import load_dotenv
from os import getenv
from pprint import pprint
import time
import pandas as pd
import duckdb

from openai import OpenAI
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.callbacks import OpenAICallbackHandler

# SQL agent
from langchain_community.utilities import SQLDatabase
from langchain.agents import create_sql_agent
from langchain_community.agent_toolkits import SQLDatabaseToolkit

load_dotenv("../.env")

### Reference
- https://python.langchain.com/docs/tutorials/sql_qa/

### List of models to be tested

In [None]:
models = {
    "llama_free": "meta-llama/llama-3.3-8b-instruct:free",
    "deepseek_free": "deepseek/deepseek-r1-0528-qwen3-8b:free",
    "gemma_free": "google/gemma-3n-e4b-it:free",
    "gemini": "google/gemini-2.5-flash-lite",
    "llama_lunaris": "sao10k/l3-lunaris-8b",
    "mistral": "mistralai/ministral-8b"
}

In [None]:
# Test model availability
def test_model_availability(model: str) -> bool:
    client = OpenAI(
        base_url=getenv("OPENROUTER_API_URL"),
        api_key=getenv("OPENROUTER_API_KEY"),
    )
    try:
        client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": "test"}],
            max_tokens=1
        )
        return True
    except Exception as e:
        error_msg = str(e).lower()
        if "region" in error_msg or "country" in error_msg or "blocked" in error_msg:
            print(f"Model {model} is region-blocked.")
            return False  # Region-blocked
        else:
            pprint(e)  # Print other errors for debugging
            return False

# models = {
#     model_name: model_id for model_name, model_id in models.items()
#     if test_model_availability(model_id)
# }

In [None]:
# Timer function to measure execution time

def timer(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        print(f"Execution time: {round(end_time - start_time, 2)} seconds")
        return result
    return wrapper

### Create simple LangChain example

In [None]:
# Create first langchain agent to prompt LLM
template = """
Question: {question}
Answer: Let's think step by step.
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["question"]
)

llm = ChatOpenAI(
    api_key=getenv("OPENROUTER_API_KEY"),
    base_url=getenv("OPENROUTER_API_URL"),
    model_name=models["llama_free"],
    max_completion_tokens=256
)

llm_chain = prompt | llm | StrOutputParser()

@timer
def langchain_prompt(question: str) -> str:
    return llm_chain.invoke(question)
langchain_prompt("What is the president of the USA?")

### Connect to DB to fetch data

In [None]:
# Connect to local duckdb to fetch data

conn = duckdb.connect(
    database="../data/housing_crawler_read_only.duckdb",
    read_only=True
)

@timer
def query_housing_data(query: str) -> pd.DataFrame:
    df = conn.execute(query).df()
    # Remove id fields to minimize data size
    id_fields = [col for col in df.columns if "id" in col.lower()]
    df = df.drop(columns=id_fields)
    return df

def df_to_markdown(df: pd.DataFrame) -> str:
    return df.to_markdown(index=False)

df = query_housing_data("SELECT * FROM estate_info LIMIT 2;")
df_markdown = df_to_markdown(df)
print(df_markdown)


### Use Markdown template for SQL generation using LLM

In [None]:
# Use markdown table in prompt

template_with_data = """
Here is an extract of the table called estate_info from Hong Kong Housing data from DuckDB:
{table_data}
Question: {question}
Create SQL query to answer the question based on the data above.
The SQL query will be executed on the same data.
The respond should only contains the SQL query in plain text, not in code block.
Do not include explanations or additional text.
"""

prompt_with_data = PromptTemplate(
    template=template_with_data,
    input_variables=["table_data", "question"]
)
llm_chain_with_data = prompt_with_data | llm | StrOutputParser()

token_count = OpenAICallbackHandler()
response = llm_chain_with_data.with_config(callbacks=[token_count]).invoke(input={
    "table_data": df_markdown, 
    "question": "Which estate has the highest transaction volume in Kowloon?"
    })
print(response)
# print(token_count.total_tokens)  # Check token usage


### Create SQL agent directly instead of prompt template

In [None]:
llm = ChatOpenAI(
    api_key=getenv("OPENROUTER_API_KEY"),
    base_url=getenv("OPENROUTER_API_URL"),
    model_name=models["llama_lunaris"],
    max_completion_tokens=256
)

db = SQLDatabase.from_uri(
    "duckdb:///../data/housing_crawler_read_only.duckdb"
)
toolkit = SQLDatabaseToolkit(db=db, llm=llm)

agent = create_sql_agent(
    llm=llm,
    toolkit=toolkit,
    verbose=True,  # Add verbose for debugging
    handle_parsing_errors=True
)

token_count = OpenAICallbackHandler()

@timer
def langchain_sql_agent(question: str) -> str:
    # return response and the token usage
    response = agent.run(question, callbacks=[token_count])
    print(f"Total tokens used: {token_count}")
    return response


In [None]:
langchain_sql_agent("Which is the northernmost estate in Hong Kong?")

### Strategy to reduce token usage and speed up response time

In [None]:
llm = ChatOpenAI(
    api_key=getenv("OPENROUTER_API_KEY"),
    base_url=getenv("OPENROUTER_API_URL"),
    model_name=models["llama_lunaris"],
    max_completion_tokens=256
)

# Disable row sampling
db = SQLDatabase.from_uri(
    database_uri="duckdb:///../data/housing_crawler_read_only.duckdb",
    sample_rows_in_table_info=0
)
toolkit = SQLDatabaseToolkit(db=db, llm=llm)

agent = create_sql_agent(
    llm=llm,
    toolkit=toolkit,
    # verbose=True,  # Add verbose for debugging
    verbose=False,
    handle_parsing_errors=True
)

token_count = OpenAICallbackHandler()

@timer
def langchain_sql_agent(question: str) -> str:
    # return response and the token usage
    response = agent.run(
        input="You are a helpful assistant that helps people find information about housing in Hong Kong. Here is the user question. " + question,
        callbacks=[token_count]
        )
    print(f"Total tokens used: {token_count}")
    return response


In [None]:
langchain_sql_agent("List top 10 estates in Hong Kong by transaction volume.")

In [None]:
conn.close()

: 