In [1]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
openai_api_key = os.environ["OPENAI_API_KEY"]

In [2]:
MODEL_GPT = 'gpt-4o-mini'

## Basic app for querying data from database

In [3]:
# from langchain_openai import OpenAI
from langchain_openai import ChatOpenAI

In [4]:
# llm = OpenAI()
llm = ChatOpenAI(model=MODEL_GPT)

### Load SQLite database
Short vesion (with only 1000 rows) of database [San Francisco Trees.](https://data.sfgov.org/City-Infrastructure/Street-Tree-List/tkzw-k3nq)

In [5]:
# !pip install langchain_experimental

In [6]:
from langchain import SQLDatabase
from langchain_experimental.sql import SQLDatabaseChain

In [7]:
# sqlite_db_path = "data/street_tree_db.sqlite"
sqlite_db_path = "../../data/street_tree_db.sqlite"

In [8]:
db = SQLDatabase.from_uri(f"sqlite:///{sqlite_db_path}")

### Create chain with LLM and database

In [9]:
db_chain = SQLDatabaseChain(
    llm=llm,
    database=db,
    verbose=True
)



In [10]:
# db_chain.run("How many species of trees are in San Francisco?")
db_chain.invoke("How many species of trees are in San Francisco?")



[1m> Entering new SQLDatabaseChain chain...[0m
How many species of trees are in San Francisco?
[32;1m[1;3m```sql
SELECT DISTINCT "qSpecies" 
FROM "street_trees" 
LIMIT 5;
```[0m

OperationalError: (sqlite3.OperationalError) near "```sql
SELECT DISTINCT "qSpecies" 
FROM "street_trees" 
LIMIT 5;
```": syntax error
[SQL: ```sql
SELECT DISTINCT "qSpecies" 
FROM "street_trees" 
LIMIT 5;
```]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

In [11]:
# db_chain.run("How many trees of the species Ficus nitida are there in San Francisco?")
db_chain.invoke("How many trees of the species Ficus nitida are there in San Francisco?")



[1m> Entering new SQLDatabaseChain chain...[0m
How many trees of the species Ficus nitida are there in San Francisco?
[32;1m[1;3m```sql
SELECT COUNT("TreeID") AS "TreeCount" 
FROM street_trees 
WHERE "qSpecies" = 'Ficus nitida';
```[0m

OperationalError: (sqlite3.OperationalError) near "```sql
SELECT COUNT("TreeID") AS "TreeCount" 
FROM street_trees 
WHERE "qSpecies" = 'Ficus nitida';
```": syntax error
[SQL: ```sql
SELECT COUNT("TreeID") AS "TreeCount" 
FROM street_trees 
WHERE "qSpecies" = 'Ficus nitida';
```]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

### Both answers are correct for 1,000 rows database we created

## OperationalError: (sqlite3.OperationalError) near "```sql
https://docs.sqlalchemy.org/en/20/errors.html#error-e3q8

In [13]:
```
OperationalError: (sqlite3.OperationalError) near "```sql
SELECT DISTINCT "qSpecies" 
FROM "street_trees" 
LIMIT 5;
```": syntax error
[SQL: ```sql
SELECT DISTINCT "qSpecies" 
FROM "street_trees" 
LIMIT 5;
```]
(Background on this error at: https://sqlalche.me/e/20/e3q8)
```

```
OperationalError: (sqlite3.OperationalError) near "```sql
SELECT COUNT("TreeID") AS "TreeCount" 
FROM street_trees 
WHERE "qSpecies" = 'Ficus nitida';
```": syntax error
[SQL: ```sql
SELECT COUNT("TreeID") AS "TreeCount" 
FROM street_trees 
WHERE "qSpecies" = 'Ficus nitida';
```]
(Background on this error at: https://sqlalche.me/e/20/e3q8)
```

SyntaxError: unterminated string literal (detected at line 2) (3521478517.py, line 2)

## SQLite Text-to-Query Application (generated by Chat LangChain)

In [14]:
from langchain_community.utilities import SQLDatabase
from langchain_community.agent_toolkits import SQLDatabaseToolkit
from langchain_openai import ChatOpenAI
from langchain.agents import create_sql_agent

In [15]:
# 1. Database Connection
def setup_database(db_path):
    """
    Create a SQLDatabase connection
    
    Args:
        db_path (str): Path to the SQLite database file
    
    Returns:
        SQLDatabase: Configured database connection
    """
    db = SQLDatabase.from_uri(f"sqlite:///{db_path}")
    return db


In [16]:
# 2. Create SQL Agent
# def create_sql_query_agent(db, model_name="gpt-3.5-turbo"):
def create_sql_query_agent(db, model_name="gpt-4o-mini"):
    """
    Create an SQL query agent
    
    Args:
        db (SQLDatabase): Database connection
        model_name (str): OpenAI model to use
    
    Returns:
        Agent executor for SQL queries
    """
    # Initialize the language model
    llm = ChatOpenAI(model=model_name, temperature=0)
    
    # Create the SQL database toolkit
    toolkit = SQLDatabaseToolkit(db=db, llm=llm)
    
    # Create the SQL agent
    agent_executor = create_sql_agent(
        llm=llm, 
        toolkit=toolkit, 
        verbose=True
    )
    
    return agent_executor

In [17]:
# 3. Query the Database
def query_database(agent, question):
    """
    Execute a natural language query on the database
    
    Args:
        agent: SQL query agent
        question (str): Natural language question
    
    Returns:
        Query result
    """
    try:
        response = agent.invoke({"input": question})
        return response['output']
    except Exception as e:
        return f"An error occurred: {str(e)}"

In [30]:
# Path to your SQLite database
DB_PATH = "../../data/street_tree_db.sqlite"
    
# Setup database connection
db = setup_database(DB_PATH)
    
# Create SQL agent
sql_agent = create_sql_query_agent(db)
    
# Example queries
queries = [
    # "How many species of trees are in San Francisco?",
    # "How many trees of the species Ficus nitida are there in San Francisco?",
    # "How many trees of the species Arbutus are there in San Francisco?",
    "How many trees are there in San Francisco where the species name contains 'Arbutus'? Use the LIKE operator with wildcards to match partial species names."
]
    
# Run example queries
# for query in queries:
#     print(f"\nQuery: {query}")
#     result = query_database(sql_agent, query)
#     print(f"Result: {result}")

In [31]:
# Run example queries
for query in queries:
    print(f"\nQuery: {query}")
    result = query_database(sql_agent, query)
    print(f"Result: {result}")


Query: How many trees are there in San Francisco where the species name contains 'Arbutus'? Use the LIKE operator with wildcards to match partial species names.


[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables  
[32;1m[1;3mI should check the schema of the `street_trees` table to find the relevant columns for my query about trees in San Francisco with species names containing 'Arbutus'.  
Action: sql_db_schema  
Action Input: "street_trees"  [0m[33;1m[1;3m
CREATE TABLE street_trees (
	"TreeID" INTEGER, 
	"qLegalStatus" TEXT, 
	"qSpecies" TEXT, 
	"qAddress" TEXT, 
	"SiteOrder" REAL, 
	"qSiteInfo" TEXT, 
	"PlantType" TEXT, 
	"qCaretaker" TEXT, 
	"qCareAssistant" TEXT, 
	"PlantDate" TEXT, 
	"DBH" REAL, 
	"PlotSize" TEXT, 
	"PermitNotes" TEXT, 
	"XCoord" REAL, 
	"YCoord" REAL, 
	"Latitude" REAL, 
	"Longitude" REAL, 
	"Location" TEXT, 
	"Fire Prevention Districts" REAL, 
	"Police Districts" REAL, 
	"Supervisor Districts" REAL, 
	"Zip Codes" R

In [None]:
# # Example Usage
# def main():
#     # Path to your SQLite database
#     DB_PATH = "../../data/street_tree_db.sqlite"
    
#     # Setup database connection
#     db = setup_database(DB_PATH)
    
#     # Create SQL agent
#     sql_agent = create_sql_query_agent(db)
    
#     # Example queries
#     queries = [
#         "How many tracks are in the database?",
#         "List the top 5 artists by number of tracks",
#         "What are the genres of music in the database?"
#     ]
    
#     # Run example queries
#     for query in queries:
#         print(f"\nQuery: {query}")
#         result = query_database(sql_agent, query)
#         print(f"Result: {result}")

# if __name__ == "__main__":
#     main()

## SQLite Text-to-Query Application (updated by ChatGPT)

In [33]:
from langchain_community.utilities import SQLDatabase
from langchain_community.agent_toolkits import SQLDatabaseToolkit
from langchain_openai import ChatOpenAI
from langchain.agents import create_sql_agent
from langchain.prompts import PromptTemplate

# 1. Database Connection
def setup_database(db_path):
    """
    Create a SQLDatabase connection
    
    Args:
        db_path (str): Path to the SQLite database file
    
    Returns:
        SQLDatabase: Configured database connection
    """
    db = SQLDatabase.from_uri(f"sqlite:///{db_path}")
    return db

# 2. Create SQL Agent with Forced LIKE '%...%' in Species Queries
# def create_sql_query_agent(db, model_name="gpt-3.5-turbo"):
def create_sql_query_agent(db, model_name="gpt-4o-mini"):
    """
    Create an SQL query agent that forces LIKE '%...%' for species queries
    
    Args:
        db (SQLDatabase): Database connection
        model_name (str): OpenAI model to use
    
    Returns:
        Agent executor for SQL queries
    """
    # Initialize the language model
    llm = ChatOpenAI(model=model_name, temperature=0)

    # Custom instruction to enforce LIKE '%...%' for species
    custom_prompt = PromptTemplate(
        input_variables=["input"],
        template=(
            "You are an SQL agent that generates SQLite queries. "
            "For any question related to a species name, always use the SQL condition: "
            "`WHERE species LIKE '%<species_name>%'`. "
            "For example, if the question is about 'Ficus nitida', generate:\n"
            "`SELECT COUNT(*) FROM trees WHERE species LIKE '%Ficus nitida%' AND city = 'San Francisco';`\n"
            "Now, generate the SQL for: {input}"
        )
    )

    # Create the SQL database toolkit
    toolkit = SQLDatabaseToolkit(db=db, llm=llm)

    # Create the SQL agent with custom prompt
    agent_executor = create_sql_agent(
        llm=llm, 
        toolkit=toolkit, 
        verbose=True,
        agent_prompt=custom_prompt  # Apply custom prompt
    )
    
    return agent_executor

# 3. Query the Database
def query_database(agent, question):
    """
    Execute a natural language query on the database
    
    Args:
        agent: SQL query agent
        question (str): Natural language question
    
    Returns:
        Query result
    """
    try:
        response = agent.invoke({"input": question})
        return response['output']
    except Exception as e:
        return f"An error occurred: {str(e)}"

# Example Usage
# Path to your SQLite database
DB_PATH = "../../data/street_tree_db.sqlite"

# Setup database connection
db = setup_database(DB_PATH)

# Create SQL agent
sql_agent = create_sql_query_agent(db)

# Example queries
queries = [
    "How many trees of the species Arbutus are there in San Francisco?"
]

# Run example queries
for query in queries:
    print(f"\nQuery: {query}")
    result = query_database(sql_agent, query)
    print(f"Result: {result}")


Query: How many trees of the species Arbutus are there in San Francisco?


[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables  
[32;1m[1;3mI should check the schema of the `street_trees` table to find relevant columns for querying the number of Arbutus trees in San Francisco.  
Action: sql_db_schema  
Action Input: "street_trees"  [0m[33;1m[1;3m
CREATE TABLE street_trees (
	"TreeID" INTEGER, 
	"qLegalStatus" TEXT, 
	"qSpecies" TEXT, 
	"qAddress" TEXT, 
	"SiteOrder" REAL, 
	"qSiteInfo" TEXT, 
	"PlantType" TEXT, 
	"qCaretaker" TEXT, 
	"qCareAssistant" TEXT, 
	"PlantDate" TEXT, 
	"DBH" REAL, 
	"PlotSize" TEXT, 
	"PermitNotes" TEXT, 
	"XCoord" REAL, 
	"YCoord" REAL, 
	"Latitude" REAL, 
	"Longitude" REAL, 
	"Location" TEXT, 
	"Fire Prevention Districts" REAL, 
	"Police Districts" REAL, 
	"Supervisor Districts" REAL, 
	"Zip Codes" REAL, 
	"Neighborhoods (old)" REAL, 
	"Analysis Neighborhoods" REAL
)

/*
3 rows from street_trees table:
TreeID	qLeg

In [None]:
# # Example Usage
# def main():
#     # Path to your SQLite database
#     DB_PATH = "../../data/street_tree_db.sqlite"
    
#     # Setup database connection
#     db = setup_database(DB_PATH)
    
#     # Create SQL agent
#     sql_agent = create_sql_query_agent(db)
    
#     # Example queries
#     queries = [
#         "How many trees of the species Arbutus are there in San Francisco?"
#     ]
    
#     # Run example queries
#     for query in queries:
#         print(f"\nQuery: {query}")
#         result = query_database(sql_agent, query)
#         print(f"Result: {result}")

# if __name__ == "__main__":
#     main()