In [9]:
import os
from dotenv import load_dotenv

load_dotenv(override=True)

True

In [16]:
from langchain_community.utilities import SQLDatabase

database_url = os.getenv('DB_URL')

db = SQLDatabase.from_uri(database_url)
print(db.dialect)
print(db.get_usable_table_names())
print(db.get_table_info())
db.run("SELECT * FROM iron_ore_blocks LIMIT 10;")

postgresql
['iron_ore_blocks']

CREATE TABLE iron_ore_blocks (
	"sl._no" BIGINT, 
	state TEXT, 
	block_name TEXT, 
	greenfield_or_brownfield TEXT, 
	status_of_blocks TEXT, 
	type_of_block TEXT, 
	mineral TEXT, 
	name_of_bidder TEXT, 
	lease_area DOUBLE PRECISION, 
	reserve_mt TEXT, 
	fpo DOUBLE PRECISION, 
	date_of_auction DATE, 
	date_of_loi_issuance DATE, 
	fy_expiry TEXT
)

/*
3 rows from iron_ore_blocks table:
sl._no	state	block_name	greenfield_or_brownfield	status_of_blocks	type_of_block	mineral	name_of_bidder	lease_area	reserve_mt	fpo	date_of_auction	date_of_loi_issuance	fy_expiry
1	Andhra Pradesh	Addankivaripalem	Greenfield	Deed Executed and Exploration in Progress	CL	Iron Ore	JSW Steel Ltd.	None	None	14.4	2023-07-28	2023-05-09	2024-25
2	Maharashtra	Ajgaon Block	Greenfield	Deed Executed and Exploration in Progress	CL	Iron Ore	JSW Steel Ltd.	8.4	None	25.0	2022-03-31	None	2023-24
3	Orissa	Badampahar Iron Ore Block	Brownfield	Operationalised	ML	Iron Ore	GM Iron & Steel Company Ltd	

"[(1, 'Andhra Pradesh', 'Addankivaripalem', 'Greenfield', 'Deed Executed and Exploration in Progress', 'CL', 'Iron Ore', 'JSW Steel Ltd.', None, None, 14.4, datetime.date(2023, 7, 28), datetime.date(2023, 5, 9), '2024-25'), (2, 'Maharashtra', 'Ajgaon Block', 'Greenfield', 'Deed Executed and Exploration in Progress', 'CL', 'Iron Ore', 'JSW Steel Ltd.', 8.4, None, 25.0, datetime.date(2022, 3, 31), None, '2023-24'), (3, 'Orissa', 'Badampahar Iron Ore Block', 'Brownfield', 'Operationalised', 'ML', 'Iron Ore', 'GM Iron & Steel Company Ltd', 129.61, '4.484', 95.15, datetime.date(2020, 7, 1), None, '2025-26'), (4, 'Rajasthan', 'Bagawas', 'Brownfield', 'In process of revocation', 'ML', 'Iron Ore', 'Subhash Lohiya, Propo. M/s Bharat Coal Traders, Punjab', 5.9266, '0.58', 452.0, datetime.date(2022, 7, 29), datetime.date(2023, 4, 4), '2028-29'), (5, 'Chhattisgarh', 'Bailadila Deposit 01A Iron Ore Block', 'Greenfield', 'Pending with bidder - SOP/NOC', 'CL', 'Iron Ore', 'M/s ArcelorMittal\\n Nippon

In [13]:
from typing_extensions import TypedDict


class State(TypedDict):
    question: str
    query: str
    result: str
    answer: str

In [14]:
import getpass
import os
from langchain.chat_models import init_chat_model

os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

llm = init_chat_model("gpt-4o-mini", model_provider="openai")

In [15]:
from langchain_core.prompts import ChatPromptTemplate

system_message = """
Given an input question, create a syntactically correct {dialect} query to
run to help find the answer. Unless the user specifies in his question a
specific number of examples they wish to obtain, always limit your query to
at most {top_k} results. You can order the results by a relevant column to
return the most interesting examples in the database.

Never query for all the columns from a specific table, only ask for a the
few relevant columns given the question.

Pay attention to use only the column names that you can see in the schema
description. Be careful to not query for columns that do not exist. Also,
pay attention to which column is in which table.

Only use the following tables:
{table_info}
"""

user_prompt = "Question: {input}"

query_prompt_template = ChatPromptTemplate(
    [("system", system_message), ("user", user_prompt)]
)

for message in query_prompt_template.messages:
    message.pretty_print()



Given an input question, create a syntactically correct [33;1m[1;3m{dialect}[0m query to
run to help find the answer. Unless the user specifies in his question a
specific number of examples they wish to obtain, always limit your query to
at most [33;1m[1;3m{top_k}[0m results. You can order the results by a relevant column to
return the most interesting examples in the database.

Never query for all the columns from a specific table, only ask for a the
few relevant columns given the question.

Pay attention to use only the column names that you can see in the schema
description. Be careful to not query for columns that do not exist. Also,
pay attention to which column is in which table.

Only use the following tables:
[33;1m[1;3m{table_info}[0m


Question: [33;1m[1;3m{input}[0m


In [None]:
from typing_extensions import Annotated


class QueryOutput(TypedDict):
    """Generated SQL query."""

    query: Annotated[str, ..., "Syntactically valid SQL query."]


def write_query(state: State):
    """Generate SQL query to fetch information."""
    prompt = query_prompt_template.invoke(
        {
            "dialect": db.dialect,
            "top_k": 10,
            "table_info": db.get_table_info(),
            "input": state["question"],
        }
    )
    structured_llm = llm.with_structured_output(QueryOutput)
    result = structured_llm.invoke(prompt)
    return {"query": result["query"]}