In [1]:
import os
from dotenv import load_dotenv
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.sql_database import SQLDatabase
from langchain.agents import AgentExecutor
from geoalchemy2 import Geometry
from sqlalchemy import create_engine
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
from langchain import SQLDatabaseChain


In [2]:
load_dotenv()

True

In [3]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
DATABASE_NAME = os.getenv("DATABASE_NAME")
DATABASE_USER = os.getenv("DATABASE_USER")
DATABASE_PASSWORD = os.getenv("DATABASE_PASSWORD")
DATABASE_PORT = os.getenv("DATABASE_PORT")
DATABASE_HOST = os.getenv("DATABASE_HOST")

In [4]:
llm=ChatOpenAI(model_name="gpt-4")

In [5]:
connection_string = f"postgresql://{DATABASE_USER}:{DATABASE_PASSWORD}@{DATABASE_HOST}:{DATABASE_PORT}/{DATABASE_NAME}"
engine = create_engine(connection_string)

db = SQLDatabase(engine=engine)
db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True)

In [6]:
connection_string = f"postgresql://{DATABASE_USER}:{DATABASE_PASSWORD}@{DATABASE_HOST}:{DATABASE_PORT}/{DATABASE_NAME}"
engine = create_engine(connection_string)

db = SQLDatabase(engine=engine)
toolkit = SQLDatabaseToolkit(db=db)

agent_executor = create_sql_agent(
    llm=llm,
    toolkit=toolkit,
    verbose=True
)

In [12]:
try:
    db_chain.run("how many tables are there?")
except Exception as e:
    print("\n\n", e)



[1m> Entering new SQLDatabaseChain chain...[0m
how many tables are there? 
SQLQuery:[32;1m[1;3m"SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public';"[0m

 (psycopg2.errors.SyntaxError) syntax error at or near ""SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public';""
LINE 1: "SELECT COUNT(*) FROM information_schema.tables WHERE table_...
        ^

[SQL: "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public';"]
(Background on this error at: https://sqlalche.me/e/14/f405)


In [13]:
agent_executor.run("How many tables are there in the database?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: list_tables_sql_db
Action Input: [0m
Observation: [38;5;200m[1;3mtreestandsummary, treestratum, stand_4326, assortment, stand, operation, datasource, specialfeature, treestand, specification, restriction[0m
Thought:[32;1m[1;3mI can see there are 11 tables in the database.
Final Answer: There are 11 tables in the database.[0m

[1m> Finished chain.[0m


'There are 11 tables in the database.'

In [14]:
agent_executor.run("What's the total sum of areas in the stand_4326 table?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: list_tables_sql_db
Action Input: [0m
Observation: [38;5;200m[1;3mtreestandsummary, treestratum, stand_4326, assortment, stand, operation, datasource, specialfeature, treestand, specification, restriction[0m
Thought:[32;1m[1;3mI see the stand_4326 table in the database. Now, I should look at its schema to identify the area column.
Action: schema_sql_db
Action Input: stand_4326[0m
Observation: [33;1m[1;3m
CREATE TABLE stand_4326 (
	id INTEGER, 
	standid BIGINT, 
	parcelid BIGINT, 
	realestateid BIGINT, 
	standnumber BIGINT, 
	standnumberextension VARCHAR(2), 
	maingroup BIGINT, 
	subgroup BIGINT, 
	fertilityclass BIGINT, 
	soiltype BIGINT, 
	drainagestate BIGINT, 
	ditchingyear BIGINT, 
	thinningyear BIGINT, 
	developmentclass VARCHAR(2), 
	standquality BIGINT, 
	maintreespecies BIGINT, 
	accessibility BIGINT, 
	standinfo VARCHAR, 
	datasource BIGINT, 
	growthplacedatasource BIGINT, 
	area DOUBLE PRECISION, 
	a

'The total sum of areas in the stand_4326 table is 600,761.402 square units.'

In [19]:
res = agent_executor.run("Get the forest polygon with largest area as geojson from table stand_4326?")
res



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: list_tables_sql_db
Action Input: [0m
Observation: [38;5;200m[1;3mtreestandsummary, treestratum, stand_4326, assortment, stand, operation, datasource, specialfeature, treestand, specification, restriction[0m
Thought:[32;1m[1;3mNow that I have the list of tables, I can query the stand_4326 table for the forest polygon with the largest area.
Action: schema_sql_db
Action Input: stand_4326[0m
Observation: [33;1m[1;3m
CREATE TABLE stand_4326 (
	id INTEGER, 
	standid BIGINT, 
	parcelid BIGINT, 
	realestateid BIGINT, 
	standnumber BIGINT, 
	standnumberextension VARCHAR(2), 
	maingroup BIGINT, 
	subgroup BIGINT, 
	fertilityclass BIGINT, 
	soiltype BIGINT, 
	drainagestate BIGINT, 
	ditchingyear BIGINT, 
	thinningyear BIGINT, 
	developmentclass VARCHAR(2), 
	standquality BIGINT, 
	maintreespecies BIGINT, 
	accessibility BIGINT, 
	standinfo VARCHAR, 
	datasource BIGINT, 
	growthplacedatasource BIGINT, 
	area DOUBLE PRECI

'{"type":"MultiPolygon","coordinates":[[[[23.269915592,60.639333217],[23.268083013,60.639452568],[23.268085115,60.639392395],[23.268122166,60.63933026 ... (31171 characters truncated) ... 5759],[23.263801546,60.636306311],[23.263913367,60.636327409],[23.263964315,60.636381328],[23.264034663,60.636412002],[23.264135767,60.636431456]]]]}'