In [1]:
from langchain_ollama import ChatOllama
from langchain_community.utilities import SQLDatabase
from langchain_community.agent_toolkits import SQLDatabaseToolkit
from langchain.agents import create_agent

In [2]:
import pandas as pd
from pprint import pprint
import os
from dotenv import load_dotenv
load_dotenv()  # take environment variables from .env.

True

In [3]:
# initiate ollama llm.
llm_model = os.getenv("OLLAMA_LLM_MODEL")
llm = ChatOllama(model=llm_model)

In [4]:
# create postgres_url and db instance.
username = os.getenv("username")
password = os.getenv("password")
host = os.getenv("host")
port = os.getenv("port")
db_name = os.getenv("db_name")
postgres_url = f"postgresql://{username}:{password}@{host}:{port}/{db_name}"
db = SQLDatabase.from_uri(postgres_url)

In [5]:
toolkit = SQLDatabaseToolkit(db=db, llm=llm)

In [6]:
tools = toolkit.get_tools()
for tool in tools:
    print(f"{tool.name}: {tool.description}\n")

sql_db_query: Input to this tool is a detailed and correct SQL query, output is a result from the database. If the query is not correct, an error message will be returned. If an error is returned, rewrite the query, check the query, and try again. If you encounter an issue with Unknown column 'xxxx' in 'field list', use sql_db_schema to query the correct table fields.

sql_db_schema: Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. Be sure that the tables actually exist by calling sql_db_list_tables first! Example Input: table1, table2, table3

sql_db_list_tables: Input is an empty string, output is a comma-separated list of tables in the database.

sql_db_query_checker: Use this tool to double check if your query is correct before executing it. Always use this tool before executing a query with sql_db_query!



In [7]:
table_info = db.get_table_info()
print("Table Info:")
pprint(table_info)

Table Info:
('\n'
 'CREATE TABLE jobad (\n'
 '\tid VARCHAR NOT NULL, \n'
 '\turl VARCHAR NOT NULL, \n'
 '\tcontent VARCHAR, \n'
 '\tkeyword VARCHAR NOT NULL, \n'
 '\tjob_title VARCHAR, \n'
 '\tcompany VARCHAR, \n'
 '\tresponsibilities VARCHAR[], \n'
 '\tqualifications VARCHAR[], \n'
 '\texperiences VARCHAR[], \n'
 '\tskills VARCHAR[], \n'
 '\tsalary VARCHAR, \n'
 '\tworking_location VARCHAR, \n'
 '\tCONSTRAINT jobad_pkey PRIMARY KEY (id)\n'
 ')\n'
 '\n'
 '/*\n'
 '3 rows from jobad table:\n'
 'id\turl\tcontent\tkeyword\tjob_title\tcompany\tresponsibilities\t'
 'qualifications\texperiences\tskills\tsalary\tworking_location\n'
 '84408520\thttps://hk.jobsdb.com/job/84408520?type=standard\t# Finance '
 'Systems Project Professional\n'
 '**Responsibilities:**\n'
 '- Collect user requirements from the di\tLLM\tFinance Systems Project '
 "Professional\tNone\t['Collect user requirements from the different teams in "
 "Finance Division regarding functions such as\t['Degree holder in Accounting, 

In [8]:
tools = toolkit.get_tools()
for tool in tools:
    print(f"{tool.name}: {tool.description}\n")

sql_db_query: Input to this tool is a detailed and correct SQL query, output is a result from the database. If the query is not correct, an error message will be returned. If an error is returned, rewrite the query, check the query, and try again. If you encounter an issue with Unknown column 'xxxx' in 'field list', use sql_db_schema to query the correct table fields.

sql_db_schema: Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. Be sure that the tables actually exist by calling sql_db_list_tables first! Example Input: table1, table2, table3

sql_db_list_tables: Input is an empty string, output is a comma-separated list of tables in the database.

sql_db_query_checker: Use this tool to double check if your query is correct before executing it. Always use this tool before executing a query with sql_db_query!



In [9]:
statements = """SELECT job_title, COUNT(id) as count FROM jobAd GROUP BY job_title ORDER BY count DESC;"""
results = db.run(statements)

In [10]:
db.get_usable_table_names()

['jobad']

In [11]:
system_prompt = """
You are an agent designed to interact with a SQL database.
Given an input question, create a syntactically correct {dialect} query to run,
then look at the results of the query and return the answer. Unless the user
specifies a specific number of examples they wish to obtain, always limit your
query to at most {top_k} results.

You can order the results by a relevant column to return the most interesting
examples in the database. Never query for all the columns from a specific table,
only ask for the relevant columns given the question.

You MUST double check your query before executing it. If you get an error while
executing a query, rewrite the query and try again.

DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the
database.

To start you should ALWAYS look at the tables in the database to see what you
can query. Do NOT skip this step.

Then you should query the schema of the most relevant tables.
""".format(
    dialect=db.dialect,
    top_k=5,
)

In [13]:
agent = create_agent(
    llm,
    tools,
    system_prompt=system_prompt,
)

In [None]:
question = "show me all records that job title is null in JobAd"
def query_database(question: str):
    for step in agent.stream(
        {"messages": [{"role": "user", "content": question}]},
        stream_mode="values",
    ):
        step["messages"][-1].pretty_print()
    
    return 


show me all records that job title is null in JobAd
Tool Calls:
  sql_db_schema (dd99898b-8960-4955-8884-27da368692bb)
 Call ID: dd99898b-8960-4955-8884-27da368692bb
  Args:
    table_names: JobAd
Name: sql_db_schema

Error: table_names {'JobAd'} not found in database

The 'table_names' parameter seems to be missing. Let me try again.

First, I will query the schema of the JobAd table:
```
SELECT * FROM information_schema.columns WHERE table_name = 'JobAd';
```

Next, I will use this information to construct a SQL query that shows all records with null job title in JobAd:
```sql
SELECT * 
FROM JobAd 
WHERE job_title IS NULL;
```

I'll now execute the query and see the results:
