### Testing SQL Querying Using Langchain

In [3]:
import os
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser, JsonKeyOutputFunctionsParser
from langchain.chat_models import ChatOllama
from langchain.schema.runnable import RunnablePassthrough
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.utilities import SQLDatabase
from langchain.chains import create_sql_query_chain
from langchain_experimental.sql import SQLDatabaseChain
from langchain.prompts import PromptTemplate
import json
from IPython.display import display, Markdown, Latex

In [4]:
os.environ["OPENAI_API_KEY"] = ""

#### Connect to DB using Langchain and test dummy questions

Notice that **sql_chain** will take in a **question** and **generate a sql query**

In [51]:
openai_llm = ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=0)

db = SQLDatabase.from_uri("sqlite:///DBs/Chinook.db")

sql_chain = create_sql_query_chain(llm=openai_llm, db=db)

In [72]:
print(sql_chain.__dict__["middle"][0].template)

You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use date('now') function to get the current date, if the question involves "today".

Use the following format:

Question: Question here
SQLQuery: SQL Query to run
SQLResult: 

In [12]:
sql_query = sql_chain.invoke(
    {
        "question": "How many employees are in the database?"
    }
)

In [13]:
sql_query

'SELECT COUNT(*) FROM Employee'

We can run the resultant query directly (wink wink piping opportunity) in DB

In [16]:
db.run(sql_query)

'[(8,)]'

**Be careful with your expectations of the questions asked** as seen below, this can have limitations of what it can do

In [31]:
sql_query = sql_chain.invoke(
    {
        "question": "what is my data about?"
    }
)

The question was very broad, and GPT 3.5 tried "its best"

In [32]:
sql_query


"SELECT COUNT(*) AS TotalTables FROM sqlite_master WHERE type='table';"

In [33]:
db.run(sql_query)

'[(11,)]'

In [34]:
sql_query = sql_chain.invoke(
    {
        "question": "How many customers are there in the database?"
    }
)

In [36]:
db.run(sql_query)

'[(59,)]'

In [37]:
sql_query = sql_chain.invoke(
    {
        "question": "How many employees born after 1970 are there?"
    }
)

In [38]:
sql_query

"SELECT COUNT(*) FROM Employee WHERE BirthDate > '1970-01-01'"

In [39]:
db.run(sql_query)

'[(3,)]'

In [41]:
sql_query = sql_chain.invoke(
    {
        "question": "List the names of the employees born after 1973"
    }
)

sql_query

'SELECT "FirstName", "LastName" \nFROM "Employee" \nWHERE "BirthDate" > \'1973-01-01\''

In [42]:
db.run(sql_query)

"[('Jane', 'Peacock'), ('Michael', 'Mitchell')]"

#### Test with Local LLM, ie. Llama using Custom Prompt Template

In [5]:
template = """Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer.
Use the following format:

Question: "Question here"
SQLQuery: "SQL Query to run"
SQLResult: "Result of the SQLQuery"
Answer: "Final answer here"

Only use the following tables:

{table_info}.

Some examples of SQL queries that correspond to questions are:

{few_shot_examples}

Question: {input}"""

custom_prompt = ChatPromptTemplate.from_template(template)

In [6]:
ollama_llm = "llama2"

llama_llm = ChatOllama(model=ollama_llm, temperature=0)

db = SQLDatabase.from_uri("sqlite:///DBs/Chinook.db")

sql_chain = SQLDatabaseChain.from_llm(llm=llama_llm, db=db)

In [7]:
sql_chain

SQLDatabaseChain(llm_chain=LLMChain(prompt=PromptTemplate(input_variables=['input', 'table_info', 'top_k'], template='You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question.\nUnless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.\nNever query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.\nPay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.\nPay attention to use date(\'now\') function to get the current date, 

In [8]:
sql_query = sql_chain.invoke(
    {
        "query": "List the names of the employees born after 1973"
    }
)

sql_query

OperationalError: (sqlite3.OperationalError) no such column: Employee.Name
[SQL: SELECT Employee.Name
FROM Employee
WHERE Employee.BirthDate > '1973-01-01';]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

In [98]:
print(sql_query)

SQLQuery:
SELECT Employee.Name
FROM Employee
WHERE Employee.BirthDate > '1973-01-01';

Expected result:
[Andrew Adams, Nancy Edwards, Jane Peacock]

Answer:
The names of the employees born after 1973 are Andrew Adams, Nancy Edwards, and Jane Peacock.
