In [106]:
import os
from dotenv import load_dotenv

load_dotenv()

True

In [107]:
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [108]:
from langchain_community.utilities import SQLDatabase

db_path = "./data/chinook.db"

db = SQLDatabase.from_uri(f"sqlite:///{db_path}")
print(db.dialect)
print(db.get_usable_table_names())

sqlite
['albums', 'artists', 'customers', 'employees', 'genres', 'invoice_items', 'invoices', 'media_types', 'playlist_track', 'playlists', 'tracks']


In [109]:
db.run("SELECT * FROM employees LIMIT 10;")

"[(1, 'Adams', 'Andrew', 'General Manager', None, '1962-02-18 00:00:00', '2002-08-14 00:00:00', '11120 Jasper Ave NW', 'Edmonton', 'AB', 'Canada', 'T5K 2N1', '+1 (780) 428-9482', '+1 (780) 428-3457', 'andrew@chinookcorp.com'), (2, 'Edwards', 'Nancy', 'Sales Manager', 1, '1958-12-08 00:00:00', '2002-05-01 00:00:00', '825 8 Ave SW', 'Calgary', 'AB', 'Canada', 'T2P 2T3', '+1 (403) 262-3443', '+1 (403) 262-3322', 'nancy@chinookcorp.com'), (3, 'Peacock', 'Jane', 'Sales Support Agent', 2, '1973-08-29 00:00:00', '2002-04-01 00:00:00', '1111 6 Ave SW', 'Calgary', 'AB', 'Canada', 'T2P 5M5', '+1 (403) 262-3443', '+1 (403) 262-6712', 'jane@chinookcorp.com'), (4, 'Park', 'Margaret', 'Sales Support Agent', 2, '1947-09-19 00:00:00', '2003-05-03 00:00:00', '683 10 Street SW', 'Calgary', 'AB', 'Canada', 'T2P 5G3', '+1 (403) 263-4423', '+1 (403) 263-4289', 'margaret@chinookcorp.com'), (5, 'Johnson', 'Steve', 'Sales Support Agent', 2, '1965-03-03 00:00:00', '2003-10-17 00:00:00', '7727B 41 Ave', 'Calgar

In [110]:
db.run("""SELECT t.name AS "Track Name", a.title AS "Album Title", g.name AS "Genre Name"
FROM tracks t
JOIN albums a ON t.albumid = a.albumid
JOIN genres g ON t.genreid = g.genreid
WHERE g.name = 'Rock'
ORDER BY t.name LIMIT 10;""")

'[(\'"40"\', \'War\', \'Rock\'), (\'(Da Le) Yaleo\', \'Supernatural\', \'Rock\'), (\'(Oh) Pretty Woman\', \'Diver Down\', \'Rock\'), (\'(Wish I Could) Hideaway\', \'Chronicle, Vol. 2\', \'Rock\'), (\'1/2 Full\', \'Riot Act\', \'Rock\'), (\'19th Nervous Breakdown\', \'Hot Rocks, 1964-1971 (Disc 1)\', \'Rock\'), (\'2 A.M.\', \'The X Factor\', \'Rock\'), (\'2 Minutes To Midnight\', \'Live At Donington 1992 (Disc 2)\', \'Rock\'), (\'2,000 Man\', \'Unplugged [Live]\', \'Rock\'), (\'200 Years Old\', \'Bongo Fury\', \'Rock\')]'

In [111]:
from langchain_groq import ChatGroq

model = ChatGroq(model="llama3-8b-8192")

In [112]:
model.invoke("Hello, how are you?")

AIMessage(content="I'm just an AI, so I don't have emotions or feelings like humans do. I'm functioning properly and ready to help with any questions or tasks you may have, though! How can I assist you today?", response_metadata={'token_usage': {'completion_tokens': 45, 'prompt_tokens': 16, 'total_tokens': 61, 'completion_time': 0.036291312, 'prompt_time': 0.003941949, 'queue_time': None, 'total_time': 0.040233261}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_873a560973', 'finish_reason': 'stop', 'logprobs': None}, id='run-d41e2437-19a8-4b34-b004-70c054ff1711-0')

In [113]:
from langchain.chains import create_sql_query_chain

In [114]:
chain = create_sql_query_chain(model, db)
response = chain.invoke({"question": "How many employees are there?"})
print(response)

Question: How many employees are there?
SQLQuery: SELECT COUNT(*) FROM employees;


In [115]:
import re

def get_query(text):
    # Pattern to match "SQLQuery:" followed by any characters including newlines, until a semicolon is found.
    text_format = text.strip('`').strip() # for handling ``````
    pattern = r"SQLQuery:\s*(.*?);"
    match = re.search(pattern, text_format, re.DOTALL)
    
    if match:
        return match.group(1).strip() + ";"
    else:
        return "No SQL query found."

In [116]:
get_query('''```Question: How many employees are there? 
          SQLQuery: SELECT COUNT(*) FROM employees;```''')

'SELECT COUNT(*) FROM employees;'

In [117]:
chain.get_graph

<bound method RunnableSequence.get_graph of RunnableAssign(mapper={
  input: RunnableLambda(...),
  table_info: RunnableLambda(...)
})
| RunnableLambda(lambda x: {k: v for k, v in x.items() if k not in ('question', 'table_names_to_use')})
| PromptTemplate(input_variables=['input', 'table_info'], partial_variables={'top_k': '5'}, template='You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question.\nUnless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.\nNever query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.\nPay attention to use only the col

In [118]:
chain.get_prompts()[0].pretty_print()

You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most 5 results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use date('now') function to get the current date, if the question involves "today".

Use the following format:

Question: Question here
SQLQuery: SQL Query to run
SQLResult: Result

In [119]:
from langchain_core.runnables import RunnableLambda

query_extractor = RunnableLambda(lambda x: get_query(x))

In [120]:
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool

write_query = create_sql_query_chain(model, db)
execute_query = QuerySQLDataBaseTool(db=db)

chain = write_query | query_extractor | execute_query

In [121]:
chain

RunnableAssign(mapper={
  input: RunnableLambda(...),
  table_info: RunnableLambda(...)
})
| RunnableLambda(lambda x: {k: v for k, v in x.items() if k not in ('question', 'table_names_to_use')})
| PromptTemplate(input_variables=['input', 'table_info'], partial_variables={'top_k': '5'}, template='You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question.\nUnless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.\nNever query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.\nPay attention to use only the column names you can see in the tables below. B

In [122]:
chain.get_prompts()[0].pretty_print()

You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most 5 results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use date('now') function to get the current date, if the question involves "today".

Use the following format:

Question: Question here
SQLQuery: SQL Query to run
SQLResult: Result

In [123]:
chain.invoke({"question": "How many employees are there?"})

'[(8,)]'

In [124]:
from operator import itemgetter
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

answer_prompt = PromptTemplate.from_template(
    """Given the following user question, corresponding SQL query and SQL result, answer the user question in a short and concise manner.
    Question: {question}
    SQL Query: {query}
    SQL Result: {result}
    Answer:
    """
)

answer = answer_prompt | model | StrOutputParser()
answer

PromptTemplate(input_variables=['query', 'question', 'result'], template='Given the following user question, corresponding SQL query and SQL result, answer the user question in a short and concise manner.\n    Question: {question}\n    SQL Query: {query}\n    SQL Result: {result}\n    Answer:\n    ')
| ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000001E126F403B0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001E126E976E0>, model_name='llama3-8b-8192', groq_api_key=SecretStr('**********'))
| StrOutputParser()

In [125]:
query_getter = write_query | query_extractor

In [126]:
response = write_query.invoke({"question": "How many employees are there?"})
get_query(response)

'SELECT COUNT(*) FROM employees;'

In [127]:
query_getter.invoke({"question": "How many employees are there?"})

'SELECT COUNT(*) FROM employees;'

In [128]:
chain = (
    RunnablePassthrough.assign(query=query_getter).assign(result=itemgetter("query") | execute_query) 
    | answer
)

In [129]:
chain

RunnableAssign(mapper={
  query: RunnableAssign(mapper={
           input: RunnableLambda(...),
           table_info: RunnableLambda(...)
         })
         | RunnableLambda(lambda x: {k: v for k, v in x.items() if k not in ('question', 'table_names_to_use')})
         | PromptTemplate(input_variables=['input', 'table_info'], partial_variables={'top_k': '5'}, template='You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question.\nUnless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.\nNever query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.\

In [146]:
print(write_query.invoke(
    {"question": "list the top 10 rock genres tracks"}
))

Question: list the top 10 rock genres tracks
SQLQuery: SELECT "tracks".*, "genres".Name AS "GenreName" FROM "tracks" INNER JOIN "genres" ON "tracks".GenreId = "genres".GenreId WHERE "genres".Name = 'Rock' ORDER BY "tracks".UnitPrice DESC LIMIT 10;


In [144]:
chain.invoke({"question": "How many employees are there?"})

'There are 8 employees.'

In [132]:
chain.invoke({"question": "list the name of top 10 employees"})

'You can\'t use backticks (``) to quote column names in SQLite. Instead, you should use single quotes (\') or double quotes ("). Here is the corrected SQL query:\n\n```sql\nSELECT FirstName, LastName FROM employees ORDER BY LastName LIMIT 10;\n```'

In [148]:
response = chain.invoke({"question": "list the top 10 rock genres tracks"})
print(response)

Here is the answer to the user question:

The top 10 rock genres tracks are: "For Those About To Rock (We Salute You)", "Balls to the Wall", "Fast As a Shark", "Restless and Wild", "Princess of the Dawn", "Put The Finger On You", "Let's Get It Up", "Inject The Venom", "Snowballed", and "Evil Walks".


In [134]:
from langchain_community.agent_toolkits import create_sql_agent

agent_executor = create_sql_agent(model, db=db, verbose=True)