In [127]:
import langchain
from dotenv import load_dotenv,find_dotenv
from langchain_community.utilities import SQLDatabase
from langchain_core.runnables import RunnablePassthrough,RunnableLambda,RunnableParallel
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import (PromptTemplate, ChatPromptTemplate,FewShotPromptTemplate,
                            MessagesPlaceholder,SystemMessagePromptTemplate,HumanMessagePromptTemplate)
from langchain_community.vectorstores import FAISS, Chroma
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain_google_genai import GoogleGenerativeAI,ChatGoogleGenerativeAI
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
from langchain_community.agent_toolkits import create_sql_agent
from langchain_openai import ChatOpenAI
from langchain.chains.sql_database.prompt import SQL_PROMPTS
from langchain.chains import create_sql_query_chain

In [128]:
load_dotenv(find_dotenv("D:\LLM Courses\Master Langchain Udemy\.env"))

True

In [129]:
llm=ChatGoogleGenerativeAI(model="gemini-1.5-flash-001",temperature=0.3)
llm=ChatOpenAI(model="gpt-3.5-turbo")
db=SQLDatabase.from_uri(database_uri="sqlite:///db/chinook.db/chinook.db")

In [130]:
chain=create_sql_query_chain(llm=llm,db=db)

In [6]:
chain.get_prompts()

[PromptTemplate(input_variables=['input', 'table_info'], partial_variables={'top_k': '5'}, template='You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question.\nUnless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.\nNever query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.\nPay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.\nPay attention to use date(\'now\') function to get the current date, if the question i

In [9]:
chain.get_prompts()[0].pretty_print()

You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most 5 results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use date('now') function to get the current date, if the question involves "today".

Use the following format:

Question: Question here
SQLQuery: SQL Query to run
SQLResult: Result

<h3>Getting Relevent Context for DBs</h3>

In [11]:
context=db.get_context()

In [18]:
print(context.keys())

dict_keys(['table_info', 'table_names'])


In [24]:
print(context['table_info'])


CREATE TABLE "Album" (
	"AlbumId" INTEGER NOT NULL, 
	"Title" NVARCHAR(160) NOT NULL, 
	"ArtistId" INTEGER NOT NULL, 
	PRIMARY KEY ("AlbumId"), 
	FOREIGN KEY("ArtistId") REFERENCES "Artist" ("ArtistId")
)

/*
3 rows from Album table:
AlbumId	Title	ArtistId
1	For Those About To Rock We Salute You	1
2	Balls to the Wall	2
3	Restless and Wild	2
*/


CREATE TABLE "Artist" (
	"ArtistId" INTEGER NOT NULL, 
	"Name" NVARCHAR(120), 
	PRIMARY KEY ("ArtistId")
)

/*
3 rows from Artist table:
ArtistId	Name
1	AC/DC
2	Accept
3	Aerosmith
*/


CREATE TABLE "Customer" (
	"CustomerId" INTEGER NOT NULL, 
	"FirstName" NVARCHAR(40) NOT NULL, 
	"LastName" NVARCHAR(20) NOT NULL, 
	"Company" NVARCHAR(80), 
	"Address" NVARCHAR(70), 
	"City" NVARCHAR(40), 
	"State" NVARCHAR(40), 
	"Country" NVARCHAR(40), 
	"PostalCode" NVARCHAR(10), 
	"Phone" NVARCHAR(24), 
	"Fax" NVARCHAR(24), 
	"Email" NVARCHAR(60) NOT NULL, 
	"SupportRepId" INTEGER, 
	PRIMARY KEY ("CustomerId"), 
	FOREIGN KEY("SupportRepId") REFERENCES "Empl

In [23]:
print(context['table_names'])

Album, Artist, Customer, Employee, Genre, Invoice, InvoiceLine, MediaType, Playlist, PlaylistTrack, Track


<h3>When there are not many tables, we can just insert the entirety of this information in our prompt</h3>

In [131]:
chain.get_prompts()[0].partial(table_info=context["table_info"]).pretty_print()

You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most 5 results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use date('now') function to get the current date, if the question involves "today".

Use the following format:

Question: Question here
SQLQuery: SQL Query to run
SQLResult: Result

<h3>Few Shot Templates</h3>

In [132]:
examples = [
    {"input": "List all artists.", "query": "SELECT * FROM Artist;"},
    {
        "input": "Find all albums for the artist 'AC/DC'.",
        "query": "SELECT * FROM Album WHERE ArtistId = (SELECT ArtistId FROM Artist WHERE Name = 'AC/DC');",
    },
    {
        "input": "List all tracks in the 'Rock' genre.",
        "query": "SELECT * FROM Track WHERE GenreId = (SELECT GenreId FROM Genre WHERE Name = 'Rock');",
    },
    {
        "input": "Find the total duration of all tracks.",
        "query": "SELECT SUM(Milliseconds) FROM Track;",
    },
    {
        "input": "List all customers from Canada.",
        "query": "SELECT * FROM Customer WHERE Country = 'Canada';",
    },
    {
        "input": "How many tracks are there in the album with ID 5?",
        "query": "SELECT COUNT(*) FROM Track WHERE AlbumId = 5;",
    },
    {
        "input": "Find the total number of invoices.",
        "query": "SELECT COUNT(*) FROM Invoice;",
    },
    {
        "input": "List all tracks that are longer than 5 minutes.",
        "query": "SELECT * FROM Track WHERE Milliseconds > 300000;",
    },
    {
        "input": "Who are the top 5 customers by total purchase?",
        "query": "SELECT CustomerId, SUM(Total) AS TotalPurchase FROM Invoice GROUP BY CustomerId ORDER BY TotalPurchase DESC LIMIT 5;",
    },
    {
        "input": "Which albums are from the year 2000?",
        "query": "SELECT * FROM Album WHERE strftime('%Y', ReleaseDate) = '2000';",
    },
    {
        "input": "How many employees are there",
        "query": 'SELECT COUNT(*) FROM "Employee"',
    },
]

In [133]:
examplePromt=PromptTemplate.from_template(
    template="""
    User Input: {input}\n
    SQL query: {query}
    """
)

prefix="""
    You are a SQLite Expert. Given an input Question, without mentioning Preamble, ex.: sqlite, create a syntactically correct SQLite Query. 
    Unless otherwise specified, do not return more than {top_k} rows.
    Here is the relevant table info: {table_info}
    Below are a number of examples of questions and their corresponding SQL Queries.
"""

suffix="""
    User Input: {input}\n
    SQL query:
"""

In [134]:
prompt=FewShotPromptTemplate(
    examples=examples[:6],
    example_prompt=examplePromt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["input","top_k","table_info"]
)

In [135]:
prompt.pretty_print()


    You are a SQLite Expert. Given an input Question, without mentioning Preamble, ex.: sqlite, create a syntactically correct SQLite Query. 
    Unless otherwise specified, do not return more than [33;1m[1;3m{top_k}[0m rows.
    Here is the relevant table info: [33;1m[1;3m{table_info}[0m
    Below are a number of examples of questions and their corresponding SQL Queries.



    User Input: List all artists.

    SQL query: SELECT * FROM Artist;
    


    User Input: Find all albums for the artist 'AC/DC'.

    SQL query: SELECT * FROM Album WHERE ArtistId = (SELECT ArtistId FROM Artist WHERE Name = 'AC/DC');
    


    User Input: List all tracks in the 'Rock' genre.

    SQL query: SELECT * FROM Track WHERE GenreId = (SELECT GenreId FROM Genre WHERE Name = 'Rock');
    


    User Input: Find the total duration of all tracks.

    SQL query: SELECT SUM(Milliseconds) FROM Track;
    


    User Input: List all customers from Canada.

    SQL query: SELECT * FROM Customer WHERE

In [136]:
context.keys()

dict_keys(['table_info', 'table_names'])

In [137]:
context['table_names']

'Album, Artist, Customer, Employee, Genre, Invoice, InvoiceLine, MediaType, Playlist, PlaylistTrack, Track'

In [138]:
print(prompt.format(input="How many artists are there?",top_k=6,table_info=context['table_names']))


    You are a SQLite Expert. Given an input Question, without mentioning Preamble, ex.: sqlite, create a syntactically correct SQLite Query. 
    Unless otherwise specified, do not return more than 6 rows.
    Here is the relevant table info: Album, Artist, Customer, Employee, Genre, Invoice, InvoiceLine, MediaType, Playlist, PlaylistTrack, Track
    Below are a number of examples of questions and their corresponding SQL Queries.



    User Input: List all artists.

    SQL query: SELECT * FROM Artist;
    


    User Input: Find all albums for the artist 'AC/DC'.

    SQL query: SELECT * FROM Album WHERE ArtistId = (SELECT ArtistId FROM Artist WHERE Name = 'AC/DC');
    


    User Input: List all tracks in the 'Rock' genre.

    SQL query: SELECT * FROM Track WHERE GenreId = (SELECT GenreId FROM Genre WHERE Name = 'Rock');
    


    User Input: Find the total duration of all tracks.

    SQL query: SELECT SUM(Milliseconds) FROM Track;
    


    User Input: List all customers from

In [139]:
chain=create_sql_query_chain(prompt=prompt,db=db,llm=llm)

In [140]:
chain.get_prompts()

[FewShotPromptTemplate(input_variables=['input', 'table_info'], partial_variables={'top_k': '5'}, examples=[{'input': 'List all artists.', 'query': 'SELECT * FROM Artist;'}, {'input': "Find all albums for the artist 'AC/DC'.", 'query': "SELECT * FROM Album WHERE ArtistId = (SELECT ArtistId FROM Artist WHERE Name = 'AC/DC');"}, {'input': "List all tracks in the 'Rock' genre.", 'query': "SELECT * FROM Track WHERE GenreId = (SELECT GenreId FROM Genre WHERE Name = 'Rock');"}, {'input': 'Find the total duration of all tracks.', 'query': 'SELECT SUM(Milliseconds) FROM Track;'}, {'input': 'List all customers from Canada.', 'query': "SELECT * FROM Customer WHERE Country = 'Canada';"}, {'input': 'How many tracks are there in the album with ID 5?', 'query': 'SELECT COUNT(*) FROM Track WHERE AlbumId = 5;'}], example_prompt=PromptTemplate(input_variables=['input', 'query'], template='\n    User Input: {input}\n\n    SQL query: {query}\n    '), suffix='\n    User Input: {input}\n\n    SQL query:\n'

In [141]:
chain.get_prompts()[0].pretty_print()


    You are a SQLite Expert. Given an input Question, without mentioning Preamble, ex.: sqlite, create a syntactically correct SQLite Query. 
    Unless otherwise specified, do not return more than 5 rows.
    Here is the relevant table info: [33;1m[1;3m{table_info}[0m
    Below are a number of examples of questions and their corresponding SQL Queries.



    User Input: List all artists.

    SQL query: SELECT * FROM Artist;
    


    User Input: Find all albums for the artist 'AC/DC'.

    SQL query: SELECT * FROM Album WHERE ArtistId = (SELECT ArtistId FROM Artist WHERE Name = 'AC/DC');
    


    User Input: List all tracks in the 'Rock' genre.

    SQL query: SELECT * FROM Track WHERE GenreId = (SELECT GenreId FROM Genre WHERE Name = 'Rock');
    


    User Input: Find the total duration of all tracks.

    SQL query: SELECT SUM(Milliseconds) FROM Track;
    


    User Input: List all customers from Canada.

    SQL query: SELECT * FROM Customer WHERE Country = 'Canada';
  

In [142]:
print(chain.invoke(input={"question":"How many Employees are there","table_info":context['table_names']}))

SELECT COUNT(*) FROM Employee;


In [143]:
chain2=chain|RunnableLambda(lambda d: d.replace("sqlite","").strip())

In [144]:
print(chain2.invoke(input={"question":"How many Employees are there","table_info":context['table_names']}))

SELECT COUNT(*) FROM Employee;


<h3>Using Dynamic Few Shot Templates</h3>

In [145]:
exampleSelector=SemanticSimilarityExampleSelector.from_examples(
    vectorstore_cls=FAISS,
    k=4,
    examples=examples,
    embeddings=SentenceTransformerEmbeddings(),
    input_keys=['input']
)

In [146]:
prompt=FewShotPromptTemplate(
    example_selector=exampleSelector,
    example_prompt=examplePromt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["input","top_k","table_info"]
)

In [147]:
print(prompt.format(input="Name the Artist who have composed the track : 'Balls to the Wall'?",table_info=context['table_names'],top_k=5))


    You are a SQLite Expert. Given an input Question, without mentioning Preamble, ex.: sqlite, create a syntactically correct SQLite Query. 
    Unless otherwise specified, do not return more than 5 rows.
    Here is the relevant table info: Album, Artist, Customer, Employee, Genre, Invoice, InvoiceLine, MediaType, Playlist, PlaylistTrack, Track
    Below are a number of examples of questions and their corresponding SQL Queries.



    User Input: List all artists.

    SQL query: SELECT * FROM Artist;
    


    User Input: How many tracks are there in the album with ID 5?

    SQL query: SELECT COUNT(*) FROM Track WHERE AlbumId = 5;
    


    User Input: Which albums are from the year 2000?

    SQL query: SELECT * FROM Album WHERE strftime('%Y', ReleaseDate) = '2000';
    


    User Input: List all tracks in the 'Rock' genre.

    SQL query: SELECT * FROM Track WHERE GenreId = (SELECT GenreId FROM Genre WHERE Name = 'Rock');
    


    User Input: Name the Artist who have compos

In [156]:
chain = create_sql_query_chain(llm, db, prompt) | RunnableLambda(lambda d: d.replace("sqlite","").strip())

In [157]:
print(chain.invoke({"question":"Name the Artist who have composed the track : 'Balls to the Wall'?"}))

SELECT Artist.Name
FROM Artist
JOIN Album ON Artist.ArtistId = Album.ArtistId
JOIN Track ON Album.AlbumId = Track.AlbumId
WHERE Track.Name = 'Balls to the Wall';


In [168]:
executeQuery=QuerySQLDataBaseTool(db=db)

In [169]:
executeChain=chain|executeQuery|StrOutputParser()

In [174]:
# Through QuerySQLDatabaseTool
print(executeChain.invoke({"question":"Name the Artist who have composed the track : 'Balls to the Wall'?"}))

[('Accept',)]


In [171]:
executeChain.invoke({"question":"Name the Artist who have composed the track : 'Balls to the Wall'?"})

"[('Accept',)]"

In [167]:
eval(executeChain.invoke({"question":"Name the Artist who have composed the track : 'Balls to the Wall'?"}))[0][0]

'Accept'

In [173]:
#Through db.run
db.run(command=chain.invoke({"question":"Name the Artist who have composed the track : 'Balls to the Wall'?"}))

"[('Accept',)]"