In [1]:
import langchain
from dotenv import load_dotenv,find_dotenv
from langchain_community.utilities import SQLDatabase
from langchain_core.runnables import RunnablePassthrough,RunnableLambda,RunnableParallel
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import (PromptTemplate, ChatPromptTemplate,FewShotPromptTemplate,
                            MessagesPlaceholder,SystemMessagePromptTemplate,HumanMessagePromptTemplate)
from langchain_community.vectorstores import FAISS, Chroma
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain_google_genai import GoogleGenerativeAI,ChatGoogleGenerativeAI
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
from langchain_community.agent_toolkits import create_sql_agent

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv(find_dotenv("D:\LLM Courses\Master Langchain Udemy\.env"))

True

In [3]:
llm=ChatGoogleGenerativeAI(model="gemini-1.5-flash-001",temperature=0.3)
db=SQLDatabase.from_uri(database_uri="sqlite:///db/chinook.db/chinook.db")

In [4]:
agentExecutor=create_sql_agent(llm=llm,db=db,verbose=True)

In [5]:
query={"input":"How many Employees are there?"}

In [6]:
agentExecutor.invoke(input=query)



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mThought: I should look at the tables in the database to see what I can query.  Then I should query the schema of the most relevant tables.
Action: sql_db_list_tables
Action Input: [0m[38;5;200m[1;3mAlbum, Artist, Customer, Employee, Genre, Invoice, InvoiceLine, MediaType, Playlist, PlaylistTrack, Track[0m[32;1m[1;3mThought: I should query the schema of the Employee table to see what columns are available.
Action: sql_db_schema
Action Input: Employee[0m[33;1m[1;3m
CREATE TABLE "Employee" (
	"EmployeeId" INTEGER NOT NULL, 
	"LastName" NVARCHAR(20) NOT NULL, 
	"FirstName" NVARCHAR(20) NOT NULL, 
	"Title" NVARCHAR(30), 
	"ReportsTo" INTEGER, 
	"BirthDate" DATETIME, 
	"HireDate" DATETIME, 
	"Address" NVARCHAR(70), 
	"City" NVARCHAR(40), 
	"State" NVARCHAR(40), 
	"Country" NVARCHAR(40), 
	"PostalCode" NVARCHAR(10), 
	"Phone" NVARCHAR(24), 
	"Fax" NVARCHAR(24), 
	"Email" NVARCHAR(60), 
	PRIMARY KEY ("EmployeeId"), 
	FO

{'input': 'How many Employees are there?', 'output': 'There are 8 employees.'}

In [7]:
agentExecutor.invoke(input={
    "input":"Describe the Playlist Table"
})



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: [0m[38;5;200m[1;3mAlbum, Artist, Customer, Employee, Genre, Invoice, InvoiceLine, MediaType, Playlist, PlaylistTrack, Track[0m[32;1m[1;3mThought: I should look at the schema of the Playlist table.
Action: sql_db_schema
Action Input: Playlist[0m[33;1m[1;3m
CREATE TABLE "Playlist" (
	"PlaylistId" INTEGER NOT NULL, 
	"Name" NVARCHAR(120), 
	PRIMARY KEY ("PlaylistId")
)

/*
3 rows from Playlist table:
PlaylistId	Name
1	Music
2	Movies
3	TV Shows
*/[0m[32;1m[1;3mThought: I now know the final answer
Final Answer: The Playlist table has two columns: PlaylistId and Name. PlaylistId is the primary key and is an integer. Name is a string with a maximum length of 120 characters.  The table contains information about playlists, such as "Music", "Movies", and "TV Shows". 
[0m

[1m> Finished chain.[0m


{'input': 'Describe the Playlist Table',
 'output': 'The Playlist table has two columns: PlaylistId and Name. PlaylistId is the primary key and is an integer. Name is a string with a maximum length of 120 characters.  The table contains information about playlists, such as "Music", "Movies", and "TV Shows".'}

### With Few Shot Prompt Template

In [8]:
examples=[
    {
        "input":"Find all Artists",
        "query":"SELECT * FROM artist;"
    },
    {
        "input":"Find All the albums for the artist 'AC/DC'",
        "query":"SELECT * FROM album WHERE artistid=(SELECT artistid FROM artist WHERE name='AC/DC');"
    },
    {
        "input": "List all tracks in the 'Rock' genre.",
        "query": "SELECT * FROM Track WHERE GenreId = (SELECT GenreId FROM Genre WHERE Name = 'Rock');",
    },
    {
        "input": "Find the total duration of all tracks.",
        "query": "SELECT SUM(Milliseconds) FROM Track;",
    },
    {
        "input": "List all customers from Canada.",
        "query": "SELECT * FROM Customer WHERE Country = 'Canada';",
    },
    {
        "input": "How many tracks are there in the album with ID 5?",
        "query": "SELECT COUNT(*) FROM Track WHERE AlbumId = 5;",
    },
    {
        "input": "Find the total number of invoices.",
        "query": "SELECT COUNT(*) FROM Invoice;",
    },
    {
        "input": "List all tracks that are longer than 5 minutes.",
        "query": "SELECT * FROM Track WHERE Milliseconds > 300000;",
    },
    {
        "input": "Who are the top 5 customers by total purchase?",
        "query": "SELECT CustomerId, SUM(Total) AS TotalPurchase FROM Invoice GROUP BY CustomerId ORDER BY TotalPurchase DESC LIMIT 5;",
    },
    {
        "input": "Which albums are from the year 2000?",
        "query": "SELECT * FROM Album WHERE strftime('%Y', ReleaseDate) = '2000';",
    },
    {
        "input": "How many employees are there",
        "query": 'SELECT COUNT(*) FROM "Employee"',
    },
]


In [9]:
exampleSelector=SemanticSimilarityExampleSelector.from_examples(
    examples=examples,
    embeddings=SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-V2"),
    vectorstore_cls=Chroma,
    k=4,
    input_keys=['input']
)

  embeddings=SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-V2"),


In [10]:
prefix="""
    You are an agent designed to interact with a SQL database.
    Given an input question, create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer.
    Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results.
    You can order the results by a relevant column to return the most interesting examples in the database.
    Never query for all the columns from a specific table, only ask for the relevant columns given the question.
    You have access to tools for interacting with the database.
    Only use the given tools. Only use the information returned by the tools to construct your final answer.
    You MUST double check your query before executing it. If you get an error while executing a query, rewrite the query and try again.

    DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the database.
    If the question does not seem related to the database, just return "I don't know" as the answer.
    
    Here are some examples of user inputs and their corresponding SQL queries:
"""

In [11]:
fewShotPromptTemplate=FewShotPromptTemplate(example_selector=exampleSelector,
                                            prefix=prefix,suffix="",
                                            input_variables=['input','dialect','top_k'],
                                            example_prompt=PromptTemplate.from_template(template="User input: {input}\nSQL Query: {query}")
                                           )

In [12]:
fullPrompt=ChatPromptTemplate.from_messages(
    messages=[
        SystemMessagePromptTemplate(prompt=fewShotPromptTemplate),
        HumanMessagePromptTemplate(prompt=PromptTemplate(template="{input}")),
        MessagesPlaceholder(variable_name="agent_scratchpad")
    ]
)

In [13]:
promptVal=fullPrompt.invoke(
    input={
        "input":"How Many Artists are there?",
        "top_k":5,
        "dialect":"SQL",
        "agent_scratchpad":[]
    }
)

print(promptVal.to_string())

System: 
    You are an agent designed to interact with a SQL database.
    Given an input question, create a syntactically correct SQL query to run, then look at the results of the query and return the answer.
    Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most 5 results.
    You can order the results by a relevant column to return the most interesting examples in the database.
    Never query for all the columns from a specific table, only ask for the relevant columns given the question.
    You have access to tools for interacting with the database.
    Only use the given tools. Only use the information returned by the tools to construct your final answer.
    You MUST double check your query before executing it. If you get an error while executing a query, rewrite the query and try again.

    DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the database.
    If the question does not seem related t

In [14]:
agent=create_sql_agent(llm=llm,db=db,prompt=fullPrompt,verbose=True,agent_type="tool-calling")

In [15]:
agent.invoke({"input": "How many artists are there?","dialect":"SQL","top_k":5})



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_query` with `{'query': 'SELECT COUNT(*) FROM artist'}`


[0m[36;1m[1;3m[(275,)][0m[32;1m[1;3mThere are 275 artists. 
[0m

[1m> Finished chain.[0m


{'input': 'How many artists are there?',
 'dialect': 'SQL',
 'top_k': 5,
 'output': 'There are 275 artists. \n'}