# LangChain using ChatGPT LLM (using LLM Chain)

In [2]:
from IPython.display import Image

## <span style="color:Brown">LangChain Basics</span>

### Basic LLM Request

In [3]:
from langchain.llms import OpenAI
from dotenv import load_dotenv
load_dotenv()

# api_key = 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
# llm = OpenAI(openai_api_key=api_key)
llm = OpenAI()
result = llm("write a very very short poem about Sun")
print(result)



Golden rays of light
Warming up the day
Sunshine shining bright
Chasing all the gray


### LLMChain & PromptTemplate

In [8]:
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
load_dotenv()

# %%
# Language Model
llm = OpenAI()

# Input
code_prompt = PromptTemplate(
    input_variables=["task","language"],
    template="Write a very short {language} function that will {task}."
)

# Chain
code_chain = LLMChain(
    llm=llm,
    prompt=code_prompt,
    output_key="code"
)

# Chain Object that takes inputs 
result = code_chain({"task":'Factorial',"language":'Python'})
print(result["code"])



def factorial(n):
    if n == 0:
        return 1
    else:
        return n * factorial(n-1)


#### <span style="color:Brown">1. LLMChain</span>

In [None]:
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import argparse
from dotenv import load_dotenv
load_dotenv()
# Note: This will cause an error (if you run it in Jupyter notebook or VSCode Interactive Notebook without these command-line arguments) because argparse expects these arguments to be passed in from the command line.
parser = argparse.ArgumentParser()
parser.add_argument("--task", default="return a list of numbers")
parser.add_argument("--language", default="python")
args = parser.parse_args()

# %%
# Language Model
llm = OpenAI()

# Input
code_prompt = PromptTemplate(
    input_variables=["task","language"],
    template="Write a very short {language} function that will {task}."
)

# Chain
code_chain = LLMChain(
    llm=llm,
    prompt=code_prompt,
    output_key="code"
)

# Chain Object that takes inputs 
result = code_chain({
    "task": args.task,
    "language": args.language
})
print(result)

#### <span style="color:Brown">2. Sequential Chain</span>

In [None]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain,SequentialChain
import argparse
from dotenv import load_dotenv
# %%
load_dotenv()

# Note: This will cause an error (if you run it in Jupyter notebook or VSCode Interactive Notebook without these command-line arguments) because argparse expects these arguments to be passed in from the command line.
parser = argparse.ArgumentParser()
parser.add_argument("--task", default="return a list of numbers")
parser.add_argument("--language", default="python")
args = parser.parse_args()

# %%
# Chain A: Code Chain
# OpenAI Function that does http request on the OpenAI server
llm = OpenAI()

# Input
code_prompt = PromptTemplate(
    input_variables=["task","language"],
    template="Write a very short {language} function that will {task}."
)

# Chain A
code_chain = LLMChain(
    llm=llm,
    prompt=code_prompt,
    output_key="code"
)

# Chain B: Test Chain
test_prompt = PromptTemplate(
    input_variables=["language","code"],
    template="Write a test for the following code:\n{code} in {language}"
)

test_chain = LLMChain(
    llm=llm,
    prompt=test_prompt,
    output_key="test"
)

# Chaining Chain A and Chain B in a Sequential manner
chain = SequentialChain(
    chains = [code_chain,test_chain],
    input_variables = ["task","language"],
    output_variables = ["code","test"]
)
result = chain({
    "task": args.task,
    "language": args.language
})
print(f">>>>> GENERATED CODE  <<<<<<\n{result['code']}")
print(f">>>>> GENERATED TEST  <<<<<<\n{result['test']}")

#### <span style="color:Brown">3. ChatOpenAI</span>

In [None]:
# %%
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate,HumanMessagePromptTemplate
from langchain.chains import LLMChain
from dotenv import load_dotenv
# %%
load_dotenv()

# Chain A: Code Chain
# OpenAI Function that does http request on the OpenAI server
chat = ChatOpenAI()

# Input
prompt = ChatPromptTemplate(
    input_variables=["content"],
    messages = [
        HumanMessagePromptTemplate.from_template("{content}")
    ]
)

# Chain
chain = LLMChain(
    llm=chat,
    prompt=prompt
)

while True:
    content = input('>>> User Message: ')
    if content == 'break':
        break
    else:
        result = chain(
            {
            "content": content
        }
    )
    print(f">>> AI Message: {result['text']}")

#### <span style="color:Brown">4. ConversationBufferMemory</span>

In [None]:
# %%
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory
from dotenv import load_dotenv
# %%
load_dotenv()

# LLM / Interface
chatllm = ChatOpenAI()

# Input
prompt = ChatPromptTemplate(
    input_variables=["content","messages"],
    messages= [
        # specifically look for 'messages' key in the modified input (i.e., with added 'messages' key), 
        # so that memory placeholder can expand the messages key value in such a way that LLM can easily understand
        MessagesPlaceholder(variable_name="messages"), # Can be any key name, need not necessarily be messages
        HumanMessagePromptTemplate.from_template("{content}")
    ]
)

# New key called 'messages' will be inserted with every input
# ConversationBufferMemory can store a history but it can't store history in a 'file', so if you exit the
# program everything that is stored in RAM is lost, but FileChatMessageHistory stores in a json file.
memory = ConversationBufferMemory(memory_key="messages",return_messages=True)

# Chain takes Input and LLM
chain = LLMChain(
    llm=chatllm,
    prompt=prompt,
    memory=memory
)

# Type break or Ctrl + C to interrupt the loop
while True:
    content = input(">> ")
    if content=='break':
        break
    else:
        result = chain({"content":content})
        # Output
        print(result['text'])

#### <span style="color:Brown">4. FileChatMessageHistory</span>

In [None]:
# %%
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory,FileChatMessageHistory
from dotenv import load_dotenv
# %%
load_dotenv()

# LLM / Interface
chatllm = ChatOpenAI()

# Input
prompt = ChatPromptTemplate(
    input_variables=["content","messages"],
    messages= [
        # specifically look for 'messages' key in the modified input (i.e., with added 'messages' key), 
        # so that memory placeholder can expand the messages key value in such a way that LLM can easily understand
        MessagesPlaceholder(variable_name="messages"), # Can be any key name, need not necessarily be messages
        HumanMessagePromptTemplate.from_template("{content}")
    ]
)

# New key called 'messages' will be inserted with every input
# FileChatMessageHistory is a Class
# ConversationBufferMemory can't store history in a file but FileChatMessageHistory does. Even if you exit
# the program all the history will be restored
memory = ConversationBufferMemory(
    chat_memory=FileChatMessageHistory("messages.json"),
    memory_key="messages",
    return_messages=True)

# Chain takes Input and LLM
chain = LLMChain(
    llm=chatllm,
    prompt=prompt,
    memory=memory
)

# Type break or Ctrl + C to interrupt the loop
while True:
    content = input(">> ")
    if content=='break':
        break
    else:
        result = chain({"content":content})
        # Output
        print(result['text'])

#### <span style="color:Brown">4. FileChatMessageHistoryVerbose</span>

In [None]:
# %%
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory,FileChatMessageHistory
from dotenv import load_dotenv
# %%
load_dotenv()

# LLM / Interface
chatllm = ChatOpenAI(verbose=True)

# Input
prompt = ChatPromptTemplate(
    input_variables=["content","messages"],
    messages= [
        # specifically look for 'messages' key in the modified input (i.e., with added 'messages' key), 
        # so that memory placeholder can expand the messages key value in such a way that LLM can easily understand
        MessagesPlaceholder(variable_name="messages"), # Can be any key name, need not necessarily be messages
        HumanMessagePromptTemplate.from_template("{content}")
    ]
)

# New key called 'messages' will be inserted with every input
# FileChatMessageHistory is a Class
# ConversationBufferMemory can't store history in a file but FileChatMessageHistory does. Even if you exit
# the program all the history will be restored
memory = ConversationBufferMemory(
    chat_memory=FileChatMessageHistory("messages.json"),
    memory_key="messages",
    return_messages=True)

# Chain takes Input and LLM
chain = LLMChain(
    llm=chatllm,
    prompt=prompt,
    memory=memory,
    verbose=True
)

# Ctrl + C to interrupt the loop
while True:
    content = input(">> ")
    if content=='break':
        break
    else:
        result = chain({"content":content})
        # Output
        print(result['text'])

#### <span style="color:Brown">5. ConversationSummaryMemory</span>

In [None]:
# %%
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder
from langchain.chains import LLMChain
from langchain.memory import ConversationSummaryMemory
from dotenv import load_dotenv
# %%
load_dotenv()

# LLM / Interface
chatllm = ChatOpenAI(verbose=True) # verbose=True (optional)

# Input
prompt = ChatPromptTemplate(
    input_variables=["content","messages"],
    messages= [
        # specifically look for 'messages' key in the modified input (i.e., with added 'messages' key), 
        # so that memory placeholder can expand the messages key value in such a way that LLM can easily understand
        MessagesPlaceholder(variable_name="messages"), # Can be any key name, need not necessarily be messages
        HumanMessagePromptTemplate.from_template("{content}")
    ]
)

# New key called 'messages' will be inserted with every input
# 'System' key is automatically added and passes the Summary prompt of 'FileChatMessageHistory' to the LLM defined in ConversationSummaryMemory Class 
memory = ConversationSummaryMemory(
    llm=chatllm, # You can pass any summarization LLM, need not necessarily same LLM that you are using for final response
    memory_key="messages",
    return_messages=True)

# Chain takes Input and LLM
chain = LLMChain(
    llm=chatllm,
    prompt=prompt,
    memory=memory,
    verbose=True # verbose=True optional
)

# Ctrl + C to interrupt the loop
while True:
    content = input(">> ")
    if content=='break':
        break
    else:
        result = chain({"content":content})
        # Output
        print(result['text'])

#### <span style="color:Brown">6. TextLoader</span>

In [None]:
# %%
from langchain.document_loaders import TextLoader
from dotenv import load_dotenv
# %%
load_dotenv()

loader = TextLoader("facts.txt")
docs = loader.load()
# docs = TextLoader("facts.txt").load()
print(docs)
# %%

#### <span style="color:Brown">7. CharacterTextSplitter</span>

In [None]:
# %%
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from dotenv import load_dotenv
# %%
load_dotenv()

# Loader and Splitter
loader = TextLoader("facts.txt")

text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=200,
    chunk_overlap=0
)

docs = loader.load_and_split(
    text_splitter=text_splitter
)

for doc in docs:
    print(doc.page_content)
    print()

#### <span style="color:Brown">8. OpenAIEmbeddings</span>

In [None]:
# %%
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
# %%
load_dotenv()

#  Embeddings. Embed the chunks using OpenAIEmbeddings
embeddings = OpenAIEmbeddings()
embeds = embeddings.embed_query("Hi There")
print(embeds)

#### <span style="color:Brown">9. Chroma</span>

In [None]:
# %%
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores.chroma import Chroma 
from dotenv import load_dotenv
# %%
load_dotenv()

#  Embeddings. Embed the chunks using OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

# Loader and Splitter
loader = TextLoader("facts.txt")

text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=200,
    chunk_overlap=0
)

docs = loader.load_and_split(
    text_splitter=text_splitter
)
 
# Creating an instance of Chromadb
# All chunks will be converted to embeddings in a single request and stored in a vector store
db = Chroma.from_documents(
    docs,
    embedding=embeddings,
    persist_directory="emb"
)
#  similarity_search_with_score it gives scores
results = db.similarity_search_with_score("What is an interesting fact about the English language?",
                                          k=5) # k = Return Top N results
for result in results:
    print()
    print(f'Similarity Search Score: {result[1]}') # Less Score means less Distance means More Similarity
    print(result[0].page_content)

#### <span style="color:Brown">10. RetrievalQA Chain</span>

In [None]:
# %%
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.vectorstores.chroma import Chroma
from langchain.chains import RetrievalQA
from dotenv import load_dotenv
# %%
load_dotenv()
#  Embeddings. Embed the chunks using OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

# Location of the Vector Store and Embedding that it needs to use for Retrevial
db = Chroma(
    persist_directory="emb",
    embedding_function=embeddings
)

# Retriever
# A Retriever is an object that can take in a string and return some relevant documents.
# To be a "Retriever", the object must have a method called "get_relevant_documents" that takes a string and returns a list of documents.
# Look up (it'not just passive look up it's finding a similarity) Query against the Vector Store for Similar results. We call it as Retriever or RetrieverQA to be specific
# It's Modular. You can you any retriever that might have slightly different functions to find documents
retriever = db.as_retriever()

# which LLM to use by Chain
chatllm = ChatOpenAI()

# RetrievalQA Chain: LangChain has a tool that basically wraps up this entire flow. It's going to take in our vector store. It's going to encode or generate some embeddings for an incoming user question or a query. Find some relevant documents, inject / 'stuff' them into a system message prompt template, take the user's question and put it into human message prompt template, and then feed the entire thing into an LLM Chain for us. This construct is called a retrieval chain, or in the source code it's called really just a retrieval.
chain = RetrievalQA.from_chain_type(
    llm=chatllm,
    retriever=retriever,
    chain_type="stuff"
)

# We will pass the input to the retriever here in the chain because always chain takes the input and re-routes the input to the retriever.
result = chain.run("What is an interesting fact about the English language?")
print(result)

# The main problem with above code is it doesn't remove any duplicate records.

#### <span style="color:Brown">11. RedundantFilterRetriever (This will import RedundantFilterRetriever class from RedundantFilterRetrCuser.py in CustomFilters)</span>

In [None]:
# %%
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.vectorstores.chroma import Chroma
from langchain.chains import RetrievalQA
from CustomFilters.RedundantFilterRetriever import RedundantFilterRetriever
# from Folder.File import Class
from dotenv import load_dotenv
# import langchain
# 
# langchain.debug = True # to Turn ON Debugging Mode
# %%
load_dotenv()

#  Embeddings. Embed the chunks using OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

# Location of the Vector Store and Embedding that it needs to use for Retrevial
db = Chroma(
    persist_directory="emb",
    embedding_function=embeddings
)

# Retriever
# A Retriever is an object that can take in a string and return some relevant documents.
# To be a "Retriever", the object must have a method called "get_relevant_documents" that takes a string and returns a list of documents.
# Look up (it'not just passive look up it's finding a similarity) Query against the Vector Store for Similar results. We call it as Retriever or RetrieverQA to be specific
# It's Modular. You can you any retriever that might have slightly different functions to find documents
retriever = RedundantFilterRetriever(
    embeddings=embeddings,
    chroma=db
    )

# which LLM to use by Chain
chatllm = ChatOpenAI()

# RetrievalQA Chain: LangChain has a tool that basically wraps up this entire flow. It's going to take in our vector store. It's going to encode or generate some embeddings for an incoming user question or a query. Find some relevant documents, inject / 'stuff' them into a system message prompt template, take the user's question and put it into human message prompt template, and then feed the entire thing into an LLM Chain for us. This construct is called a retrieval chain, or in the source code it's called really just a retrieval.
chain = RetrievalQA.from_chain_type(
    llm=chatllm,
    retriever=retriever,
    chain_type="stuff"
)

# We will pass the input to the retriever here in the chain because always chain takes the input and re-routes the input to the retriever.
result = chain.run("What is an interesting fact about the English language?") # chain.run gives string in 'result' key as output
# result = chain("What is an interesting fact about the English language?")  # chain gives dictionary as output
print(result)

## Agents

Notes: OneNote > AI > LangChain > Agents

##### Defining a Tool (sql.py)

In [None]:
import sqlite3
from langchain.tools import Tool

connection = sqlite3.connect("db.sqlite")

def run_sqlite_query(query):
    cursor = connection.cursor()
    cursor.execute(query)
    return cursor.fetchall()

run_query_tool = Tool.from_function(
    name="run_sqlite_query",
    description="Run a sqlite query",
    func=run_sqlite_query
)

##### Defining an Agent and AgentExecutor (main.py)

In [None]:
from langchain_openai import ChatOpenAI
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder
)
from langchain.agents import OpenAIFunctionsAgent,AgentExecutor
from tools.sql import run_query_tool
from dotenv import load_dotenv
load_dotenv()

chat = ChatOpenAI()

prompt = ChatPromptTemplate(
    messages=[
        HumanMessagePromptTemplate.from_template("{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad") # Remember, the goal of MessagesPlaceholder is to kind of take in an input variable and then explode or expand into a new list of messages. Agent Scratch Pad is exactly same as MessagesPlaceholder(variable_name="messages") in ConversationSummaryMemory (LangChainBasics.py).
    ]
    )

tools = [run_query_tool]

agent = OpenAIFunctionsAgent(
    llm=chat,
    prompt=prompt,
    tools = tools
)

agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True
)

agent_executor("How many users are there in the database?")
# agent_executor("How many users have provided a shipping address?") # This will fail

#### Addressing Shortcomings in ChatGPTs Assumptions
Shortcomings: ChatGPT is assuming table names and column names which may or may not be the same in actual database. So, ChatGPT query is not compatible with our actual database at all and we will end up with getting error.

Solution: We need to do a better job of getting ChatGPT to try to investigate our database a little bit. Or maybe give it some more information just right up front,  give it enough information to understand what tables exist and what columns are in each of those tables. And then whenever we ask it a question that's a little bit more complex like this one right here (How many users have provided a shipping address?). We want ChatGPT to use that knowledge to formulate a query, rather than just assuming the table's name and it has a column called shipping address.

##### Strategy 1: Doing a little bit of Error Handling (especially Operational Errors)
Rather than just throwing the error and exiting our program, which is what's happening right now (when we gave 'How many users have provided a shipping address?'), we're going to instead capture that error message. We're going to get the error message itself, like the actual text that describes exactly what just went wrong. We're then going to take that error message and send it off to ChatGPT. So we're gonna say something like the following error occurred while running the query you gave me, and then we'll put in exactly what the error message was. So we're gonna send that off to ChatGPT and the hope here, this is our hope. It's not actually going to, like I said, solve the entire problem, but our hope is that ChatGPT is going to realize, "Oh, okay, a column called shipping address doesn't actually exist, I'll try again." So hopefully it will send us a follow-up query and ideally this one will actually work. Now, once again, I wanna tell you, this is not gonna solve the entire problem, but this is just kind of step one. It's to help ChatGPT understand when something goes wrong and give it the chance or the opportunity to fix things up. To implement all this, we're going to make an update to our tool function. The actual function that takes the query from ChatGPT and executes it.

In [None]:
# sql.py
import sqlite3
from langchain.tools import Tool

connection = sqlite3.connect("db.sqlite")

def run_sqlite_query(query):
    cursor = connection.cursor()
    try:# Error Handling
        cursor.execute(query)
        return cursor.fetchall()
    except sqlite3.OperationalError as err:
        return f'The following error occurred: {str(err)}'

run_query_tool = Tool.from_function(
    name="run_sqlite_query",
    description="Run a SQLite query",
    func=run_sqlite_query
)

In [None]:
# main.py
from langchain_openai import ChatOpenAI
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder
)
from langchain.agents import OpenAIFunctionsAgent,AgentExecutor
from tools.sql import run_query_tool
from dotenv import load_dotenv
load_dotenv()

chat = ChatOpenAI()

prompt = ChatPromptTemplate(
    messages=[
        HumanMessagePromptTemplate.from_template("{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad") # Remember, the goal of MessagesPlaceholder is to kind of take in an input variable and then explode or expand into a new list of messages. Agent Scratch Pad is exactly same as MessagesPlaceholder(variable_name="messages") in ConversationSummaryMemory (LangChainBasics.py).
    ]
    )

tools = [run_query_tool]

agent = OpenAIFunctionsAgent(
    llm=chat,
    prompt=prompt,
    tools = tools
)

agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True
)

agent_executor("How many users have provided a shipping address?")

##### Adding in Error Handling is definitely good because it gives ChatGPT the ability to follow up and try to fix an error, but it still is not really enough. Now we need to help ChatGPT understand the structure of our database just a little bit more. So we're going to take a look at two additional techniques that we're going to use to make ChatGPT a lot more robust in answering questions that involve our database. So here's the two additional techniques we're going to use. We're going to make two changes to the initial requests we send off to ChatGPT. First we are going to add in a system message and Second we're going to add in an additional tool.
![](../Media/lc_1.png)

##### First we are going to add in a system message.

In [None]:
# sql.py
import sqlite3
from langchain.tools import Tool

connection = sqlite3.connect("db.sqlite")

def tables_str():
    conn = sqlite3.connect('db.sqlite')
    cursor = conn.cursor()
    cursor.execute("Select name from sqlite_master where type = 'table'")
    tables = cursor.fetchall()
    return '\n'.join(row[0] for row in tables if row[0] != None)
    
def run_sqlite_query(query):
    cursor = connection.cursor()
    try:# Error Handling
        cursor.execute(query)
        return cursor.fetchall()
    except sqlite3.OperationalError as err:
        return f'The following error occurred: {str(err)}'

run_query_tool = Tool.from_function(
    name="run_sqlite_query",
    description="Run a SQLite query",
    func=run_sqlite_query
)

In [None]:
# main.py
from langchain_openai import ChatOpenAI
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder
)
from langchain.agents import OpenAIFunctionsAgent,AgentExecutor
from tools.sql import run_query_tool, tables_str
from langchain.schema import SystemMessage
from dotenv import load_dotenv
load_dotenv()

chat = ChatOpenAI()
tables = tables_str()
prompt = ChatPromptTemplate(
    messages=[
        SystemMessage(content="You are an AI and you have access to database.\n{tables}"), # It's a Static Message so no need to use System Message Prompt Template
        HumanMessagePromptTemplate.from_template("{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad") # Remember, the goal of MessagesPlaceholder is to kind of take in an input variable and then explode or expand into a new list of messages. Agent Scratch Pad is exactly same as MessagesPlaceholder(variable_name="messages") in ConversationSummaryMemory (LangChainBasics.py).
    ]
    )

tools = [run_query_tool]

agent = OpenAIFunctionsAgent(
    llm=chat,
    prompt=prompt,
    tools = tools
)

agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True
)

agent_executor("How many users have provided a shipping address?")

Observation: Still AI is not using correct column from a table in a given list of tables in System Message. So we need to give a tool to ChatGPT to describe a table from a given list of tables

##### Second we're going to add in an additional describe tool.

In [None]:
# sql.py
import sqlite3
from langchain.tools import Tool

connection = sqlite3.connect("db.sqlite")

def tables_str():
    conn = sqlite3.connect('db.sqlite')
    cursor = conn.cursor()
    cursor.execute("Select name from sqlite_master where type = 'table'")
    tables = cursor.fetchall()
    return '\n'.join(row[0] for row in tables if row[0] != None)
    
def run_sqlite_query(query):
    cursor = connection.cursor()
    try:# Error Handling
        cursor.execute(query)
        return cursor.fetchall()
    except sqlite3.OperationalError as err:
        return f'The following error occurred: {str(err)}'

run_query_tool = Tool.from_function(
    name="run_sqlite_query",
    description="Run a SQLite query",
    func=run_sqlite_query
)

def describe_tables(table_names):
    tables_list = table_names.split('\n')
    tables = "('" + "', '".join(tables_list) + "')"
    cursor = connection.cursor()
    rows = cursor.execute(f"SELECT sql FROM sqlite_master WHERE type='table' and name IN {tables};")
    return '\n'.join(row[0] for row in rows if row[0] is not None)

describe_tables_tool = Tool.from_function(
    name="describe_tables",
    description="Given a list of table names, returns the schema of those tables",
    func=describe_tables
)

In [None]:
# main.py
from langchain_openai import ChatOpenAI
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder
)
from langchain.agents import OpenAIFunctionsAgent,AgentExecutor
from tools.sql import run_query_tool, tables_str, describe_tables_tool
from langchain.schema import SystemMessage
from dotenv import load_dotenv
load_dotenv()

chat = ChatOpenAI()
tables = tables_str()
prompt = ChatPromptTemplate(
    messages=[
        SystemMessage(content="You are an AI and you have access to database.\n{tables}"), # It's a Static Message so no need to use System Message Prompt Template
        HumanMessagePromptTemplate.from_template("{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad") # Remember, the goal of MessagesPlaceholder is to kind of take in an input variable and then explode or expand into a new list of messages. Agent Scratch Pad is exactly same as MessagesPlaceholder(variable_name="messages") in ConversationSummaryMemory (LangChainBasics.py).
    ]
    )

tools = [run_query_tool,describe_tables_tool]

agent = OpenAIFunctionsAgent(
    llm=chat,
    prompt=prompt,
    tools = tools
)

agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True
)

agent_executor("How many users have provided a shipping address?")

Observation: Even though we've added in the system message and this additional tool, we're gonna very quickly realize that it's really hard to get ChatGPT to make use of that new tool. ChatGPT luckily after several failed attempts used our new describe tool in above but be aware, right away, that unfortunately this is not gonna solve all of our problems. Sometimes ChatGPT thinks that it just knows what's best for answering some of these questions and it's going to refuse to use some tools. So I'll show you some ways we're going to get around that. One way is to insist ChatGPT to use our describe tool i.e., by 'Being Direct with System Messages'

In [None]:
# main.py
from langchain_openai import ChatOpenAI
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder
)
from langchain.agents import OpenAIFunctionsAgent,AgentExecutor
from tools.sql import run_query_tool, tables_str, describe_tables_tool
from langchain.schema import SystemMessage
from dotenv import load_dotenv
load_dotenv()

chat = ChatOpenAI()
tables = tables_str()
prompt = ChatPromptTemplate(
    messages=[
        SystemMessage(content="You are an AI that has access to a SQLite database.\n"
            f"The database has tables of: {tables}\n"
            "Do not make any assumptions about what tables exist "
            "or what columns exist. Instead, use the 'describe_tables' function"), # It's a Static Message so no need to use System Message Prompt Template
        HumanMessagePromptTemplate.from_template("{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad") # Remember, the goal of MessagesPlaceholder is to kind of take in an input variable and then explode or expand into a new list of messages. Agent Scratch Pad is exactly same as MessagesPlaceholder(variable_name="messages") in ConversationSummaryMemory (LangChainBasics.py).
    ]
    )

tools = [run_query_tool,describe_tables_tool]

agent = OpenAIFunctionsAgent(
    llm=chat,
    prompt=prompt,
    tools = tools
)

agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True
)

agent_executor("How many users have provided a shipping address?")

* https://www.udemy.com/course/chatgpt-and-langchain-the-complete-developers-masterclass/learn/lecture/40261308#notes
* Even though our program is now working, one more little optimization we can make to better help ChatGPT understand is, how to make use of the tools we have provided to ChatGPT. 
* 
LangChain rather than putting in the actual name of the argument that our defined function expects, something like "query" or "table names" or something like that, Langchain, unfortunately, puts in argument names like "__arg1". This is the default behavior of langchain, rather than putting in actual descriptive argument names, that would help chatGPT better understand how to make use of your tool, it just throws in things like "__arg. 

* So the very small optimization we can make is to customize these names. So to make sure that we get in names like "query", If you put in a name here, something like gibberish (like __arg1), well chatGPT is not going to be able to make use of that very well. But if you put in a name like "query" then chatGPT is gonna understand very plainly, oh, okay, the first argument is going to be the query that I want to run.  

* Pydantic, this is a library that allows us to annotate different classes inside of a python class, and just kind of more clearly describe what kinds of data we expect that class to receive as attributes (Instead of correcting LangChain, we are setting expectations to ChatGPT to give properties in a particular name or particular datatype etc). This base model class is what adds on that extra little bit behavior.

In [None]:
# sql.py
import sqlite3
from langchain.tools import Tool
from typing import List
from pydantic.v1 import BaseModel

connection = sqlite3.connect("db.sqlite")

def tables_str():
    conn = sqlite3.connect('db.sqlite')
    cursor = conn.cursor()
    cursor.execute("Select name from sqlite_master where type = 'table'")
    tables = cursor.fetchall()
    return '\n'.join(row[0] for row in tables if row[0] != None)
    
def run_sqlite_query(query):
    cursor = connection.cursor()
    try:# Error Handling
        cursor.execute(query)
        return cursor.fetchall()
    except sqlite3.OperationalError as err:
        return f'The following error occurred: {str(err)}'

# By making this class, we have created some kind of record inside of our program. It says essentially, if you want to be a class of RunQueryArgsSchema, you must provide a 'query' attribute that is a string. We then provided that off to our tool under the keyword argument args_schema. Langchain internally is going to use this information to better describe the different arguments that ChatGPT should be providing to our tool. So in this case, it's gonna tell ChatGPT, "Okay, if you want to make use of this tool and if you want to use this function, you must provide an argument called query, and it's supposed to be a string."
class RunQueryArgsSchema(BaseModel):
    query: str

run_query_tool = Tool.from_function(
    name="run_sqlite_query",
    description="Run a SQLite query",
    func=run_sqlite_query
)

def describe_tables(table_names):
    # tables_list = table_names.split('\n')    # This is not required if you define a Pydantic class DescribeTablesArgsSchema
    tables = "('" + "', '".join(table_names) + "')"
    cursor = connection.cursor()
    rows = cursor.execute(f"SELECT sql FROM sqlite_master WHERE type='table' and name IN {tables};")
    return '\n'.join(row[0] for row in rows if row[0] is not None)

class DescribeTablesArgsSchema(BaseModel):
    table_names: List[str]
    
describe_tables_tool = Tool.from_function(
    name="describe_tables",
    description="Given a list of table names, returns the schema of those tables",
    func=describe_tables,
    args_schema=DescribeTablesArgsSchema
)

In [None]:
# main.py
from langchain_openai import ChatOpenAI
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder
)
from langchain.agents import OpenAIFunctionsAgent,AgentExecutor
from tools.sql import run_query_tool, tables_str, describe_tables_tool
from langchain.schema import SystemMessage
from dotenv import load_dotenv
# import langchain

# langchain.debug = True
load_dotenv()
chat = ChatOpenAI()
tables = tables_str()
prompt = ChatPromptTemplate(
    messages=[
        SystemMessage(content="You are an AI that has access to a SQLite database.\n"
            f"The database has tables of: {tables}\n"
            "Do not make any assumptions about what tables exist "
            "or what columns exist. Instead, use the 'describe_tables' function"), # It's a Static Message so no need to use System Message Prompt Template
        HumanMessagePromptTemplate.from_template("{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad") # Remember, the goal of MessagesPlaceholder is to kind of take in an input variable and then explode or expand into a new list of messages. Agent Scratch Pad is exactly same as MessagesPlaceholder(variable_name="messages") in ConversationSummaryMemory (LangChainBasics.py).
    ]
    )

tools = [run_query_tool,describe_tables_tool]

agent = OpenAIFunctionsAgent(
    llm=chat,
    prompt=prompt,
    tools = tools
)

agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True
)

agent_executor("How many users have provided a shipping address?")

Why are we using Structured Tool here instead of Tool? 
* Well, it's a very, very simple reason because of some legacy decisions in a lang chain library, whenever you create a class or a tool out of the Tool class, as we did back inside of our sql.py file, This Tool class (langchain.tools.Tool) can only use functions that receive a single argument. Sounds silly, I know, but it's due to a lot of legacy issues inside of the lang chain code base. If you want to make a tool that's going to receive multiple different arguments, we're going to instead use this structured tool class instead. 
* And very simply, this StructuredTool can receive multiple different arguments. So it is a perfect fit for our use case where we want to receive one and two separate arguments.

In [None]:
# report.py
from pydantic.v1 import BaseModel
from langchain.tools import StructuredTool

def write_report(filename, html):
    with open(filename, 'w') as f:
        f.write(html)

class WriteReportArgsSchema(BaseModel):
    filename: str
    html: str
    
write_report_tool = StructuredTool.from_function(
    name="write_report",
    description="Write an HTML file to disk. Use this tool whenever someone asks for a report.",
    func=write_report,
    args_schema=WriteReportArgsSchema
)

In [None]:
# main.py
from langchain_openai import ChatOpenAI
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder
)
from langchain.agents import OpenAIFunctionsAgent,AgentExecutor
from tools.sql import run_query_tool, tables_str, describe_tables_tool
from tools.report import write_report_tool
from langchain.schema import SystemMessage
from dotenv import load_dotenv
# import langchain

# langchain.debug = True
load_dotenv()
chat = ChatOpenAI()
tables = tables_str()
prompt = ChatPromptTemplate(
    messages=[
        SystemMessage(content="You are an AI that has access to a SQLite database.\n"
            f"The database has tables of: {tables}\n"
            "Do not make any assumptions about what tables exist "
            "or what columns exist. Instead, use the 'describe_tables' function"), # It's a Static Message so no need to use System Message Prompt Template
        HumanMessagePromptTemplate.from_template("{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad") # Remember, the goal of MessagesPlaceholder is to kind of take in an input variable and then explode or expand into a new list of messages. Agent Scratch Pad is exactly same as MessagesPlaceholder(variable_name="messages") in ConversationSummaryMemory (LangChainBasics.py).
    ]
    )

tools = [run_query_tool, describe_tables_tool, write_report_tool]

agent = OpenAIFunctionsAgent(
    llm=chat,
    prompt=prompt,
    tools = tools
)

agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True
)

agent_executor("Summarize the top 5 most popular products. Write the results to a report file.")

#### <span style="color:Brown">Memory in Agents</span>

Notes: OneNote > AI > LangChain > Memory in Agents

In [None]:
# main.py
from langchain_openai import ChatOpenAI
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder
)
from langchain.agents import OpenAIFunctionsAgent,AgentExecutor
from tools.sql import run_query_tool, tables_str, describe_tables_tool
from tools.report import write_report_tool
from langchain.memory import ConversationBufferMemory
from langchain.schema import SystemMessage
from dotenv import load_dotenv
# import langchain

# langchain.debug = True
load_dotenv()
chat = ChatOpenAI()
tables = tables_str()
# memory_key is chat_history. And remember, we also need to add in that other keyword argument of return_messages true. And we're putting that in to make sure that the memory is going to return a list of memory objects. So when it says return_messages=True, it means return or give us back the list of messages as message objects as opposed to a bunch of strings. We would use that list of strings if we were using a completion-based language model (say if we set return_messages=False). And we aren't, we are using ChatGPT here, which is message/chat-based.  
memory = ConversationBufferMemory(memory_key="chat_history",return_messages=True)
prompt = ChatPromptTemplate(
    messages=[
        SystemMessage(content="You are an AI that has access to a SQLite database.\n"
            f"The database has tables of: {tables}\n"
            "Do not make any assumptions about what tables exist "
            "or what columns exist. Instead, use the 'describe_tables' function"), # It's a Static Message so no need to use System Message Prompt Template
        # We need to add in a MessagesPlaceholder for the chat_history as well. So this is going to be the thing that tries to find the stored list of messages (with variable_name=chat_history) and it's gonna kind of explode and convert into all the previous messages that were exchanged. So we'll put in another MessagesPlaceholder with a variable name of chat_history.
        # And I'm putting it right here specifically because I want to add it before any brand new human message that we add in.
        MessagesPlaceholder(variable_name="chat_history"),
        HumanMessagePromptTemplate.from_template("{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad") # Remember, the goal of MessagesPlaceholder is to kind of take in an input variable and then explode or expand into a new list of messages. Agent Scratch Pad is exactly same as MessagesPlaceholder(variable_name="messages") in ConversationSummaryMemory (LangChainBasics.py).
    ]
    )

tools = [run_query_tool, describe_tables_tool, write_report_tool]
agent = OpenAIFunctionsAgent(
    llm=chat,
    prompt=prompt,
    tools = tools
)

agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True,
    # To use the ConversationBufferMemory as this Memory object that we just created, we're going to add it into our agent executor because that is the thing that needs to remember this list of messages. 
    memory=memory
)

agent_executor(
    "How many orders are there? Write the result to an html report."
)

# This is going to use ConversationBufferMemory and repeat the process.
agent_executor(
    "Repeat the exact same process for users."
)
