In [2]:
from google.colab import userdata
openAI_API = userdata.get('API_KEY')

# basic chat implementation

In [3]:
%pip install --upgrade --quiet langchain langchain-openai langchain-chroma

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m54.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.9/302.9 kB[0m [31m28.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.2/121.2 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.6/320.6 kB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m42.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m526.8/526.8 kB[0m [31m48.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.0/92.0 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━

In [4]:
import dotenv

dotenv.load_dotenv()

False

In [5]:
from langchain_openai import ChatOpenAI

chat = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0.2,openai_api_key=openAI_API)

If we invoke our chat model, the output is an AIMessage:

In [6]:
from langchain_core.messages import HumanMessage

chat.invoke(
    [
        HumanMessage(
            content="Translate this sentence from English to Telugu: I love programming."
        )
    ]
)

AIMessage(content='నేను ప్రోగ్రమింగ్ నేర్చుకున్నాను. (Nēnu prōgramiṅg nērcukunnānu)', response_metadata={'token_usage': {'completion_tokens': 81, 'prompt_tokens': 20, 'total_tokens': 101}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-0e50e68a-5a16-4523-acce-9c8cd44e8655-0')

The model on its own does not have any concept of state. For example, if you ask a followup question:

In [7]:
chat.invoke([HumanMessage(content="What did you just say?")])

AIMessage(content='I apologize if I said something unclear. Can you please let me know what you would like me to clarify or repeat?', response_metadata={'token_usage': {'completion_tokens': 24, 'prompt_tokens': 13, 'total_tokens': 37}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-60d252a4-bb9c-488e-a6b7-e9adcdbd4b1a-0')

We can see that it doesn't take the previous conversation turn into context, and cannot answer the question.

To get around this, we need to pass the entire conversation history into the model. Let's see what happens when we do that:

In [8]:
from langchain_core.messages import AIMessage

chat.invoke(
    [
        HumanMessage(
            content="Translate this sentence from English to Telugu: I love programming."
        ),
        AIMessage(content="నాకు ప్రోగ్రమింగ్ నచ్చు."),
        HumanMessage(content="What did you just say?"),
    ]
)

AIMessage(content='I said "నాకు ప్రోగ్రమింగ్ నచ్చు" which means "I love programming" in Telugu.', response_metadata={'token_usage': {'completion_tokens': 57, 'prompt_tokens': 77, 'total_tokens': 134}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-031a83f1-faf2-499a-a0b7-03cfa1d1f321-0')

**Prompt templates**

Let's define a prompt template to make formatting a bit easier. We can create a chain by piping it into the model:

In [9]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability.",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

chain = prompt | chat

The MessagesPlaceholder above inserts chat messages passed into the chain's input as chat_history directly into the prompt. Then, we can invoke the chain like this:



**Message history**

As a shortcut for managing the chat history, we can use a MessageHistory class, which is responsible for saving and loading chat messages.

There are many built-in message history integrations that persist messages to a variety of databases, but for this quickstart we'll use a in-memory, demo message history called ChatMessageHistory.



In [10]:
from langchain.memory import ChatMessageHistory

demo_ephemeral_chat_history = ChatMessageHistory()

demo_ephemeral_chat_history.add_user_message("hi!")

demo_ephemeral_chat_history.add_ai_message("whats up?")
demo_ephemeral_chat_history.add_user_message("nothing")
demo_ephemeral_chat_history.add_ai_message("Ok Good bye")

demo_ephemeral_chat_history.messages

[HumanMessage(content='hi!'),
 AIMessage(content='whats up?'),
 HumanMessage(content='nothing'),
 AIMessage(content='Ok Good bye')]

In [11]:
demo_ephemeral_chat_history.add_user_message(
    "Translate this sentence from English to French: I love programming."
)

response = chain.invoke({"messages": demo_ephemeral_chat_history.messages})

response

AIMessage(content='The translation of "I love programming" in French is "J\'adore la programmation."', response_metadata={'token_usage': {'completion_tokens': 20, 'prompt_tokens': 65, 'total_tokens': 85}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-a49e5623-9faa-44c0-8dd4-2e876c5cc511-0')

In [12]:
demo_ephemeral_chat_history.add_ai_message(response)

demo_ephemeral_chat_history.add_user_message("What did you just say?")

chain.invoke({"messages": demo_ephemeral_chat_history.messages})

AIMessage(content='I said the translation of "I love programming" in French is "J\'adore la programmation."', response_metadata={'token_usage': {'completion_tokens': 22, 'prompt_tokens': 99, 'total_tokens': 121}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-af23e13a-59be-46ef-90ed-9a7cd8aab8d2-0')

**Retrievers**

We can set up and use a Retriever to pull domain-specific knowledge for our chatbot. To show this, let's expand the simple chatbot we created above to be able to answer questions about Document

In [13]:
%pip install --upgrade --quiet langchain-chroma beautifulsoup4

In [14]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://docs.smith.langchain.com/overview")
data = loader.load()

 we split it into smaller chunks that the LLM's context window can handle and store it in a vector database:

In [15]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=10)
all_splits = text_splitter.split_documents(data)

Then we embed and store those chunks in a vector database:

In [16]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings


vectorstore = Chroma.from_documents(
    documents=all_splits,
    embedding=OpenAIEmbeddings(openai_api_key=openAI_API)
)


#Retrieval

And finally, let's create a retriever from our initialized vectorstore:

In [17]:
# k is the number of chunks to retrieve
retriever = vectorstore.as_retriever(k=4)

docs = retriever.invoke("What is langsmith?")

docs[0].metadata["description"]

'LangSmith is a platform for building production-grade LLM applications. It allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence. Use of LangChain is not necessary - LangSmith works on its own!'

In [18]:
retriever = vectorstore.as_retriever(k=10)

docs = retriever.invoke("How langsmith is used for testing?")

docs[0].metadata["description"]

'LangSmith is a platform for building production-grade LLM applications. It allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence. Use of LangChain is not necessary - LangSmith works on its own!'

**Handling documents**

Let's modify our previous prompt to accept documents as context. We'll use a create_stuff_documents_chain helper function to "stuff" all of the input documents into the prompt, which also conveniently handles formatting. We use the ChatPromptTemplate.from_messages method to format the message input we want to pass to the model, including a MessagesPlaceholder where chat history messages will be directly injected:

In [19]:
from langchain.chains.combine_documents import create_stuff_documents_chain

chat = ChatOpenAI(model="gpt-3.5-turbo-1106",openai_api_key=openAI_API)

question_answering_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Answer the user's questions based on the below context:\n\n{context}",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

document_chain = create_stuff_documents_chain(chat, question_answering_prompt)

In [20]:
from langchain.memory import ChatMessageHistory

demo_ephemeral_chat_history = ChatMessageHistory()

demo_ephemeral_chat_history.add_user_message("how can langsmith help with testing?")

document_chain.invoke(
    {
        "messages": demo_ephemeral_chat_history.messages,
        "context": docs,
    }
)

"LangSmith is a platform that allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence. It provides the tools and features for building production-grade LLM applications. With LangSmith, you can log traces of your application's behavior and performance, and then use the evaluation tools to analyze and assess the performance of your application. This can be very helpful for testing as it allows you to closely monitor and evaluate your application's behavior, identify any issues or bottlenecks, and make improvements as needed. Additionally, LangSmith does not require the use of LangChain, so you can use it on its own for testing and evaluation purposes."

In [21]:
from langchain.memory import ChatMessageHistory

demo_ephemeral_chat_history = ChatMessageHistory()

demo_ephemeral_chat_history.add_user_message("what are the usecases of langsmith?")

document_chain.invoke(
    {
        "messages": demo_ephemeral_chat_history.messages,
        "context": docs,
    }
)

'LangSmith is a platform designed for building production-grade LLM (Language Model) applications. It is particularly useful for closely monitoring and evaluating your application, allowing you to ship quickly and with confidence. Some potential use cases for LangSmith include:\n\n1. Developing and deploying language-based applications\n2. Monitoring and evaluating the performance of language models\n3. Building and testing natural language processing (NLP) applications\n4. Ensuring the quality and accuracy of language-based AI applications\n5. Streamlining the development and deployment process for language models and applications\n\nOverall, LangSmith is geared towards empowering developers to create and manage robust and efficient language-based applications.'

**Creating a retrieval chain**

Next, let's integrate our retriever into the chain. Our retriever should retrieve information relevant to the last message we pass in from the user, so we extract it and use that as input to fetch relevant docs, which we add to the current chain as context. We pass context plus the previous messages into our document chain to generate a final answer.

We also use the RunnablePassthrough.assign() method to pass intermediate steps through at each invocation. Here's what it looks like:

In [22]:
from typing import Dict

from langchain_core.runnables import RunnablePassthrough


def parse_retriever_input(params: Dict):
    return params["messages"][-1].content


retrieval_chain = RunnablePassthrough.assign(
    context=parse_retriever_input | retriever,
).assign(
    answer=document_chain,
)

In [23]:
response = retrieval_chain.invoke(
    {
        "messages": demo_ephemeral_chat_history.messages,
    }
)

response

{'messages': [HumanMessage(content='what are the usecases of langsmith?')],
 'context': [Document(page_content='Skip to main contentLangSmith API DocsSearchGo to AppQuick startTutorialsHow-to guidesConceptsReferencePricingSelf-hostingQuick startOn this pageGet started with LangSmithLangSmith is a platform for building production-grade LLM applications. It allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence. Use of LangChain is not necessary - LangSmith works on its own!1. Install LangSmith‚ÄãPythonTypeScriptpip install -U langsmithyarn add langchain', metadata={'description': 'LangSmith is a platform for building production-grade LLM applications. It allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence. Use of LangChain is not necessary - LangSmith works on its own!', 'language': 'en', 'source': 'https://docs.smith.langchain.com/overview', 'title': 'Get started with LangSmith | \uf8

In [24]:
demo_ephemeral_chat_history.add_ai_message(response["answer"])

demo_ephemeral_chat_history.add_user_message("tell me more about that!")

retrieval_chain.invoke(
    {
        "messages": demo_ephemeral_chat_history.messages,
    },
)

{'messages': [HumanMessage(content='what are the usecases of langsmith?'),
  AIMessage(content='LangSmith is a platform designed for building production-grade LLM (Language Model) applications. Some of the use cases for LangSmith include:\n\n1. Language Model Development: LangSmith can be used to develop and monitor language models for various applications, such as chatbots, language translation, content generation, and more.\n\n2. Application Monitoring: It allows for closely monitoring and evaluating your application, enabling you to ship quickly and with confidence.\n\n3. Trace Logging and Evaluation: LangSmith provides ways to log traces and run evaluations, which is useful for tracking the performance and behavior of language models in real-world scenarios.\n\nOverall, LangSmith is suitable for developers and organizations looking to build and deploy language model applications while ensuring performance and reliability.'),
  HumanMessage(content='tell me more about that!')],
 'co

Nice! Our chatbot can now answer domain-specific questions in a conversational way.

As an aside, if you don't want to return all the intermediate steps, you can define your retrieval chain like this using a pipe directly into the document chain instead of the final .assign() call:



In [25]:
retrieval_chain_with_only_answer = (
    RunnablePassthrough.assign(
        context=parse_retriever_input | retriever,
    )
    | document_chain
)

retrieval_chain_with_only_answer.invoke(
    {
        "messages": demo_ephemeral_chat_history.messages,
    },
)

"LangSmith offers a comprehensive set of tools and features to support the development and deployment of language model applications. Here are some key aspects to consider:\n\n1. Trace Logging: LangSmith provides multiple ways to log traces, allowing you to capture and analyze the behavior and performance of your language models in real-world usage scenarios.\n\n2. Evaluation Framework: The platform includes built-in evaluators to grade the results of language model tests, enabling you to assess the accuracy and effectiveness of your models.\n\n3. API Key Management: LangSmith allows you to create and manage API keys, providing secure access to the platform's resources and services.\n\n4. Environment Setup: The platform offers guidance on setting up your development environment, including the use of environment variables and integration with other API services, such as the OpenAI API.\n\n5. Language Support: LangSmith supports multiple programming languages, including Python and JavaSc

Query transformation


In [26]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableBranch

# We need a prompt that we can pass into an LLM to generate a transformed search query

chat = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0.2,openai_api_key=openAI_API)

query_transform_prompt = ChatPromptTemplate.from_messages(
    [
        MessagesPlaceholder(variable_name="messages"),
        (
            "user",
            "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation. Only respond with the query, nothing else.",
        ),
    ]
)

query_transforming_retriever_chain = RunnableBranch(
    (
        lambda x: len(x.get("messages", [])) == 1,
        # If only one message, then we just pass that message's content to retriever
        (lambda x: x["messages"][-1].content) | retriever,
    ),
    # If messages, then we pass inputs to LLM chain to transform the query, then pass to retriever
    query_transform_prompt | chat | StrOutputParser() | retriever,
).with_config(run_name="chat_retriever_chain")

In [27]:
document_chain = create_stuff_documents_chain(chat, question_answering_prompt)

conversational_retrieval_chain = RunnablePassthrough.assign(
    context=query_transforming_retriever_chain,
).assign(
    answer=document_chain,
)

demo_ephemeral_chat_history = ChatMessageHistory()

In [28]:
demo_ephemeral_chat_history.add_user_message("how can langsmith help with testing?")

response = conversational_retrieval_chain.invoke(
    {"messages": demo_ephemeral_chat_history.messages},
)

demo_ephemeral_chat_history.add_ai_message(response["answer"])

response

{'messages': [HumanMessage(content='how can langsmith help with testing?'),
  AIMessage(content='LangSmith is a platform designed to help you closely monitor and evaluate your application, allowing you to build production-grade LLM (Language Model) applications with confidence. It provides tools and features to help you test and evaluate your application, so you can ship quickly and with confidence. LangSmith offers the ability to log traces, create API keys, and set up your environment for testing and evaluation. Additionally, it provides how-to guides and tutorials to help you learn more about evaluation and testing with LangSmith. Overall, LangSmith is a valuable tool for testing and monitoring the performance of your LLM applications.')],
 'context': [Document(page_content='Get started with LangSmith | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', metadata={'description': 'LangSmith is a platform for building production-grade LLM applications. It allows you to closely monitor and evaluate y

In [29]:
demo_ephemeral_chat_history.add_user_message("tell me more about that!")

conversational_retrieval_chain.invoke(
    {"messages": demo_ephemeral_chat_history.messages}
)

{'messages': [HumanMessage(content='how can langsmith help with testing?'),
  AIMessage(content='LangSmith is a platform designed to help you closely monitor and evaluate your application, allowing you to build production-grade LLM (Language Model) applications with confidence. It provides tools and features to help you test and evaluate your application, so you can ship quickly and with confidence. LangSmith offers the ability to log traces, create API keys, and set up your environment for testing and evaluation. Additionally, it provides how-to guides and tutorials to help you learn more about evaluation and testing with LangSmith. Overall, LangSmith is a valuable tool for testing and monitoring the performance of your LLM applications.'),
  HumanMessage(content='tell me more about that!')],
 'context': [Document(page_content='Skip to main contentLangSmith API DocsSearchGo to AppQuick startTutorialsHow-to guidesConceptsReferencePricingSelf-hostingQuick startOn this pageGet started wi

# Memory management

##Memory to Store




###Neo4j

In [30]:
%pip install neo4j

Collecting neo4j
  Downloading neo4j-5.20.0.tar.gz (202 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m203.0/203.0 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: neo4j
  Building wheel for neo4j (pyproject.toml) ... [?25l[?25hdone
  Created wheel for neo4j: filename=neo4j-5.20.0-py3-none-any.whl size=280771 sha256=79beb827ac7be4bab86dabcbfc0f4d8cd8c71573b0f5fb09be3931a41b1d7180
  Stored in directory: /root/.cache/pip/wheels/cb/12/66/764554d079caad4b9a11a02cfc0d200dd876d12935b9cf7e64
Successfully built neo4j
Installing collected packages: neo4j
Successfully installed neo4j-5.20.0


In [31]:
from langchain_community.chat_message_histories import Neo4jChatMessageHistory

history = Neo4jChatMessageHistory(
    url="bolt://localhost:7687",
    username="neo4j",
    password="password",
    session_id="session_id_1",
)

history.add_user_message("hi!")

history.add_ai_message("whats up?")

ValueError: Could not connect to Neo4j database. Please ensure that the url is correct

In [None]:
history.messages

### Stremlit

In [None]:
%pip install streamlit

In [None]:
from langchain_community.chat_message_histories import (
    StreamlitChatMessageHistory,
)

history = StreamlitChatMessageHistory(key="chat_messages")

history.add_user_message("hi!")
history.add_ai_message("whats up?")

##We can use it directly to store conversation turns for our chain:



In [32]:
demo_ephemeral_chat_history = ChatMessageHistory()

input1 = "Translate this sentence from English to French: I love programming."

demo_ephemeral_chat_history.add_user_message(input1)

response = chain.invoke(
    {
        "messages": demo_ephemeral_chat_history.messages,
    }
)

demo_ephemeral_chat_history.add_ai_message(response)

input2 = "What did I just ask you?"

demo_ephemeral_chat_history.add_user_message(input2)

chain.invoke(
    {
        "messages": demo_ephemeral_chat_history.messages,
    }
)

AIMessage(content='You just asked me to translate the sentence "I love programming" from English to French.', response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 74, 'total_tokens': 92}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-3c05bfff-69b0-409d-a40b-1292a428ca77-0')

##**Automatic history management**

The previous examples pass messages to the chain explicitly. This is a completely acceptable approach, but it does require external management of new messages. LangChain also includes an wrapper for LCEL chains that can handle this process automatically called ***RunnableWithMessageHistory.***

To show how it works, let's slightly modify the above prompt to take a final input variable that populates a HumanMessage template after the chat history. This means that we will expect a chat_history parameter that contains all messages BEFORE the current messages instead of all messages:



In [33]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability.",
        ),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}"),
    ]
)

chain = prompt | chat

We'll pass the latest input to the conversation here and let the RunnableWithMessageHistory class wrap our chain and do the work of appending that input variable to the chat history.

In [34]:
from langchain_core.runnables.history import RunnableWithMessageHistory

demo_ephemeral_chat_history_for_chain = ChatMessageHistory()

chain_with_message_history = RunnableWithMessageHistory(
    chain,
    lambda session_id: demo_ephemeral_chat_history_for_chain,
    input_messages_key="input",
    history_messages_key="chat_history",
)

This class takes a few parameters in addition to the chain that we want to wrap:

A factory function that returns a message history for a given session id. This allows your chain to handle multiple users at once by loading different messages for different conversations.
An input_messages_key that specifies which part of the input should be tracked and stored in the chat history. In this example, we want to track the string passed in as input.

A history_messages_key that specifies what the previous messages should be injected into the prompt as. Our prompt has a MessagesPlaceholder named chat_history, so we specify this property to match.

(For chains with multiple outputs) an output_messages_key which specifies which output to store as history. This is the inverse of input_messages_key.

We can invoke this new chain as normal, with an additional configurable field that specifies the particular session_id to pass to the factory function. This is unused for the demo, but in real-world chains, you'll want to return a chat history corresponding to the passed session:

**Modifying chat history**

Modifying stored chat messages can help your chatbot handle a variety of situations. Here are some examples:



##**Trimming messages**

LLMs and chat models have limited context windows, and even if you're not directly hitting limits, you may want to limit the amount of distraction the model has to deal with. One solution is to only load and store the most recent n messages. Let's use an example history with some preloaded messages:

In [35]:
demo_ephemeral_chat_history = ChatMessageHistory()

demo_ephemeral_chat_history.add_user_message("Hey there! I'm Nemo.")
demo_ephemeral_chat_history.add_ai_message("Hello!")
demo_ephemeral_chat_history.add_user_message("How are you today?")
demo_ephemeral_chat_history.add_ai_message("Fine thanks!")

demo_ephemeral_chat_history.messages

[HumanMessage(content="Hey there! I'm Nemo."),
 AIMessage(content='Hello!'),
 HumanMessage(content='How are you today?'),
 AIMessage(content='Fine thanks!')]

In [36]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability.",
        ),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}"),
    ]
)

chain = prompt | chat

chain_with_message_history = RunnableWithMessageHistory(
    chain,
    lambda session_id: demo_ephemeral_chat_history,
    input_messages_key="input",
    history_messages_key="chat_history",
)

chain_with_message_history.invoke(
    {"input": "What's my name?"},
    {"configurable": {"session_id": "unused"}},
)



AIMessage(content='Your name is Nemo.', response_metadata={'token_usage': {'completion_tokens': 6, 'prompt_tokens': 66, 'total_tokens': 72}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5313c842-178f-4a18-a0c5-0bb069742a6f-0')

We can see the chain remembers the preloaded name.

But let's say we have a very small context window, and we want to trim the number of messages passed to the chain to only the 2 most recent ones. We can use the clear method to remove messages and re-add them to the history. We don't have to, but let's put this method at the front of our chain to ensure it's always called:



In [37]:
def trim_messages(chain_input):
    stored_messages = demo_ephemeral_chat_history.messages
    if len(stored_messages) <= 2:
        return False

    demo_ephemeral_chat_history.clear()

    for message in stored_messages[-2:]:
        demo_ephemeral_chat_history.add_message(message)

    return True


chain_with_trimming = (
    RunnablePassthrough.assign(messages_trimmed=trim_messages)
    | chain_with_message_history
)

Let's call this new chain and check the messages afterwards:

In [38]:
chain_with_trimming.invoke(
    {"input": "Where does P. Sherman live?"},
    {"configurable": {"session_id": "unused"}},
)



AIMessage(content='P. Sherman is a fictional character from the movie "Finding Nemo," and he lives at 42 Wallaby Way, Sydney.', response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 53, 'total_tokens': 80}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-6f59435c-866d-4bef-b685-1789661432ed-0')

In [39]:
demo_ephemeral_chat_history.messages

[HumanMessage(content="What's my name?"),
 AIMessage(content='Your name is Nemo.', response_metadata={'token_usage': {'completion_tokens': 6, 'prompt_tokens': 66, 'total_tokens': 72}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5313c842-178f-4a18-a0c5-0bb069742a6f-0'),
 HumanMessage(content='Where does P. Sherman live?'),
 AIMessage(content='P. Sherman is a fictional character from the movie "Finding Nemo," and he lives at 42 Wallaby Way, Sydney.', response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 53, 'total_tokens': 80}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-6f59435c-866d-4bef-b685-1789661432ed-0')]

And we can see that our history has removed the two oldest messages while still adding the most recent conversation at the end. The next time the chain is called, trim_messages will be called again, and only the two most recent messages will be passed to the model. In this case, this means that the model will forget the name we gave it the next time we invoke it:



In [40]:
chain_with_trimming.invoke(
    {"input": "What is my name?"},
    {"configurable": {"session_id": "unused"}},
)



AIMessage(content="I'm sorry, I don't have access to your personal information. Therefore, I don't know your name.", response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 74, 'total_tokens': 97}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-ca74b513-b13b-4478-ac34-07686a8a9069-0')

In [41]:
demo_ephemeral_chat_history.messages

[HumanMessage(content='Where does P. Sherman live?'),
 AIMessage(content='P. Sherman is a fictional character from the movie "Finding Nemo," and he lives at 42 Wallaby Way, Sydney.', response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 53, 'total_tokens': 80}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-6f59435c-866d-4bef-b685-1789661432ed-0'),
 HumanMessage(content='What is my name?'),
 AIMessage(content="I'm sorry, I don't have access to your personal information. Therefore, I don't know your name.", response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 74, 'total_tokens': 97}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-ca74b513-b13b-4478-ac34-07686a8a9069-0')]

##**Summary memory**

We can use this same pattern in other ways too. For example, we could use an additional LLM call to generate a summary of the conversation before calling our chain. Let's recreate our chat history and chatbot chain:

In [42]:
def summarize_messages(chain_input):
    stored_messages = demo_ephemeral_chat_history.messages
    if len(stored_messages) == 0:
        return False
    summarization_prompt = ChatPromptTemplate.from_messages(
        [
            MessagesPlaceholder(variable_name="chat_history"),
            (
                "user",
                "Distill the above chat messages into a single summary message. Include as many specific details as you can.",
            ),
        ]
    )
    summarization_chain = summarization_prompt | chat

    summary_message = summarization_chain.invoke({"chat_history": stored_messages})

    demo_ephemeral_chat_history.clear()

    demo_ephemeral_chat_history.add_message(summary_message)

    return True


chain_with_summarization = (
    RunnablePassthrough.assign(messages_summarized=summarize_messages)
    | chain_with_message_history
)

In [43]:
chain_with_summarization.invoke(
    {"input": "What did I say my name was?"},
    {"configurable": {"session_id": "unused"}},
)



AIMessage(content="I'm sorry, but I don't have access to personal information, so I don't know your name. If you mentioned your name earlier in the conversation, I don't have the ability to remember it. How can I assist you today?", response_metadata={'token_usage': {'completion_tokens': 49, 'prompt_tokens': 96, 'total_tokens': 145}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-bd4d8d15-e87b-418e-91be-81452441e03f-0')

In [44]:
demo_ephemeral_chat_history.messages

[AIMessage(content='The chat discussed the whereabouts of P. Sherman, a fictional character from the movie "Finding Nemo," who lives at 42 Wallaby Way, Sydney. Additionally, it mentioned that the assistant does not have access to personal information, so it does not know the user\'s name.', response_metadata={'token_usage': {'completion_tokens': 57, 'prompt_tokens': 106, 'total_tokens': 163}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-63822236-c584-403c-9faf-773ec123927b-0'),
 HumanMessage(content='What did I say my name was?'),
 AIMessage(content="I'm sorry, but I don't have access to personal information, so I don't know your name. If you mentioned your name earlier in the conversation, I don't have the ability to remember it. How can I assist you today?", response_metadata={'token_usage': {'completion_tokens': 49, 'prompt_tokens': 96, 'total_tokens': 145}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint

#Tool Calling

We use the term "tool calling" interchangeably with "function calling". Although function calling is sometimes meant to refer to invocations of a single function, we treat all models as though they can return multiple tool or function calls in each message.

Tool calling allows a model to respond to a given prompt by generating output that matches a user-defined schema. While the name implies that the model is performing some action, this is actually not the case! The model is coming up with the arguments to a tool, and actually running the tool (or not) is up to the user - for example, if you want to extract output matching some schema from unstructured text, you could give the model an "extraction" tool that takes parameters matching the desired schema, then treat the generated output as your final result.

A tool call includes a name, arguments dict, and an optional identifier. The arguments dict is structured

***{argument_name: argument_value}.***

Many LLM providers, including Anthropic, Cohere, Google, Mistral, OpenAI, and others, support variants of a tool calling feature.

These features typically allow requests to the LLM to include available tools and their schemas, and for responses to include calls to these tools.

*For instance, given a search engine tool, an LLM might handle a query by first issuing a call to the search engine.*

The system calling the LLM can receive the tool call, execute it, and return the output to the LLM to inform its response. LangChain includes a suite of built-in tools and supports several methods for defining your own custom tools. Tool-calling is extremely useful for building tool-using chains and agents, and for getting structured outputs from models more generally.

Providers adopt different conventions for formatting tool schemas and tool calls.

##Anthropic

For instance, Anthropic returns tool calls as parsed structures within a larger content block:

In [45]:
[
  {
    "text": "<thinking>\nI should use a tool.\n</thinking>",
    "type": "text"
  },
  {
    "id": "id_value",
    "input": {"arg_name": "arg_value"},
    "name": "tool_name",
    "type": "tool_use"
  }
]

[{'text': '<thinking>\nI should use a tool.\n</thinking>', 'type': 'text'},
 {'id': 'id_value',
  'input': {'arg_name': 'arg_value'},
  'name': 'tool_name',
  'type': 'tool_use'}]

##OpenAI

whereas OpenAI separates tool calls into a distinct parameter, with arguments as JSON strings:

In [46]:
{
  "tool_calls": [
    {
      "id": "id_value",
      "function": {
        "arguments": '{"arg_name": "arg_value"}',
        "name": "tool_name"
      },
      "type": "function"
    }
  ]
}

{'tool_calls': [{'id': 'id_value',
   'function': {'arguments': '{"arg_name": "arg_value"}', 'name': 'tool_name'},
   'type': 'function'}]}

##Request: Passing tools to model

For a model to be able to invoke tools, you need to pass tool schemas to it when making a chat request. LangChain ChatModels supporting tool calling features implement a .bind_tools method, which receives a list of LangChain tool objects, Pydantic classes, or JSON Schemas and binds them to the chat model in the provider-specific expected format. Subsequent invocations of the bound chat model will include tool schemas in every call to the model API.

###**Defining tool schemas: LangChain Tool**

For example, we can define the schema for custom tools using the **@tool** decorator on Python functions:

In [47]:
from langchain_core.tools import tool


@tool
def add(a: int, b: int) -> int:
    """Adds a and b.

    Args:
        a: first int
        b: second int
    """
    return a + b


@tool
def multiply(a: int, b: int) -> int:
    """Multiplies a and b.

    Args:
        a: first int
        b: second int
    """
    return a * b


tools = [add, multiply]

###Defining tool schemas: Pydantic class
We can equivalently define the schema using Pydantic. Pydantic is useful when your tool inputs are more complex:

In [48]:
from langchain_core.pydantic_v1 import BaseModel, Field


# Note that the docstrings here are crucial, as they will be passed along
# to the model along with the class name.
class add(BaseModel):
    """Add two integers together."""

    a: int = Field(..., description="First integer")
    b: int = Field(..., description="Second integer")


class multiply(BaseModel):
    """Multiply two integers together."""

    a: int = Field(..., description="First integer")
    b: int = Field(..., description="Second integer")


tools = [add, multiply]

##**Request: Forcing a tool call**



When you just use bind_tools(tools), the model can choose whether to return one tool call, multiple tool calls, or no tool calls at all. Some models support a tool_choice parameter that gives you some ability to force the model to call a tool. For models that support this, you can pass in the name of the tool you want the model to always call tool_choice="xyz_tool_name". Or you can pass in tool_choice="any" to force the model to call at least one tool, without specifying which tool specifically.


In [49]:
# always_multiply_llm = llm.bind_tools([multiply], tool_choice="multiply")

In [50]:
# always_multiply_llm = llm.bind_tools([multiply], tool_choice="any")

##Response: Reading tool calls from model output



If tool calls are included in a LLM response, they are attached to the corresponding AIMessage or AIMessageChunk (when streaming) as a list of ToolCall objects in the .tool_calls attribute. A ToolCall is a typed dict that includes a tool name, dict of argument values, and (optionally) an identifier. Messages with no tool calls default to an empty list for this attribute.


In [51]:
# query = "What is 3 * 12? Also, what is 11 + 49?"

# llm_with_tools.invoke(query).tool_calls

The .tool_calls attribute should contain valid tool calls. Note that on occasion, model providers may output malformed tool calls (e.g., arguments that are not valid JSON). When parsing fails in these cases, instances of InvalidToolCall are populated in the .invalid_tool_calls attribute. An InvalidToolCall can have a name, string arguments, identifier, and error message.

If desired, output parsers can further process the output. For example, we can convert back to the original Pydantic class:



In [52]:
# from langchain_core.output_parsers.openai_tools import PydanticToolsParser

# chain = llm_with_tools | PydanticToolsParser(tools=[multiply, add])
# chain.invoke(query)

##Request: Passing tool outputs to model

If we're using the model-generated tool invocations to actually call tools and want to pass the tool results back to the model, we can do so using ToolMessages.

In [53]:
from langchain_core.messages import HumanMessage, ToolMessage
from langchain_openai import ChatOpenAI

In [54]:
llm = ChatOpenAI(model="gpt-3.5-turbo-0125",openai_api_key=openAI_API)


In [55]:
llm_with_tools = llm.bind_tools(tools)

In [56]:
always_multiply_llm = llm.bind_tools([multiply], tool_choice="multiply")

In [57]:
always_call_tool_llm = llm.bind_tools([add, multiply], tool_choice="any")

In [58]:
query = "What is 3 * 12? Also, what is 11 + 49?"

llm_with_tools.invoke(query).tool_calls

[{'name': 'multiply',
  'args': {'a': 3, 'b': 12},
  'id': 'call_l1huJaw6zqYgT0RAtaVNJ8u2'},
 {'name': 'add',
  'args': {'a': 11, 'b': 49},
  'id': 'call_wGEnw99Rs7EjAGAK09FNr9tz'}]

In [59]:
from langchain_core.output_parsers.openai_tools import PydanticToolsParser

chain = llm_with_tools | PydanticToolsParser(tools=[multiply, add])
chain.invoke(query)

[multiply(a=3, b=12), add(a=11, b=49)]

In [60]:
async for chunk in llm_with_tools.astream(query):
    print(chunk.tool_call_chunks)

[]
[{'name': 'multiply', 'args': '', 'id': 'call_SQVmQVXtZFeYiRWdzCGQ9poI', 'index': 0}]
[{'name': None, 'args': '{"a"', 'id': None, 'index': 0}]
[{'name': None, 'args': ': 3, ', 'id': None, 'index': 0}]
[{'name': None, 'args': '"b": 1', 'id': None, 'index': 0}]
[{'name': None, 'args': '2}', 'id': None, 'index': 0}]
[{'name': 'add', 'args': '', 'id': 'call_EEV29EkCcRlLgBY2Cq64Rq6a', 'index': 1}]
[{'name': None, 'args': '{"a"', 'id': None, 'index': 1}]
[{'name': None, 'args': ': 11,', 'id': None, 'index': 1}]
[{'name': None, 'args': ' "b": ', 'id': None, 'index': 1}]
[{'name': None, 'args': '49}', 'id': None, 'index': 1}]
[]


In [61]:
first = True
async for chunk in llm_with_tools.astream(query):
    if first:
        gathered = chunk
        first = False
    else:
        gathered = gathered + chunk

    print(gathered.tool_call_chunks)

[]
[{'name': 'multiply', 'args': '', 'id': 'call_4nEwpUZffPzfANfPHH432QMt', 'index': 0}]
[{'name': 'multiply', 'args': '{"a"', 'id': 'call_4nEwpUZffPzfANfPHH432QMt', 'index': 0}]
[{'name': 'multiply', 'args': '{"a": 3, ', 'id': 'call_4nEwpUZffPzfANfPHH432QMt', 'index': 0}]
[{'name': 'multiply', 'args': '{"a": 3, "b": 1', 'id': 'call_4nEwpUZffPzfANfPHH432QMt', 'index': 0}]
[{'name': 'multiply', 'args': '{"a": 3, "b": 12}', 'id': 'call_4nEwpUZffPzfANfPHH432QMt', 'index': 0}]
[{'name': 'multiply', 'args': '{"a": 3, "b": 12}', 'id': 'call_4nEwpUZffPzfANfPHH432QMt', 'index': 0}, {'name': 'add', 'args': '', 'id': 'call_vgTp6hBSuSMfOVcufbNho7xa', 'index': 1}]
[{'name': 'multiply', 'args': '{"a": 3, "b": 12}', 'id': 'call_4nEwpUZffPzfANfPHH432QMt', 'index': 0}, {'name': 'add', 'args': '{"a"', 'id': 'call_vgTp6hBSuSMfOVcufbNho7xa', 'index': 1}]
[{'name': 'multiply', 'args': '{"a": 3, "b": 12}', 'id': 'call_4nEwpUZffPzfANfPHH432QMt', 'index': 0}, {'name': 'add', 'args': '{"a": 11,', 'id': 'call_

In [62]:
first = True
async for chunk in llm_with_tools.astream(query):
    if first:
        gathered = chunk
        first = False
    else:
        gathered = gathered + chunk

    print(gathered.tool_calls)

[]
[]
[{'name': 'multiply', 'args': {}, 'id': 'call_sh5v83H8tJgq7NJDMuVKhYZi'}]
[{'name': 'multiply', 'args': {'a': 3}, 'id': 'call_sh5v83H8tJgq7NJDMuVKhYZi'}]
[{'name': 'multiply', 'args': {'a': 3, 'b': 1}, 'id': 'call_sh5v83H8tJgq7NJDMuVKhYZi'}]
[{'name': 'multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_sh5v83H8tJgq7NJDMuVKhYZi'}]
[{'name': 'multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_sh5v83H8tJgq7NJDMuVKhYZi'}]
[{'name': 'multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_sh5v83H8tJgq7NJDMuVKhYZi'}, {'name': 'add', 'args': {}, 'id': 'call_7hKZckWyFyaTb40sAfoK5o39'}]
[{'name': 'multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_sh5v83H8tJgq7NJDMuVKhYZi'}, {'name': 'add', 'args': {'a': 11}, 'id': 'call_7hKZckWyFyaTb40sAfoK5o39'}]
[{'name': 'multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_sh5v83H8tJgq7NJDMuVKhYZi'}, {'name': 'add', 'args': {'a': 11}, 'id': 'call_7hKZckWyFyaTb40sAfoK5o39'}]
[{'name': 'multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_sh5v83H8tJgq7NJDMuVKhYZi'}, 

In [63]:
@tool
def add(a: int, b: int) -> int:
    """Adds a and b.

    Args:
        a: first int
        b: second int
    """
    return a + b


@tool
def multiply(a: int, b: int) -> int:
    """Multiplies a and b.

    Args:
        a: first int
        b: second int
    """
    return a * b


tools = [add, multiply]
llm_with_tools = llm.bind_tools(tools)

messages = [HumanMessage(query)]
ai_msg = llm_with_tools.invoke(messages)
messages.append(ai_msg)

for tool_call in ai_msg.tool_calls:
    selected_tool = {"add": add, "multiply": multiply}[tool_call["name"].lower()]
    tool_output = selected_tool.invoke(tool_call["args"])
    messages.append(ToolMessage(tool_output, tool_call_id=tool_call["id"]))

messages

[HumanMessage(content='What is 3 * 12? Also, what is 11 + 49?'),
 AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_YVUlxH4glylWzHDwn7A1tJc7', 'function': {'arguments': '{"a": 3, "b": 12}', 'name': 'multiply'}, 'type': 'function'}, {'id': 'call_k5Gmggz6U7bbiekVwOUZdXNx', 'function': {'arguments': '{"a": 11, "b": 49}', 'name': 'add'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 49, 'prompt_tokens': 144, 'total_tokens': 193}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-44c8fb08-bde8-46e9-84e3-6f1a717bbbf4-0', tool_calls=[{'name': 'multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_YVUlxH4glylWzHDwn7A1tJc7'}, {'name': 'add', 'args': {'a': 11, 'b': 49}, 'id': 'call_k5Gmggz6U7bbiekVwOUZdXNx'}]),
 ToolMessage(content='36', tool_call_id='call_YVUlxH4glylWzHDwn7A1tJc7'),
 ToolMessage(content='60', tool_call_id='call_k5Gmggz6U7bbiekVwOUZdXNx')]

In [64]:
llm_with_tools.invoke(messages)

AIMessage(content='3 * 12 = 36\n11 + 49 = 60', response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 209, 'total_tokens': 225}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-8eeff2f4-9ed4-467f-87db-a784f1436adf-0')

###few short prompting

In [65]:
llm_with_tools.invoke(
    "Whats 119 times 8 minus 20. Don't do any math yourself, only use tools for math. Respect order of operations"
).tool_calls

[{'name': 'multiply',
  'args': {'a': 119, 'b': 8},
  'id': 'call_TxXzf2fIc70fbHYwUE0lhpLC'},
 {'name': 'subtract',
  'args': {'a': 952, 'b': 20},
  'id': 'call_L2TTjjqFER0WN5gGzB2xsibI'}]

The model shouldn't be trying to add anything yet, since it technically can't know the results of 119 * 8 yet.

By adding a prompt with some examples we can correct this behavior:



In [66]:
from langchain_core.messages import AIMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

examples = [
    HumanMessage(
        "What's the product of 317253 and 128472 plus four", name="example_user"
    ),
    AIMessage(
        "",
        name="example_assistant",
        tool_calls=[
            {"name": "multiply", "args": {"x": 317253, "y": 128472}, "id": "1"}
        ],
    ),
    ToolMessage("16505054784", tool_call_id="1"),
    AIMessage(
        "",
        name="example_assistant",
        tool_calls=[{"name": "add", "args": {"x": 16505054784, "y": 4}, "id": "2"}],
    ),
    ToolMessage("16505054788", tool_call_id="2"),
    AIMessage(
        "The product of 317253 and 128472 plus four is 16505054788",
        name="example_assistant",
    ),
]

system = """You are bad at math but are an expert at using a calculator.

Use past tool usage as an example of how to correctly use the tools."""
few_shot_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        *examples,
        ("human", "{query}"),
    ]
)

chain = {"query": RunnablePassthrough()} | few_shot_prompt | llm_with_tools
chain.invoke("Whats 119 times 8 minus 20").tool_calls

[{'name': 'multiply',
  'args': {'a': 119, 'b': 8},
  'id': 'call_sCkKn9bvue9HMtH9fpKv7Qo3'}]