# Lang Chain Experimentation Tutorials HandsOn

# [Cookbook for reference](https://www.pinecone.io/learn/series/langchain/)
# [Documentation](https://python.langchain.com/docs/get_started/introduction)

---
Chat with History - ChatGPT API

In [29]:
if 0:
    #!/usr/bin/env python
    """Example LangChain server exposes and agent that has conversation history.
    
    In this example, the history is stored entirely on the client's side.
    
    Please see other examples in LangServe on how to use RunnableWithHistory to
    store history on the server side.
    
    Relevant LangChain documentation:
    
    * Creating a custom agent: https://python.langchain.com/docs/modules/agents/how_to/custom_agent
    * Streaming with agents: https://python.langchain.com/docs/modules/agents/how_to/streaming#custom-streaming-with-events
    * General streaming documentation: https://python.langchain.com/docs/expression_language/streaming
    * Message History: https://python.langchain.com/docs/expression_language/how_to/message_history
    
    **ATTENTION**
    1. To support streaming individual tokens you will need to use the astream events
       endpoint rather than the streaming endpoint.
    2. This example does not truncate message history, so it will crash if you
       send too many messages (exceed token length).
    3. The playground at the moment does not render agent output well! If you want to
       use the playground you need to customize it's output server side using astream
       events by wrapping it within another runnable.
    4. See the client notebook it has an example of how to use stream_events client side!
    """  # noqa: E501
    
    
    from typing import Any, List, Union
    
    from fastapi import FastAPI
    from langchain.agents import AgentExecutor, tool
    from langchain.agents.format_scratchpad.openai_tools import (
        format_to_openai_tool_messages,
    )
    from langchain.agents import Tool, AgentType, initialize_agent
    from langchain_community.llms import LlamaCpp
    from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
    from langchain.prompts import MessagesPlaceholder
    from langchain_community.tools.convert_to_openai import format_tool_to_openai_tool
    from langchain_core.messages import AIMessage, FunctionMessage, HumanMessage
    from langchain_core.prompts import ChatPromptTemplate
    from langchain.utilities import DuckDuckGoSearchAPIWrapper
    from langchain_openai import ChatOpenAI
    
    from langserve import add_routes
    from langserve.pydantic_v1 import BaseModel, Field
    
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                "You are a very powerful assistant "
                "Talk with the user as normal. "
            ),
            # Please note the ordering of the fields in the prompt!
            # The correct ordering is:
            # 1. history - the past messages between the user and the agent
            # 2. user - the user's current input
            # 3. agent_scratchpad - the agent's working space for thinking and
            #    invoking tools to respond to the user's input.
            # If you change the ordering, the agent will not work correctly since
            # the messages will be shown to the underlying LLM in the wrong order.
            
            MessagesPlaceholder(variable_name="chat_history"),
            ("user", "{input}"),
            MessagesPlaceholder(variable_name="agent_scratchpad"),
        ]
    )
    
    
    # DuckDuckGoSearchAPIWrapper does not require an API key
    # search = DuckDuckGoSearchAPIWrapper()
    # search_tool = Tool(name="Current Search",
    #                    func=search.run,
    #                    description="Useful when you need to answer questions about nouns, current events or the current state of the world."
    #                    )
    # tools = [search_tool]
    
    
    tools = [word_length]
    llm_with_tools = llm.bind(tools=[format_tool_to_openai_tool(tool) for tool in tools])
    
    
    
    # We need to set streaming=True on the LLM to support streaming individual tokens.
    # Tokens will be available when using the stream_log / stream events endpoints,
    # but not when using the stream endpoint since the stream implementation for agent
    # streams action observation pairs not individual tokens.
    # See the client notebook that shows how to use the stream events endpoint.
    
    # Initialize LlamaCpp with your model's parameters
    # llm = LlamaCpp(
    #     model_path="../models/mixtral-8x7b-instruct-v0.1.Q5_K_M.gguf",
    #     n_gpu_layers=1,
    #     n_batch=512,
    #     f16_kv=True,
    #     verbose=True,
    #     streaming=True
    # )
    from openai import OpenAI
    
    llm = ChatOpenAI(
        model="gpt-3.5-turbo", 
        temperature=0, 
        streaming=True, 
        api_key="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"
    )
    
    # ATTENTION: For production use case, it's a good idea to trim the prompt to avoid
    #            exceeding the context window length used by the model.
    #
    # To fix that simply adjust the chain to trim the prompt in whatever way
    # is appropriate for your use case.
    # For example, you may want to keep the system message and the last 10 messages.
    # Or you may want to trim based on the number of tokens.
    # Or you may want to also summarize the messages to keep information about things
    # that were learned about the user.
    #
    # def prompt_trimmer(messages: List[Union[HumanMessage, AIMessage, FunctionMessage]]):
    #     '''Trims the prompt to a reasonable length.'''
    #     # Keep in mind that when trimming you may want to keep the system message!
    #     return messages[-10:] # Keep last 10 messages.
    
    agent = (
        {
            "input": lambda x: x["input"],
            "agent_scratchpad": lambda x: format_to_openai_tool_messages(
                x["intermediate_steps"]
            ),
            "chat_history": lambda x: x["chat_history"],
        }
        | prompt
        # | prompt_trimmer # See comment above.
        | llm_with_tools
        | OpenAIToolsAgentOutputParser()
    )

    agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
    
    app = FastAPI(
        title="LangChain Server",
        version="1.0",
        description="Spin up a simple api server using LangChain's Runnable interfaces",
    )
    
    
    # We need to add these input/output schemas because the current AgentExecutor
    # is lacking in schemas.
    class Input(BaseModel):
        input: str
        # The field extra defines a chat widget.
        # Please see documentation about widgets in the main README.
        # The widget is used in the playground.
        # Keep in mind that playground support for agents is not great at the moment.
        # To get a better experience, you'll need to customize the streaming output
        # for now.
        chat_history: List[Union[HumanMessage, AIMessage, FunctionMessage]] = Field(
            ...,
            extra={"widget": {"type": "chat", "input": "input", "output": "output"}},
        )
    
    
    class Output(BaseModel):
        output: Any
    
    
    # Adds routes to the app for using the chain under:
    # /invoke
    # /batch
    # /stream
    # /stream_events
    add_routes(
        app,
        agent_executor.with_types(input_type=Input, output_type=Output).with_config(
            {"run_name": "agent"}
        ),
    )
    
    if __name__ == "__main__":
        import uvicorn
        uvicorn.run(app, host="localhost", port=8000)

---

# Langserve + History 

In [37]:
import os
from langchain.agents import Tool, AgentType, initialize_agent
from langchain.memory import ConversationBufferMemory
from langchain.utilities import DuckDuckGoSearchAPIWrapper
from langchain.agents import AgentExecutor
from fastapi import FastAPI
from langchain_core.messages import AIMessage, FunctionMessage, HumanMessage
from typing import Any, List, Union
from langserve import add_routes

# Adding a search tool
search = DuckDuckGoSearchAPIWrapper()
search_tool = Tool(name="Current Search",
                   func=search.run,
                   description="Useful when you need to answer questions about nouns, current events or the current state of the world."
                   )

tools = [search_tool]

# Memory Buffer Added
memory = ConversationBufferMemory(memory_key="chat_history")


# Llamacpp Model added
llm = LlamaCpp(
    model_path="../models/mixtral-8x7b-instruct-v0.1.Q5_K_M.gguf",
    n_gpu_layers=1,
    n_batch=512,
    n_ctx=2048,
    f16_kv=True,
    verbose=True,
    streaming=True
)

# Agent Executor Initialized
agent_executor = AgentExecutor(agent=agent_chain, tools=tools, verbose=True)

# FastAPI app executor
app = FastAPI(
    title="LangChain Server",
    version="1.0",
    description="Spin up a simple api server using LangChain's Runnable interfaces",
)


class Input(BaseModel):
    input: str
    # The field extra defines a chat widget.
    # Please see documentation about widgets in the main README.
    # The widget is used in the playground.
    # Keep in mind that playground support for agents is not great at the moment.
    # To get a better experience, you'll need to customize the streaming output
    # for now.
    chat_history: List[Union[HumanMessage, AIMessage, FunctionMessage]] = Field(
        ...,
        extra={"widget": {"type": "chat", "input": "input", "output": "output"}},
    )


# Add application route
add_routes(
    app,
    agent_executor.with_types(input_type=Input, output_type=str).with_config(
        {"run_name": "agent"}
    ),
)

#agent_chain.run(input="What it do, nephew!")

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="localhost", port=8000)

In [15]:
import uuid
from langserve import RemoteRunnable
from langchain_core.messages import AIMessage, FunctionMessage, HumanMessage

chat = RemoteRunnable("http://localhost:8000/")


---

# Conversational Agent - Simple Chain

In [5]:
import requests
from pydantic import BaseModel, Field
import datetime
import wikipedia
from langchain.utilities import DuckDuckGoSearchAPIWrapper
from langchain.agents import Tool
from langchain.tools import tool

@tool
def search_wikipedia(query: str) -> str:
    """Run Wikipedia search and get page summaries."""
    page_titles = wikipedia.search(query)
    summaries = []
    for page_title in page_titles[: 3]:
        try:
            wiki_page =  wikipedia.page(title=page_title, auto_suggest=False)
            summaries.append(f"Page: {page_title}\nSummary: {wiki_page.summary}")
        except (
            self.wiki_client.exceptions.PageError,
            self.wiki_client.exceptions.DisambiguationError,
        ):
            pass
    if not summaries:
        return "No good Wikipedia Search Result was found"
    return "\n\n".join(summaries)

# Adding a search tool
search = DuckDuckGoSearchAPIWrapper()
search_tool = Tool(name="Current Search",
                   func=search.run,
                   description="Useful when you need to answer questions about nouns, current events or the current state of the world."
                   )

tools = [search_tool, search_wikipedia]

In [20]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

from langchain.tools.render import format_tool_to_openai_function
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
from langchain_community.llms import LlamaCpp

In [None]:
functions = [format_tool_to_openai_function(f) for f in tools]
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are helpful but sassy assistant"),
    ("user", "{input}"),
])

# Llamacpp Model added
model = LlamaCpp(
    model_path="../models/mixtral-8x7b-instruct-v0.1.Q5_K_M.gguf",
    n_gpu_layers=1,
    n_batch=512,
    n_ctx=2048,
    f16_kv=True,
    verbose=True,
    streaming=True
)
# Chaining 
chain = prompt | model

llama_model_loader: loaded meta data with 26 key-value pairs and 995 tensors from ../models/mixtral-8x7b-instruct-v0.1.Q5_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mixtral-8x7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:    

In [19]:
result = chain.invoke({"input": "what is the weather is sf?"})


llama_print_timings:        load time =    1362.06 ms
llama_print_timings:      sample time =      28.00 ms /   158 runs   (    0.18 ms per token,  5642.66 tokens per second)
llama_print_timings: prompt eval time =    1362.01 ms /    21 tokens (   64.86 ms per token,    15.42 tokens per second)
llama_print_timings:        eval time =   19156.39 ms /   157 runs   (  122.02 ms per token,     8.20 tokens per second)
llama_print_timings:       total time =   20899.83 ms /   178 tokens


In [13]:
chain.invoke({"input": "which teams are playing IPL today ?"})

Llama.generate: prefix-match hit

llama_print_timings:        load time =    1355.52 ms
llama_print_timings:      sample time =      10.62 ms /    58 runs   (    0.18 ms per token,  5459.34 tokens per second)
llama_print_timings: prompt eval time =     635.20 ms /     8 tokens (   79.40 ms per token,    12.59 tokens per second)
llama_print_timings:        eval time =    6933.23 ms /    57 runs   (  121.64 ms per token,     8.22 tokens per second)
llama_print_timings:       total time =    7710.57 ms /    65 tokens


"\nAssistant: Hello there, as of my last update, the Indian Premier League (IPL) schedule for today hasn't been released yet. I recommend checking the official IPL website or other reliable sports sources for the most current and accurate information. Have a great day!"

---

# Conversational Agent - Memory + Tools + AgentFinisher

In [3]:
import requests
from langchain.memory import ConversationBufferMemory
from langchain.prompts import MessagesPlaceholder
from pydantic import BaseModel, Field
import datetime
import wikipedia
from langchain.utilities import DuckDuckGoSearchAPIWrapper
from langchain.agents import Tool
from langchain.tools import tool
from langchain.tools.render import format_tool_to_openai_function
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
from langchain.utilities import DuckDuckGoSearchAPIWrapper
from langchain.agents import AgentExecutor
from langchain.agents.format_scratchpad import format_to_openai_functions

In [4]:
# Using Wikipedia tool for searching specific query

import wikipedia

@tool
def search_wikipedia(query: str) -> str:
    """Run Wikipedia search and get page summaries."""
    page_titles = wikipedia.search(query)
    summaries = []
    for page_title in page_titles[: 3]:
        try:
            wiki_page =  wikipedia.page(title=page_title, auto_suggest=False)
            summaries.append(f"Page: {page_title}\nSummary: {wiki_page.summary}")
        except (
            self.wiki_client.exceptions.PageError,
            self.wiki_client.exceptions.DisambiguationError,
        ):
            pass
    if not summaries:
        return "No good Wikipedia Search Result was found"
    return "\n\n".join(summaries)


# Use Duck Duck Go for searching web for real-time queries, e.g. current affairs.
# Adding a search tool

search = DuckDuckGoSearchAPIWrapper()
search_tool = Tool(name="Current Search",
                   func=search.run,
                   description="Useful when you need to answer questions about nouns, current events or the current state of the world."
                   )

tools = [search_tool, search_wikipedia]


In [5]:
import requests
from langchain.memory import ConversationBufferMemory
from langchain.prompts import MessagesPlaceholder
from pydantic import BaseModel, Field
import datetime
import wikipedia
from langchain.utilities import DuckDuckGoSearchAPIWrapper
from langchain.agents import Tool
from langchain.tools import tool
from langchain.tools.render import format_tool_to_openai_function
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
from langchain.utilities import DuckDuckGoSearchAPIWrapper
from langchain.agents import AgentExecutor
from langchain.agents.format_scratchpad import format_to_openai_functions
from langchain_community.llms import LlamaCpp

functions = [format_tool_to_openai_function(f) for f in tools]

# Binding tools functions with OpenAI model
model = ChatOpenAI(model="gpt-3.5-turbo", 
                   temperature=0, 
                   streaming=True, 
                   api_key="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")



llm = LlamaCpp(
    model_path="../models/mixtral-8x7b-instruct-v0.1.Q5_K_M.gguf",
    n_gpu_layers=1,
    n_batch=512,
    f16_kv=True,
    verbose=True,
    streaming=True
)

# Adding ConversationBuffer
memory = ConversationBufferMemory(return_messages=True,memory_key="chat_history")

# Defining Chatprompt
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are helpful but sassy assistant"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("user", "{input}"),
    MessagesPlaceholder(variable_name="agent_scratchpad")
])

# Agent Scratchpad is the intermediate thinking the Agent does
# Runnable Passthrough provides intermediate inputs to the conversation
chain = RunnablePassthrough.assign(
    agent_scratchpad = lambda x: format_to_openai_functions(x["intermediate_steps"])
) | prompt | model | OpenAIFunctionsAgentOutputParser()

# AgentExecutor takes in lang chain
qa = AgentExecutor(agent=chain, tools=tools, verbose=True, memory=memory)

  warn_deprecated(
  warn_deprecated(


In [17]:
result = qa.invoke({"input": "How are you doing today ?"})
answer = result['output'] 
answer



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI'm here and sassy as ever, ready to assist you with anything you need. How can I help you today?[0m

[1m> Finished chain.[0m


"I'm here and sassy as ever, ready to assist you with anything you need. How can I help you today?"

In [None]:
from fastapi import FastAPI, Body
from pydantic import BaseModel
from langchain.memory import ConversationBufferMemory
from langchain.prompts import MessagesPlaceholder
from langchain.agents import Tool
from langchain.tools import tool
from langchain.tools.render import format_tool_to_openai_function
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
from langchain.agents import AgentExecutor
from langchain.agents.format_scratchpad import format_to_openai_functions
from langchain_community.llms import LlamaCpp

# Define the request body model
class QAInput(BaseModel):
    input: str

# Define the response model
class QAResponse(BaseModel):
    output: str

# Initialize FastAPI app
app = FastAPI()

# model = LlamaCpp(
#     model_path="../models/mixtral-8x7b-instruct-v0.1.Q5_K_M.gguf",
#     n_gpu_layers=1,
#     n_batch=512,
#     f16_kv=True,
#     verbose=True,
#     streaming=True
# )

# Binding tools functions with OpenAI model
model = ChatOpenAI(model="gpt-3.5-turbo", 
                temperature=0, 
                streaming=True, 
                api_key="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")

def create_qa_agent():
    # Your existing code here...
    # ...
    functions = [format_tool_to_openai_function(f) for f in tools]

    # Adding ConversationBuffer
    memory = ConversationBufferMemory(return_messages=True,memory_key="chat_history")

    # Defining Chatprompt
    prompt = ChatPromptTemplate.from_messages([
        ("system", "You are a Helpful Chat Assistant who has a formal tone to answer the questions."),
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad")
    ])

    # Agent Scratchpad is the intermediate thinking the Agent does
    # Runnable Passthrough provides intermediate inputs to the conversation
    chain = RunnablePassthrough.assign(
        agent_scratchpad = lambda x: format_to_openai_functions(x["intermediate_steps"])
    ) | prompt | model #| OpenAIFunctionsAgentOutputParser()

    # AgentExecutor takes in lang chain
    qa = AgentExecutor(agent=chain, tools=tools, verbose=True, memory=memory)
    return qa

# Store the qa agent in a global variable so it's not recreated every time the endpoint is called
qa_agent = create_qa_agent()

@app.put("/ask", response_model=QAResponse)
async def ask_question(input: QAInput = Body(...)):
    result = qa_agent.invoke({"input": input.input})
    answer = result['output']
    return QAResponse(output=answer)

In [3]:
# Client Code 

import requests

def chat_with_agent():
    base_url = "http://localhost:8000"  # Replace with your server's address if not running locally
    stop_word = "stop"  # The word to stop the conversation

    while True:
        user_input = input("You: ")
        if user_input.lower() == stop_word:
            print("Ending conversation.")
            break

        response = requests.put(f"{base_url}/ask", json={"input": user_input})
        response.raise_for_status()  # Raise an exception if the request failed

        agent_output = response.json()["output"]
        print(f"Agent: {agent_output}")


In [5]:
chat_with_agent()

You:  Hi wassup


JSONDecodeError: Expecting value: line 1 column 1 (char 0)