## Lesson 4: Persistence and Streaming

https://learn.deeplearning.ai/courses/ai-agents-in-langgraph/lesson/5/persistence-and-streaming

In [1]:
import operator
from typing import Annotated, TypedDict

from dotenv import find_dotenv, load_dotenv
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.messages import AnyMessage, HumanMessage, SystemMessage, ToolMessage
from langchain_openai import ChatOpenAI
from langgraph.graph import END, StateGraph

In [2]:
load_dotenv(find_dotenv())

MODEL = "glm-4-air"
BASE_URL = "https://open.bigmodel.cn/api/paas/v4/"

In [3]:
tool = TavilySearchResults(max_results=2)

In [6]:
class AgentState(TypedDict):
    messages: Annotated[list[AnyMessage], operator.add]

In [4]:
from langgraph.checkpoint.sqlite import SqliteSaver

In [5]:
memory = SqliteSaver.from_conn_string(":memory:")

In [15]:
class Agent:
    def __init__(self, model, tools: list, checkpointer, system: str = "") -> None:
        self.system = system

        graph = StateGraph(AgentState)
        graph.add_node(node="llm", action=self.call_openai)
        graph.add_node(node="action", action=self.take_action)
        graph.add_conditional_edges(
            source="llm", path=self.exists_action, path_map={True: "action", False: END}
        )
        graph.add_edge(start_key="action", end_key="llm")
        graph.set_entry_point(key="llm")

        self.graph = graph.compile(checkpointer=checkpointer)
        self.tools = {t.name: t for t in tools}
        self.model = model.bind_tools(tools)

    def call_openai(self, state: AgentState) -> dict:
        messages = state["messages"]

        if self.system:
            messages = [SystemMessage(content=self.system)] + messages

        message = self.model.invoke(messages)

        return {"messages": [message]}

    def exists_action(self, state: AgentState) -> bool:
        result = state["messages"][-1]
        return len(result.tool_calls) > 0

    def take_action(self, state: AgentState) -> dict:
        tool_calls = state["messages"][-1].tool_calls

        results = []

        for t in tool_calls:
            print(f"Calling: {t}")

            result = self.tools[t["name"]].invoke(t["args"])
            results.append(
                ToolMessage(tool_call_id=t["id"], name=t["name"], content=str(result))
            )

        print("Back to the model!")

        return {"messages": results}

In [8]:
prompt = """You are a smart research assistant. Use the search engine to look up information. \
You are allowed to make multiple calls (either together or in sequence). \
Only look up information when you are sure of what you want. \
If you need to look up some information before asking a follow up question, you are allowed to do that!
"""

In [16]:
model = ChatOpenAI(model=MODEL, base_url=BASE_URL)

abot = Agent(model=model, tools=[tool], checkpointer=memory, system=prompt)

In [17]:
messages = [HumanMessage(content="What is the weather like in SF now?")]

thread = {"configurable": {"thread_id": "1"}}

In [18]:
for event in abot.graph.stream({"messages": messages}, thread):
    for v in event.values():
        print(v["messages"])

[AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_8726260668035754131', 'function': {'arguments': '{"query": "San Francisco weather"}', 'name': 'tavily_search_results_json'}, 'type': 'function', 'index': 0}]}, response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 239, 'total_tokens': 255}, 'model_name': 'glm-4-air', 'system_fingerprint': None, 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-bac0ad78-ebf9-41a3-a99a-187dcd708eed-0', tool_calls=[{'name': 'tavily_search_results_json', 'args': {'query': 'San Francisco weather'}, 'id': 'call_8726260668035754131'}], usage_metadata={'input_tokens': 239, 'output_tokens': 16, 'total_tokens': 255})]
Calling: {'name': 'tavily_search_results_json', 'args': {'query': 'San Francisco weather'}, 'id': 'call_8726260668035754131'}
Back to the model!
[ToolMessage(content='[{\'url\': \'https://www.weatherapi.com/\', \'content\': "{\'location\': {\'name\': \'San Francisco\', \'region\': \'California\', \'

In [20]:
messages = [HumanMessage(content="What about in LA?")]

thread = {"configurable": {"thread_id": "1"}}

for event in abot.graph.stream({"messages": messages}, thread):
    for v in event.values():
        print(v["messages"])

[AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_8726255376635909922', 'function': {'arguments': '{"query": "Los Angeles weather"}', 'name': 'tavily_search_results_json'}, 'type': 'function', 'index': 0}]}, response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 883, 'total_tokens': 899}, 'model_name': 'glm-4-air', 'system_fingerprint': None, 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-ff3c8613-b480-42e8-8864-80a7842513e3-0', tool_calls=[{'name': 'tavily_search_results_json', 'args': {'query': 'Los Angeles weather'}, 'id': 'call_8726255376635909922'}], usage_metadata={'input_tokens': 883, 'output_tokens': 16, 'total_tokens': 899})]
Calling: {'name': 'tavily_search_results_json', 'args': {'query': 'Los Angeles weather'}, 'id': 'call_8726255376635909922'}
Back to the model!
[ToolMessage(content='[{\'url\': \'https://www.weatherapi.com/\', \'content\': "{\'location\': {\'name\': \'Los Angeles\', \'region\': \'California\', \'country\

In [21]:
messages = [HumanMessage(content="Which one is warmer?")]

thread = {"configurable": {"thread_id": "1"}}

for event in abot.graph.stream({"messages": messages}, thread):
    for v in event.values():
        print(v["messages"])

[AIMessage(content="Based on the current temperatures provided, Los Angeles is warmer with a temperature of 16.7°C (62.1°F), compared to San Francisco's temperature of 13.3°C (55.9°F).", response_metadata={'token_usage': {'completion_tokens': 46, 'prompt_tokens': 1467, 'total_tokens': 1513}, 'model_name': 'glm-4-air', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-80594cce-92c9-4474-b94f-7ef1cb0cbc6f-0', usage_metadata={'input_tokens': 1467, 'output_tokens': 46, 'total_tokens': 1513})]


In [22]:
messages = [HumanMessage(content="Which one is warmer?")]

thread = {"configurable": {"thread_id": "2"}}

for event in abot.graph.stream({"messages": messages}, thread):
    for v in event.values():
        print(v["messages"])

[AIMessage(content="The question is too vague to answer accurately without more context. I could assume it is asking about two specific places, objects, or times of day, but I need more information to proceed. I'll ask the user for clarification. \n\nWould you please specify what you are comparing in terms of temperature? Are you asking about two different locations, times of day, or something else?", response_metadata={'token_usage': {'completion_tokens': 78, 'prompt_tokens': 213, 'total_tokens': 291}, 'model_name': 'glm-4-air', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5bab7ea8-f2ba-4b7e-af7a-e7070b3dc0c6-0', usage_metadata={'input_tokens': 213, 'output_tokens': 78, 'total_tokens': 291})]


### Streaming tokens

In [24]:
from langgraph.checkpoint.aiosqlite import AsyncSqliteSaver

In [25]:
memory = AsyncSqliteSaver.from_conn_string(":memory:")

abot = Agent(model=model, tools=[tool], checkpointer=memory, system=prompt)

In [26]:
messages = [HumanMessage(content="What is the weather like in SF now?")]

thread = {"configurable": {"thread_id": "1"}}

In [28]:
async for event in abot.graph.astream_events(
    {"messages": messages}, thread, version="v1"
):
    kind = event["event"]

    if kind == "on_chat_model_stream":
        content = event["data"]["chunk"].content

        if content:
            print(content, end="|")

Calling: {'name': 'tavily_search_results_json', 'args': {'query': 'San Francisco current weather'}, 'id': 'call_8726261045992938055'}
Back to the model!
