# How to stream from the final node node

A common use case is streaming from an agent is to stream LLM tokens from inside the final node. This guide demonstrates how you can do this.

In [1]:
%%capture --no-stderr
%pip install -U langgraph langchain-openai

In [2]:
import getpass
import os


def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")


_set_env("OPENAI_API_KEY")

OPENAI_API_KEY:  ········


## Define model and tools

In [3]:
from typing import Literal
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.runnables import ConfigurableField
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI
from langgraph.prebuilt import create_react_agent
from langgraph.prebuilt import ToolNode


@tool
def get_weather(city: Literal["nyc", "sf"]):
    """Use this to get weather information."""
    if city == "nyc":
        return "It might be cloudy in nyc"
    elif city == "sf":
        return "It's always sunny in sf"
    else:
        raise AssertionError("Unknown city")


tools = [get_weather]
model = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
# NOTE: this is where we're adding a tag that we'll be using later to filter the outputs of the final node
model = model.bind_tools(tools).with_config(tags=["final_node"])

In [4]:
tool_node = ToolNode(tools=tools)

In [5]:
from typing import TypedDict, Annotated

from langgraph.graph import END, StateGraph
from langgraph.graph.message import MessagesState
from langchain_core.messages import BaseMessage

## Define graph

In [6]:
# Define the function that determines whether to continue or not
def should_continue(state: MessagesState) -> Literal["tools", END]:
    messages = state['messages']
    last_message = messages[-1]
    # If the LLM makes a tool call, then we route to the "tools" node
    if last_message.tool_calls:
        return "tools"
    # Otherwise, we stop (reply to the user)
    return END


# Define the function that calls the model
def call_model(state: MessagesState):
    messages = state['messages']
    response = model.invoke(messages)
    # We return a list, because this will get added to the existing list
    return {"messages": [response]}

In [7]:
# Define a new graph
workflow = StateGraph(MessagesState)

# Define the two nodes we will cycle between
workflow.add_node("agent", call_model)
workflow.add_node("tools", tool_node)

# Set the entrypoint as `agent`
# This means that this node is the first one called
workflow.set_entry_point("agent")

# We now add a conditional edge
workflow.add_conditional_edges(
    # First, we define the start node. We use `agent`.
    # This means these are the edges taken after the `agent` node is called.
    "agent",
    # Next, we pass in the function that will determine which node is called next.
    should_continue,
)

# We now add a normal edge from `tools` to `agent`.
# This means that after `tools` is called, `agent` node is called next.
workflow.add_edge("tools", 'agent')

In [8]:
app = workflow.compile()

## Stream outputs from the final node

In [9]:
inputs = [("human", "what's the weather in nyc?")]
async for event in app.astream_events({"messages": inputs}, version="v2"):
    kind = event["event"]
    tags = event.get("tags", [])
    if kind == "on_chat_model_stream" and "final_node" in tags:
        data = event["data"]
        if data["chunk"].content:
            # Empty content in the context of OpenAI or Anthropic usually means
            # that the model is asking for a tool to be invoked.
            # So we only print non-empty content
            print(data["chunk"].content, end="|")

  warn_beta(


The| weather| in| NYC| might| be| cloudy|.|