# How to stream LLM tokens (without LangChain LLMs)

In this example we will stream tokens from the language model powering an agent. We'll be using OpenAI client library directly, without using LangChain chat models. We will also use a ReAct agent as an example.

## Setup

In [1]:
%%capture --no-stderr
%pip install -U langgraph openai

In [2]:
import getpass
import os


def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")


_set_env("OPENAI_API_KEY")

OPENAI_API_KEY:  ········


## Define model, tools and graph

### Define a node that will call OpenAI API

In [3]:
from openai import AsyncOpenAI
from langchain_core.runnables.config import ensure_config, get_callback_manager_for_config

openai_client = AsyncOpenAI()

In [4]:
# define tool schema for openai tool calling

tool = {
  "type": "function",
  "function": {
    "name": "get_items",
    "description": "Use this tool to look up which items are in the given place.",
    "parameters": {
      "type": "object",
      "properties": {
        "place": {
          "type": "string"
        }
      },
      "required": [
        "place"
      ]
    }
  }
}

In [5]:
async def call_model(state, config=None):
    config = ensure_config(config | {"tags": ["agent_llm"]})
    callback_manager = get_callback_manager_for_config(config)
    messages = state["messages"]
    
    llm_run_manager = callback_manager.on_chat_model_start({}, [messages])[0]
    response = await openai_client.chat.completions.create(
        messages=messages,
        model="gpt-3.5-turbo",
        tools=[tool],
        stream=True
    )

    response_content = ""
    role = None

    tool_call_id = None
    tool_call_function_name = None
    tool_call_function_arguments = ""
    async for chunk in response:
        delta = chunk.choices[0].delta
        if delta.role is not None:
            role = delta.role

        if delta.content:
            response_content += delta.content
            llm_run_manager.on_llm_new_token(delta.content)

        if delta.tool_calls:
            # note: for simplicity we're only handling a single tool call here
            if delta.tool_calls[0].function.name is not None:
                tool_call_function_name = delta.tool_calls[0].function.name
                tool_call_id = delta.tool_calls[0].id
            
            tool_call_function_arguments += delta.tool_calls[0].function.arguments

    if tool_call_function_name is not None:
        tool_calls = [
            {
                "id": tool_call_id,
                "function": {"name": tool_call_function_name, "arguments": tool_call_function_arguments},
                "type": "function"
            }
        ]
    else:
        tool_calls = None

    response_message = {
        "role": role,
        "content": response_content,
        "tool_calls": tool_calls
    }
    return {"messages": [response_message]}

### Define our tools and a tool-calling node

In [6]:
import json

In [7]:
async def get_items(place: str) -> str:
    """Use this tool to look up which items are in the given place."""
    # NOTE: we'll be using this tag to filter tokens from the tool LLM
    config = ensure_config({"tags": ["tool_llm"]})
    callback_manager = get_callback_manager_for_config(config)

    messages = [
        {
            "role": "user",
            "content": (
                f"Can you tell me what kind of items i might find in the following place: '{place}'. "
                f"List at least 3 such items separating them by a comma. And include a brief description of each item."
            )
        }
    ]
    llm_run_manager = callback_manager.on_chat_model_start({}, [messages])[0]
    response = await openai_client.chat.completions.create(
        messages=messages,
        model="gpt-3.5-turbo",
        stream=True
    )
    
    response_content = ""
    async for chunk in response:
        delta = chunk.choices[0].delta
        if delta.content:
            response_content += delta.content
            llm_run_manager.on_llm_new_token(delta.content)

    return response_content

In [8]:
# define mapping to look up functions when running tools
function_name_to_function = {
    "get_items": get_items
}

In [9]:
async def call_tools(state):
    messages = state["messages"]

    tool_call = messages[-1]["tool_calls"][0]
    function_name = tool_call["function"]["name"]
    function_arguments = tool_call["function"]["arguments"]
    arguments = json.loads(function_arguments)
    
    function_response = await function_name_to_function[function_name](**arguments)    
    tool_message = {
        "tool_call_id": tool_call["id"],
        "role": "tool",
        "name": function_name,
        "content": function_response,
    }
    return {
        "messages": [tool_message]
    }

### Define our graph

In [10]:
import operator
from typing import Annotated, TypedDict, Literal

from langgraph.graph import StateGraph, END

In [11]:
class State(TypedDict):
    messages: Annotated[list, operator.add]

In [12]:
def should_continue(state) -> Literal["tools", END]:
    messages = state['messages']
    last_message = messages[-1]
    if last_message["tool_calls"]:
        return "tools"
    return END

In [13]:
workflow = StateGraph(State)
workflow.set_entry_point("model")
workflow.add_node("model", call_model)  # i.e. our "agent"
workflow.add_node("tools", call_tools)
workflow.add_conditional_edges("model", should_continue)
workflow.add_edge("tools", "model")

In [14]:
graph = workflow.compile()

## Stream tokens

### Stream from inside tool

In [15]:
async for event in graph.astream_events({"messages": [{"role": "user", "content": "what's in the bedroom"}]}, version="v2"):
    tags = event.get("tags", [])
    if event["event"] == "on_chat_model_stream" and "tool_llm" in tags:
        print(event["data"]["chunk"].content, end="|", flush=True)

  warn_beta(


In| a| bedroom|,| you| might| find| a| bed| -| a| piece| of| furniture| for| sleeping| or| resting|,| a| night|stand| -| a| small| table| for| placing| items| next| to| the| bed|,| and| a| dresser| -| a| piece| of| furniture| with| drawers| for| storing| clothes| and| personal| items|.|

### Stream from inside agent (tool-calling LLM)

In [16]:
async for event in graph.astream_events({"messages": [{"role": "user", "content": "what's in the bedroom"}]}, version="v2"):
    tags = event.get("tags", [])
    if event["event"] == "on_chat_model_stream" and "agent_llm" in tags:
        print(event["data"]["chunk"].content, end="|", flush=True)

In| a| bedroom|,| you| might| find| a| bedside| table|,| a| dresser|,| and| a| comfort|er|.|