In [1]:
from langchain_ollama import ChatOllama
from langchain_core.messages import HumanMessage, AIMessage
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import BaseMessage
from langgraph.graph.message import add_messages
from typing_extensions import Annotated, TypedDict
from typing import Sequence

In [2]:
from langchain_ollama import ChatOllama
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, trim_messages
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph
import gradio as gr

model = ChatOllama(
    model="llama3.2",
    temperature=0,
)

system_message = SystemMessage(content="You are a helpful AI assistant. Please respond concisely.")

workflow = StateGraph(state_schema=MessagesState)

trimmer = trim_messages(
    max_tokens=8196,
    strategy="last",
    token_counter=model,
    include_system=True,
    allow_partial=False,
    start_on="human",
)

def call_model(state: MessagesState):
    trimmed_messages = trimmer.invoke(state["messages"])
    response = model.invoke(trimmed_messages)
    return {"messages": [response]}

workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

config = {"configurable": {"thread_id": "abc789"}}

app.update_state(config, {"messages": [system_message]})

def stream_response(message, history):
    partial_message = ""
    for chunk, _ in app.stream(
        {"messages": [HumanMessage(message)]},
        config,
        stream_mode="messages",
    ):
        if isinstance(chunk, AIMessage):  # Filter to just model responses
            partial_message += chunk.content
            yield partial_message
 
gr.ChatInterface(stream_response).queue().launch(debug=True)



* Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.


Keyboard interruption in main thread... closing server.




In [6]:
workflow?

[0;31mType:[0m        StateGraph
[0;31mString form:[0m <langgraph.graph.state.StateGraph object at 0x11b7c5400>
[0;31mFile:[0m        ~/Library/Python/3.12/lib/python/site-packages/langgraph/graph/state.py
[0;31mDocstring:[0m  
A graph whose nodes communicate by reading and writing to a shared state.
The signature of each node is State -> Partial<State>.

Each state key can optionally be annotated with a reducer function that
will be used to aggregate the values of that key received from multiple nodes.
The signature of a reducer function is (Value, Value) -> Value.

Args:
    state_schema (Type[Any]): The schema class that defines the state.
    config_schema (Optional[Type[Any]]): The schema class that defines the configuration.
        Use this to expose configurable parameters in your API.

Examples:
    >>> from langchain_core.runnables import RunnableConfig
    >>> from typing_extensions import Annotated, TypedDict
    >>> from langgraph.checkpoint.memory import MemorySav

In [8]:
app.update_state?

[0;31mSignature:[0m
[0mapp[0m[0;34m.[0m[0mupdate_state[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mconfig[0m[0;34m:[0m [0;34m'RunnableConfig'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mvalues[0m[0;34m:[0m [0;34m'Optional[Union[dict[str, Any], Any]]'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mas_node[0m[0;34m:[0m [0;34m'Optional[str]'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0;34m'RunnableConfig'[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Update the state of the graph with the given values, as if they came from
node `as_node`. If `as_node` is not provided, it will be set to the last node
that updated the state, if not ambiguous.
[0;31mFile:[0m      ~/Library/Python/3.12/lib/python/site-packages/langgraph/pregel/__init__.py
[0;31mType:[0m      method