In [None]:
! pip install langchain_core langchain-anthropic langgraph 

In [None]:
import os, getpass


def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")


_set_env("ANTHROPIC_API_KEY")

In [9]:
# LLM
from langchain_anthropic import ChatAnthropic
llm = ChatAnthropic(model="claude-3-5-sonnet-latest")

### Vanilla Agent

* No orchestration framework 
* Optionally, use LangGraph to bind tools and specify tools 

In [10]:
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI

# LLM
llm = ChatOpenAI(model="gpt-4o")

# Define tools
@tool
def multiply(a: int, b: int) -> int:
    """Multiply a and b.

    Args:
        a: first int
        b: second int
    """
    return a * b


@tool
def add(a: int, b: int) -> int:
    """Adds a and b.

    Args:
        a: first int
        b: second int
    """
    return a + b


@tool
def divide(a: int, b: int) -> float:
    """Divide a and b.

    Args:
        a: first int
        b: second int
    """
    return a / b

# Augment the LLM with tools
tools = [add, multiply, divide]
tools_by_name = {tool.name: tool for tool in tools}
llm_with_tools = llm.bind_tools(tools)

In [11]:
from langgraph.graph import add_messages
from langchain_core.messages import (
    SystemMessage,
    HumanMessage,
    BaseMessage,
    ToolCall,
)

def call_llm(messages: list[BaseMessage]):
    """LLM decides whether to call a tool or not"""
    return llm_with_tools.invoke(
        [
            SystemMessage(
                content="You are a helpful assistant tasked with performing arithmetic on a set of inputs."
            )
        ]
        + messages
    )

def call_tool(tool_call: ToolCall):
    """Performs the tool call"""

    tool = tools_by_name[tool_call["name"]]
    return tool.invoke(tool_call)

def agent(messages: list[BaseMessage]):
    """ Tool calling agent """
    llm_response = call_llm(messages)

    while True:
        if not llm_response.tool_calls:
            break

        # Execute tools
        tool_results = [
            call_tool(tool_call) for tool_call in llm_response.tool_calls
        ]
        messages = add_messages(messages, [llm_response, *tool_results])
        llm_response = call_llm(messages)

    messages = add_messages(messages, llm_response)
    return messages

# Stream
messages = agent([HumanMessage(content="Add 3 and 4.")])
for m in messages:
    m.pretty_print()


Add 3 and 4.
Tool Calls:
  add (call_N1trAdi9h9vK0IW3vsvCRaaA)
 Call ID: call_N1trAdi9h9vK0IW3vsvCRaaA
  Args:
    a: 3
    b: 4
Name: add

7

The sum of 3 and 4 is 7.


### Agent with short-term memory

* LangGraph persistence layer 
* `@entrypoint` decorator indicates the start of a workflow. 

In [12]:
import uuid
from langgraph.func import entrypoint # New 
from langgraph.checkpoint.memory import MemorySaver # New 

def call_llm(messages: list[BaseMessage]):
    """LLM decides whether to call a tool or not"""
    return llm_with_tools.invoke(
        [
            SystemMessage(
                content="You are a helpful assistant tasked with performing arithmetic on a set of inputs."
            )
        ]
        + messages
    )

def call_tool(tool_call: ToolCall):
    """Performs the tool call"""

    tool = tools_by_name[tool_call["name"]]
    return tool.invoke(tool_call)

@entrypoint(checkpointer=MemorySaver()) # New 
def agent(messages: list[BaseMessage], previous: list[BaseMessage]): # New 
    """ Tool calling agent """

    # Add previous messages from short-term memory to the current messages
    if previous is not None:
        messages = add_messages(previous, messages)
    
    # Call the LLM
    llm_response = call_llm(messages)

    while True:
        if not llm_response.tool_calls:
            break

        # Execute tools
        tool_results = [
            call_tool(tool_call) for tool_call in llm_response.tool_calls
        ]
        messages = add_messages(messages, [llm_response, *tool_results])
        llm_response = call_llm(messages)

    messages = add_messages(messages, llm_response)
    return messages

# Thread ID
thread_id = str(uuid.uuid4())

# Config
config = {"configurable": {"thread_id": thread_id}}

# Run with checkpointer to persist state in memory
messages = agent.invoke([HumanMessage(content="Add 3 and 4.")], config)
for m in messages:
    m.pretty_print()


Add 3 and 4.
Tool Calls:
  add (call_wVN0NiCHHueHRdaDGFEt1hFh)
 Call ID: call_wVN0NiCHHueHRdaDGFEt1hFh
  Args:
    a: 3
    b: 4
Name: add

7

The result of adding 3 and 4 is 7.


In [57]:
# Checkpoint state
agent_state = agent.get_state(config)
for m in agent_state.values:
    m.pretty_print()


Add 3 and 4.
Tool Calls:
  add (call_FisH9R1uplO9Nyx7fzWDY6uE)
 Call ID: call_FisH9R1uplO9Nyx7fzWDY6uE
  Args:
    a: 3
    b: 4
Name: add

7

The sum of 3 and 4 is 7.


In [58]:
# Continue with the same thread
messages = agent.invoke([HumanMessage(content="Take the result and multiply it by 2.")], config)
for m in messages:
    m.pretty_print()


Add 3 and 4.
Tool Calls:
  add (call_FisH9R1uplO9Nyx7fzWDY6uE)
 Call ID: call_FisH9R1uplO9Nyx7fzWDY6uE
  Args:
    a: 3
    b: 4
Name: add

7

The sum of 3 and 4 is 7.

Take the result and multiply it by 2.
Tool Calls:
  multiply (call_Q7TTDpimEZf0QC6klEx2qTRJ)
 Call ID: call_Q7TTDpimEZf0QC6klEx2qTRJ
  Args:
    a: 7
    b: 2
Name: multiply

14

The result of multiplying 7 by 2 is 14.


In [60]:
# Continue with the same thread
for item in agent.stream([HumanMessage(content="Take the result and multiply it by 3.")], config, stream_mode="values"):
    item[-1].pretty_print()


The result of multiplying 42 by 3 is 126.


In [41]:
# Checkpoint state
agent_state = agent.get_state(config)
for m in agent_state.values:
    m.pretty_print()


Add 3 and 4.
Tool Calls:
  add (call_5Ff1Bj5S4TYSYAoW1TpSFeyA)
 Call ID: call_5Ff1Bj5S4TYSYAoW1TpSFeyA
  Args:
    a: 3
    b: 4
Name: add

7

The sum of 3 and 4 is 7.

Take the result and multiply it by 2.
Tool Calls:
  multiply (call_kk8W7CO9hAZgYwLVaISHGvKs)
 Call ID: call_kk8W7CO9hAZgYwLVaISHGvKs
  Args:
    a: 7
    b: 2
Name: multiply

14

The result of multiplying 7 by 2 is 14.

Take the result and multiply it by 3.
Tool Calls:
  multiply (call_38ckbEtvlXDKAZLWMoTULVQs)
 Call ID: call_38ckbEtvlXDKAZLWMoTULVQs
  Args:
    a: 14
    b: 3
Name: multiply

42

Multiplying 14 by 3 gives you 42.


### Agent with HITL

* Add interrupt to the workflow to allow for HITL

In [13]:
from langgraph.types import interrupt

def call_llm(messages: list[BaseMessage]):
    """LLM decides whether to call a tool or not"""
    return llm_with_tools.invoke(
        [
            SystemMessage(
                content="You are a helpful assistant tasked with performing arithmetic on a set of inputs."
            )
        ]
        + messages
    )

def call_tool(tool_call: ToolCall):
    """Performs the tool call"""

    # Interrupt the workflow to get a review from a human.
    is_approved = interrupt({ # New 
            # Any json-serializable payload provided to interrupt as argument.
            # It will be surfaced on the client side as an Interrupt when streaming data
            # from the workflow.
            "tool_call": tool_call, # The tool call we want reviewed.
            # We can add any additional information that we need.
            # For example, introduce a key called "action" with some instructions.
            "action": "Please approve/reject the tool call",
        })
    
    if is_approved:
        tool = tools_by_name[tool_call["name"]]
        return tool.invoke(tool_call)
    else:
        return "Tool call rejected"

@entrypoint(checkpointer=MemorySaver())  
def agent(messages: list[BaseMessage], previous: list[BaseMessage]): 
    """ Tool calling agent """

    # Add previous messages from short-term memory to the current messages
    if previous is not None:
        messages = add_messages(previous, messages)
    
    # Call the LLM
    llm_response = call_llm(messages)

    while True:
        if not llm_response.tool_calls:
            break

        # Execute tools
        tool_results = [
            call_tool(tool_call) for tool_call in llm_response.tool_calls
        ]
        messages = add_messages(messages, [llm_response, *tool_results])
        llm_response = call_llm(messages)

    messages = add_messages(messages, llm_response)
    return messages

# Thread ID
thread_id = str(uuid.uuid4())

# Config
config = {"configurable": {"thread_id": thread_id}}

# Run until the interrupt 
for item in agent.stream([HumanMessage(content="Add 3 and 4.")], config, stream_mode="updates"):
    print(item['__interrupt__'][0].value)

{'tool_call': {'name': 'add', 'args': {'a': 3, 'b': 4}, 'id': 'call_Np3qpF1w2n6VHgIEXNvw7duZ', 'type': 'tool_call'}, 'action': 'Please approve/reject the tool call'}


In [83]:
from langgraph.types import Command
for item in agent.stream(Command(resume=True), config, stream_mode="updates"):
    item['agent'][-1].pretty_print()


The sum of 3 and 4 is 7.


### Agent with HITL and Long-term memory

* Add interrupt to the workflow to allow for HITL
* Add tool for [long-term memory](https://langchain-ai.github.io/langgraph/concepts/memory/#long-term-memory)

In [164]:
import uuid
from typing import Annotated, Optional

from langchain_core.tools import InjectedToolArg
from langgraph.store.base import BaseStore

@tool 
def upsert_memory(
    content: str,
    *,
    memory_id: Optional[uuid.UUID] = None,
    # Hide these arguments from the model.
    store: Annotated[BaseStore, InjectedToolArg],
):
    """Upsert a memory in the database.

    If a memory conflicts with an existing one, then just UPDATE the
    existing one by passing in memory_id - don't create two memories
    that are the same. If the user corrects a memory, UPDATE it.

    Args:
        content: The main content of the memory. For example:
            "User expressed interest in learning about French."
        memory_id: ONLY PROVIDE IF UPDATING AN EXISTING MEMORY.
        The memory to overwrite.
    """
    mem_id = memory_id or uuid.uuid4()

    # BaseStore is a LangGraph persistence layer
    store.put(
        ("memories"),
        key=str(mem_id),
        value={"content": content},
    )
    return f"Stored memory {mem_id}"

# Augment the LLM with tools
tools = [upsert_memory]
tools_by_name = {tool.name: tool for tool in tools}
llm_with_memory_tool = llm.bind_tools(tools)

In [166]:
from langgraph.store.memory import InMemoryStore # New 
from langchain_core.messages import ToolMessage

def call_llm(messages: list[BaseMessage]):
    """LLM decides whether to call a tool or not"""
    return llm_with_memory_tool.invoke( # New 
        [
            SystemMessage(
                content="You are a helpful assistant tasked with storing memories." # New 
            )
        ]
        + messages
    )

def call_tool(tool_call: ToolCall, store: BaseStore):

    # Interrupt the workflow to get a review from a human.
    is_approved = interrupt({ # New 
            # Any json-serializable payload provided to interrupt as argument.
            # It will be surfaced on the client side as an Interrupt when streaming data
            # from the workflow.
            "tool_call": tool_call, # The tool call we want reviewed.
            # We can add any additional information that we need.
            # For example, introduce a key called "action" with some instructions.
            "action": "Please approve/reject the tool call",
        })
    
    if is_approved:

        print("Tool call approved, Memory Added!")

        tool = tools_by_name[tool_call["name"]]
        tool.invoke({**tool_call["args"], "store": store})

        # Tool message provides confirmation to the model that the actions it took were completed
        results = ToolMessage(content=tool_call["args"]["content"], tool_call_id=tool_call["id"])
        return results
    else: 
        return "Tool call rejected"

@entrypoint(checkpointer=MemorySaver(), store=InMemoryStore())  
def agent(messages: list[BaseMessage], previous: list[BaseMessage], store: BaseStore): 
    """ Tool calling agent """

    # Add previous messages from short-term memory to the current messages
    if previous is not None:
        messages = add_messages(previous, messages)
    
    # New 
    # Retrieve the most recent memories for context
    memories = store.search( 
        ("memories"),
        limit=10,
    )

    # New
    # Format memories for inclusion in the prompt
    formatted = "\n".join(f"[{mem.key}]: {mem.value} (similarity: {mem.score})" for mem in memories)
    if formatted:
        formatted = f"""
<memories>
{formatted}
</memories>"""

    # New
    # Call the LLM
    llm_response = call_llm([SystemMessage(content=f"Here is some context for you about the user: {formatted}"), *messages])

    while True:
        if not llm_response.tool_calls:
            break

        # Execute tools
        tool_results = [
            call_tool(tool_call, store) for tool_call in llm_response.tool_calls
        ]
        messages = add_messages(messages, [llm_response, *tool_results])
        llm_response = call_llm(messages)

    messages = add_messages(messages, llm_response)
    return messages

# Thread ID
thread_id = str(uuid.uuid4())

# Config
config = {"configurable": {"thread_id": thread_id}}

# Run until the interrupt 
for item in agent.stream([HumanMessage(content="Hi my name is Lance and I live in San Francisco.")], config, stream_mode="updates"):
    if '__interrupt__' in item:
        print(item['__interrupt__'][0].value)

{'tool_call': {'name': 'upsert_memory', 'args': {'content': "User's name is Lance and they live in San Francisco."}, 'id': 'call_4wMyPHYypNRscBdylzW6x3UD', 'type': 'tool_call'}, 'action': 'Please approve/reject the tool call'}


In [167]:
for item in agent.stream(Command(resume=True), config, stream_mode="updates"):
    item['agent'][-1].pretty_print()

Tool call approved, Memory Added!

Nice to meet you, Lance! How can I assist you today?


TODO: Clarify problem w/ *not* using `@task` in the above case!

Seems it still runs once. 

### Adding tasks

* TODO: Why?


In [162]:
from langgraph.func import task # New 

@task
def call_llm(messages: list[BaseMessage]):
    """LLM decides whether to call a tool or not"""
    return llm_with_memory_tool.invoke( # New 
        [
            SystemMessage(
                content="You are a helpful assistant tasked with storing memories." # New 
            )
        ]
        + messages
    )

@task
def call_tool(tool_call: ToolCall, store: BaseStore):

    # Interrupt the workflow to get a review from a human.
    is_approved = interrupt({ # New 
            # Any json-serializable payload provided to interrupt as argument.
            # It will be surfaced on the client side as an Interrupt when streaming data
            # from the workflow.
            "tool_call": tool_call, # The tool call we want reviewed.
            # We can add any additional information that we need.
            # For example, introduce a key called "action" with some instructions.
            "action": "Please approve/reject the tool call",
        })
    
    if is_approved:

        tool = tools_by_name[tool_call["name"]]
        tool.invoke({**tool_call["args"], "store": store})

        # Tool message provides confirmation to the model that the actions it took were completed
        results = ToolMessage(content=tool_call["args"]["content"], tool_call_id=tool_call["id"])
        return results
    else: 
        return "Tool call rejected"

@entrypoint(checkpointer=MemorySaver(), store=InMemoryStore())  
def agent(messages: list[BaseMessage], previous: list[BaseMessage], store: BaseStore): 
    """ Tool calling agent """

    # Add previous messages from short-term memory to the current messages
    if previous is not None:
        messages = add_messages(previous, messages)
    
    # New 
    # Retrieve the most recent memories for context
    memories = store.search( 
        ("memories"),
        limit=10,
    )

    # New
    # Format memories for inclusion in the prompt
    formatted = "\n".join(f"[{mem.key}]: {mem.value} (similarity: {mem.score})" for mem in memories)
    if formatted:
        formatted = f"""
<memories>
{formatted}
</memories>"""

    # New
    # Call the LLM
    llm_response = call_llm([SystemMessage(content=f"Here is some context for you about the user: {formatted}"), *messages]).result()

    while True:
        if not llm_response.tool_calls:
            break

        # Execute tools
        tool_results = [
            call_tool(tool_call=tool_call, store=store).result() for tool_call in llm_response.tool_calls
        ]
        messages = add_messages(messages, [llm_response, *tool_results])
        llm_response = call_llm(messages).result()

    messages = add_messages(messages, llm_response)
    return messages

# Thread ID
thread_id = str(uuid.uuid4())

# Config
config = {"configurable": {"thread_id": thread_id}}

# Run until the interrupt 
for item in agent.stream([HumanMessage(content="Hi my name is Isaac and I live in Palo Alto.")], config, stream_mode="updates"):
    if '__interrupt__' in item:
        print(item['__interrupt__'][0].value)

{'tool_call': {'name': 'upsert_memory', 'args': {'content': "User's name is Isaac and he lives in Palo Alto."}, 'id': 'call_OI9WYghIIgaw6WAlq35KftbA', 'type': 'tool_call'}, 'action': 'Please approve/reject the tool call'}


In [163]:
for item in agent.stream(Command(resume=True), config, stream_mode="updates"):
    if 'agent' in item:
        print(item['agent'][-1].pretty_print())


Hello Isaac! I've noted that you live in Palo Alto. How can I assist you today?
None


### Adding Time Travel
