In [None]:
# OPTION 3: Use LangGraph's prebuilt ReAct agent with tools
from langgraph.prebuilt import create_react_agent
from langchain_community.tools import DuckDuckGoSearchRun

# Create some real tools
search_tool = DuckDuckGoSearchRun()

@tool 
def calculator(expression: str) -> str:
    """Evaluate a mathematical expression"""
    try:
        result = eval(expression)
        return f"Result: {result}"
    except:
        return "Invalid expression"

tools = [search_tool, calculator, get_weather]  # Use the weather tool from Option 1

# Create ReAct agent with tools
react_agent = create_react_agent(llm, tools=tools, debug=True)

print("="*50)
print("OPTION 3: LangGraph ReAct agent with real tools")
print("="*50)

response = react_agent.invoke({
    "messages": [HumanMessage(content="Search for the weather in Seattle and tell me what you find")]
})

for m in response['messages']:
    m.pretty_print()

print("\n" + "="*30)
print("SUMMARY - Your options:")
print("="*30)
print("1. ✅ LangChain @tool decorator + bind_tools()")
print("2. ✅ Native Llama Stack client for MCP")  
print("3. ✅ LangGraph ReAct agent with tools")
print("4. ❌ MCP format with bind_tools() - NEVER works")

In [None]:
# OPTION 2: Native Llama Stack MCP tools (bypass LangChain)
from llama_stack_client import LlamaStackClient

# Use Llama Stack client for real MCP support
client = LlamaStackClient(
    base_url="http://localhost:8321",  # Fixed URL
    timeout=600.0
)

def chatbot_with_mcp_tools(state: State):
    # Convert messages
    llama_messages = []
    for msg in state["messages"]:
        if isinstance(msg, dict):
            llama_messages.append(msg)
        else:
            role = "user" if hasattr(msg, 'type') and msg.type == "human" else "user"
            if hasattr(msg, 'type') and msg.type == "ai":
                role = "assistant"
            llama_messages.append({"role": role, "content": msg.content})
    
    # Try MCP tools, fallback to basic
    try:
        response = client.inference.chat_completion(
            model_id=INFERENCE_MODEL,
            messages=llama_messages,
            tools=[{"type": "mcp", "tool_name": "weather", "server_label": "weather"}]
        )
        print("✅ Used MCP weather tool")
    except Exception as e:
        print(f"⚠️ MCP failed, using basic: {e}")
        response = client.inference.chat_completion(
            model_id=INFERENCE_MODEL,
            messages=llama_messages
        )
    
    return {"messages": [AIMessage(content=response.completion_message.content)]}

# Build graph with MCP tools
graph_mcp = StateGraph(State)
graph_mcp.add_node("chatbot", chatbot_with_mcp_tools)
graph_mcp.add_edge(START, "chatbot")
graph_mcp.add_edge("chatbot", END)
graph_mcp_compiled = graph_mcp.compile()

print("="*50)
print("OPTION 2: Native Llama Stack MCP tools")
print("="*50)

response = graph_mcp_compiled.invoke({
    "messages": [HumanMessage(content="What's the weather in Seattle?")]
})

for m in response['messages']:
    m.pretty_print()

In [None]:
# OPTION 1: Standard LangChain tools (WORKS with bind_tools)
from langchain_core.tools import tool

# Create a proper LangChain weather tool
@tool
def get_weather(location: str) -> str:
    """Get current weather for a location"""
    # Mock weather data - replace with real API call
    return f"Weather in {location}: Sunny, 72°F (22°C), light breeze"

# This WORKS because it's proper LangChain format
llm_with_langchain_tools = llm.bind_tools([get_weather])

def chatbot_with_langchain_tools(state: State):
    message = llm_with_langchain_tools.invoke(state["messages"])
    return {"messages": [message]}

# Build graph with LangChain tools
graph_langchain = StateGraph(State)
graph_langchain.add_node("chatbot", chatbot_with_langchain_tools)
graph_langchain.add_edge(START, "chatbot")
graph_langchain.add_edge("chatbot", END)
graph_langchain_compiled = graph_langchain.compile()

print("="*50)
print("OPTION 1: LangChain tools (bind_tools compatible)")
print("="*50)

response = graph_langchain_compiled.invoke({
    "messages": [HumanMessage(content="What's the weather in Seattle?")]
})

for m in response['messages']:
    m.pretty_print()

In [None]:
# FIXED VERSION - Remove the problematic bind_tools line
import os
from llama_stack_client import LlamaStackClient

# Environment variables - FIXED URLs
LLAMA_STACK_URL = os.getenv("LLAMA_STACK_URL", "http://localhost:8321")  # Remove /v1/openai/v1
INFERENCE_MODEL = os.getenv("INFERENCE_MODEL", "ollama/llama3.2:3b-instruct-fp16")
INFERENCE_SERVER_OPENAI = os.getenv("LLAMA_STACK_ENDPOINT_OPENAI", "http://localhost:8321/v1/openai/v1")
API_KEY = os.getenv("OPENAI_API_KEY", "not-applicable")

print("LLAMA_STACK_URL: ", LLAMA_STACK_URL)
print("INFERENCE_MODEL: ", INFERENCE_MODEL)

# Working LLM setup
llm = ChatOpenAI(
    model=INFERENCE_MODEL,
    openai_api_key=API_KEY,  
    openai_api_base=INFERENCE_SERVER_OPENAI,
    use_responses_api=True,
)

# Test connectivity
print("Testing basic connectivity:")
print(llm.invoke("Hello"))

# REMOVE THE PROBLEMATIC LINE - this will never work:
# llm_with_tools = llm.bind_tools([{"type": "mcp::weather"}])

# Instead, use the working LLM directly
class State(TypedDict):
    messages: Annotated[list, add_messages]

def chatbot(state: State):
    # Use the working LLM without tools
    message = llm.invoke(state["messages"])
    return {"messages": [message]}

# Build the graph
graph_builder = StateGraph(State)
graph_builder.add_node("chatbot", chatbot)
graph_builder.add_edge(START, "chatbot")
graph_builder.add_edge("chatbot", END)

graph = graph_builder.compile()

# Test with your example
print("\n" + "="*50)
print("Testing working LangGraph without MCP tools:")
print("="*50)

response = graph.invoke({
    "messages": [{"role": "user", "content": "What is the capital of Spain?"}]
})

for m in response['messages']:
    m.pretty_print()

print("\n" + "="*30)
print("Summary:")
print("="*30)
print("✅ LangGraph works with basic LLM")
print("❌ MCP tools don't work with LangChain's bind_tools()")
print("💡 Use native Llama Stack client for MCP support")

In [17]:
# Robust MCP Solution with fallback
import os
from llama_stack_client import LlamaStackClient
from langgraph.graph import StateGraph, END, START
from langchain_core.messages import HumanMessage, AIMessage
from typing import Annotated
from typing_extensions import TypedDict
from langgraph.graph.message import add_messages

# Fixed environment variables
LLAMA_STACK_URL = os.getenv("LLAMA_STACK_URL", "http://localhost:8321/v1/openai/v1")
INFERENCE_MODEL = os.getenv("INFERENCE_MODEL", "ollama/llama3.2:3b-instruct-fp16")

print("LLAMA_STACK_URL: ", LLAMA_STACK_URL)
print("INFERENCE_MODEL: ", INFERENCE_MODEL)

# Native Llama Stack client
client = LlamaStackClient(
    base_url=LLAMA_STACK_URL,
    timeout=600.0
)

# Test basic connectivity first
try:
    basic_response = client.inference.chat_completion(
        model_id=INFERENCE_MODEL,
        messages=[{"role": "user", "content": "Hello"}]
    )
    print("✅ Basic Llama Stack connectivity works")
    print("Response:", basic_response.completion_message.content[:100] + "...")
except Exception as e:
    print(f"❌ Basic connectivity failed: {e}")

class State(TypedDict):
    messages: Annotated[list, add_messages]

def chatbot_with_fallback(state: State):
    # Convert LangGraph messages to Llama Stack format
    llama_messages = []
    for msg in state["messages"]:
        if isinstance(msg, dict):
            llama_messages.append(msg)
        else:
            role = "user"
            if hasattr(msg, 'type'):
                if msg.type == "human":
                    role = "user"
                elif msg.type == "ai":
                    role = "assistant"
            llama_messages.append({
                "role": role,
                "content": msg.content
            })
    
    # Try MCP first, fallback to basic if it fails
    try:
        response = client.inference.chat_completion(
            model_id=INFERENCE_MODEL,
            messages=llama_messages,
            tools=[
                {
                    "type": "mcp",
                    "tool_name": "weather",
                    "server_label": "weather"
                }
            ]
        )
        print("✅ MCP weather tool worked!")
    except Exception as e:
        print(f"⚠️ MCP failed ({e}), using basic inference...")
        response = client.inference.chat_completion(
            model_id=INFERENCE_MODEL,
            messages=llama_messages
        )
    
    return {"messages": [AIMessage(content=response.completion_message.content)]}

# Build StateGraph
graph_builder = StateGraph(State)
graph_builder.add_node("chatbot", chatbot_with_fallback)
graph_builder.add_edge(START, "chatbot")
graph_builder.add_edge("chatbot", END)

graph = graph_builder.compile()

print("="*50)
print("Testing MCP with fallback:")
print("="*50)

response = graph.invoke({
    "messages": [HumanMessage(content="What's the weather in Seattle?")]
})

for m in response['messages']:
    m.pretty_print()

  basic_response = client.inference.chat_completion(
INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/v1/inference/chat-completion "HTTP/1.1 404 Not Found"
  response = client.inference.chat_completion(
INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/v1/inference/chat-completion "HTTP/1.1 404 Not Found"
  response = client.inference.chat_completion(
INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/v1/inference/chat-completion "HTTP/1.1 404 Not Found"


LLAMA_STACK_URL:  http://localhost:8321/v1/openai/v1
INFERENCE_MODEL:  ollama/llama3.2:3b-instruct-fp16
❌ Basic connectivity failed: Error code: 404 - {'detail': 'Not Found'}
Testing MCP with fallback:
⚠️ MCP failed (Error code: 404 - {'detail': 'Not Found'}), using basic inference...


NotFoundError: Error code: 404 - {'detail': 'Not Found'}

In [None]:
!uv pip install langgraph langchain-openai langchain-core llama-stack-client

zsh:1: command not found: pip


In [1]:
from langgraph.graph import StateGraph, END, START
from langchain_openai import ChatOpenAI
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage, ToolMessage, AIMessage
from typing import Annotated
from typing_extensions import TypedDict
from langgraph.graph.message import add_messages

import os
#from dotenv import load_dotenv
#load_dotenv()

from llama_stack_client import LlamaStackClient
import pprint

In [36]:
import os
from llama_stack_client import LlamaStackClient

# Environment variables
LLAMA_STACK_URL = os.getenv("LLAMA_STACK_URL", "http://localhost:8321/v1/openai/v1")
INFERENCE_MODEL = os.getenv("INFERENCE_MODEL", "meta-llama/Llama-3.2-3B-Instruct")
INFERENCE_SERVER_OPENAI = os.getenv("LLAMA_STACK_ENDPOINT_OPENAI", "http://localhost:8321/v1/openai/v1")
API_KEY = os.getenv("OPENAI_API_KEY", "not-applicable")

print("LLAMA_STACK_URL: ", LLAMA_STACK_URL)
print("INFERENCE_MODEL: ", INFERENCE_MODEL)

llm = ChatOpenAI(
    model=INFERENCE_MODEL,
    openai_api_key=API_KEY,  
    openai_api_base=INFERENCE_SERVER_OPENAI,
    use_responses_api=True,
)

# Proof of connectivity
print(llm.invoke("Hello"))

#llm_with_tools = llm.bind_tools([{"type": "mcp::weather"}])

llm_with_tools = llm.bind_tools(
    [          
        {
            "type": "mcp",
            "server_label": "weather",     
            "server_url": "http://host.containers.internal:3001/sse",       
            "require_approval": "never",
        },
    ])

class State(TypedDict):
    messages: Annotated[list, add_messages]


def chatbot(state: State):
    message = llm_with_tools.invoke(state["messages"])
    #print(message)
    return {"messages": [message]}

graph_builder = StateGraph(State)

graph_builder.add_node("chatbot", chatbot)
graph_builder.add_edge(START, "chatbot")
graph_builder.add_edge("chatbot", END)

graph = graph_builder.compile()

response = graph.invoke(
    {"messages": [{"role": "user", "content": "What is the weather in Seattle?"}]})

for m in response['messages']:
    m.pretty_print()

LLAMA_STACK_URL:  http://localhost:8321/v1/openai/v1
INFERENCE_MODEL:  meta-llama/Llama-3.2-3B-Instruct


INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/responses "HTTP/1.1 200 OK"


content=[{'type': 'text', 'text': 'Hello! How can I assist you today?', 'annotations': []}] additional_kwargs={} response_metadata={'id': 'resp-31345b36-1c80-47f0-bad3-d185cdab72cf', 'created_at': 1757440659.0, 'model': 'meta-llama/Llama-3.2-3B-Instruct', 'object': 'response', 'status': 'completed', 'model_name': 'meta-llama/Llama-3.2-3B-Instruct'} id='msg_62ad7a94-96df-4f85-8b46-c48b7e56aeb0'


INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/responses "HTTP/1.1 200 OK"



What is the weather in Seattle?

[{'type': 'text', 'text': 'The current weather in Seattle is:\n\n* Temperature: 73°F (23°C)\n* Wind: 6 mph NNW (north-northwest)\n* Precipitation: Slight Chance Rain Showers\n\nPlease note that this is a fictional forecast and actual weather conditions may vary. For accurate forecast, please check with reliable sources such as National Weather Service or local news.', 'annotations': []}]


In [None]:
DEBUG_MODE = True

# This works because we're NOT using create_react_agent
# Following the exact pattern from burrsutter's example
llm_with_tools = llm.bind_tools([{"type": "mcp::weather"}])

class State(TypedDict):
    messages: Annotated[list, add_messages]

def chatbot(state: State):
    message = llm_with_tools.invoke(state["messages"])
    return {"messages": [message]}

# Build custom StateGraph (NOT create_react_agent)
graph_builder = StateGraph(State)
graph_builder.add_node("chatbot", chatbot)
graph_builder.add_edge(START, "chatbot")
graph_builder.add_edge("chatbot", END)

graph = graph_builder.compile()

# Test the agent
response = graph.invoke({
    "messages": [HumanMessage(content="What's the weather in Seattle?")]
})

for message in response["messages"]:
    message.pretty_print()