LangGraph Components

In [None]:
import os
from kubernetes import client, config
from kubernetes.client.exceptions import ApiException

os.environ["NRP_API_KEY"] = "API key here"
config.load_incluster_config()

v1 = client.CoreV1Api()
apps_v1 = client.AppsV1Api()
batch_v1 = client.BatchV1Api()
networking_v1 = client.NetworkingV1Api()


In [314]:
from langgraph.graph import StateGraph, END
from typing import TypedDict, Annotated
import operator
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage

from tabulate import tabulate

In [315]:
from openai import OpenAI

client = OpenAI(
    api_key=os.environ.get("NRP_API_KEY"),
    base_url="https://llm.nrp-nautilus.io/"
)


In [316]:
def describe_pods(namespace="gsoc"):
    """
    Describe pods and print only fields useful for Prometheus metric queries.
    """
    try:
        pods = v1.list_namespaced_pod(namespace=namespace) if namespace else v1.list_pod_for_all_namespaces()

        rows = []
        for pod in pods.items:
            pod_name = pod.metadata.name
            ns = pod.metadata.namespace
            pod_ip = pod.status.pod_ip
            node = pod.spec.node_name
            container_names = [c.name for c in pod.spec.containers]
            container = ", ".join(container_names)

            rows.append([pod_name, ns, pod_ip, node, container])

        headers = ["Pod", "Namespace", "Pod IP", "Node", "Container"]
        print(tabulate(rows, headers=headers, tablefmt="fancy_grid"))

    except ApiException as e:
        print(f"❌ Error fetching pods: {e}")


In [317]:
describe_pods()

╒══════════════════════════════════╤═════════════╤════════════════╤════════════════════════════╤═════════════╕
│ Pod                              │ Namespace   │ Pod IP         │ Node                       │ Container   │
╞══════════════════════════════════╪═════════════╪════════════════╪════════════════════════════╪═════════════╡
│ agno-deployment-55c55964db-lzhkx │ gsoc        │ 10.244.215.212 │ hcc-nrp-shor-c6013.unl.edu │ jupyter     │
├──────────────────────────────────┼─────────────┼────────────────┼────────────────────────────┼─────────────┤
│ my-postgres-cluster-0            │ gsoc        │ 10.244.91.149  │ k8s-gen4-02.ampath.net     │ postgres    │
├──────────────────────────────────┼─────────────┼────────────────┼────────────────────────────┼─────────────┤
│ shellshock-cluster-0             │ gsoc        │ 10.244.19.231  │ dtn-gpu2.kreonet.net       │ postgres    │
╘══════════════════════════════════╧═════════════╧════════════════╧════════════════════════════╧═════════════╛


In [318]:
#namespace gpu utilization

In [319]:
import requests


def namespace_gpu_utilization(prom_url="https://prometheus.nrp-nautilus.io", threshold=0):
    """
    Display average GPU utilization per namespace using PromQL.
    Args:
        prom_url (str): Base Prometheus URL.
        threshold (float): Minimum % utilization to show (filtering).
    """
    query = 'avg by (namespace) (DCGM_FI_DEV_GPU_UTIL)'
    url = f"{prom_url}/api/v1/query"

    try:
        response = requests.get(url, params={"query": query}, timeout=10)
        response.raise_for_status()
        data = response.json()

        if data.get("status") != "success":
            print("❌ Prometheus query failed.")
            return

        results = data["data"]["result"]
        if not results:
            print("✅ Query successful, but no GPU usage data returned.")
            return

        rows = []
        for r in results:
            ns = r["metric"].get("namespace", "unknown")
            util = float(r["value"][1])
            if util >= threshold:
                status = (
                    "🟢 Low" if util < 40 else
                    "🟡 Moderate" if util < 70 else
                    "🔴 High"
                )
                rows.append([ns, f"{util:.2f}%", status])

        headers = ["Namespace", "Avg GPU Utilization", "Status"]
        print(tabulate(rows, headers=headers, tablefmt="fancy_grid"))

    except Exception as e:
        print(f"❌ Error querying Prometheus: {e}")


In [320]:
namespace_gpu_utilization()


╒════════════════════════════════════╤═══════════════════════╤═════════════╕
│ Namespace                          │ Avg GPU Utilization   │ Status      │
╞════════════════════════════════════╪═══════════════════════╪═════════════╡
│ gpu-mon                            │ 0.14%                 │ 🟢 Low      │
├────────────────────────────────────┼───────────────────────┼─────────────┤
│ csusb-hpc                          │ 0.00%                 │ 🟢 Low      │
├────────────────────────────────────┼───────────────────────┼─────────────┤
│ nrp-llm                            │ 7.15%                 │ 🟢 Low      │
├────────────────────────────────────┼───────────────────────┼─────────────┤
│ csusb-xli                          │ 0.00%                 │ 🟢 Low      │
├────────────────────────────────────┼───────────────────────┼─────────────┤
│ sdsu-goldberg                      │ 0.00%                 │ 🟢 Low      │
├────────────────────────────────────┼───────────────────────┼─────────────┤
│ sp

In [321]:
import requests
from tabulate import tabulate

def fetch_dcgm_gpu_util_data(prom_url="https://prometheus.nrp-nautilus.io"):
    """
    Fetch rich GPU utilization data from Prometheus using DCGM_FI_DEV_GPU_UTIL.
    
    Returns:
        list of dicts with context: [{hostname, gpu_id, model, namespace, pod, utilization, ...}]
    """
    query = 'DCGM_FI_DEV_GPU_UTIL'
    url = f"{prom_url}/api/v1/query"

    try:
        response = requests.get(url, params={"query": query}, timeout=10)
        response.raise_for_status()
        data = response.json()

        if data.get("status") != "success":
            print("❌ Prometheus query failed.")
            return []

        results = data["data"]["result"]
        if not results:
            print("✅ Query successful, but no GPU data returned.")
            return []

        enriched = []
        for r in results:
            m = r["metric"]
            val = float(r["value"][1])
            enriched.append({
                "hostname": m.get("Hostname", "unknown"),
                "ip_port": m.get("instance", "unknown"),
                "gpu_id": m.get("gpu", "N/A"),
                "device": m.get("device", "N/A"),
                "uuid": m.get("UUID", "N/A"),
                "model": m.get("modelName", "unknown"),
                "namespace": m.get("namespace", "N/A"),
                "pod": m.get("pod", "N/A"),
                "utilization": val
            })

        return enriched

    except Exception as e:
        print(f"❌ Error querying Prometheus: {e}")
        return []


def display_gpu_data_head(data, n=5):
    """
    Display the first `n` GPU entries with rich context.
    """
    if not data:
        print("No data to display.")
        return

    rows = [
        [d["hostname"], d["gpu_id"], d["model"], f"{d['utilization']:.2f}%", d["namespace"], d["pod"]]
        for d in data[:n]
    ]
    print(tabulate(rows, headers=["Host", "GPU", "Model", "Utilization", "Namespace", "Pod"], tablefmt="fancy_grid"))


def analyze_dcgm_gpu_data(data):
    """
    Analyze DCGM GPU data with statistics and top utilization.
    """
    if not data:
        print("No data to analyze.")
        return

    total = len(data)
    avg_util = sum(d["utilization"] for d in data) / total
    maxed = [d for d in data if d["utilization"] >= 99.0]
    idle = [d for d in data if d["utilization"] < 1.0]
    available = [d for d in data if d["utilization"] < 100.0]
    unique_hosts = set(d["hostname"] for d in data)
    unique_models = set(d["model"] for d in data)

    print(f"\n🔍 Total GPUs: {total}")
    print(f"📊 Average Utilization: {avg_util:.2f}%")
    print(f"🔴 Fully Utilized GPUs (>=99%): {len(maxed)}")
    print(f"🟢 Idle GPUs (<1%): {len(idle)}")
    print(f"💻 Unique Host Machines: {len(unique_hosts)}")
    print(f"🧠 Unique GPU Models: {len(unique_models)}")
    print(f"🧮 GPUs Available (<100%): {len(available)}\n")

    print("📈 Top 10 GPUs by Utilization:")
    top = sorted(data, key=lambda x: x["utilization"], reverse=True)[:10]
    rows = [[d["hostname"], d["gpu_id"], d["model"], f"{d['utilization']:.2f}%", d["namespace"], d["pod"]] for d in top]
    print(tabulate(rows, headers=["Host", "GPU", "Model", "Utilization", "Namespace", "Pod"], tablefmt="github"))


# Run it
if __name__ == "__main__":
    data = fetch_dcgm_gpu_util_data()
    display_gpu_data_head(data, n=5)
    analyze_dcgm_gpu_data(data)


╒═════════════════════════════════╤═══════╤═════════════════════════╤═══════════════╤═══════════════╤═══════════════════════════════════════════╕
│ Host                            │   GPU │ Model                   │ Utilization   │ Namespace     │ Pod                                       │
╞═════════════════════════════════╪═══════╪═════════════════════════╪═══════════════╪═══════════════╪═══════════════════════════════════════════╡
│ k8s-gpu-01.calit2.optiputer.net │     7 │ NVIDIA GeForce GTX 1080 │ 0.00%         │ gpu-mon       │ dcgm-export-dcgm-exporter-jfh5r           │
├─────────────────────────────────┼───────┼─────────────────────────┼───────────────┼───────────────┼───────────────────────────────────────────┤
│ rci-tide-gpu-07.sdsu.edu        │     2 │ NVIDIA L40              │ 0.00%         │ csusb-xli     │ jupyter-xiangyu-li-csusb-edu---662a57cb   │
├─────────────────────────────────┼───────┼─────────────────────────┼───────────────┼───────────────┼───────────────────────

In [322]:
from langchain.tools import tool

@tool
def calculate_dcgm_gpu_stats(threshold: float = 0.0) -> str:
    """
    Analyze GPU utilization across nodes and return statistical breakdown.
    Includes averages, idle/overloaded counts, and model/host distribution.
    """
    data = fetch_dcgm_gpu_util_data()
    if not data:
        return "⚠️ No GPU data available."

    filtered = [d for d in data if d["utilization"] >= threshold]
    total = len(filtered)
    if total == 0:
        return f"✅ No GPUs over the threshold of {threshold}% utilization."

    avg_util = sum(d["utilization"] for d in filtered) / total
    maxed = [d for d in filtered if d["utilization"] >= 99.0]
    idle = [d for d in filtered if d["utilization"] < 1.0]
    moderate = [d for d in filtered if 1.0 <= d["utilization"] < 70.0]
    available = [d for d in filtered if d["utilization"] < 100.0]
    unique_models = set(d["model"] for d in filtered)
    unique_hosts = set(d["hostname"] for d in filtered)

    return f"""
📊 GPU Utilization Stats (threshold: {threshold}%):

🔍 Total GPUs Considered: {total}
📈 Average Utilization: {avg_util:.2f}%
🔴 Fully Utilized (>=99%): {len(maxed)}
🟢 Idle (<1%): {len(idle)}
⚙️  Moderate (1-70%): {len(moderate)}
💻 Unique Host Machines: {len(unique_hosts)}
🧠 Unique GPU Models: {len(unique_models)}
🧮 GPUs Available (<100%): {len(available)}
"""


In [323]:
from langchain_core.tools import tool

@tool
def dcgm_gpu_inspect_tool(threshold: float = 0.0) -> str:
    """
    Inspect raw GPU usage with model name, host, pod, and utilization.
    Filters by a minimum utilization threshold.
    """
    data = fetch_dcgm_gpu_util_data()
    if not data:
        return "⚠️ No GPU data available."

    filtered = [d for d in data if d["utilization"] >= threshold]
    if not filtered:
        return f"✅ No GPUs over {threshold}% utilization."

    top = sorted(filtered, key=lambda x: x["utilization"], reverse=True)[:10]
    rows = [
        [d["hostname"], d["gpu_id"], d["model"], f"{d['utilization']:.2f}%", d["namespace"], d["pod"]]
        for d in top
    ]
    return tabulate(rows, headers=["Host", "GPU", "Model", "Utilization", "Namespace", "Pod"], tablefmt="github")


In [324]:
from langchain_core.tools import tool
from typing import Optional
from io import StringIO
import sys

# Utility to capture printed output from functions
def capture_stdout(func, *args, **kwargs):
    old_stdout = sys.stdout
    sys.stdout = mystdout = StringIO()
    try:
        func(*args, **kwargs)
    finally:
        sys.stdout = old_stdout
    return mystdout.getvalue()




In [325]:
from langchain_core.messages import AIMessage
import json

class NRPModel:
    def __init__(self, client):
        self.client = client
        self.tools = []

    def bind_tools(self, tools):
        self.tools = tools
        return self

    def _convert_tool_to_openai_format(self, tool):
        """Convert LangChain tool to OpenAI tool format"""
        return {
            "type": "function",
            "function": {
                "name": tool.name,
                "description": tool.description,
                "parameters": tool.args_schema.model_json_schema() if tool.args_schema else {
                    "type": "object",
                    "properties": {},
                    "required": []
                }
            }
        }

    def invoke(self, messages):
        # Convert messages to proper format if needed
        formatted_messages = []
        for msg in messages:
            if hasattr(msg, 'content'):
                role = "system" if msg.__class__.__name__ == "SystemMessage" else "user"
                formatted_messages.append({"role": role, "content": msg.content})
            else:
                formatted_messages.append(msg)

        # Convert tools to OpenAI format
        openai_tools = None
        if self.tools:
            openai_tools = [self._convert_tool_to_openai_format(t) for t in self.tools]

        response = self.client.chat.completions.create(
            model="gemma3",
            temperature=0,
            messages=formatted_messages,
            tool_choice="auto" if openai_tools else None,
            tools=openai_tools,
        )

        choice = response.choices[0].message

        tool_calls = []
        if hasattr(choice, "tool_calls") and choice.tool_calls:
            for t in choice.tool_calls:
                # Parse the arguments if they're a string
                args = t.function.arguments
                if isinstance(args, str):
                    try:
                        args = json.loads(args)
                    except json.JSONDecodeError:
                        args = {}
                
                tool_calls.append({
                    "name": t.function.name,
                    "args": args,
                    "id": t.id
                })

        return AIMessage(
            content=choice.content or "",
            tool_calls=tool_calls
        )

In [326]:
class AgentState(TypedDict):
    messages: Annotated[list[AnyMessage], operator.add]


# %%

In [327]:
# %%
class Agent:
    def __init__(self, model, tools, system: str = ""):
        self.system = system
        self.tools = {t.name: t for t in tools}
        self.model = model.bind_tools(tools)

        graph = StateGraph(AgentState)
        graph.add_node("llm", self.call_openai)
        graph.add_node("action", self.take_action)
        graph.add_conditional_edges("llm", self.exists_action, {True: "action", False: END})
        graph.add_edge("action", "llm")
        graph.set_entry_point("llm")

        self.raw_graph = graph
        self.graph = graph.compile()

    def exists_action(self, state: AgentState) -> bool:
        """Check if the last message has tool calls"""
        try:
            result = state["messages"][-1]
            
            # Check if result has tool_calls attribute and it's not None and not empty
            return (hasattr(result, "tool_calls") and 
                    result.tool_calls is not None and 
                    len(result.tool_calls) > 0)
        except (IndexError, KeyError, AttributeError):
            return False

    def call_openai(self, state: AgentState) -> dict:
        messages = state["messages"]
        if self.system:
            messages = [SystemMessage(content=self.system)] + messages
        message = self.model.invoke(messages)
        return {"messages": [message]}

    def take_action(self, state: AgentState) -> dict:
        tool_calls = state["messages"][-1].tool_calls
        results = []
        for t in tool_calls:
            tool_name = t["name"]
            tool_args = t["args"]
            print(f"Calling tool: {tool_name} with args: {tool_args}")
            if tool_name not in self.tools:
                result = "Tool name not recognized. Please try again."
            else:
                try:
                    result = self.tools[tool_name].invoke(tool_args)
                except Exception as e:
                    result = f"Tool error: {e}"
            results.append(ToolMessage(tool_call_id=t["id"], name=tool_name, content=str(result)))
        print("✅ Tool(s) executed. Returning to model.")
        return {"messages": results}

# %%

In [328]:
# %%
from langchain_core.messages import HumanMessage

# Updated system prompt
system_prompt = """You are a Kubernetes monitoring assistant.

Use these tools to answer questions:
- 'describe_pods_tool': View pod/container info across namespaces.
- 'gpu_util_tool': View average GPU utilization per namespace.
- 'dcgm_gpu_inspect_tool': Inspect raw GPU metrics by node, GPU model, and pod.
- 'calculate_dcgm_gpu_stats': Return a statistical breakdown of all GPU activity (idle, avg, overloaded, unique models).

Only respond using actual tool outputs. Say clearly if no data is found. Never guess."""

# Rebind all tools
model = NRPModel(client)
tools = [describe_pods_tool, gpu_util_tool, dcgm_gpu_inspect_tool, calculate_dcgm_gpu_stats]
abot = Agent(model=model, tools=tools, system=system_prompt)


In [329]:
# %%
from langchain_core.tools import tool
from typing import Optional
from io import StringIO
import sys

# Helper to capture printed output
def capture_stdout(func, *args, **kwargs):
    old_stdout = sys.stdout
    sys.stdout = mystdout = StringIO()
    try:
        func(*args, **kwargs)
    finally:
        sys.stdout = old_stdout
    return mystdout.getvalue()

# Define tools using decorator without args
@tool
def describe_pods_tool(namespace: Optional[str] = "gsoc") -> str:
    """Describe pods in a given Kubernetes namespace. Defaults to 'gsoc'."""
    return capture_stdout(describe_pods, namespace=namespace)

@tool
def namespace_gpu_util_tool(threshold: Optional[float] = 0.0) -> str:
    """Get average GPU utilization per namespace with optional threshold filter."""
    return capture_stdout(namespace_gpu_utilization, threshold=threshold)

@tool
def calculate_dcgm_gpu_stats(threshold: float = 0.0) -> str:
    """
    Analyze GPU utilization across nodes and return statistical breakdown.
    Includes averages, idle/overloaded counts, and model/host distribution.
    """
    data = fetch_dcgm_gpu_util_data()
    if not data:
        return "⚠️ No GPU data available."

    filtered = [d for d in data if d["utilization"] >= threshold]
    total = len(filtered)
    if total == 0:
        return f"✅ No GPUs over the threshold of {threshold}% utilization."

    avg_util = sum(d["utilization"] for d in filtered) / total
    maxed = [d for d in filtered if d["utilization"] >= 99.0]
    idle = [d for d in filtered if d["utilization"] < 1.0]
    moderate = [d for d in filtered if 1.0 <= d["utilization"] < 70.0]
    available = [d for d in filtered if d["utilization"] < 100.0]
    unique_models = set(d["model"] for d in filtered)
    unique_hosts = set(d["hostname"] for d in filtered)

    return f"""
📊 GPU Utilization Stats (threshold: {threshold}%):

🔍 Total GPUs Considered: {total}
📈 Average Utilization: {avg_util:.2f}%
🔴 Fully Utilized (>=99%): {len(maxed)}
🟢 Idle (<1%): {len(idle)}
⚙️  Moderate (1-70%): {len(moderate)}
💻 Unique Host Machines: {len(unique_hosts)}
🧠 Unique GPU Models: {len(unique_models)}
🧮 GPUs Available (<100%): {len(available)}
"""


In [331]:
# %%
from langchain_core.messages import HumanMessage

# Updated system prompt
system_prompt = """You are a Kubernetes monitoring assistant.

Use these tools to answer questions:
- 'describe_pods_tool': View pod/container info across namespaces.
- 'gpu_util_tool': View average GPU utilization per namespace.
- 'dcgm_gpu_inspect_tool': Inspect raw GPU metrics by node, GPU model, and pod.
- 'calculate_dcgm_gpu_stats': Return a statistical breakdown of all GPU activity (idle, avg, overloaded, unique models).

Only respond using actual tool outputs. Say clearly if no data is found. Never guess."""

# Rebind all tools
model = NRPModel(client)
tools = [describe_pods_tool, namespace_gpu_util_tool, dcgm_gpu_inspect_tool, calculate_dcgm_gpu_stats]
abot = Agent(model=model, tools=tools, system=system_prompt)


In [332]:
messages = [HumanMessage(content="List pods in gsoc namespace")]
response = abot.graph.invoke({"messages": messages})
print(response["messages"][-1].content)




Calling tool: describe_pods_tool with args: {'namespace': 'gsoc'}
✅ Tool(s) executed. Returning to model.
Calling tool: describe_pods_tool with args: {'namespace': 'gsoc'}
✅ Tool(s) executed. Returning to model.
Calling tool: describe_pods_tool with args: {'namespace': 'gsoc'}
✅ Tool(s) executed. Returning to model.
Calling tool: describe_pods_tool with args: {'namespace': 'gsoc'}
✅ Tool(s) executed. Returning to model.


╒══════════════════════════════════╤═════════════╤════════════════╤════════════════════════════╤═════════════╕
│ Pod                              │ Namespace   │ Pod IP         │ Node                       │ Container   │
╞══════════════════════════════════╪═════════════╪════════════════╪════════════════════════════╪═════════════╡
│ agno-deployment-55c55964db-lzhkx │ gsoc        │ 10.244.215.212 │ hcc-nrp-shor-c6013.unl.edu │ jupyter     │
├──────────────────────────────────┼─────────────┼────────────────┼────────────────────────────┼─────────────┤
│ my-postgres-clust

In [333]:
messages = [HumanMessage(content="Show me GPU usage across a few namespaces")]
response = abot.graph.invoke({"messages": messages})
print(response["messages"][-1].content)


Calling tool: namespace_gpu_util_tool with args: {'threshold': 0.0}
✅ Tool(s) executed. Returning to model.
Calling tool: namespace_gpu_util_tool with args: {'threshold': 0.0}
✅ Tool(s) executed. Returning to model.
Calling tool: namespace_gpu_util_tool with args: {'threshold': 0.0}
✅ Tool(s) executed. Returning to model.
Calling tool: namespace_gpu_util_tool with args: {'threshold': 0.0}
✅ Tool(s) executed. Returning to model.
Calling tool: namespace_gpu_util_tool with args: {'threshold': 0.0}
✅ Tool(s) executed. Returning to model.
Calling tool: namespace_gpu_util_tool with args: {'threshold': 0.0}
✅ Tool(s) executed. Returning to model.
Calling tool: namespace_gpu_util_tool with args: {'threshold': 0.0}
✅ Tool(s) executed. Returning to model.
Calling tool: namespace_gpu_util_tool with args: {'threshold': 0.0}
✅ Tool(s) executed. Returning to model.
Calling tool: namespace_gpu_util_tool with args: {'threshold': 0.0}
✅ Tool(s) executed. Returning to model.
Calling tool: namespace_gpu_

GraphRecursionError: Recursion limit of 25 reached without hitting a stop condition. You can increase the limit by setting the `recursion_limit` config key.
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/GRAPH_RECURSION_LIMIT

In [334]:
messages = [HumanMessage(content="Show me GPU stats of Hostname=k8s-gpu-03.sdsc.optiputer.net, UUID=GPU-ca8bf369-fe71-b92f-d679-59732bab04e6, container=exporter, device=nvidia7, instance=10.244.20.6:9400, job=dcgm-export-dcgm-exporter, modelName=NVIDIA GeForce GTX 1080 Ti across the cluster")]
response = abot.graph.invoke({"messages": messages})
print(response["messages"][-1].content)



Calling tool: dcgm_gpu_inspect_tool with args: {'threshold': 0.0}
✅ Tool(s) executed. Returning to model.
Calling tool: dcgm_gpu_inspect_tool with args: {'threshold': 0.0}
✅ Tool(s) executed. Returning to model.
Calling tool: dcgm_gpu_inspect_tool with args: {'threshold': 0.0}
✅ Tool(s) executed. Returning to model.
Calling tool: dcgm_gpu_inspect_tool with args: {'threshold': 0.0}
✅ Tool(s) executed. Returning to model.
Calling tool: dcgm_gpu_inspect_tool with args: {'threshold': 0.0}
✅ Tool(s) executed. Returning to model.
Calling tool: dcgm_gpu_inspect_tool with args: {'threshold': 0.0}
✅ Tool(s) executed. Returning to model.
Calling tool: dcgm_gpu_inspect_tool with args: {'threshold': 0.0}
✅ Tool(s) executed. Returning to model.
Calling tool: dcgm_gpu_inspect_tool with args: {'threshold': 0.0}
✅ Tool(s) executed. Returning to model.
Calling tool: dcgm_gpu_inspect_tool with args: {'threshold': 0.0}
✅ Tool(s) executed. Returning to model.
Calling tool: dcgm_gpu_inspect_tool with args:

GraphRecursionError: Recursion limit of 25 reached without hitting a stop condition. You can increase the limit by setting the `recursion_limit` config key.
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/GRAPH_RECURSION_LIMIT