From 3446674583df2672a81c32c1023394391a7c1fe1 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 19 Mar 2026 18:41:42 +0000 Subject: [PATCH 1/3] Add how-to guide: budget control for LangChain agents New guide covering per-run budget wrapping of AgentExecutor using the runcycles Python client. Complements the existing per-LLM-call callback handler doc with a pattern for whole-run reservations, tool-level checks, multi-tenant scoping, and graceful degradation via ALLOW_WITH_CAPS. https://claude.ai/code/session_01PzuaGmFdDW83EJxwt6kQua --- .vitepress/config.ts | 1 + ...add-budget-control-to-a-langchain-agent.md | 256 ++++++++++++++++++ 2 files changed, 257 insertions(+) create mode 100644 how-to/how-to-add-budget-control-to-a-langchain-agent.md diff --git a/.vitepress/config.ts b/.vitepress/config.ts index cee3650..7fdcffc 100644 --- a/.vitepress/config.ts +++ b/.vitepress/config.ts @@ -175,6 +175,7 @@ export default defineConfig({ { text: 'Tenant, Workflow, and Run Budgets', link: '/how-to/how-to-model-tenant-workflow-and-run-budgets-in-cycles' }, { text: 'Estimate Exposure Before Execution', link: '/how-to/how-to-estimate-exposure-before-execution-practical-reservation-strategies-for-cycles' }, { text: 'Degradation Paths', link: '/how-to/how-to-think-about-degradation-paths-in-cycles-deny-downgrade-disable-or-defer' }, + { text: 'Budget Control for LangChain Agents', link: '/how-to/how-to-add-budget-control-to-a-langchain-agent' }, { text: 'Shadow Mode Rollout', link: '/how-to/shadow-mode-in-cycles-how-to-roll-out-budget-enforcement-without-breaking-production' }, { text: 'Error Handling Patterns', link: '/how-to/error-handling-patterns-in-cycles-client-code' }, { text: 'Error Handling in TypeScript', link: '/how-to/error-handling-patterns-in-typescript' }, diff --git a/how-to/how-to-add-budget-control-to-a-langchain-agent.md b/how-to/how-to-add-budget-control-to-a-langchain-agent.md new file mode 100644 index 0000000..422f572 --- /dev/null +++ b/how-to/how-to-add-budget-control-to-a-langchain-agent.md @@ -0,0 +1,256 @@ +--- +title: "How to Add Budget Control to a LangChain Agent" +description: "Wrap a LangChain AgentExecutor with per-run budget limits using Cycles reservations — without rewriting agent logic." +--- + +# How to Add Budget Control to a LangChain Agent + +LangChain makes it easy to build agents that call LLMs, search the web, execute code, and chain tool calls together. What it doesn't give you is any way to cap how much a single agent run is allowed to spend. + +That's fine when you're experimenting. It's a real problem when you're running agents in production — especially across multiple users or tenants. A single misbehaving agent loop can burn through hundreds of dollars before anyone notices. + +This guide shows how to add per-run budget control to a LangChain agent using [Cycles](https://runcycles.com) — without rewriting your agent logic. + +::: tip Already using the callback handler? +If you want per-LLM-call budget tracking (a reservation around every model invocation), see [Integrating Cycles with LangChain](/how-to/integrating-cycles-with-langchain). This guide covers a different pattern: a **single reservation around the entire agent run**, plus optional tool-level checks. +::: + +## The problem + +Here's a typical LangChain agent loop: + +```python +from langchain.agents import AgentExecutor, create_openai_functions_agent +from langchain_openai import ChatOpenAI +from langchain_core.tools import tool + +@tool +def search_web(query: str) -> str: + """Search the web for information.""" + return your_search_implementation(query) + +llm = ChatOpenAI(model="gpt-4o") +agent = create_openai_functions_agent(llm, [search_web], prompt) +executor = AgentExecutor(agent=agent, tools=[search_web]) + +result = executor.invoke({"input": "Research the top 10 competitors..."}) +``` + +This works. But there's no limit on how many LLM calls the agent makes, how many tool invocations it triggers, or what it costs. If the agent gets stuck in a loop, retries a failing tool, or expands scope unexpectedly, it keeps running — and spending — until it either finishes or hits the provider's rate limits. + +## The fix: reserve before, commit after + +The pattern Cycles uses is borrowed from database transactions: + +1. **Reserve** budget before the agent run starts +2. **Execute** the agent if the reservation is granted +3. **Commit** actual usage after — releases unused budget back to the pool +4. **Release** the full reservation if the run fails + +This gives you hard limits that are enforced *before* spend happens — not discovered afterward on your bill. + +## Prerequisites + +```bash +pip install runcycles langchain langchain-openai +``` + +```bash +export CYCLES_BASE_URL="http://localhost:7878" +export CYCLES_API_KEY="your-api-key" +export OPENAI_API_KEY="sk-..." +``` + +> **Need an API key?** Create one via the Admin Server — see [Deploy the Full Stack](/quickstart/deploying-the-full-cycles-stack#step-3-create-an-api-key) or [API Key Management](/how-to/api-key-management-in-cycles). + +## Per-run budget wrapper + +Wrap your `AgentExecutor` invocation in a single Cycles reservation: + +```python +import uuid +from langchain.agents import AgentExecutor, create_openai_functions_agent +from langchain_openai import ChatOpenAI +from langchain_core.tools import tool +from runcycles import ( + CyclesClient, CyclesConfig, ReservationCreateRequest, + CommitRequest, ReleaseRequest, Subject, Action, Amount, + Unit, CyclesMetrics, BudgetExceededError, CyclesProtocolError, +) + +client = CyclesClient(CyclesConfig.from_env()) + +@tool +def search_web(query: str) -> str: + """Search the web for information.""" + return your_search_implementation(query) + +def run_agent_with_budget( + user_input: str, + tenant: str, + budget_microcents: int, +) -> dict: + key = str(uuid.uuid4()) + + # 1. Reserve budget for the entire run + res = client.create_reservation(ReservationCreateRequest( + idempotency_key=key, + subject=Subject(tenant=tenant, workflow="research"), + action=Action(kind="agent.run", name="research-task"), + estimate=Amount(unit=Unit.USD_MICROCENTS, amount=budget_microcents), + ttl_ms=120_000, + )) + + if not res.is_success: + error = res.get_error_response() + if error and error.error == "BUDGET_EXCEEDED": + raise BudgetExceededError( + error.message, status=res.status, + error_code=error.error, request_id=error.request_id, + ) + msg = error.message if error else (res.error_message or "Reservation failed") + raise CyclesProtocolError(msg, status=res.status) + + reservation_id = res.get_body_attribute("reservation_id") + decision = res.get_body_attribute("decision") + + if decision == "DENY": + return {"error": "Budget denied — cannot start agent run"} + + # 2. Execute the agent + try: + llm = ChatOpenAI(model="gpt-4o") + agent = create_openai_functions_agent(llm, [search_web], prompt) + executor = AgentExecutor( + agent=agent, tools=[search_web], max_iterations=10, + ) + + result = executor.invoke({"input": user_input}) + + # 3. Commit actual usage + client.commit_reservation(reservation_id, CommitRequest( + idempotency_key=f"commit-{key}", + actual=Amount( + unit=Unit.USD_MICROCENTS, + amount=budget_microcents // 2, # replace with real tracking + ), + )) + return result + + except Exception: + # 4. Release on failure — budget returns to the pool + client.release_reservation( + reservation_id, + ReleaseRequest(idempotency_key=f"release-{key}"), + ) + raise + +# Run it +result = run_agent_with_budget( + user_input="Research the top 10 competitors in the CRM space", + tenant="acme", + budget_microcents=5_000_000, # $50.00 +) +``` + +::: info Crash safety +If the agent crashes before committing or releasing, the reservation expires automatically after `ttl_ms` and the held budget returns to the pool. See [TTL, Grace Period, and Extend](/protocol/reservation-ttl-grace-period-and-extend-in-cycles). +::: + +## Adding tool-level budget checks + +Individual tools can also reserve budget before costly operations. If the tool's reservation is denied, it returns a skip message instead of failing: + +```python +@tool +def search_web(query: str) -> str: + """Search the web for information.""" + tool_key = str(uuid.uuid4()) + + # Reserve before the tool call + res = client.create_reservation(ReservationCreateRequest( + idempotency_key=tool_key, + subject=Subject(tenant="acme", toolset="web-search"), + action=Action(kind="tool.call", name="search-web"), + estimate=Amount(unit=Unit.USD_MICROCENTS, amount=100_000), + ttl_ms=30_000, + )) + + if not res.is_success or res.get_body_attribute("decision") == "DENY": + return "Budget exhausted — skipping web search." + + tool_reservation_id = res.get_body_attribute("reservation_id") + + # Execute the tool + results = your_search_implementation(query) + + # Commit actual usage + client.commit_reservation(tool_reservation_id, CommitRequest( + idempotency_key=f"commit-{tool_key}", + actual=Amount(unit=Unit.USD_MICROCENTS, amount=50_000), + )) + + return results +``` + +## Multi-tenant scoping + +Use the `Subject` hierarchy to give each customer their own budget scope: + +```python +def run_for_customer(customer_id: str, user_input: str): + return run_agent_with_budget( + user_input=user_input, + tenant=customer_id, + budget_microcents=10_000_000, # or pull from the customer's plan + ) +``` + +Each customer's spend is tracked independently. One customer burning through their budget doesn't affect others. + +## Graceful degradation with ALLOW_WITH_CAPS + +When budget is running low, Cycles can return `ALLOW_WITH_CAPS` instead of a hard `DENY`. Use the decision to switch to a cheaper model or limit tool access: + +```python +res = client.create_reservation(ReservationCreateRequest( + idempotency_key=key, + subject=Subject(tenant=tenant, workflow="research"), + action=Action(kind="agent.run", name="research-task"), + estimate=Amount(unit=Unit.USD_MICROCENTS, amount=5_000_000), + ttl_ms=120_000, +)) + +if not res.is_success: + # handle error (see above) + ... + +decision = res.get_body_attribute("decision") + +if decision == "DENY": + raise BudgetExceededError("Budget exhausted") +elif decision == "ALLOW_WITH_CAPS": + # Switch to a cheaper model + llm = ChatOpenAI(model="gpt-4o-mini") +else: + # ALLOW — full capacity + llm = ChatOpenAI(model="gpt-4o") +``` + +See [Caps and Three-Way Decisions](/protocol/caps-and-the-three-way-decision-model-in-cycles) for more on how `ALLOW_WITH_CAPS` works and what cap fields are available. + +## What you get + +With this pattern in place: + +- **Per-tenant isolation** — `Subject(tenant="acme")` means each customer's budget is tracked and enforced independently +- **Graceful degradation** — `ALLOW_WITH_CAPS` lets agents downgrade instead of stopping cold +- **Automatic reconciliation** — committing less than the reserved amount releases the difference back to the pool +- **Crash safety** — if the agent crashes before committing, the reservation expires automatically and budget is released + +## Next steps + +- [Integrating Cycles with LangChain](/how-to/integrating-cycles-with-langchain) — per-LLM-call callback handler pattern +- [Reserve / Commit Lifecycle](/protocol/how-reserve-commit-works-in-cycles) — protocol deep-dive +- [Degradation Paths](/how-to/how-to-think-about-degradation-paths-in-cycles-deny-downgrade-disable-or-defer) — strategies for deny, downgrade, disable, or defer +- [Add to a Python App](/quickstart/getting-started-with-the-python-client) — Python client quickstart From 2f07e91616f15655e45d9c9db5a2c15227b1438f Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 19 Mar 2026 18:43:37 +0000 Subject: [PATCH 2/3] Fix USD_MICROCENTS amounts in LangChain budget guide 1 USD = 100,000,000 microcents. The original amounts were off by orders of magnitude (e.g. 5_000_000 labeled as $50 when it's actually $0.05). Corrected all amounts and added inline dollar comments. https://claude.ai/code/session_01PzuaGmFdDW83EJxwt6kQua --- ...how-to-add-budget-control-to-a-langchain-agent.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/how-to/how-to-add-budget-control-to-a-langchain-agent.md b/how-to/how-to-add-budget-control-to-a-langchain-agent.md index 422f572..594f631 100644 --- a/how-to/how-to-add-budget-control-to-a-langchain-agent.md +++ b/how-to/how-to-add-budget-control-to-a-langchain-agent.md @@ -97,7 +97,7 @@ def run_agent_with_budget( idempotency_key=key, subject=Subject(tenant=tenant, workflow="research"), action=Action(kind="agent.run", name="research-task"), - estimate=Amount(unit=Unit.USD_MICROCENTS, amount=budget_microcents), + estimate=Amount(unit=Unit.USD_MICROCENTS, amount=budget_microcents), # 1 USD = 100_000_000 microcents ttl_ms=120_000, )) @@ -149,7 +149,7 @@ def run_agent_with_budget( result = run_agent_with_budget( user_input="Research the top 10 competitors in the CRM space", tenant="acme", - budget_microcents=5_000_000, # $50.00 + budget_microcents=5_000_000_000, # $50.00 ) ``` @@ -172,7 +172,7 @@ def search_web(query: str) -> str: idempotency_key=tool_key, subject=Subject(tenant="acme", toolset="web-search"), action=Action(kind="tool.call", name="search-web"), - estimate=Amount(unit=Unit.USD_MICROCENTS, amount=100_000), + estimate=Amount(unit=Unit.USD_MICROCENTS, amount=100_000_000), # $1.00 ttl_ms=30_000, )) @@ -187,7 +187,7 @@ def search_web(query: str) -> str: # Commit actual usage client.commit_reservation(tool_reservation_id, CommitRequest( idempotency_key=f"commit-{tool_key}", - actual=Amount(unit=Unit.USD_MICROCENTS, amount=50_000), + actual=Amount(unit=Unit.USD_MICROCENTS, amount=40_000_000), # $0.40 )) return results @@ -202,7 +202,7 @@ def run_for_customer(customer_id: str, user_input: str): return run_agent_with_budget( user_input=user_input, tenant=customer_id, - budget_microcents=10_000_000, # or pull from the customer's plan + budget_microcents=10_000_000_000, # $100.00, or pull from the customer's plan ) ``` @@ -217,7 +217,7 @@ res = client.create_reservation(ReservationCreateRequest( idempotency_key=key, subject=Subject(tenant=tenant, workflow="research"), action=Action(kind="agent.run", name="research-task"), - estimate=Amount(unit=Unit.USD_MICROCENTS, amount=5_000_000), + estimate=Amount(unit=Unit.USD_MICROCENTS, amount=5_000_000_000), # $50.00 ttl_ms=120_000, )) From 58a1aa507c33617b8faeabf9eeff83cd9ab96afe Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 19 Mar 2026 18:47:53 +0000 Subject: [PATCH 3/3] Fix protocol accuracy in LangChain budget guide - Remove DENY-after-success checks: per protocol spec (line 637), budget denial for dry_run=false comes as 409 BUDGET_EXCEEDED, not decision=DENY on a 200. Matches existing callback handler doc pattern. - Simplify tool-level check to just is_success - Degradation section now shows correct two-way branch (ALLOW vs ALLOW_WITH_CAPS) with error handling for 409 above - Remove unused CyclesMetrics import https://claude.ai/code/session_01PzuaGmFdDW83EJxwt6kQua --- ...add-budget-control-to-a-langchain-agent.md | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/how-to/how-to-add-budget-control-to-a-langchain-agent.md b/how-to/how-to-add-budget-control-to-a-langchain-agent.md index 594f631..50f8e4b 100644 --- a/how-to/how-to-add-budget-control-to-a-langchain-agent.md +++ b/how-to/how-to-add-budget-control-to-a-langchain-agent.md @@ -75,7 +75,7 @@ from langchain_core.tools import tool from runcycles import ( CyclesClient, CyclesConfig, ReservationCreateRequest, CommitRequest, ReleaseRequest, Subject, Action, Amount, - Unit, CyclesMetrics, BudgetExceededError, CyclesProtocolError, + Unit, BudgetExceededError, CyclesProtocolError, ) client = CyclesClient(CyclesConfig.from_env()) @@ -114,12 +114,12 @@ def run_agent_with_budget( reservation_id = res.get_body_attribute("reservation_id") decision = res.get_body_attribute("decision") - if decision == "DENY": - return {"error": "Budget denied — cannot start agent run"} - - # 2. Execute the agent + # 2. Execute the agent — optionally downgrade if budget is tight try: - llm = ChatOpenAI(model="gpt-4o") + if decision == "ALLOW_WITH_CAPS": + llm = ChatOpenAI(model="gpt-4o-mini") + else: + llm = ChatOpenAI(model="gpt-4o") agent = create_openai_functions_agent(llm, [search_web], prompt) executor = AgentExecutor( agent=agent, tools=[search_web], max_iterations=10, @@ -176,7 +176,7 @@ def search_web(query: str) -> str: ttl_ms=30_000, )) - if not res.is_success or res.get_body_attribute("decision") == "DENY": + if not res.is_success: return "Budget exhausted — skipping web search." tool_reservation_id = res.get_body_attribute("reservation_id") @@ -210,7 +210,7 @@ Each customer's spend is tracked independently. One customer burning through the ## Graceful degradation with ALLOW_WITH_CAPS -When budget is running low, Cycles can return `ALLOW_WITH_CAPS` instead of a hard `DENY`. Use the decision to switch to a cheaper model or limit tool access: +When budget is running low, Cycles can return `ALLOW_WITH_CAPS` instead of a hard denial. Use the decision to switch to a cheaper model or limit tool access: ```python res = client.create_reservation(ReservationCreateRequest( @@ -222,15 +222,20 @@ res = client.create_reservation(ReservationCreateRequest( )) if not res.is_success: - # handle error (see above) - ... + # Budget denial arrives as a 409 BUDGET_EXCEEDED error — handle it here + error = res.get_error_response() + if error and error.error == "BUDGET_EXCEEDED": + raise BudgetExceededError( + error.message, status=res.status, + error_code=error.error, request_id=error.request_id, + ) + raise CyclesProtocolError(...) +# On success, decision is ALLOW or ALLOW_WITH_CAPS decision = res.get_body_attribute("decision") -if decision == "DENY": - raise BudgetExceededError("Budget exhausted") -elif decision == "ALLOW_WITH_CAPS": - # Switch to a cheaper model +if decision == "ALLOW_WITH_CAPS": + # Budget is tight — switch to a cheaper model llm = ChatOpenAI(model="gpt-4o-mini") else: # ALLOW — full capacity