From 0e666c351f7ecff6ad8810f895a14fbe03572989 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 16 Mar 2026 19:38:13 +0000 Subject: [PATCH 1/2] Improve documentation: glossary, expanded incidents, LangChain.js guide, SEO, and UX - Add glossary page with key Cycles terminology - Expand three thin incident pattern pages with detection, monitoring, TypeScript examples, and severity/impact sections - Add LangChain.js (TypeScript) integration guide - Add integrations overview landing page with comparison table - Add TypeScript streaming examples to streaming guide - Add apple-touch-icon, web app manifest, and per-page canonical URLs - Create custom 404 page - Uncollapse Integrations sidebar section - Add LangChain.js to sidebar navigation https://claude.ai/code/session_01LVeAZGvccG11HfdcBGqECM --- .vitepress/config.ts | 18 +- 404.md | 18 ++ glossary.md | 136 ++++++++ ...andling-streaming-responses-with-cycles.md | 78 +++++ .../integrating-cycles-with-langchain-js.md | 305 ++++++++++++++++++ how-to/integrations-overview.md | 73 +++++ incidents/concurrent-agent-overspend.md | 266 ++++++++++++++- .../retry-storms-and-idempotency-failures.md | 240 +++++++++++++- ...scope-misconfiguration-and-budget-leaks.md | 229 +++++++++++++ public/manifest.json | 21 ++ 10 files changed, 1376 insertions(+), 8 deletions(-) create mode 100644 404.md create mode 100644 glossary.md create mode 100644 how-to/integrating-cycles-with-langchain-js.md create mode 100644 how-to/integrations-overview.md create mode 100644 public/manifest.json diff --git a/.vitepress/config.ts b/.vitepress/config.ts index bd5405e..6bdebff 100644 --- a/.vitepress/config.ts +++ b/.vitepress/config.ts @@ -24,6 +24,8 @@ export default defineConfig({ srcExclude: ['README.md', 'cycles-protocol/**', 'cycles-server-admin/**'], head: [ ['link', { rel: 'icon', href: '/runcycles-favicon.ico' }], + ['link', { rel: 'apple-touch-icon', sizes: '192x192', href: '/runcycles-logo-192.png' }], + ['link', { rel: 'manifest', href: '/manifest.json' }], ['meta', { name: 'description', content: 'Hard limits on agent spend and actions, enforced before execution.' }], ['meta', { property: 'og:type', content: 'website' }], ['meta', { property: 'og:site_name', content: 'Cycles' }], @@ -146,6 +148,7 @@ export default defineConfig({ { text: 'Idempotency, Retries and Concurrency', link: '/concepts/idempotency-retries-and-concurrency-why-cycles-is-built-for-real-failure-modes' }, { text: 'From Observability to Enforcement', link: '/concepts/from-observability-to-enforcement-how-teams-evolve-from-dashboards-to-budget-authority' }, { text: 'How Cycles Compares', link: '/concepts/how-cycles-compares-to-rate-limiters-observability-provider-caps-in-app-counters-and-job-schedulers' }, + { text: 'Glossary', link: '/glossary' }, ] }, { @@ -174,11 +177,13 @@ export default defineConfig({ }, { text: 'Integrations', - collapsed: true, + collapsed: false, items: [ + { text: 'Overview', link: '/how-to/integrations-overview' }, { text: 'OpenAI', link: '/how-to/integrating-cycles-with-openai' }, { text: 'Anthropic', link: '/how-to/integrating-cycles-with-anthropic' }, - { text: 'LangChain', link: '/how-to/integrating-cycles-with-langchain' }, + { text: 'LangChain (Python)', link: '/how-to/integrating-cycles-with-langchain' }, + { text: 'LangChain.js', link: '/how-to/integrating-cycles-with-langchain-js' }, { text: 'Vercel AI SDK', link: '/how-to/integrating-cycles-with-vercel-ai-sdk' }, { text: 'AWS Bedrock', link: '/how-to/integrating-cycles-with-aws-bedrock' }, { text: 'Google Gemini', link: '/how-to/integrating-cycles-with-google-gemini' }, @@ -256,5 +261,14 @@ export default defineConfig({ pageData.title = pageData.params.pageTitle } + const canonicalUrl = `https://runcycles.io/${pageData.relativePath}` + .replace(/index\.md$/, '') + .replace(/\.md$/, '') + + pageData.frontmatter.head ??= [] + pageData.frontmatter.head.push([ + 'link', + { rel: 'canonical', href: canonicalUrl }, + ]) }, }) diff --git a/404.md b/404.md new file mode 100644 index 0000000..6a2d49d --- /dev/null +++ b/404.md @@ -0,0 +1,18 @@ +--- +layout: page +title: "Page Not Found" +description: "The page you're looking for doesn't exist." +--- + +# Page Not Found + +The page you're looking for doesn't exist or has been moved. + +## Where to go + +- [What is Cycles?](/quickstart/what-is-cycles) — start with the basics +- [End-to-End Tutorial](/quickstart/end-to-end-tutorial) — build a budget-guarded app in 10 minutes +- [API Reference](/api/) — interactive endpoint documentation +- [How-To Guides](/how-to/adding-cycles-to-an-existing-application) — integration patterns and recipes + +Use the search bar above to find what you're looking for. diff --git a/glossary.md b/glossary.md new file mode 100644 index 0000000..4e3ac62 --- /dev/null +++ b/glossary.md @@ -0,0 +1,136 @@ +--- +title: "Glossary" +description: "Definitions of key terms and concepts used throughout the Cycles documentation." +--- + +# Glossary + +Definitions of key terms and concepts used throughout the Cycles documentation. + +## Core Concepts + +### Budget Authority + +The role Cycles plays in an autonomous system: authorizing or denying execution based on whether sufficient budget is available. Unlike billing or observability, budget authority is enforced **before** work begins. See [What Cycles Is Not](/concepts/what-cycles-is-not-billing-rate-limiting-orchestration-and-other-category-confusion) for how this differs from adjacent categories. + +### Reservation + +A temporary hold placed on a budget before work begins. Reservations lock an estimated amount so that concurrent operations cannot overspend the same budget. Every reservation must eventually be [committed](#commit) or [released](#release). See [How Reserve-Commit Works](/protocol/how-reserve-commit-works-in-cycles). + +### Commit + +Finalizing a reservation with the actual cost once work completes successfully. The committed amount replaces the original estimate, and any difference is returned to the available budget. See [How Reserve-Commit Works](/protocol/how-reserve-commit-works-in-cycles). + +### Release + +Freeing a reservation's held budget when the associated work fails, is cancelled, or is no longer needed. The full reserved amount is returned to the available budget. See [How Reserve-Commit Works](/protocol/how-reserve-commit-works-in-cycles). + +### Estimate + +The predicted cost used when creating a reservation. Estimates determine how much budget is held and should be calibrated to cover the worst-case execution cost. See [How to Estimate Exposure Before Execution](/how-to/how-to-estimate-exposure-before-execution-practical-reservation-strategies-for-cycles). + +### Actual + +The real cost committed after execution completes. The actual amount may be less than, equal to, or greater than the original estimate, with the difference handled by the configured [overage policy](#overage-policy). + +### Decide + +A preflight budget check that evaluates whether a reservation **would** be allowed, without actually creating one. Useful for UI gating, request routing, or early rejection of requests that would exceed budget. See [How Decide Works](/protocol/how-decide-works-in-cycles-preflight-budget-checks-without-reservation). + +## Budget & Scope + +### Scope + +A hierarchical path that identifies a specific budget. Scopes are built from [subject](#subject) fields and take the form `tenant:acme/workspace:prod/agent:summarizer`. Budgets are enforced at every level of the scope hierarchy. See [How Scope Derivation Works](/protocol/how-scope-derivation-works-in-cycles). + +### Subject + +The set of entity fields — `tenant`, `workspace`, `app`, `workflow`, `agent`, and `toolset` — that identify **who** is spending. Subjects are sent with every protocol request and used to derive the scope path. + +### Scope Derivation + +The process by which Cycles builds hierarchical scope paths from the subject fields on a request. Each field maps to a level in the scope tree, enabling budget enforcement at any granularity from tenant-wide down to a single toolset. See [How Scope Derivation Works](/protocol/how-scope-derivation-works-in-cycles). + +### Cap / Budget Cap + +A constraint applied to execution when budget is running low but not yet exhausted. For example, a cap might reduce `max_tokens` on an LLM call so the request can still proceed at lower cost. Caps are returned as part of an `ALLOW_WITH_CAPS` decision. See [Caps and the Three-Way Decision Model](/protocol/caps-and-the-three-way-decision-model-in-cycles). + +### Three-Way Decision + +The three possible responses to a reservation or decide request: **ALLOW** (proceed normally), **ALLOW_WITH_CAPS** (proceed with reduced limits), or **DENY** (reject the request). This model enables graceful degradation instead of hard pass/fail. See [Caps and the Three-Way Decision Model](/protocol/caps-and-the-three-way-decision-model-in-cycles). + +### Overage Policy + +Configures what happens when the actual cost committed exceeds the original estimate. Three policies are available: **REJECT** (deny the commit), **ALLOW_IF_AVAILABLE** (permit if remaining budget covers the difference), and **ALLOW_WITH_OVERDRAFT** (permit even if it creates debt). See [Commit Overage Policies](/protocol/commit-overage-policies-in-cycles-reject-allow-if-available-and-allow-with-overdraft). + +## Units + +### USD_MICROCENTS + +One hundred-millionth of a dollar (10^-8 USD). This is the default monetary unit in Cycles, chosen for integer-precision arithmetic at sub-cent granularity. See [Understanding Units](/protocol/understanding-units-in-cycles-usd-microcents-tokens-credits-and-risk-points). + +### TOKENS + +A raw token count unit, typically used to track LLM input and output tokens directly rather than converting to monetary cost. See [Understanding Units](/protocol/understanding-units-in-cycles-usd-microcents-tokens-credits-and-risk-points). + +### CREDITS + +An abstract credit unit that lets teams define their own internal currency. Useful when monetary cost is not the right abstraction for a given budget. See [Understanding Units](/protocol/understanding-units-in-cycles-usd-microcents-tokens-credits-and-risk-points). + +### RISK_POINTS + +An abstract risk-scoring unit for budgeting non-monetary concerns such as safety risk, compliance exposure, or action severity. See [Understanding Units](/protocol/understanding-units-in-cycles-usd-microcents-tokens-credits-and-risk-points). + +## Lifecycle + +### TTL (Time To Live) + +The duration an active reservation remains valid before it auto-expires. If a reservation is neither committed nor released within its TTL (plus any [grace period](#grace-period)), the held budget is automatically reclaimed. See [Reservation TTL, Grace Period, and Extend](/protocol/reservation-ttl-grace-period-and-extend-in-cycles). + +### Grace Period + +An additional window of time after a reservation's TTL expires before the held budget is fully reclaimed. The grace period provides a safety buffer for in-flight operations that slightly exceed their TTL. See [Reservation TTL, Grace Period, and Extend](/protocol/reservation-ttl-grace-period-and-extend-in-cycles). + +### Extend + +Prolonging an active reservation's TTL before it expires. This is used when work is taking longer than originally anticipated and the reservation should remain active. See [Reservation TTL, Grace Period, and Extend](/protocol/reservation-ttl-grace-period-and-extend-in-cycles). + +### Heartbeat + +An automatic TTL extension sent periodically by SDK clients to keep a reservation alive during long-running work. Heartbeats remove the need for callers to manually track and extend reservation lifetimes. + +## Operations + +### Shadow Mode / Dry Run + +Evaluating budget policies and computing the decision result **without** persisting the reservation or affecting budget balances. Shadow mode is used during rollout to validate enforcement logic before turning it on in production. See [Dry-Run / Shadow Mode Evaluation](/protocol/dry-run-shadow-mode-evaluation-in-cycles) and [Shadow Mode How-To](/how-to/shadow-mode-in-cycles-how-to-roll-out-budget-enforcement-without-breaking-production). + +### Idempotency Key + +A unique client-supplied key that ensures a protocol operation is processed exactly once, even if the request is retried due to network failures or timeouts. Each endpoint type has its own idempotency scope. See [Idempotency, Retries, and Concurrency](/concepts/idempotency-retries-and-concurrency-why-cycles-is-built-for-real-failure-modes). + +### Debt / Overdraft + +A negative budget balance that occurs when the actual cost committed exceeds the available budget. Debt is only permitted when the [overage policy](#overage-policy) is set to `ALLOW_WITH_OVERDRAFT`. See [Debt, Overdraft, and the Over-Limit Model](/protocol/debt-overdraft-and-the-over-limit-model-in-cycles). + +### Event / Direct Debit + +Recording spend against a budget **without** a prior reservation. Events are used for costs that are known after the fact or that bypass the reserve-commit lifecycle entirely. See [How Events Work](/protocol/how-events-work-in-cycles-direct-debit-without-reservation). + +### Balance + +The current state of a budget, including fields such as `allocated`, `spent`, `reserved`, `remaining`, and `debt`. Balances are computed across the full scope hierarchy and reflect all committed, reserved, and event-based spend. See [Querying Balances](/protocol/querying-balances-in-cycles-understanding-budget-state). + +## Infrastructure + +### Cycles Server + +The HTTP service that implements the [Cycles Protocol](#cycles-protocol) and processes all budget authority requests — reserve, commit, release, decide, extend, events, and balances. See the [API Reference](/protocol/api-reference-for-the-cycles-protocol). + +### Admin Server + +The management API used to configure tenants, API keys, budgets, and policies. The Admin Server is separate from the Cycles Server and is not part of the protocol's hot path. See [Authentication, Tenancy, and API Keys](/protocol/authentication-tenancy-and-api-keys-in-cycles). + +### Cycles Protocol + +The open specification defining the budget authority API. The protocol covers the complete reservation lifecycle, balance queries, event recording, and decision evaluation. See the [API Reference](/protocol/api-reference-for-the-cycles-protocol). diff --git a/how-to/handling-streaming-responses-with-cycles.md b/how-to/handling-streaming-responses-with-cycles.md index bd9df93..42ad829 100644 --- a/how-to/handling-streaming-responses-with-cycles.md +++ b/how-to/handling-streaming-responses-with-cycles.md @@ -17,6 +17,8 @@ With non-streaming calls, the `@cycles` decorator handles the full lifecycle aut ## The pattern +### Python + Use the programmatic `CyclesClient` (not the decorator) for streaming: ```python @@ -93,6 +95,82 @@ def stream_with_budget(prompt: str, max_tokens: int = 1024) -> str: return "".join(chunks) ``` +### TypeScript + +The TypeScript client provides `reserveForStream`, which handles reservation creation and automatic heartbeat (TTL extension) in one call: + +```typescript +import OpenAI from "openai"; +import { + CyclesClient, + CyclesConfig, + reserveForStream, + BudgetExceededError, +} from "runcycles"; + +const cyclesClient = new CyclesClient(CyclesConfig.fromEnv()); +const openai = new OpenAI(); + +async function streamWithBudget( + prompt: string, + maxTokens = 1024, +): Promise { + // 1. Reserve budget (starts automatic heartbeat) + const handle = await reserveForStream({ + client: cyclesClient, + estimate: maxTokens * 1000, // worst-case output cost + unit: "USD_MICROCENTS", + actionKind: "llm.completion", + actionName: "gpt-4o", + }); + + try { + // Respect budget caps + let effectiveMaxTokens = maxTokens; + if (handle.caps?.maxTokens) { + effectiveMaxTokens = Math.min(maxTokens, handle.caps.maxTokens); + } + + // 2. Stream the response + const stream = await openai.chat.completions.create({ + model: "gpt-4o", + messages: [{ role: "user", content: prompt }], + max_tokens: effectiveMaxTokens, + stream: true, + stream_options: { include_usage: true }, + }); + + const chunks: string[] = []; + let inputTokens = 0; + let outputTokens = 0; + + for await (const chunk of stream) { + const content = chunk.choices[0]?.delta?.content; + if (content) chunks.push(content); + if (chunk.usage) { + inputTokens = chunk.usage.prompt_tokens ?? 0; + outputTokens = chunk.usage.completion_tokens ?? 0; + } + } + + // 3. Commit actual cost (stops heartbeat automatically) + const actualCost = Math.ceil(inputTokens * 250 + outputTokens * 1000); + await handle.commit(actualCost, { + tokensInput: inputTokens, + tokensOutput: outputTokens, + }); + + return chunks.join(""); + } catch (err) { + // Release budget on failure (stops heartbeat automatically) + await handle.release("stream_error"); + throw err; + } +} +``` + +`reserveForStream` handles TTL extension automatically via a background heartbeat, so you don't need to call `extend` manually. The heartbeat stops when you call `commit` or `release`. + ## TTL considerations Streaming responses can take significantly longer than non-streaming calls. Set `ttl_ms` high enough to cover the full stream duration: diff --git a/how-to/integrating-cycles-with-langchain-js.md b/how-to/integrating-cycles-with-langchain-js.md new file mode 100644 index 0000000..599cf30 --- /dev/null +++ b/how-to/integrating-cycles-with-langchain-js.md @@ -0,0 +1,305 @@ +--- +title: "Integrating Cycles with LangChain.js" +description: "Add budget governance to LangChain.js applications using a custom callback handler that wraps every LLM call with a Cycles reservation." +--- + +# Integrating Cycles with LangChain.js + +This guide shows how to add budget governance to LangChain.js applications using a custom callback handler that wraps every LLM call with a Cycles reservation. + +## Prerequisites + +```bash +npm install runcycles @langchain/core @langchain/openai +``` + +```bash +export CYCLES_BASE_URL="http://localhost:7878" +export CYCLES_API_KEY="cyc_live_..." +export CYCLES_TENANT="acme" +export OPENAI_API_KEY="sk-..." +``` + +## The callback handler approach + +LangChain.js fires callback events on every LLM call. A custom `BaseCallbackHandler` can hook into `handleLLMStart` and `handleLLMEnd` to create and commit Cycles reservations: + +```typescript +import { BaseCallbackHandler } from "@langchain/core/callbacks/base"; +import { Serialized } from "@langchain/core/load/serializable"; +import { LLMResult } from "@langchain/core/outputs"; +import { v4 as uuidv4 } from "uuid"; +import { + CyclesClient, + CyclesConfig, + BudgetExceededError, + CyclesProtocolError, +} from "runcycles"; + +interface CyclesBudgetHandlerOptions { + client: CyclesClient; + subject: { tenant: string; workflow?: string; agent?: string; toolset?: string }; + estimateAmount?: number; + actionKind?: string; + actionName?: string; +} + +export class CyclesBudgetHandler extends BaseCallbackHandler { + name = "CyclesBudgetHandler"; + + private client: CyclesClient; + private subject: CyclesBudgetHandlerOptions["subject"]; + private estimateAmount: number; + private actionKind: string; + private actionName: string; + private reservations = new Map(); + private keys = new Map(); + + constructor(options: CyclesBudgetHandlerOptions) { + super(); + this.client = options.client; + this.subject = options.subject; + this.estimateAmount = options.estimateAmount ?? 2_000_000; + this.actionKind = options.actionKind ?? "llm.completion"; + this.actionName = options.actionName ?? "gpt-4o"; + } + + async handleLLMStart( + _serialized: Serialized, + _prompts: string[], + runId: string, + ): Promise { + const key = uuidv4(); + this.keys.set(runId, key); + + const res = await this.client.createReservation({ + idempotencyKey: key, + subject: this.subject, + action: { kind: this.actionKind, name: this.actionName }, + estimate: { unit: "USD_MICROCENTS", amount: this.estimateAmount }, + ttlMs: 60_000, + }); + + if (!res.isSuccess) { + const error = res.getErrorResponse(); + if (error?.error === "BUDGET_EXCEEDED") { + throw new BudgetExceededError(error.message, { + status: res.status, + errorCode: error.error, + requestId: error.requestId, + }); + } + const msg = error?.message ?? res.errorMessage ?? "Reservation failed"; + throw new CyclesProtocolError(msg, { + status: res.status, + errorCode: error?.error, + }); + } + + this.reservations.set(runId, res.getBodyAttribute("reservation_id")); + } + + async handleLLMEnd(output: LLMResult, runId: string): Promise { + const rid = this.reservations.get(runId); + const key = this.keys.get(runId); + this.reservations.delete(runId); + this.keys.delete(runId); + if (!rid || !key) return; + + const usage = output.llmOutput?.tokenUsage ?? {}; + const inputTokens = usage.promptTokens ?? 0; + const outputTokens = usage.completionTokens ?? 0; + + await this.client.commitReservation(rid, { + idempotencyKey: `commit-${key}`, + actual: { + unit: "USD_MICROCENTS", + amount: inputTokens * 250 + outputTokens * 1_000, + }, + metrics: { + tokensInput: inputTokens, + tokensOutput: outputTokens, + }, + }); + } + + async handleLLMError(error: Error, runId: string): Promise { + const rid = this.reservations.get(runId); + const key = this.keys.get(runId); + this.reservations.delete(runId); + this.keys.delete(runId); + if (rid && key) { + await this.client.releaseReservation(rid, { + idempotencyKey: `release-${key}`, + }); + } + } +} +``` + +## Using the handler + +### With a chat model + +```typescript +import { ChatOpenAI } from "@langchain/openai"; +import { HumanMessage } from "@langchain/core/messages"; +import { CyclesClient, CyclesConfig, BudgetExceededError } from "runcycles"; + +const client = new CyclesClient(CyclesConfig.fromEnv()); +const handler = new CyclesBudgetHandler({ + client, + subject: { tenant: "acme", agent: "my-agent" }, +}); + +const llm = new ChatOpenAI({ model: "gpt-4o", callbacks: [handler] }); + +try { + const result = await llm.invoke([new HumanMessage("Hello!")]); + console.log(result.content); +} catch (err) { + if (err instanceof BudgetExceededError) { + console.log("Budget exhausted."); + } else { + throw err; + } +} +``` + +### With an agent and tools + +Every LLM call the agent makes (including tool-calling turns) gets its own reservation: + +```typescript +import { tool } from "@langchain/core/tools"; +import { z } from "zod"; + +const getWeather = tool( + async ({ location }: { location: string }) => `72°F in ${location}`, + { + name: "get_weather", + description: "Get weather for a location.", + schema: z.object({ location: z.string() }), + }, +); + +const handler = new CyclesBudgetHandler({ + client, + subject: { tenant: "acme", agent: "tool-agent", toolset: "weather" }, +}); + +const llm = new ChatOpenAI({ model: "gpt-4o", callbacks: [handler] }); +const llmWithTools = llm.bindTools([getWeather]); + +try { + const result = await llmWithTools.invoke([ + new HumanMessage("What's the weather in NYC?"), + ]); + console.log(result.content); +} catch (err) { + if (err instanceof BudgetExceededError) { + console.log("Agent stopped — budget exhausted."); + } else { + throw err; + } +} +``` + +## How it works + +| Event | Action | +|-------|--------| +| `handleLLMStart` | Create a reservation with the estimated cost | +| `handleLLMEnd` | Commit the actual cost from token usage | +| `handleLLMError` | Release the reservation to free held budget | + +The handler tracks active reservations by LangChain's `runId`, so concurrent calls are handled correctly. + +## Streaming with LangChain.js + +For streaming responses, use `reserveForStream` instead of the callback handler. This keeps the reservation alive with an automatic heartbeat while tokens are being streamed: + +```typescript +import { ChatOpenAI } from "@langchain/openai"; +import { HumanMessage } from "@langchain/core/messages"; +import { + CyclesClient, + CyclesConfig, + reserveForStream, + BudgetExceededError, +} from "runcycles"; + +const client = new CyclesClient(CyclesConfig.fromEnv()); + +const handle = await reserveForStream({ + client, + estimate: 2_000_000, + unit: "USD_MICROCENTS", + actionKind: "llm.completion", + actionName: "gpt-4o", + subject: { tenant: "acme", agent: "streaming-agent" }, +}); + +const llm = new ChatOpenAI({ model: "gpt-4o" }); + +try { + const stream = await llm.stream([new HumanMessage("Write a short poem.")]); + let fullText = ""; + + for await (const chunk of stream) { + const content = typeof chunk.content === "string" ? chunk.content : ""; + process.stdout.write(content); + fullText += content; + } + + // Estimate actual cost from output length (1 token ~ 4 chars) + const estimatedOutputTokens = Math.ceil(fullText.length / 4); + const actualCost = Math.ceil(500 * 250 + estimatedOutputTokens * 1_000); + + await handle.commit(actualCost, { + tokensOutput: estimatedOutputTokens, + }); +} catch (err) { + await handle.release("stream_error"); + throw err; +} +``` + +## Per-agent budgets + +Use Cycles' subject hierarchy to give each agent its own budget scope: + +```typescript +// Planning agent with its own budget +const plannerHandler = new CyclesBudgetHandler({ + client, + subject: { tenant: "acme", workflow: "support", agent: "planner" }, +}); + +// Executor agent with a separate budget +const executorHandler = new CyclesBudgetHandler({ + client, + subject: { tenant: "acme", workflow: "support", agent: "executor" }, +}); + +const planner = new ChatOpenAI({ model: "gpt-4o", callbacks: [plannerHandler] }); +const executor = new ChatOpenAI({ model: "gpt-4o", callbacks: [executorHandler] }); +``` + +Each agent draws from its own budget allocation. If the executor exhausts its budget, the planner can still operate independently. + +## Key points + +- **One reservation per LLM call.** The callback creates a reservation on every `handleLLMStart` and commits on `handleLLMEnd`. +- **Agents are automatically covered.** Multi-turn agents that call the LLM repeatedly get budget-checked on every turn. +- **Errors release budget.** If the LLM call fails, the reservation is released immediately. +- **Concurrent-safe.** Reservations are tracked by `runId`, supporting concurrent LLM calls. +- **Streaming uses a different pattern.** Use `reserveForStream` with its automatic heartbeat instead of the callback handler. +- **Works with any LangChain.js model.** Attach the handler to `ChatOpenAI`, `ChatAnthropic`, or any other model via `callbacks: [handler]`. + +## Next steps + +- [Integrating Cycles with LangChain (Python)](/how-to/integrating-cycles-with-langchain) — the Python version of this guide +- [Handling Streaming Responses](/how-to/handling-streaming-responses-with-cycles) — streaming patterns in detail +- [Cost Estimation Cheat Sheet](/how-to/cost-estimation-cheat-sheet) — how much to reserve per model +- [Error Handling Patterns in TypeScript](/how-to/error-handling-patterns-in-typescript) — handling Cycles errors in TypeScript diff --git a/how-to/integrations-overview.md b/how-to/integrations-overview.md new file mode 100644 index 0000000..cdba962 --- /dev/null +++ b/how-to/integrations-overview.md @@ -0,0 +1,73 @@ +--- +title: "Integrations Overview" +description: "Overview of all supported Cycles integrations — LLM providers, frameworks, and web servers — with language support and streaming capabilities." +--- + +# Integrations Overview + +Cycles integrates with LLM providers, agent frameworks, and web servers. Each integration wraps model calls with the reserve → commit → release lifecycle so that every call is budget-checked before execution. + +## Supported integrations + +| Integration | Language | Streaming | Pattern | +|-------------|----------|-----------|---------| +| [OpenAI](/how-to/integrating-cycles-with-openai) | Python, TypeScript | Yes | Decorator / `withCycles` | +| [Anthropic](/how-to/integrating-cycles-with-anthropic) | Python, TypeScript | Yes | Decorator / `withCycles` | +| [LangChain](/how-to/integrating-cycles-with-langchain) | Python | Yes | Callback handler | +| [LangChain.js](/how-to/integrating-cycles-with-langchain-js) | TypeScript | Yes | Callback handler | +| [Vercel AI SDK](/how-to/integrating-cycles-with-vercel-ai-sdk) | TypeScript | Yes | `reserveForStream` | +| [AWS Bedrock](/how-to/integrating-cycles-with-aws-bedrock) | TypeScript | Yes | `withCycles` / `reserveForStream` | +| [Google Gemini](/how-to/integrating-cycles-with-google-gemini) | TypeScript | Yes | `withCycles` / `reserveForStream` | +| [Express](/how-to/integrating-cycles-with-express) | TypeScript | — | Middleware | +| [FastAPI](/how-to/integrating-cycles-with-fastapi) | Python | — | Decorator | +| [OpenClaw](/how-to/integrating-cycles-with-openclaw) | TypeScript | Yes | Agent framework hooks | + +## Integration patterns + +Cycles offers several integration approaches depending on your stack: + +### Decorator / Higher-order function + +The simplest approach. Wrap your LLM-calling function and Cycles handles reservation, commit, and release automatically. + +- **Python:** `@cycles` decorator +- **TypeScript:** `withCycles` higher-order function + +Best for: individual model calls, simple request-response flows. + +### Callback handler + +For agent frameworks like LangChain that fire events on every LLM call. A custom callback handler creates reservations on `llm_start` and commits on `llm_end`. + +Best for: multi-turn agents, tool-calling chains, LangChain/LangGraph pipelines. + +### `reserveForStream` + +For streaming responses where the actual cost is only known after the stream completes. Reserves budget upfront, auto-extends the reservation TTL during streaming, and commits actual usage when the stream finishes. + +Best for: streaming chat UIs, Vercel AI SDK, any provider with streaming support. + +### Programmatic client + +Direct access to the Cycles client for full control over the reservation lifecycle. Use when the higher-level patterns don't fit your architecture. + +Best for: custom frameworks, complex orchestration, batch processing. + +See [Choosing the Right Integration Pattern](/how-to/choosing-the-right-integration-pattern) for detailed guidance. + +## Adding a new integration + +All integrations follow the same protocol: + +1. **Reserve** budget before the LLM call with an estimated cost +2. **Execute** the model call (respecting any caps returned) +3. **Commit** actual cost from token usage after execution +4. **Release** on error to free held budget + +See [Using the Cycles Client Programmatically](/how-to/using-the-cycles-client-programmatically) for the full client API reference. + +## Next steps + +- [Adding Cycles to an Existing Application](/how-to/adding-cycles-to-an-existing-application) — step-by-step guide for your first integration +- [Cost Estimation Cheat Sheet](/how-to/cost-estimation-cheat-sheet) — pricing reference for estimation +- [Error Handling Patterns](/how-to/error-handling-patterns-in-cycles-client-code) — handling budget errors across languages diff --git a/incidents/concurrent-agent-overspend.md b/incidents/concurrent-agent-overspend.md index 0166382..5383928 100644 --- a/incidents/concurrent-agent-overspend.md +++ b/incidents/concurrent-agent-overspend.md @@ -49,6 +49,248 @@ Agent E reserves $3.00 → DENY (only $1.00 remaining) Agents D and E are denied *before any LLM call is made*. The budget is never exceeded. +### Python example + +```python +from runcycles import cycles, BudgetExceededError + +@cycles( + estimate=3000000, + action_kind="llm.completion", + action_name="gpt-4o", + tenant="acme-corp", + workspace="prod", + agent=lambda agent_id: agent_id, +) +def call_llm_safe(prompt: str, agent_id: str) -> str: + return call_llm(prompt) + +def agent_task(agent_id: str, task: str): + try: + result = call_llm_safe(task, agent_id=agent_id) + return result + except BudgetExceededError: + return fallback_response(task) +``` + +### TypeScript example + +```typescript +import { withCycles, BudgetExceededError } from "runcycles"; + +const callLlmSafe = withCycles( + { + estimate: 3_000_000, + actionKind: "llm.completion", + actionName: "gpt-4o", + tenant: "acme-corp", + workspace: "prod", + }, + async (prompt: string): Promise => { + return await callLlm(prompt); + } +); + +async function agentTask(agentId: string, task: string): Promise { + try { + return await callLlmSafe(task); + } catch (err) { + if (err instanceof BudgetExceededError) { + return fallbackResponse(task); + } + throw err; + } +} + +// Run 5 agents concurrently — Cycles guarantees budget safety +const results = await Promise.all( + agents.map((agent) => agentTask(agent.id, agent.task)) +); +``` + +## Severity and impact + +Concurrent overspend is proportional to the number of parallel agents and the cost per operation. The worst case is `N agents * cost per call` overshoot. + +**Concrete examples:** + +| Agents | Budget | Cost per call | Overspend (no Cycles) | With Cycles | +|--------|--------|---------------|----------------------|-------------| +| 5 | $10 | $3.00 | $5.00 (50%) | $0.00 | +| 10 | $50 | $8.00 | $30.00 (60%) | $0.00 | +| 50 | $100 | $5.00 | $150.00 (150%) | $0.00 | +| 100 | $500 | $10.00 | $500.00 (100%) | $0.00 | + +The overspend percentage increases with concurrency. At 100 agents each spending $10, the theoretical maximum overshoot is $500 — a full doubling of the budget. + +**Compounding effect with retries.** If each agent also retries failed calls (see [Retry Storms](/incidents/retry-storms-and-idempotency-failures)), the multiplication compounds. 10 agents with 5 retries each can produce 50 concurrent calls against the same budget. + +**Invoice shock.** Unlike a gradual budget drain, concurrent overspend happens in a burst. The budget goes from healthy to overdrawn in seconds, giving operators no time to intervene manually. + +## Detection + +### Querying for concurrent reservation patterns + +Check how many reservations are active simultaneously for the same scope: + +```bash +# Count active reservations per scope +curl -s "http://localhost:7878/v1/reservations?tenant=acme-corp&status=ACTIVE" \ + -H "X-Cycles-API-Key: $API_KEY" \ + | jq 'group_by(.scope) | map({scope: .[0].scope, count: length})' +``` + +If a single scope has many active reservations simultaneously, you have high concurrency against that budget. + +### Spotting TOCTOU patterns in application code + +Search your codebase for the check-then-spend anti-pattern: + +```python +# ANTI-PATTERN: checking balance then spending is NOT safe +balance = get_balance(scope="tenant:acme-corp") +if balance.remaining > estimated_cost: + # Another agent can spend between this check and the call + result = call_llm(prompt) # UNSAFE +``` + +The fix is to always use `reserve` instead of `balance` for authorization decisions. + +### Checking for budget overruns + +Compare spent against allocated to find scopes that exceeded their budget: + +```bash +# Find scopes where spent exceeds allocated (overrun already happened) +curl -s "http://localhost:7878/v1/balances?tenant=acme-corp" \ + -H "X-Cycles-API-Key: $API_KEY" \ + | jq '.[] | select(.spent > .allocated) | {scope, allocated, spent, overshoot: (.spent - .allocated)}' +``` + +## Monitoring + +### Alerting rules + +```yaml +# Alert when spent exceeds allocated for any scope +- alert: CyclesBudgetOvershoot + expr: | + cycles_scope_spent_total > cycles_scope_allocated_total + for: 0m + labels: + severity: critical + annotations: + summary: "Budget overshoot on {{ $labels.scope }}: spent {{ $value }}" + +# Alert on high concurrent reservation count (pre-incident warning) +- alert: CyclesHighConcurrentReservations + expr: | + cycles_active_reservations_count > 20 + for: 1m + labels: + severity: warning + annotations: + summary: "{{ $value }} concurrent reservations on {{ $labels.scope }}" + +# Alert when remaining budget drops below 10% with active reservations +- alert: CyclesBudgetNearExhaustion + expr: | + cycles_scope_remaining_total / cycles_scope_allocated_total < 0.1 + and cycles_active_reservations_count > 0 + for: 0m + labels: + severity: critical + annotations: + summary: "Budget nearly exhausted on {{ $labels.scope }} with active reservations" +``` + +For detailed monitoring setup, see [Monitoring and Alerting](/how-to/monitoring-and-alerting). + +## Testing for concurrency issues + +Concurrency bugs are hard to reproduce in unit tests. Use these strategies to verify your budget enforcement holds under concurrent load. + +### Load test with parallel reservations + +```python +import asyncio +from runcycles import reserve, commit, release + +async def test_concurrent_budget_safety(): + """Verify that concurrent reservations never exceed the budget.""" + budget_allocated = 10_000_000 # 10M microcredits + cost_per_call = 3_000_000 # 3M microcredits each + num_agents = 5 + + async def agent_reserve(): + try: + reservation = await reserve( + estimate=cost_per_call, + action_kind="llm.completion", + action_name="gpt-4o", + tenant="test-tenant", + ) + # Simulate work + await asyncio.sleep(0.1) + await commit(reservation.id, actual=cost_per_call) + return "committed" + except BudgetExceededError: + return "denied" + + results = await asyncio.gather( + *[agent_reserve() for _ in range(num_agents)] + ) + + committed = results.count("committed") + denied = results.count("denied") + + # At most 3 agents can commit (3 * 3M = 9M < 10M budget) + assert committed <= 3, f"Too many commits: {committed}" + assert denied >= 2, f"Expected denials, got {denied}" + total_spent = committed * cost_per_call + assert total_spent <= budget_allocated, f"Overspent: {total_spent}" +``` + +### TypeScript concurrency test + +```typescript +import { reserve, commit, BudgetExceededError } from "runcycles"; + +async function testConcurrentBudgetSafety() { + const budgetAllocated = 10_000_000; + const costPerCall = 3_000_000; + const numAgents = 5; + + const agentReserve = async (): Promise<"committed" | "denied"> => { + try { + const reservation = await reserve({ + estimate: costPerCall, + actionKind: "llm.completion", + actionName: "gpt-4o", + tenant: "test-tenant", + }); + await new Promise((r) => setTimeout(r, 100)); + await commit(reservation.id, { actual: costPerCall }); + return "committed"; + } catch (err) { + if (err instanceof BudgetExceededError) return "denied"; + throw err; + } + }; + + const results = await Promise.all( + Array.from({ length: numAgents }, () => agentReserve()) + ); + + const committed = results.filter((r) => r === "committed").length; + console.assert(committed <= 3, `Too many commits: ${committed}`); + const totalSpent = committed * costPerCall; + console.assert(totalSpent <= budgetAllocated, `Overspent: ${totalSpent}`); +} +``` + +For more testing patterns, see [Testing with Cycles](/how-to/testing-with-cycles). + ## Key points - **Balance reads are informational, not authoritative.** Querying `/v1/balances` tells you the current state, but it does not reserve anything. Two agents can read the same balance and both decide to spend. @@ -66,9 +308,27 @@ This pattern appears in: ## Prevention -1. **Always reserve before spending.** Never rely on balance reads for authorization. -2. **Use hierarchical scopes.** Even if agents have individual budgets, a shared parent scope acts as a hard cap. -3. **Design for denial.** Agents that can't reserve budget should degrade gracefully, not crash. +1. **Always reserve before spending.** Never rely on balance reads for authorization. The `reserve` call is the only concurrency-safe way to claim budget. A successful reservation is a guarantee; a balance read is a suggestion. + +2. **Use hierarchical scopes.** Even if agents have individual budgets, a shared parent scope acts as a hard cap. If 5 agents each have a $5 budget but the team scope is $10, the team scope prevents collective overspend: + + ```bash + # Team-level cap + curl -s -X POST "http://localhost:7878/v1/budgets" \ + -H "X-Cycles-API-Key: $API_KEY" \ + -d '{"scope": "tenant:acme-corp/workspace:prod", "allocated": 10000000}' + + # Per-agent budgets (sum exceeds team cap — that's fine) + for agent in agent-a agent-b agent-c agent-d agent-e; do + curl -s -X POST "http://localhost:7878/v1/budgets" \ + -H "X-Cycles-API-Key: $API_KEY" \ + -d "{\"scope\": \"tenant:acme-corp/workspace:prod/agent:${agent}\", \"allocated\": 5000000}" + done + ``` + +3. **Design for denial.** Agents that can't reserve budget should degrade gracefully, not crash. Return cached results, use a cheaper model, or queue the work for later. See [Degradation Paths](/how-to/how-to-think-about-degradation-paths-in-cycles-deny-downgrade-disable-or-defer) for patterns. + +4. **Avoid fire-and-forget patterns.** If you spawn agents without awaiting their reservations, you lose the ability to react to denials. Always handle the reservation result before proceeding. ## Next steps diff --git a/incidents/retry-storms-and-idempotency-failures.md b/incidents/retry-storms-and-idempotency-failures.md index 0d1591f..dfdf9bb 100644 --- a/incidents/retry-storms-and-idempotency-failures.md +++ b/incidents/retry-storms-and-idempotency-failures.md @@ -52,6 +52,151 @@ def process_document(doc): When total spend across all retries hits the budget limit, further attempts are denied immediately — no LLM call is made. +### TypeScript equivalent + +Using the `runcycles` SDK, the same pattern works with `withCycles`: + +```typescript +import { withCycles, BudgetExceededError } from "runcycles"; + +const callLlmGuarded = withCycles( + { + estimate: 5_000_000, + actionKind: "llm.completion", + actionName: "gpt-4o", + }, + async (prompt: string): Promise => { + return await callLlm(prompt); + } +); + +async function processDocument(doc: string): Promise { + let currentDoc = doc; + for (let attempt = 0; attempt < 10; attempt++) { + try { + const response = await callLlmGuarded( + `Process this document: ${currentDoc}` + ); + if (validate(response)) { + return response; + } + currentDoc = currentDoc + "\n\nPrevious attempt failed validation. Try again."; + } catch (err) { + if (err instanceof BudgetExceededError) { + return "Document processing stopped: budget limit reached."; + } + throw err; + } + } + return "Document processing stopped: max retries reached."; +} +``` + +## Severity and impact + +Retry storms are deceptive because each individual retry is cheap. The damage comes from multiplication across a fleet. + +**Single-document cost explosion:** + +| Retries per doc | Cost per call | Docs in batch | Total cost | +|-----------------|---------------|---------------|------------| +| 1 (no retries) | $0.05 | 1,000 | $50 | +| 5 | $0.05 | 1,000 | $250 | +| 10 | $0.05 | 1,000 | $500 | +| 10 | $0.05 | 10,000 | $5,000 | + +**Prompt growth makes it worse.** Each retry in the example above appends context to the prompt. By attempt #10, the prompt is significantly longer than the original. With token-based pricing, later retries cost more than earlier ones: + +| Attempt | Prompt tokens | Cost per call | +|---------|--------------|---------------| +| 1 | 500 | $0.05 | +| 5 | 2,500 | $0.12 | +| 10 | 5,000 | $0.22 | + +A 10-retry loop with growing prompts costs roughly $1.00 per document, not $0.50. Across 10,000 documents, that is $10,000 instead of $500. + +**Fleet multiplication.** If you run 20 parallel workers processing the same batch, a retry storm in the shared batch job can multiply these figures by the worker count before any human notices the spend rate. + +## Detection + +### Querying for retry storm indicators + +Check the ratio of active reservations to recent commits. A healthy system commits most reservations quickly. A retry storm shows many reservations being created and released (or expiring) without successful commits. + +```bash +# Count active reservations for a scope +curl -s "http://localhost:7878/v1/reservations?tenant=acme-corp&status=ACTIVE" \ + -H "X-Cycles-API-Key: $API_KEY" | jq 'length' + +# Check balance to see reserved vs spent +curl -s "http://localhost:7878/v1/balances?tenant=acme-corp" \ + -H "X-Cycles-API-Key: $API_KEY" | jq '.[] | {scope, allocated, spent, reserved, remaining}' +``` + +If `reserved` is growing much faster than `spent`, many reservations are being created without committing — a hallmark of retry loops. + +### Checking for repeated idempotency key prefixes + +If you use the pattern `doc-{id}-attempt-{n}`, you can look for documents with high attempt numbers: + +```bash +# List reservations and look for high attempt numbers +curl -s "http://localhost:7878/v1/reservations?tenant=acme-corp&workflow=doc-processing" \ + -H "X-Cycles-API-Key: $API_KEY" \ + | jq '[.[].idempotency_key | select(test("attempt-[5-9]|attempt-[0-9]{2,}"))]' +``` + +Any result means at least one document hit 5+ retries. + +## Monitoring + +### Alerting rules + +Use these Prometheus-style rules to detect retry storms before they drain budgets: + +```yaml +# Alert when reservation creation rate spikes relative to commit rate +# A ratio above 3 means most reservations are not committing — likely retries +- alert: CyclesRetryStormDetected + expr: | + rate(cycles_reservations_created_total[5m]) + / rate(cycles_commits_total[5m]) > 3 + for: 2m + labels: + severity: warning + annotations: + summary: "Possible retry storm: reservation/commit ratio is {{ $value }}" + +# Alert when reserved amount exceeds a threshold relative to allocated +- alert: CyclesHighReservedRatio + expr: | + cycles_scope_reserved_total + / cycles_scope_allocated_total > 0.5 + for: 5m + labels: + severity: critical + annotations: + summary: "Over 50% of budget is in active reservations — retries may be stacking" + +# Alert when BUDGET_EXCEEDED denials spike (retries hitting the wall) +- alert: CyclesBudgetDenialSpike + expr: | + rate(cycles_reservations_denied_total{reason="BUDGET_EXCEEDED"}[5m]) > 10 + for: 1m + labels: + severity: warning + annotations: + summary: "Spike in budget denials — retry storm may have hit budget limit" +``` + +### Key metrics to track + +- **Reservation-to-commit ratio** per scope and per workflow. Healthy value is 1.0–1.2. Values above 2.0 indicate retries or abandoned work. +- **Mean and p99 reservation lifetime.** Retry storms produce short-lived reservations that are released (not committed) quickly. +- **Denied reservation rate.** A sudden spike in denials often means a retry storm just hit the budget ceiling. + +For detailed monitoring setup, see [Monitoring and Alerting](/how-to/monitoring-and-alerting). + ## Key points - **Retries are individually valid requests.** Rate limiters can't distinguish retry #1 from retry #10. @@ -68,9 +213,98 @@ idempotency_key = f"doc-{doc_id}-attempt-{attempt}" ## Prevention strategies -1. **Per-document or per-task budget.** Create a workflow-scoped budget for each document or task. Retries share the same budget pool. -2. **Cap retries with budget checks.** Before each retry, use `decide` to check if budget is available without reserving. -3. **Track retry cost separately.** Use the `metrics` field on commit to tag retries, so you can monitor retry cost ratios. +### 1. Per-document or per-task budget + +Create a workflow-scoped budget for each document or task. All retries for that document share the same budget pool, so a single stuck document can't drain the entire batch budget: + +```bash +# Create a per-document budget under the workflow scope +curl -s -X POST "http://localhost:7878/v1/budgets" \ + -H "X-Cycles-API-Key: $API_KEY" \ + -d '{ + "scope": "tenant:acme-corp/workspace:prod/workflow:doc-processing", + "allocated": 10000000, + "window": "PT1H" + }' +``` + +This limits total retry spend per document to the workflow budget, regardless of how many attempts the agent makes. + +### 2. Cap retries with budget checks + +Before each retry, use `decide` to check if budget is available without creating a reservation. This avoids creating reservations you'll immediately release: + +```python +from runcycles import decide + +def process_document(doc, doc_id): + for attempt in range(10): + allowed = decide( + estimate=5000000, + action_kind="llm.completion", + action_name="gpt-4o", + workflow=f"doc-{doc_id}", + ) + if not allowed: + return f"Document {doc_id}: budget exhausted after {attempt} attempts." + response = call_llm_guarded(f"Process this document: {doc}") + if validate(response): + return response +``` + +### 3. Track retry cost separately + +Use the `metrics` field on commit to tag retries. This lets you build dashboards that show what fraction of your spend goes to retries versus first attempts: + +```python +@cycles( + estimate=5000000, + action_kind="llm.completion", + action_name="gpt-4o", + metrics=lambda attempt: {"retry_attempt": attempt, "is_retry": attempt > 0}, +) +def call_llm_guarded(prompt: str, attempt: int = 0) -> str: + return call_llm(prompt) +``` + +### 4. Set a maximum retry budget as a fraction of first-attempt cost + +A useful heuristic: retries should never cost more than 2x the original call. If your first attempt costs $0.05, cap total retry spend at $0.10. This prevents the long tail of expensive retries: + +```python +MAX_RETRY_MULTIPLIER = 2 +first_attempt_cost = 5000000 # microcredits + +def process_with_capped_retries(doc, doc_id): + total_spent = 0 + max_retry_budget = first_attempt_cost * MAX_RETRY_MULTIPLIER + for attempt in range(10): + if attempt > 0 and total_spent >= max_retry_budget: + return f"Document {doc_id}: retry budget exhausted." + response = call_llm_guarded(f"Process this document: {doc}") + total_spent += get_last_commit_cost() + if validate(response): + return response +``` + +### 5. Use circuit breakers for persistent failures + +If multiple documents in a batch hit max retries, the issue is likely systemic (model degradation, bad prompt template). A circuit breaker stops the entire batch early: + +```python +class RetryCircuitBreaker: + def __init__(self, threshold=5): + self.failure_count = 0 + self.threshold = threshold + + def record_exhausted_retries(self): + self.failure_count += 1 + if self.failure_count >= self.threshold: + raise SystemError( + f"{self.failure_count} documents exhausted retries. " + "Halting batch — likely systemic issue." + ) +``` ## Next steps diff --git a/incidents/scope-misconfiguration-and-budget-leaks.md b/incidents/scope-misconfiguration-and-budget-leaks.md index 1020f71..8c2f999 100644 --- a/incidents/scope-misconfiguration-and-budget-leaks.md +++ b/incidents/scope-misconfiguration-and-budget-leaks.md @@ -44,6 +44,31 @@ Route B spends against `tenant:acme-corp` but never touches `tenant:acme-corp/wo - **Misleading balances.** The workspace balance report shows less spending than actually occurred. Operators think production is within limits, but the tenant-level budget tells a different story. - **No enforcement gap.** Cycles enforces exactly what it's told. If the subject doesn't include a scope level, that level is not checked. +## Severity and impact + +Scope misconfiguration is uniquely dangerous because it is **silent**. Unlike a budget exceeded error or a denied reservation, a misconfigured scope produces no errors. Calls succeed, money is spent, and the budget reports look normal — until you realize the per-workspace limits you carefully configured are being bypassed entirely. + +**Budget bypass scenario:** + +``` +Budget setup: + tenant:acme-corp → $100/month + tenant:acme-corp/workspace:prod → $50/month + +Route A (correct scope): 50 calls × $0.50 = $25 → charged to both tenant and workspace +Route B (missing workspace): 200 calls × $0.50 = $100 → charged to tenant only + +Result: + tenant:acme-corp → $125 spent (OVER BUDGET) + workspace:prod → $25 spent (looks fine!) +``` + +The workspace dashboard shows $25 spent — well within the $50 limit. But the tenant is $25 over budget because Route B bypassed workspace-level enforcement entirely. An operator looking at workspace reports sees no problem. + +**Cascading misconfiguration.** When one team gets scope construction wrong, other teams sharing the same tenant scope bear the cost. Team A's misconfigured calls drain the tenant budget, causing Team B's correctly-scoped calls to be denied with `BUDGET_EXCEEDED` at the tenant level even though their workspace budget has room. + +**Audit failure.** Scope mismatches break cost attribution. If finance needs to know how much the "prod" workspace spent, the answer is incomplete because Route B's spend is invisible at that scope level. This makes chargebacks and cost allocation unreliable. + ## Common misconfiguration patterns ### 1. Inconsistent subject fields across routes @@ -86,6 +111,53 @@ Cycles scopes follow a fixed hierarchy: `tenant → workspace → app → workfl **Fix:** Use constants or enums for scope values, not string literals. +### 5. Dynamic scope values from user input + +When scope values are derived from user input (API parameters, form fields, URL paths), unsanitized values create unpredictable scope paths: + +```python +# DANGEROUS: user-controlled scope value +@cycles(estimate=2000000, action_kind="llm.completion", + action_name="gpt-4o", + workspace=request.headers.get("X-Workspace")) +def handle_request(prompt): + ... +``` + +If a user sends `X-Workspace: prod/agent:attacker`, the scope path becomes `tenant:acme-corp/workspace:prod/agent:attacker` — a scope that likely has no budget configured, which could cause unexpected `BUDGET_EXCEEDED` errors, or worse, if a permissive fallback budget exists at a parent level, the call may bypass intended limits. + +**Fix:** Validate and sanitize scope values against an allowlist: + +```python +VALID_WORKSPACES = {"prod", "staging", "dev"} + +def safe_workspace(raw_value: str) -> str: + sanitized = raw_value.strip().lower() + if sanitized not in VALID_WORKSPACES: + raise ValueError(f"Invalid workspace: {raw_value}") + return sanitized + +@cycles(estimate=2000000, action_kind="llm.completion", + action_name="gpt-4o", + workspace=safe_workspace(request.headers.get("X-Workspace", "default"))) +def handle_request(prompt): + ... +``` + +In TypeScript: + +```typescript +const VALID_WORKSPACES = new Set(["prod", "staging", "dev"]); + +function safeWorkspace(raw: string | undefined): string { + const sanitized = (raw ?? "default").trim().toLowerCase(); + if (!VALID_WORKSPACES.has(sanitized)) { + throw new Error(`Invalid workspace: ${raw}`); + } + return sanitized; +} +``` + ## Detection ### Check for scope inconsistency @@ -108,6 +180,163 @@ If reservations are hitting scopes that don't appear in your budget list, you ha Run in [shadow mode](/how-to/shadow-mode-in-cycles-how-to-roll-out-budget-enforcement-without-breaking-production) to see all the scopes your application actually uses before creating budgets. +### TypeScript detection example + +You can programmatically detect scope gaps by comparing budget scopes against reservation scopes: + +```typescript +import { listBalances, listReservations } from "runcycles"; + +async function detectScopeGaps(tenant: string): Promise { + const balances = await listBalances({ tenant }); + const reservations = await listReservations({ tenant, status: "ACTIVE" }); + + const budgetScopes = new Set(balances.map((b) => b.scope)); + const reservationScopes = new Set(reservations.map((r) => r.scope)); + + const gaps: string[] = []; + for (const scope of reservationScopes) { + if (!budgetScopes.has(scope)) { + gaps.push(scope); + } + } + return gaps; +} + +// Usage +const gaps = await detectScopeGaps("acme-corp"); +if (gaps.length > 0) { + console.warn("Reservations hitting scopes without budgets:", gaps); +} +``` + +## Monitoring + +### Alerting for scope mismatches + +```yaml +# Alert when reservations hit scopes that have no configured budget +- alert: CyclesScopeWithoutBudget + expr: | + cycles_reservations_created_total{scope=~".+"} + unless on(scope) cycles_scope_allocated_total + for: 5m + labels: + severity: warning + annotations: + summary: "Reservations hitting scope {{ $labels.scope }} which has no budget" + +# Alert when a scope's spend diverges significantly from its child scopes +# (indicates traffic bypassing child scope) +- alert: CyclesScopeSpendMismatch + expr: | + cycles_scope_spent_total{level="tenant"} + - sum(cycles_scope_spent_total{level="workspace"}) by (tenant) + > 1000000 + for: 10m + labels: + severity: warning + annotations: + summary: "Tenant spend exceeds sum of workspace spend — possible scope bypass" + +# Alert when a workspace scope shows zero spend while tenant scope is active +- alert: CyclesInactiveChildScope + expr: | + cycles_scope_spent_total{level="workspace"} == 0 + and on(tenant) cycles_scope_spent_total{level="tenant"} > 0 + for: 30m + labels: + severity: info + annotations: + summary: "Workspace {{ $labels.workspace }} has zero spend — check for scope misconfiguration" +``` + +### Key metrics to track + +- **Budget coverage ratio:** scopes with budgets vs distinct scopes in reservations. Should be 1.0. +- **Parent-child spend delta:** difference between parent spend and sum of child spend. Non-zero means traffic is bypassing child scopes. +- **Distinct scope count over time:** sudden increases suggest dynamic scope values from user input (pattern #5). + +For detailed monitoring setup, see [Monitoring and Alerting](/how-to/monitoring-and-alerting). + +## Testing scope configuration + +### Python: verify all routes include required scope fields + +```python +import pytest +from unittest.mock import patch +from runcycles import get_last_reservation + +REQUIRED_SCOPE_FIELDS = {"tenant", "workspace"} + +def test_route_a_includes_all_scopes(): + """Verify that route_a passes all required scope fields.""" + with patch("myapp.call_llm", return_value="mocked"): + route_a("test prompt") + + reservation = get_last_reservation() + subject_keys = set(reservation.subject.keys()) + missing = REQUIRED_SCOPE_FIELDS - subject_keys + assert not missing, f"Route A missing scope fields: {missing}" + +def test_route_b_includes_all_scopes(): + """Verify that route_b passes all required scope fields.""" + with patch("myapp.call_llm", return_value="mocked"): + route_b("test prompt") + + reservation = get_last_reservation() + subject_keys = set(reservation.subject.keys()) + missing = REQUIRED_SCOPE_FIELDS - subject_keys + assert not missing, f"Route B missing scope fields: {missing}" +``` + +### TypeScript: centralized scope builder with tests + +```typescript +import { withCycles } from "runcycles"; + +// Centralized scope builder — all routes use this +interface ScopeConfig { + tenant: string; + workspace: string; + app?: string; +} + +function buildScope(): ScopeConfig { + const tenant = process.env.CYCLES_TENANT; + const workspace = process.env.CYCLES_WORKSPACE; + if (!tenant) throw new Error("CYCLES_TENANT is required"); + if (!workspace) throw new Error("CYCLES_WORKSPACE is required"); + return { tenant, workspace }; +} + +// Test that buildScope rejects missing fields +describe("buildScope", () => { + it("throws if CYCLES_TENANT is missing", () => { + delete process.env.CYCLES_TENANT; + process.env.CYCLES_WORKSPACE = "prod"; + expect(() => buildScope()).toThrow("CYCLES_TENANT is required"); + }); + + it("throws if CYCLES_WORKSPACE is missing", () => { + process.env.CYCLES_TENANT = "acme-corp"; + delete process.env.CYCLES_WORKSPACE; + expect(() => buildScope()).toThrow("CYCLES_WORKSPACE is required"); + }); + + it("returns all required fields", () => { + process.env.CYCLES_TENANT = "acme-corp"; + process.env.CYCLES_WORKSPACE = "prod"; + const scope = buildScope(); + expect(scope).toHaveProperty("tenant", "acme-corp"); + expect(scope).toHaveProperty("workspace", "prod"); + }); +}); +``` + +For more testing patterns, see [Testing with Cycles](/how-to/testing-with-cycles). + ## Prevention 1. **Centralize subject construction.** Don't let individual routes build subjects ad hoc. diff --git a/public/manifest.json b/public/manifest.json new file mode 100644 index 0000000..cc829ec --- /dev/null +++ b/public/manifest.json @@ -0,0 +1,21 @@ +{ + "name": "Cycles Documentation", + "short_name": "Cycles Docs", + "description": "Budget authority for autonomous agents. Hard limits on agent spend and actions enforced before execution.", + "start_url": "/", + "display": "standalone", + "background_color": "#0B0F1A", + "theme_color": "#0B0F1A", + "icons": [ + { + "src": "/runcycles-logo-192.png", + "sizes": "192x192", + "type": "image/png" + }, + { + "src": "/runcycles-logo-512.png", + "sizes": "512x512", + "type": "image/png" + } + ] +} From 00bfff7faeaa2b8294d3d6bad65cf58e98e1ed39 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 16 Mar 2026 19:48:43 +0000 Subject: [PATCH 2/2] Fix validation issues: wire format, manifest consistency, integration table - LangChain.js guide: fix CyclesClient calls to use snake_case wire format (idempotency_key, ttl_ms, tokens_input, tokens_output) instead of camelCase; remove invalid subject param from reserveForStream; simplify error handling to match documented TS patterns - manifest.json: align name/description with config.ts (use "Cycles" and matching description) - Integrations overview: fix OpenAI/Anthropic to Python-only (not Python+TypeScript); fix Express streaming=Yes; fix FastAPI pattern to Middleware/Decorator; fix OpenClaw pattern to Plugin https://claude.ai/code/session_01LVeAZGvccG11HfdcBGqECM --- .../integrating-cycles-with-langchain-js.md | 31 +++++-------------- how-to/integrations-overview.md | 10 +++--- public/manifest.json | 6 ++-- 3 files changed, 16 insertions(+), 31 deletions(-) diff --git a/how-to/integrating-cycles-with-langchain-js.md b/how-to/integrating-cycles-with-langchain-js.md index 599cf30..959d750 100644 --- a/how-to/integrating-cycles-with-langchain-js.md +++ b/how-to/integrating-cycles-with-langchain-js.md @@ -32,8 +32,6 @@ import { v4 as uuidv4 } from "uuid"; import { CyclesClient, CyclesConfig, - BudgetExceededError, - CyclesProtocolError, } from "runcycles"; interface CyclesBudgetHandlerOptions { @@ -73,30 +71,18 @@ export class CyclesBudgetHandler extends BaseCallbackHandler { this.keys.set(runId, key); const res = await this.client.createReservation({ - idempotencyKey: key, + idempotency_key: key, subject: this.subject, action: { kind: this.actionKind, name: this.actionName }, estimate: { unit: "USD_MICROCENTS", amount: this.estimateAmount }, - ttlMs: 60_000, + ttl_ms: 60_000, }); if (!res.isSuccess) { - const error = res.getErrorResponse(); - if (error?.error === "BUDGET_EXCEEDED") { - throw new BudgetExceededError(error.message, { - status: res.status, - errorCode: error.error, - requestId: error.requestId, - }); - } - const msg = error?.message ?? res.errorMessage ?? "Reservation failed"; - throw new CyclesProtocolError(msg, { - status: res.status, - errorCode: error?.error, - }); + throw new Error(res.errorMessage ?? "Reservation failed"); } - this.reservations.set(runId, res.getBodyAttribute("reservation_id")); + this.reservations.set(runId, res.getBodyAttribute("reservation_id") as string); } async handleLLMEnd(output: LLMResult, runId: string): Promise { @@ -111,14 +97,14 @@ export class CyclesBudgetHandler extends BaseCallbackHandler { const outputTokens = usage.completionTokens ?? 0; await this.client.commitReservation(rid, { - idempotencyKey: `commit-${key}`, + idempotency_key: `commit-${key}`, actual: { unit: "USD_MICROCENTS", amount: inputTokens * 250 + outputTokens * 1_000, }, metrics: { - tokensInput: inputTokens, - tokensOutput: outputTokens, + tokens_input: inputTokens, + tokens_output: outputTokens, }, }); } @@ -130,7 +116,7 @@ export class CyclesBudgetHandler extends BaseCallbackHandler { this.keys.delete(runId); if (rid && key) { await this.client.releaseReservation(rid, { - idempotencyKey: `release-${key}`, + idempotency_key: `release-${key}`, }); } } @@ -237,7 +223,6 @@ const handle = await reserveForStream({ unit: "USD_MICROCENTS", actionKind: "llm.completion", actionName: "gpt-4o", - subject: { tenant: "acme", agent: "streaming-agent" }, }); const llm = new ChatOpenAI({ model: "gpt-4o" }); diff --git a/how-to/integrations-overview.md b/how-to/integrations-overview.md index cdba962..adeff6d 100644 --- a/how-to/integrations-overview.md +++ b/how-to/integrations-overview.md @@ -11,16 +11,16 @@ Cycles integrates with LLM providers, agent frameworks, and web servers. Each in | Integration | Language | Streaming | Pattern | |-------------|----------|-----------|---------| -| [OpenAI](/how-to/integrating-cycles-with-openai) | Python, TypeScript | Yes | Decorator / `withCycles` | -| [Anthropic](/how-to/integrating-cycles-with-anthropic) | Python, TypeScript | Yes | Decorator / `withCycles` | +| [OpenAI](/how-to/integrating-cycles-with-openai) | Python | Yes | Decorator | +| [Anthropic](/how-to/integrating-cycles-with-anthropic) | Python | Yes | Decorator | | [LangChain](/how-to/integrating-cycles-with-langchain) | Python | Yes | Callback handler | | [LangChain.js](/how-to/integrating-cycles-with-langchain-js) | TypeScript | Yes | Callback handler | | [Vercel AI SDK](/how-to/integrating-cycles-with-vercel-ai-sdk) | TypeScript | Yes | `reserveForStream` | | [AWS Bedrock](/how-to/integrating-cycles-with-aws-bedrock) | TypeScript | Yes | `withCycles` / `reserveForStream` | | [Google Gemini](/how-to/integrating-cycles-with-google-gemini) | TypeScript | Yes | `withCycles` / `reserveForStream` | -| [Express](/how-to/integrating-cycles-with-express) | TypeScript | — | Middleware | -| [FastAPI](/how-to/integrating-cycles-with-fastapi) | Python | — | Decorator | -| [OpenClaw](/how-to/integrating-cycles-with-openclaw) | TypeScript | Yes | Agent framework hooks | +| [Express](/how-to/integrating-cycles-with-express) | TypeScript | Yes | Middleware / `withCycles` | +| [FastAPI](/how-to/integrating-cycles-with-fastapi) | Python | — | Middleware / Decorator | +| [OpenClaw](/how-to/integrating-cycles-with-openclaw) | TypeScript | Yes | Plugin (lifecycle hooks) | ## Integration patterns diff --git a/public/manifest.json b/public/manifest.json index cc829ec..9c6b2d2 100644 --- a/public/manifest.json +++ b/public/manifest.json @@ -1,7 +1,7 @@ { - "name": "Cycles Documentation", - "short_name": "Cycles Docs", - "description": "Budget authority for autonomous agents. Hard limits on agent spend and actions enforced before execution.", + "name": "Cycles", + "short_name": "Cycles", + "description": "Hard limits on agent spend and actions enforced before execution — not after. Open protocol, multi-language SDKs, Apache 2.0.", "start_url": "/", "display": "standalone", "background_color": "#0B0F1A",