From 15e739b4c4b2a6091f4217452798510456e48733 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 19 Mar 2026 22:20:06 +0000 Subject: [PATCH 1/2] Add SEO linkable content assets: ecosystem page, badge kit, blog posts, and enhanced glossary/comparison MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - New integration ecosystem showcase page (how-to/ecosystem.md) with links to all providers and frameworks - New "Built with Cycles" badge kit (community/badges.md) with SVG badges and usage snippets - New blog post: "The True Cost of Uncontrolled AI Agents" — data-driven link-bait content - New blog post: "AI Agent Budget Patterns: A Practical Guide" — definitive reference guide - Enhanced glossary with 14 new terms covering patterns, architecture, and AI terminology - Enhanced comparison page with quick-comparison summary table and "when to adopt" section - Updated sidebar navigation with Ecosystem and Community sections https://claude.ai/code/session_01N2N4fKqatxBiXgFNSwxFqF --- .vitepress/config.ts | 8 + blog/agent-budget-patterns-visual-guide.md | 271 ++++++++++++++++++ blog/true-cost-of-uncontrolled-agents.md | 108 +++++++ community/badges.md | 78 +++++ ...caps-in-app-counters-and-job-schedulers.md | 27 ++ glossary.md | 60 ++++ how-to/ecosystem.md | 149 ++++++++++ public/badges/built-with-cycles-dark.svg | 21 ++ public/badges/built-with-cycles.svg | 21 ++ 9 files changed, 743 insertions(+) create mode 100644 blog/agent-budget-patterns-visual-guide.md create mode 100644 blog/true-cost-of-uncontrolled-agents.md create mode 100644 community/badges.md create mode 100644 how-to/ecosystem.md create mode 100644 public/badges/built-with-cycles-dark.svg create mode 100644 public/badges/built-with-cycles.svg diff --git a/.vitepress/config.ts b/.vitepress/config.ts index 07841f2..94b5a3d 100644 --- a/.vitepress/config.ts +++ b/.vitepress/config.ts @@ -197,6 +197,7 @@ export default defineConfig({ text: 'Integrations', collapsed: false, items: [ + { text: 'Ecosystem', link: '/how-to/ecosystem', badge: { text: 'New', type: 'tip' } }, { text: 'Overview', link: '/how-to/integrations-overview' }, { text: 'MCP (Claude, Cursor, Windsurf)', link: '/how-to/integrating-cycles-with-mcp' }, { text: 'OpenAI', link: '/how-to/integrating-cycles-with-openai' }, @@ -264,6 +265,13 @@ export default defineConfig({ { text: 'Security Hardening', link: '/how-to/security-hardening' }, { text: 'Changelog', link: '/changelog' }, ] + }, + { + text: 'Community', + collapsed: true, + items: [ + { text: 'Built with Cycles Badges', link: '/community/badges' }, + ] } ], }, diff --git a/blog/agent-budget-patterns-visual-guide.md b/blog/agent-budget-patterns-visual-guide.md new file mode 100644 index 0000000..4064161 --- /dev/null +++ b/blog/agent-budget-patterns-visual-guide.md @@ -0,0 +1,271 @@ +--- +title: "AI Agent Budget Patterns: A Practical Guide" +date: 2026-03-19 +author: Cycles Team +tags: [patterns, budgets, architecture, guide] +description: "A practical reference for structuring AI agent budgets — covering tenant isolation, workflow caps, run-level limits, graceful degradation, and more." +blog: true +sidebar: false +--- + +# AI Agent Budget Patterns: A Practical Guide + +Every team running AI agents in production eventually faces the same question: how should we structure our budgets? Too coarse and a single runaway agent burns through the allocation. Too granular and the overhead of managing hundreds of micro-budgets becomes its own problem. This guide covers the six patterns we see most often, with concrete examples and trade-offs for each. + + + +These patterns aren't mutually exclusive — most production systems combine two or three. The [common budget patterns](/how-to/common-budget-patterns) page in our docs covers the Cycles-specific implementation details; this post focuses on the architectural thinking behind each approach. + +## Pattern 1: Tenant isolation budgets + +**When to use:** Multi-tenant platforms where each customer or team gets their own AI agent access and you need hard spend isolation between them. + +The simplest and most common starting point. Each tenant gets an independent budget that cannot be exceeded, regardless of what other tenants are doing. + +```python +# Tenant isolation: each tenant has a completely independent budget +tenant_budget = cycles.create_budget( + scope=f"tenant:{tenant_id}", + limit_dollars=500.00, + period="monthly", + on_exhausted="deny" +) + +# Every agent call for this tenant checks against their budget +async def run_agent_for_tenant(tenant_id, task): + budget = cycles.get_budget(scope=f"tenant:{tenant_id}") + result = await budget.execute( + agent.run(task), + estimated_cost=estimate_task_cost(task) + ) + return result +``` + +**Trade-offs:** +- Provides complete blast-radius isolation — one tenant's runaway agent cannot affect others +- Simple to reason about and explain to customers +- Can lead to underutilization: if Tenant A uses 10% of their budget and Tenant B hits 100%, there's no sharing +- Requires careful initial sizing — set too low and legitimate workloads get blocked + +This pattern maps directly to how [tenant, workflow, and run budgets](/how-to/how-to-model-tenant-workflow-and-run-budgets-in-cycles) work in Cycles. + +## Pattern 2: Workflow-level caps + +**When to use:** When different agent workflows have different cost profiles and risk levels, and you want to cap each independently. + +A code review agent and a deep research agent have very different cost characteristics. Workflow-level caps let you set appropriate limits for each. + +```python +# Different workflows get different budgets +workflow_budgets = { + "code-review": {"limit": 2.00, "per": "run"}, + "deep-research": {"limit": 25.00, "per": "run"}, + "summarization": {"limit": 5.00, "per": "run"}, + "chat": {"limit": 1.00, "per": "session"}, +} + +async def run_workflow(workflow_type, input_data): + config = workflow_budgets[workflow_type] + budget = cycles.create_budget( + scope=f"workflow:{workflow_type}:{run_id}", + limit_dollars=config["limit"], + period=config["per"] + ) + return await budget.execute(agent.run(input_data)) +``` + +**Trade-offs:** +- Right-sized limits for each use case reduce both waste and false denials +- Makes cost profiles explicit and auditable +- Requires understanding the cost distribution of each workflow upfront +- New workflows need budget configuration before deployment + +## Pattern 3: Per-run budgets with graceful degradation + +**When to use:** When you want agents to produce _some_ result even when they hit budget limits, rather than failing entirely. + +This is the pattern that separates production-grade agent systems from prototypes. Instead of a hard stop at budget exhaustion, the agent downgrades its approach. + +```python +async def research_with_degradation(query, budget_dollars=10.00): + budget = cycles.create_budget( + scope=f"run:{run_id}", + limit_dollars=budget_dollars + ) + + # Phase 1: Use the best model + remaining = budget.remaining() + if remaining > 5.00: + result = await budget.execute( + agent.run(query, model="claude-opus-4-20250514") + ) + # Phase 2: Fall back to a cheaper model + elif remaining > 1.00: + result = await budget.execute( + agent.run(query, model="claude-sonnet-4-20250514") + ) + # Phase 3: Return cached/partial results + else: + result = get_cached_or_partial_result(query) + result.metadata["degraded"] = True + + return result +``` + +**Trade-offs:** +- Users get a result instead of an error, improving perceived reliability +- Requires designing multiple quality tiers for each workflow +- The "degraded" signal needs to propagate to the user — silent degradation erodes trust +- More complex to test: you need to validate each fallback tier + +We cover degradation strategies in detail in [How to Think About Degradation Paths](/how-to/how-to-think-about-degradation-paths-in-cycles-deny-downgrade-disable-or-defer). + +## Pattern 4: Shared pool with priority tiers + +**When to use:** When you want to maximize utilization of a fixed budget across multiple agents or users, with guarantees for high-priority work. + +Instead of giving each consumer a fixed allocation, you share a pool but enforce priority ordering when the pool runs low. + +```python +# Shared pool with priority tiers +pool = cycles.create_budget( + scope="org:engineering", + limit_dollars=5000.00, + period="monthly" +) + +# Priority tiers determine who gets denied first +PRIORITY_THRESHOLDS = { + "critical": 0.0, # Only denied at $0 remaining + "high": 0.10, # Denied below 10% remaining + "normal": 0.25, # Denied below 25% remaining + "low": 0.50, # Denied below 50% remaining + "bulk": 0.70, # Denied below 70% remaining (off-peak only) +} + +async def execute_with_priority(task, priority="normal"): + remaining_fraction = pool.remaining() / pool.limit + threshold = PRIORITY_THRESHOLDS[priority] + + if remaining_fraction <= threshold: + raise BudgetExhaustedError( + f"Pool at {remaining_fraction:.0%}, " + f"threshold for '{priority}' is {threshold:.0%}" + ) + + return await pool.execute(task) +``` + +**Trade-offs:** +- Higher overall utilization — no budget sits idle while another is exhausted +- Critical work is protected even under heavy load +- Harder to predict per-team or per-user costs for billing purposes +- Requires agreement on what constitutes "critical" vs. "low" priority +- Risk of low-priority work getting permanently starved in busy periods + +## Pattern 5: Shadow mode rollout + +**When to use:** When you're introducing budget controls to an existing system and need to validate limits before enforcing them. + +This is less a budget _structure_ and more a deployment pattern, but it's essential for any team that isn't starting from scratch. Shadow mode tracks what _would_ have been denied without actually denying anything. + +```python +# Shadow mode: log but don't enforce +budget = cycles.create_budget( + scope=f"tenant:{tenant_id}", + limit_dollars=100.00, + period="daily", + mode="shadow" # Track but don't enforce +) + +# In shadow mode, execute() always succeeds but logs violations +result = await budget.execute(agent.run(task)) + +# After a validation period, check the shadow logs +shadow_report = cycles.get_shadow_report( + scope=f"tenant:{tenant_id}", + period="last_7_days" +) +# Output: "23 calls would have been denied. Peak overage: $47.30." +# Now you can tune the limit before switching to enforce mode. +``` + +**Trade-offs:** +- Zero risk of breaking production workflows during rollout +- Generates real data for sizing budgets accurately +- Adds latency (the budget check still happens, just without enforcement) +- Teams sometimes stay in shadow mode too long, delaying the value of enforcement + +Our [shadow mode rollout guide](/how-to/shadow-mode-in-cycles-how-to-roll-out-budget-enforcement-without-breaking-production) walks through the full process, including how to analyze shadow logs and choose enforcement cutover criteria. + +## Pattern 6: Hybrid model (tokens + dollars) + +**When to use:** When you need to track both the raw resource consumption (tokens) and the monetary cost (dollars), because they don't always move in lockstep. + +Token counts and dollar costs diverge when you use multiple models, when pricing changes, or when non-LLM tools (web search, code execution) are part of the agent's toolkit. + +```python +# Hybrid budget: track both dimensions +budget = cycles.create_budget( + scope=f"run:{run_id}", + limits={ + "tokens": 500_000, # Hard cap on token consumption + "dollars": 15.00, # Hard cap on dollar spend + }, + on_exhausted="deny_and_notify" +) + +async def execute_hybrid(task): + # Both limits are checked atomically + result = await budget.execute( + agent.run(task), + estimated={ + "tokens": estimate_tokens(task), + "dollars": estimate_cost(task), + } + ) + return result + +# Useful for cases where a cheap model uses many tokens +# or an expensive model uses few +``` + +**Trade-offs:** +- Catches scenarios that a single-dimension budget misses (e.g., a cheap model looping uses few dollars but millions of tokens) +- Useful for capacity planning beyond just cost +- More complex to configure and explain to users +- Requires accurate estimation for both dimensions + +## Combining patterns + +Most production systems layer two or three of these patterns. A common combination: + +1. **Tenant isolation** (Pattern 1) as the outer boundary +2. **Workflow caps** (Pattern 2) within each tenant +3. **Graceful degradation** (Pattern 3) within each workflow run +4. **Shadow mode** (Pattern 5) for rollout + +This gives you hard isolation between customers, right-sized limits per use case, user-friendly behavior at the limits, and a safe path to enforcement. + +``` +Tenant Budget ($500/mo) +├── Code Review Workflow ($2/run) +│ └── Per-run with degradation +├── Research Workflow ($25/run) +│ └── Per-run with degradation +└── Chat Workflow ($1/session) + └── Hard deny at limit +``` + +The [budget allocation and management guide](/how-to/budget-allocation-and-management-in-cycles) covers how to implement these hierarchies in Cycles, and the [cost estimation cheat sheet](/how-to/cost-estimation-cheat-sheet) helps with initial sizing for each tier. + +## Choosing your starting point + +If you're unsure where to begin: + +- **Single-tenant, single-agent:** Start with Pattern 3 (per-run with degradation) +- **Multi-tenant SaaS:** Start with Pattern 1 (tenant isolation) + Pattern 5 (shadow mode) +- **Internal platform with multiple teams:** Start with Pattern 4 (shared pool with priority) +- **Migrating from no controls:** Start with Pattern 5 (shadow mode) to gather data first + +The most important step isn't picking the perfect pattern — it's having _any_ budget boundary in the execution path. You can always refine the structure later. You can't un-spend money that an uncontrolled agent already burned. diff --git a/blog/true-cost-of-uncontrolled-agents.md b/blog/true-cost-of-uncontrolled-agents.md new file mode 100644 index 0000000..72e51d1 --- /dev/null +++ b/blog/true-cost-of-uncontrolled-agents.md @@ -0,0 +1,108 @@ +--- +title: "The True Cost of Uncontrolled AI Agents" +date: 2026-03-19 +author: Cycles Team +tags: [costs, agents, incidents, best-practices] +description: "What happens when autonomous AI agents run without budget limits? We break down the real-world costs, failure modes, and why pre-execution budget authority is the missing layer in most agent architectures." +blog: true +sidebar: false +--- + +# The True Cost of Uncontrolled AI Agents + +A development team ships a coding agent on Friday afternoon. It works beautifully in staging — summarizing PRs, generating tests, refactoring modules. By Monday morning, the agent has made 14,000 API calls, consumed 380 million tokens, and run up a $12,400 bill against a model provider. No one noticed because the dashboard updates hourly and the alerts were configured for _daily_ spend thresholds. The agent wasn't malicious. It wasn't buggy in the traditional sense. It simply did what agents do: it kept working. + + + +This scenario isn't hypothetical. Variations of it play out every week as more teams deploy autonomous agents into production. The economics of AI APIs — where a single GPT-4-class call can cost $0.03–$0.12 in tokens — seem manageable until you multiply by the loop count of an unsupervised agent. + +## The math: how agents amplify API costs + +A single LLM call is cheap. An agent is not a single call. + +Consider a typical agentic workflow: a coding assistant that reads a file, proposes a change, validates it with a second LLM call, runs a tool, interprets the output, and decides whether to iterate. That's 3–5 LLM calls per _step_, and a complex task can take 20–50 steps. + +| Scenario | Calls per task | Avg tokens per call | Cost per task (GPT-4 class) | +|---|---|---|---| +| Simple Q&A | 1 | 2,000 | $0.06 | +| Single-step tool use | 3 | 4,000 | $0.36 | +| Multi-step agent run | 15–40 | 6,000 | $2.70–$7.20 | +| Deep research agent | 80–200 | 8,000 | $19.20–$48.00 | +| Runaway agent (tool loop) | 500+ | 10,000 | $150+ | + +Now multiply by concurrency. Ten users triggering deep research agents simultaneously? That's potentially $500 in a few minutes. A retry storm on a flaky tool? Thousands of calls in seconds. + +## The four categories of cost + +Teams that track only their API invoice are seeing roughly 40% of the real picture. + +### 1. Direct API spend + +The obvious one: tokens in, tokens out, dollars billed. Model providers charge per token, and agents are token-hungry by nature. Fan-out patterns — where an agent spawns sub-agents or parallel tool calls — can multiply costs by 5–10x compared to a sequential design. + +### 2. Compute and infrastructure + +Agents consume CPU, memory, and network bandwidth on _your_ infrastructure too. Long-running agent loops hold open connections, consume worker threads, and can saturate rate limits that affect your entire platform. We've seen teams where a single runaway agent degraded API response times for all users by 300%. + +### 3. Operational overhead + +Every uncontrolled spend incident triggers an investigation. Someone has to figure out what happened, which agent, which user, which workflow. Depending on the organization, this involves engineering time, incident reviews, and policy changes. At $150–$250/hour for senior engineering time, a single investigation can cost more than the API bill itself. + +### 4. Opportunity cost + +When an agent exhausts a shared rate limit or burns through a monthly budget in a week, every _other_ agent and user on the platform is affected. Teams start adding manual approval steps, which defeats the purpose of autonomy. Trust erodes. Adoption stalls. + +## The five failure modes + +Through conversations with teams running agents in production and the incident patterns we've documented, five recurring failure modes account for the majority of uncontrolled spend. + +### Runaway tool loops + +An agent calls a tool, gets an unexpected result, retries with a slightly different prompt, gets the same result, and repeats. Without a circuit breaker, this loop runs until a rate limit or timeout kills it — often after hundreds of iterations. See our detailed breakdown in [Runaway Agents: Tool Loops and Budget Overruns](/incidents/runaway-agents-tool-loops-and-budget-overruns-the-incidents-cycles-is-designed-to-prevent). + +### Retry storms + +A downstream service returns a 500. The agent retries. The SDK retries. The orchestration layer retries. Each retry is a full LLM call with full context. Three layers of retry logic with 3 retries each means 27 calls for what should have been one. We cover this in depth in [Retry Storms and Idempotency Failures](/incidents/retry-storms-and-idempotency-failures). + +### Concurrent overspend + +Five agents, each individually within budget, all drawing from the same pool simultaneously. No single agent is over limit, but the aggregate exceeds the budget by 3x before any dashboard refreshes. This is the most common failure mode we see in multi-tenant systems. See [Concurrent Agent Overspend](/incidents/concurrent-agent-overspend). + +### Scope misconfiguration + +A budget is set at the wrong level — per-organization instead of per-user, or per-day instead of per-run. A single run consumes an entire team's daily allocation. This is a design problem, not an implementation bug, and it's covered in [Scope Misconfiguration and Budget Leaks](/incidents/scope-misconfiguration-and-budget-leaks). + +### The "works in dev" trap + +Agents tested with small inputs and single-user loads behave very differently in production. A summarization agent that costs $0.15 per document in testing costs $45 when a user uploads a 300-page PDF. No failure, no bug — just a cost profile that nobody modeled. + +## The observability gap + +Most teams respond to cost overruns by adding dashboards. This helps, but it solves the _awareness_ problem, not the _enforcement_ problem. Dashboards tell you what happened. They don't stop it from happening. + +The fundamental gap looks like this: + +- **Dashboards** show spend _after_ it occurs (minutes to hours of delay) +- **Rate limits** cap throughput but don't understand _cost_ — a rate limit of 100 RPM doesn't distinguish between a $0.01 call and a $5.00 call +- **Provider caps** are monthly or daily, far too coarse for per-run control +- **In-app counters** are single-process and collapse under concurrency + +We wrote extensively about this progression in [From Observability to Enforcement](/concepts/from-observability-to-enforcement-how-teams-evolve-from-dashboards-to-budget-authority) and [Why Rate Limits Are Not Enough for Autonomous Systems](/concepts/why-rate-limits-are-not-enough-for-autonomous-systems). + +The missing layer is **pre-execution budget authority**: a system that checks _before_ each call whether the budget allows it, atomically decrements the balance, and denies the call if the budget is exhausted. This is fundamentally different from post-hoc observation. + +## Budget authority as infrastructure + +This is the problem [Cycles](/) was built to solve. Instead of layering alerts on top of dashboards on top of logs, Cycles introduces a dedicated budget authority layer that sits in the execution path of every agent action. + +The core mechanic is simple: before an agent makes an LLM call or tool invocation, it checks with Cycles. Cycles atomically reserves the estimated cost. If the budget is exhausted, the call is denied — and the agent can degrade gracefully instead of failing silently or running up a bill. + +This works across concurrency boundaries, across services, and across the full hierarchy of tenant, workflow, and run-level budgets. It's the same pattern that payment systems use for authorization holds, applied to AI agent execution. + +For teams evaluating this approach, the [common budget patterns guide](/how-to/common-budget-patterns) covers the most frequent architectures we see, and the [cost estimation cheat sheet](/how-to/cost-estimation-cheat-sheet) helps with initial sizing. + +## The bottom line + +Uncontrolled agents are not a hypothetical risk. They are a recurring, measurable operational cost that grows with every new agent deployment. The teams that scale agents successfully are the ones that treat budget enforcement as infrastructure — not as a monitoring afterthought. + +The cost of building budget controls is small. The cost of not having them compounds with every agent you deploy. diff --git a/community/badges.md b/community/badges.md new file mode 100644 index 0000000..5b43c93 --- /dev/null +++ b/community/badges.md @@ -0,0 +1,78 @@ +--- +title: "Built with Cycles Badges" +description: "Add a 'Built with Cycles' badge to your project README, documentation, or website to show your project uses budget authority for autonomous agents." +--- + +# Built with Cycles + +Show the community that your project uses Cycles for budget authority. Add a "Built with Cycles" badge to your README, documentation site, or project website. + +## Badges + +The badge is available in two variants to suit light and dark backgrounds: + +**Light variant** (for light backgrounds): + +![Built with Cycles](https://runcycles.io/badges/built-with-cycles.svg) + +**Dark variant** (for dark backgrounds): + +![Built with Cycles](https://runcycles.io/badges/built-with-cycles-dark.svg) + +## Usage + +### Markdown (for GitHub READMEs) + +**Light variant:** + +```markdown +[![Built with Cycles](https://runcycles.io/badges/built-with-cycles.svg)](https://runcycles.io) +``` + +**Dark variant:** + +```markdown +[![Built with Cycles](https://runcycles.io/badges/built-with-cycles-dark.svg)](https://runcycles.io) +``` + +### HTML + +**Light variant:** + +```html +Built with Cycles +``` + +**Dark variant:** + +```html +Built with Cycles +``` + +### reStructuredText (for Python docs) + +**Light variant:** + +```rst +.. image:: https://runcycles.io/badges/built-with-cycles.svg + :target: https://runcycles.io + :alt: Built with Cycles +``` + +**Dark variant:** + +```rst +.. image:: https://runcycles.io/badges/built-with-cycles-dark.svg + :target: https://runcycles.io + :alt: Built with Cycles +``` + +## Guidelines + +- Use the badge on projects that genuinely integrate with Cycles. +- Do not modify the badge image, colors, or proportions. +- The badge link should always point to [https://runcycles.io](https://runcycles.io). + +## Tell Us About Your Project + +We'd love to feature your project on the [Integration Ecosystem](/how-to/ecosystem) page. Open a [GitHub issue](https://github.com/runcycles/cycles-protocol/issues) or start a [GitHub discussion](https://github.com/runcycles/cycles-protocol/discussions) to tell us what you've built and how you're using Cycles. Community projects that use the badge and integrate Cycles are eligible to be showcased on the ecosystem page. diff --git a/concepts/how-cycles-compares-to-rate-limiters-observability-provider-caps-in-app-counters-and-job-schedulers.md b/concepts/how-cycles-compares-to-rate-limiters-observability-provider-caps-in-app-counters-and-job-schedulers.md index d08a36b..60f635a 100644 --- a/concepts/how-cycles-compares-to-rate-limiters-observability-provider-caps-in-app-counters-and-job-schedulers.md +++ b/concepts/how-cycles-compares-to-rate-limiters-observability-provider-caps-in-app-counters-and-job-schedulers.md @@ -5,6 +5,19 @@ description: "See how Cycles differs from rate limiters, observability tools, pr # How Cycles Compares to Rate Limiters, Observability, Provider Caps, In-App Counters, and Job Schedulers +## Quick comparison + +| Approach | What it controls | Pre-execution? | Per-tenant? | Cost-aware? | Degradation? | +|---|---|:---:|:---:|:---:|:---:| +| **Rate limiter** | Request velocity | Velocity only | Partial | No | No | +| **Observability** | Post-hoc visibility | No | No | After the fact | No | +| **Provider cap** | Org-level spend | No (delayed) | No | Partial | No | +| **In-app counter** | Custom metric | Partial | Partial | Partial | No | +| **Job scheduler** | Execution timing | No | No | No | No | +| **Cycles** | Bounded budget exposure | Yes | Yes | Yes | Yes (three-way) | + +--- + Teams building autonomous systems usually already have some controls in place. - Rate limiters. @@ -370,6 +383,19 @@ It is "which layer is missing?" For most teams building autonomous systems, the missing layer is budget authority. +## When to adopt Cycles + +Consider adding Cycles to your stack when: + +- **Agents run autonomously** — without human-in-the-loop approval for each action +- **Cost is unpredictable** — fan-out, tool loops, or retries make per-run cost hard to bound +- **Multiple tenants share infrastructure** — one tenant's runaway agent should not affect others +- **You need graceful degradation** — switching to cheaper models or reducing scope when budget is low, rather than hard-failing +- **Compliance requires cost limits** — audit trails showing that every action was authorized against a budget +- **You've outgrown ad hoc counters** — custom counters work until concurrency, retries, and hierarchy make them unreliable + +If none of these apply yet, start with [shadow mode](/how-to/shadow-mode-in-cycles-how-to-roll-out-budget-enforcement-without-breaking-production) to see what enforcement would look like on your current traffic. + ## Next steps To explore the Cycles stack: @@ -380,3 +406,4 @@ To explore the Cycles stack: - Integrate with Python using the [Python Client](/quickstart/getting-started-with-the-python-client) - Integrate with TypeScript using the [TypeScript Client](/quickstart/getting-started-with-the-typescript-client) - Integrate with Spring AI using the [Spring Client](https://github.com/runcycles/cycles-spring-boot-starter) +- Browse the full [Integration Ecosystem](/how-to/ecosystem) diff --git a/glossary.md b/glossary.md index b7ef040..c8b9c4e 100644 --- a/glossary.md +++ b/glossary.md @@ -121,6 +121,44 @@ Recording spend against a budget **without** a prior reservation. Events are use The current state of a budget, including fields such as `allocated`, `spent`, `reserved`, `remaining`, and `debt`. Balances are computed across the full scope hierarchy and reflect all committed, reserved, and event-based spend. See [Querying Balances](/protocol/querying-balances-in-cycles-understanding-budget-state). +## Patterns & Architecture + +### Budget Envelope + +A fixed upper bound on how much an entity (tenant, workflow, run) is allowed to consume. Budget envelopes are enforced hierarchically — a run's envelope cannot exceed its parent workflow's remaining budget, which in turn cannot exceed the tenant's allocation. + +### Graceful Degradation + +A response strategy where the system reduces quality or capability instead of failing outright when budget is constrained. For example, switching from a large model to a smaller one, reducing `max_tokens`, or disabling optional tool calls. Enabled by the [three-way decision](#three-way-decision) model. + +### Fan-Out + +A pattern where a single workflow or agent spawns multiple concurrent sub-tasks, each consuming budget independently. Fan-out is a common source of budget overruns because the aggregate cost grows multiplicatively. Cycles handles this through [hierarchical scopes](#scope) and concurrent [reservations](#reservation). + +### Tool Loop + +A failure mode where an AI agent repeatedly calls the same tool in a loop, often due to ambiguous results or hallucinated tool arguments. Without budget authority, tool loops can run indefinitely and accumulate significant cost. See [Runaway Agents and Tool Loops](/incidents/runaway-agents-tool-loops-and-budget-overruns-the-incidents-cycles-is-designed-to-prevent). + +### Retry Storm + +A cascade of retries triggered by transient failures, where each retry spawns additional retries across services. Without idempotency and budget controls, retry storms can amplify cost by orders of magnitude. See [Retry Storms](/incidents/retry-storms-and-idempotency-failures). + +### Budget Authority + +The architectural role responsible for deciding whether an action may proceed based on remaining budget. Budget authority is enforced **before** execution — not observed after the fact. Cycles is a purpose-built budget authority. See [What is Cycles?](/quickstart/what-is-cycles). + +### Shadow Mode + +A deployment strategy where budget enforcement logic runs in evaluation mode — computing decisions and logging results without actually blocking requests. Shadow mode allows teams to validate budget policies against production traffic before enabling enforcement. See [Shadow Mode Rollout](/how-to/shadow-mode-in-cycles-how-to-roll-out-budget-enforcement-without-breaking-production). + +### Tenant Isolation + +A budget pattern where each tenant receives an independent budget allocation that cannot be consumed by other tenants. Tenant isolation prevents the "noisy neighbor" problem where one tenant's runaway agent exhausts shared resources. + +### Cost Estimation + +The process of predicting the cost of an AI operation before execution. Accurate estimates improve reservation precision and reduce budget waste from over-reserving. See [Cost Estimation Cheat Sheet](/how-to/cost-estimation-cheat-sheet). + ## Infrastructure ### Cycles Server @@ -138,3 +176,25 @@ A [Model Context Protocol](https://modelcontextprotocol.io) server that exposes ### Cycles Protocol The open specification defining the budget authority API. The protocol covers the complete reservation lifecycle, balance queries, event recording, and decision evaluation. See the [API Reference](/protocol/api-reference-for-the-cycles-protocol). + +## AI & Agent Terminology + +### Autonomous Agent + +A software system that takes actions on behalf of a user with minimal human oversight. Autonomous agents typically make multiple LLM calls, use tools, and can run for extended periods. Without budget authority, agents may consume resources indefinitely. + +### Model Context Protocol (MCP) + +An open protocol that allows AI hosts (Claude Desktop, Claude Code, Cursor, Windsurf) to discover and call external tools. Cycles provides an [MCP server](/quickstart/getting-started-with-the-mcp-server) that exposes budget authority as MCP tools, giving agents budget awareness without SDK integration. + +### Token + +The fundamental unit of text processing in large language models. Input and output tokens have different costs. Cycles can track budget in [tokens, dollars, credits, or risk points](/protocol/understanding-units-in-cycles-usd-microcents-tokens-credits-and-risk-points). + +### Agentic Loop + +The iterative cycle where an AI agent reasons, acts, observes results, and decides on the next action. Each iteration may involve one or more LLM calls and tool invocations, making the total cost of an agentic loop inherently unpredictable without budget controls. + +### Guardrail + +A constraint placed on an AI system to prevent undesirable outcomes. Budget authority is a financial guardrail — it prevents agents from consuming more resources than allocated, complementing safety and content guardrails. diff --git a/how-to/ecosystem.md b/how-to/ecosystem.md new file mode 100644 index 0000000..59b3317 --- /dev/null +++ b/how-to/ecosystem.md @@ -0,0 +1,149 @@ +--- +title: "Integration Ecosystem" +description: "Explore the full Cycles integration ecosystem — SDKs, AI providers, frameworks, and tools that work with budget authority for autonomous agents." +--- + +# Integration Ecosystem + +Cycles integrates with the tools, frameworks, and AI providers you already use. Whether you're building autonomous agents, adding budget authority to an existing application, or exploring what's possible with controlled AI spending, there's an integration path for you. + +## AI Model Providers + +### OpenAI + +Integrate Cycles budget authority with ChatGPT, GPT-4, GPT-4o, and other OpenAI models. Control per-request and per-session spending when your agents call OpenAI APIs. + +- [OpenAI integration guide](/how-to/integrating-cycles-with-openai) +- [openai.com](https://openai.com) + +### Anthropic + +Use Cycles with Claude models to set spending limits on autonomous agent workflows powered by Anthropic's API. + +- [Anthropic integration guide](/how-to/integrating-cycles-with-anthropic) +- [anthropic.com](https://anthropic.com) + +### Google Gemini + +Add budget authority to applications built on Google's Gemini family of models. + +- [Gemini integration guide](/how-to/integrating-cycles-with-google-gemini) +- [ai.google.dev](https://ai.google.dev) + +### AWS Bedrock + +Cycles works with AWS Bedrock's multi-model platform, giving you budget control across any foundation model available through Bedrock. + +- [AWS Bedrock integration guide](/how-to/integrating-cycles-with-aws-bedrock) +- [aws.amazon.com/bedrock](https://aws.amazon.com/bedrock) + +## AI Frameworks & SDKs + +### LangChain (Python) + +Build budget-aware LangChain agents in Python. Cycles integrates with LangChain's tool and callback system to enforce spending limits throughout chain execution. + +- [LangChain integration guide](/how-to/integrating-cycles-with-langchain) +- [python.langchain.com](https://python.langchain.com) + +### LangChain.js + +The same LangChain integration, purpose-built for JavaScript and TypeScript environments. + +- [LangChain.js integration guide](/how-to/integrating-cycles-with-langchain-js) +- [js.langchain.com](https://js.langchain.com) + +### Vercel AI SDK + +Add Cycles budget authority to applications built with the Vercel AI SDK for seamless spending control in Next.js and other Vercel-deployed projects. + +- [Vercel AI SDK integration guide](/how-to/integrating-cycles-with-vercel-ai-sdk) +- [sdk.vercel.ai](https://sdk.vercel.ai) + +### Spring AI + +Integrate Cycles with Spring AI to bring budget authority to Java and Kotlin AI applications. + +- [Spring AI quickstart](/quickstart/how-to-add-hard-budget-limits-to-spring-ai-with-cycles) +- [spring.io/projects/spring-ai](https://spring.io/projects/spring-ai) + +## Web Frameworks + +### Express.js + +Add Cycles middleware to your Express.js API to enforce budget authority on any route that triggers AI spending. + +- [Express.js integration guide](/how-to/integrating-cycles-with-express) +- [expressjs.com](https://expressjs.com) + +### FastAPI + +Use the Cycles Python client with FastAPI for high-performance, budget-aware AI APIs. + +- [FastAPI integration guide](/how-to/integrating-cycles-with-fastapi) +- [fastapi.tiangolo.com](https://fastapi.tiangolo.com) + +### Spring Boot + +Leverage the Cycles Spring Boot Starter for auto-configured budget authority in your Spring Boot applications. + +- [Spring Boot quickstart](/quickstart/getting-started-with-the-cycles-spring-boot-starter) +- [spring.io/projects/spring-boot](https://spring.io/projects/spring-boot) + +## Agent Platforms + +### MCP (Model Context Protocol) + +Cycles provides an MCP server that exposes budget authority as tools for any MCP-compatible client, including Claude Desktop, Claude Code, Cursor, and Windsurf. + +- [MCP integration guide](/how-to/integrating-cycles-with-mcp) +- [modelcontextprotocol.io](https://modelcontextprotocol.io) + +### OpenClaw + +Connect Cycles to OpenClaw for budget-controlled multi-agent orchestration. + +- [OpenClaw integration guide](/how-to/integrating-cycles-with-openclaw) + +## Official SDKs + +### Python Client + +The official Cycles Python client. Install from PyPI and start enforcing budgets in minutes. + +- [cycles-client on PyPI](https://pypi.org/project/cycles-client/) +- [Python quickstart](/quickstart/getting-started-with-the-python-client) + +### TypeScript Client + +The official Cycles TypeScript client for Node.js and browser environments. + +- [@runcycles/client on npm](https://www.npmjs.com/package/@runcycles/client) +- [TypeScript quickstart](/quickstart/getting-started-with-the-typescript-client) + +### Spring Boot Starter + +Auto-configured Cycles integration for Spring Boot applications, available on Maven Central. + +- [cycles-spring-boot-starter on Maven Central](https://central.sonatype.com/artifact/io.runcycles/cycles-spring-boot-starter) +- [Spring Boot quickstart](/quickstart/getting-started-with-the-cycles-spring-boot-starter) + +## Protocol & Standards + +### Cycles Protocol + +The Cycles Protocol is an open specification for budget authority in autonomous agent systems, licensed under Apache 2.0. Build your own implementation or contribute to the spec. + +- [Cycles Protocol on GitHub](https://github.com/runcycles/cycles-protocol) + +### OpenAPI Specification + +A complete OpenAPI specification is available for the Cycles API, making it straightforward to generate clients in any language or integrate with API tooling. + +- [Interactive API Reference](/api/) + +## Community Tools + +The Cycles ecosystem grows with every project that adopts budget authority. If you've built a library, plugin, tool, or integration that works with Cycles, we want to hear about it. + +Building something with Cycles? Add a [Built with Cycles badge](/community/badges) to your project and let the community know what you're working on. diff --git a/public/badges/built-with-cycles-dark.svg b/public/badges/built-with-cycles-dark.svg new file mode 100644 index 0000000..a8c4cb9 --- /dev/null +++ b/public/badges/built-with-cycles-dark.svg @@ -0,0 +1,21 @@ + + budget authority: Cycles + + + + + + + + + + + + + + + budget authority + + Cycles + + diff --git a/public/badges/built-with-cycles.svg b/public/badges/built-with-cycles.svg new file mode 100644 index 0000000..ff1ef56 --- /dev/null +++ b/public/badges/built-with-cycles.svg @@ -0,0 +1,21 @@ + + budget authority: Cycles + + + + + + + + + + + + + + + budget authority + + Cycles + + From a22f3b2f6b6fb9abcefa6fd894ada7d8ea2cd96f Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 19 Mar 2026 22:29:17 +0000 Subject: [PATCH 2/2] Remove remaining duplicate glossary entries (Budget Authority, Shadow Mode) https://claude.ai/code/session_01N2N4fKqatxBiXgFNSwxFqF --- glossary.md | 8 -------- 1 file changed, 8 deletions(-) diff --git a/glossary.md b/glossary.md index c8b9c4e..4db5704 100644 --- a/glossary.md +++ b/glossary.md @@ -143,14 +143,6 @@ A failure mode where an AI agent repeatedly calls the same tool in a loop, often A cascade of retries triggered by transient failures, where each retry spawns additional retries across services. Without idempotency and budget controls, retry storms can amplify cost by orders of magnitude. See [Retry Storms](/incidents/retry-storms-and-idempotency-failures). -### Budget Authority - -The architectural role responsible for deciding whether an action may proceed based on remaining budget. Budget authority is enforced **before** execution — not observed after the fact. Cycles is a purpose-built budget authority. See [What is Cycles?](/quickstart/what-is-cycles). - -### Shadow Mode - -A deployment strategy where budget enforcement logic runs in evaluation mode — computing decisions and logging results without actually blocking requests. Shadow mode allows teams to validate budget policies against production traffic before enabling enforcement. See [Shadow Mode Rollout](/how-to/shadow-mode-in-cycles-how-to-roll-out-budget-enforcement-without-breaking-production). - ### Tenant Isolation A budget pattern where each tenant receives an independent budget allocation that cannot be consumed by other tenants. Tenant isolation prevents the "noisy neighbor" problem where one tenant's runaway agent exhausts shared resources.