diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index d0ab6645f..2a8f4ffdd 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "1.2.0"
+ ".": "1.3.0"
}
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index ca5e82df9..aa9206944 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 97
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/runloop-ai%2Frunloop-f2df3524e4b99c38b634c334d098aa2c7d543d5ea0f49c4dd8f4d92723b81b94.yml
-openapi_spec_hash: c377abec5716d1d6c5b01a527a5bfdfb
-config_hash: 2363f563f42501d2b1587a4f64bdccaf
+configured_endpoints: 98
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/runloop-ai%2Frunloop-5271153bd2f82579803953bd2fa1b9ea6466c979118804f64379fb14e9a9c436.yml
+openapi_spec_hash: 95ac224a4b0f10e9ba6129a86746c9d4
+config_hash: cb8534d20a68a49b92726bedd50f8bb1
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0165349d7..1d1ed6b23 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,42 @@
# Changelog
+## 1.3.0 (2025-12-20)
+
+Full Changelog: [v1.2.0...v1.3.0](https://github.com/runloopai/api-client-python/compare/v1.2.0...v1.3.0)
+
+### ⚠ BREAKING CHANGES
+
+* remove support for pydantic-v1, pydantic-v2 is now default ([#710](https://github.com/runloopai/api-client-python/issues/710))
+
+### Features
+
+* **benchmarks:** add `update_scenarios` method to benchmarks resource ([71ec221](https://github.com/runloopai/api-client-python/commit/71ec221f1d0cad7aac33c0299d3f8b1aa97d0741))
+* **devbox:** added stdin streaming endpoint ([83ae56a](https://github.com/runloopai/api-client-python/commit/83ae56a22a9c1d4528719321b9565731532191f2))
+* **scenarios:** add scenario builder to sdk ([#706](https://github.com/runloopai/api-client-python/issues/706)) ([2d41a15](https://github.com/runloopai/api-client-python/commit/2d41a15b4455ed8d7f6a8063cf19b82d51edeef8))
+* **sdk:** add Benchmark and AsyncBenchmark classes ([#714](https://github.com/runloopai/api-client-python/issues/714)) ([8909d8a](https://github.com/runloopai/api-client-python/commit/8909d8aabfc2f1c80ff74b636225b42cac6725ff))
+* **sdk:** add BenchmarkOps and AsyncBenchmarkOps to SDK ([#716](https://github.com/runloopai/api-client-python/issues/716)) ([9b434d9](https://github.com/runloopai/api-client-python/commit/9b434d9bc7ebdcea2b156689403d853a932f0d9e))
+* **sdk:** add BenchmarkRun and AsyncBenchmarkRun classes ([#712](https://github.com/runloopai/api-client-python/issues/712)) ([6aa83e2](https://github.com/runloopai/api-client-python/commit/6aa83e2a6c8a55694435bd2b707340770f0a326a))
+
+
+### Bug Fixes
+
+* **benchmarks:** `update()` for benchmarks and scenarios replaces all provided fields and does not modify unspecified fields ([#6702](https://github.com/runloopai/api-client-python/issues/6702)) ([cfd04b6](https://github.com/runloopai/api-client-python/commit/cfd04b6e7781534fd0e775e1b00793ad53814a47))
+* **types:** allow pyright to infer TypedDict types within SequenceNotStr ([3241717](https://github.com/runloopai/api-client-python/commit/32417177128b5f5d90b852a5460fe6823198cf9b))
+* use async_to_httpx_files in patch method ([88f8fb9](https://github.com/runloopai/api-client-python/commit/88f8fb92e1d48ff6f95833a7ee1e376bef76e0e1))
+
+
+### Chores
+
+* add documentation url to pypi project page ([#711](https://github.com/runloopai/api-client-python/issues/711)) ([7afb327](https://github.com/runloopai/api-client-python/commit/7afb32731842ebee4f479837959ccac856bd5e85))
+* add missing docstrings ([a198632](https://github.com/runloopai/api-client-python/commit/a198632f6a3936bcf5b5b4f4e6324461c4853893))
+* **internal:** add missing files argument to base client ([b7065e2](https://github.com/runloopai/api-client-python/commit/b7065e204d00f853bcac75637680dc198346a804))
+* remove support for pydantic-v1, pydantic-v2 is now default ([#710](https://github.com/runloopai/api-client-python/issues/710)) ([fb3cc3d](https://github.com/runloopai/api-client-python/commit/fb3cc3d354d4279542cd20f44857f2ec28be7082))
+
+
+### Documentation
+
+* reformat sidebar and index pages to be more transparent, add favicon to browser tab ([#715](https://github.com/runloopai/api-client-python/issues/715)) ([1161b8f](https://github.com/runloopai/api-client-python/commit/1161b8fbe8d78dc572e0310da009e1bcc7dec36f))
+
## 1.2.0 (2025-12-09)
Full Changelog: [v1.1.0...v1.2.0](https://github.com/runloopai/api-client-python/compare/v1.1.0...v1.2.0)
diff --git a/api.md b/api.md
index 17cc5978f..83c2c8b60 100644
--- a/api.md
+++ b/api.md
@@ -20,6 +20,8 @@ from runloop_api_client.types import (
BenchmarkCreateParameters,
BenchmarkRunListView,
BenchmarkRunView,
+ BenchmarkScenarioUpdateParameters,
+ BenchmarkUpdateParameters,
BenchmarkView,
ScenarioDefinitionListView,
StartBenchmarkRunParameters,
@@ -35,6 +37,7 @@ Methods:
- client.benchmarks.definitions(id, \*\*params) -> ScenarioDefinitionListView
- client.benchmarks.list_public(\*\*params) -> SyncBenchmarksCursorIDPage[BenchmarkView]
- client.benchmarks.start_run(\*\*params) -> BenchmarkRunView
+- client.benchmarks.update_scenarios(id, \*\*params) -> BenchmarkView
## Runs
diff --git a/pyproject.toml b/pyproject.toml
index 2c90fa53a..5007d5e66 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "runloop_api_client"
-version = "1.2.0"
+version = "1.3.0"
description = "The official Python library for the runloop API"
dynamic = ["readme"]
license = "MIT"
@@ -15,7 +15,7 @@ dependencies = [
"anyio>=3.5.0, <5",
"distro>=1.7.0, <2",
"sniffio",
- "uuid-utils>=0.11.0",
+ "uuid-utils>=0.11.0",
]
requires-python = ">= 3.9"
diff --git a/requirements-dev.lock b/requirements-dev.lock
index b9f3f2862..c48025dbf 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -94,7 +94,7 @@ python-dateutil==2.9.0.post0 ; python_full_version < '3.10'
# via time-machine
respx==0.22.0
rich==14.2.0
-ruff==0.14.8
+ruff==0.14.9
six==1.17.0 ; python_full_version < '3.10'
# via python-dateutil
sniffio==1.3.1
diff --git a/src/runloop_api_client/_base_client.py b/src/runloop_api_client/_base_client.py
index f639d4201..5c05c86c5 100644
--- a/src/runloop_api_client/_base_client.py
+++ b/src/runloop_api_client/_base_client.py
@@ -1247,9 +1247,12 @@ def patch(
*,
cast_to: Type[ResponseT],
body: Body | None = None,
+ files: RequestFiles | None = None,
options: RequestOptions = {},
) -> ResponseT:
- opts = FinalRequestOptions.construct(method="patch", url=path, json_data=body, **options)
+ opts = FinalRequestOptions.construct(
+ method="patch", url=path, json_data=body, files=to_httpx_files(files), **options
+ )
return self.request(cast_to, opts)
def put(
@@ -1767,9 +1770,12 @@ async def patch(
*,
cast_to: Type[ResponseT],
body: Body | None = None,
+ files: RequestFiles | None = None,
options: RequestOptions = {},
) -> ResponseT:
- opts = FinalRequestOptions.construct(method="patch", url=path, json_data=body, **options)
+ opts = FinalRequestOptions.construct(
+ method="patch", url=path, json_data=body, files=await async_to_httpx_files(files), **options
+ )
return await self.request(cast_to, opts)
async def put(
diff --git a/src/runloop_api_client/_types.py b/src/runloop_api_client/_types.py
index a2a12e84e..31df93064 100644
--- a/src/runloop_api_client/_types.py
+++ b/src/runloop_api_client/_types.py
@@ -243,6 +243,9 @@ class HttpxSendArgs(TypedDict, total=False):
if TYPE_CHECKING:
# This works because str.__contains__ does not accept object (either in typeshed or at runtime)
# https://github.com/hauntsaninja/useful_types/blob/5e9710f3875107d068e7679fd7fec9cfab0eff3b/useful_types/__init__.py#L285
+ #
+ # Note: index() and count() methods are intentionally omitted to allow pyright to properly
+ # infer TypedDict types when dict literals are used in lists assigned to SequenceNotStr.
class SequenceNotStr(Protocol[_T_co]):
@overload
def __getitem__(self, index: SupportsIndex, /) -> _T_co: ...
@@ -251,8 +254,6 @@ def __getitem__(self, index: slice, /) -> Sequence[_T_co]: ...
def __contains__(self, value: object, /) -> bool: ...
def __len__(self) -> int: ...
def __iter__(self) -> Iterator[_T_co]: ...
- def index(self, value: Any, start: int = 0, stop: int = ..., /) -> int: ...
- def count(self, value: Any, /) -> int: ...
def __reversed__(self) -> Iterator[_T_co]: ...
else:
# just point this to a normal `Sequence` at runtime to avoid having to special case
diff --git a/src/runloop_api_client/_version.py b/src/runloop_api_client/_version.py
index e13ec2fd5..c746bdc5e 100644
--- a/src/runloop_api_client/_version.py
+++ b/src/runloop_api_client/_version.py
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
__title__ = "runloop_api_client"
-__version__ = "1.2.0" # x-release-please-version
+__version__ = "1.3.0" # x-release-please-version
diff --git a/src/runloop_api_client/resources/agents.py b/src/runloop_api_client/resources/agents.py
index 6ff202d74..9ac9f8c02 100644
--- a/src/runloop_api_client/resources/agents.py
+++ b/src/runloop_api_client/resources/agents.py
@@ -49,6 +49,7 @@ def create(
self,
*,
name: str,
+ version: str,
source: Optional[AgentSource] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -66,6 +67,8 @@ def create(
Args:
name: The name of the Agent.
+ version: The version of the Agent. Must be a semver string (e.g., '2.0.65') or a SHA.
+
source: The source configuration for the Agent.
extra_headers: Send extra headers
@@ -83,6 +86,7 @@ def create(
body=maybe_transform(
{
"name": name,
+ "version": version,
"source": source,
},
agent_create_params.AgentCreateParams,
@@ -138,6 +142,7 @@ def list(
name: str | Omit = omit,
search: str | Omit = omit,
starting_after: str | Omit = omit,
+ version: str | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -151,7 +156,7 @@ def list(
Args:
is_public: Filter agents by public visibility.
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
name: Filter agents by name (partial match supported).
@@ -159,6 +164,8 @@ def list(
starting_after: Load the next page of data starting after the item with the given ID.
+ version: Filter by version. Use 'latest' to get the most recently created agent.
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -182,6 +189,7 @@ def list(
"name": name,
"search": search,
"starting_after": starting_after,
+ "version": version,
},
agent_list_params.AgentListParams,
),
@@ -214,6 +222,7 @@ async def create(
self,
*,
name: str,
+ version: str,
source: Optional[AgentSource] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -231,6 +240,8 @@ async def create(
Args:
name: The name of the Agent.
+ version: The version of the Agent. Must be a semver string (e.g., '2.0.65') or a SHA.
+
source: The source configuration for the Agent.
extra_headers: Send extra headers
@@ -248,6 +259,7 @@ async def create(
body=await async_maybe_transform(
{
"name": name,
+ "version": version,
"source": source,
},
agent_create_params.AgentCreateParams,
@@ -303,6 +315,7 @@ def list(
name: str | Omit = omit,
search: str | Omit = omit,
starting_after: str | Omit = omit,
+ version: str | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -316,7 +329,7 @@ def list(
Args:
is_public: Filter agents by public visibility.
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
name: Filter agents by name (partial match supported).
@@ -324,6 +337,8 @@ def list(
starting_after: Load the next page of data starting after the item with the given ID.
+ version: Filter by version. Use 'latest' to get the most recently created agent.
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -347,6 +362,7 @@ def list(
"name": name,
"search": search,
"starting_after": starting_after,
+ "version": version,
},
agent_list_params.AgentListParams,
),
diff --git a/src/runloop_api_client/resources/benchmarks/benchmarks.py b/src/runloop_api_client/resources/benchmarks/benchmarks.py
index c30885e9e..9d9a30b5d 100644
--- a/src/runloop_api_client/resources/benchmarks/benchmarks.py
+++ b/src/runloop_api_client/resources/benchmarks/benchmarks.py
@@ -21,6 +21,7 @@
benchmark_start_run_params,
benchmark_definitions_params,
benchmark_list_public_params,
+ benchmark_update_scenarios_params,
)
from ..._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
from ..._utils import maybe_transform, async_maybe_transform
@@ -88,16 +89,16 @@ def create(
Create a Benchmark with a set of Scenarios.
Args:
- name: The name of the Benchmark. This must be unique.
+ name: The unique name of the Benchmark.
attribution: Attribution information for the benchmark.
description: Detailed description of the benchmark.
- metadata: User defined metadata to attach to the benchmark for organization.
+ metadata: User defined metadata to attach to the benchmark.
required_environment_variables: Environment variables required to run the benchmark. If any required variables
- are not supplied, the benchmark will fail to start
+ are not supplied, the benchmark will fail to start.
required_secret_names: Secrets required to run the benchmark with (environment variable name will be
mapped to the your user secret by name). If any of these secrets are not
@@ -176,12 +177,12 @@ def update(
self,
id: str,
*,
- name: str,
attribution: Optional[str] | Omit = omit,
description: Optional[str] | Omit = omit,
metadata: Optional[Dict[str, str]] | Omit = omit,
+ name: Optional[str] | Omit = omit,
required_environment_variables: Optional[SequenceNotStr[str]] | Omit = omit,
- required_secret_names: SequenceNotStr[str] | Omit = omit,
+ required_secret_names: Optional[SequenceNotStr[str]] | Omit = omit,
scenario_ids: Optional[SequenceNotStr[str]] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -191,26 +192,30 @@ def update(
timeout: float | httpx.Timeout | None | NotGiven = not_given,
idempotency_key: str | None = None,
) -> BenchmarkView:
- """
- Update a Benchmark with a set of Scenarios.
+ """Update a Benchmark.
+
+ Fields that are null will preserve the existing value.
+ Fields that are provided (including empty values) will replace the existing
+ value entirely.
Args:
- name: The name of the Benchmark. This must be unique.
+ attribution: Attribution information for the benchmark. Pass in empty string to clear.
- attribution: Attribution information for the benchmark.
+ description: Detailed description of the benchmark. Pass in empty string to clear.
- description: Detailed description of the benchmark.
+ metadata: User defined metadata to attach to the benchmark. Pass in empty map to clear.
- metadata: User defined metadata to attach to the benchmark for organization.
+ name: The unique name of the Benchmark. Cannot be blank.
required_environment_variables: Environment variables required to run the benchmark. If any required variables
- are not supplied, the benchmark will fail to start
+ are not supplied, the benchmark will fail to start. Pass in empty list to clear.
required_secret_names: Secrets required to run the benchmark with (environment variable name will be
mapped to the your user secret by name). If any of these secrets are not
- provided or the mapping is incorrect, the benchmark will fail to start.
+ provided or the mapping is incorrect, the benchmark will fail to start. Pass in
+ empty list to clear.
- scenario_ids: The Scenario IDs that make up the Benchmark.
+ scenario_ids: The Scenario IDs that make up the Benchmark. Pass in empty list to clear.
extra_headers: Send extra headers
@@ -228,10 +233,10 @@ def update(
f"/v1/benchmarks/{id}",
body=maybe_transform(
{
- "name": name,
"attribution": attribution,
"description": description,
"metadata": metadata,
+ "name": name,
"required_environment_variables": required_environment_variables,
"required_secret_names": required_secret_names,
"scenario_ids": scenario_ids,
@@ -252,6 +257,7 @@ def list(
self,
*,
limit: int | Omit = omit,
+ name: str | Omit = omit,
starting_after: str | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -264,7 +270,9 @@ def list(
List all Benchmarks matching filter.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
+
+ name: Filter by name
starting_after: Load the next page of data starting after the item with the given ID.
@@ -287,6 +295,7 @@ def list(
query=maybe_transform(
{
"limit": limit,
+ "name": name,
"starting_after": starting_after,
},
benchmark_list_params.BenchmarkListParams,
@@ -312,7 +321,7 @@ def definitions(
Get scenario definitions for a previously created Benchmark.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
starting_after: Load the next page of data starting after the item with the given ID.
@@ -360,7 +369,7 @@ def list_public(
List all public benchmarks matching filter.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
starting_after: Load the next page of data starting after the item with the given ID.
@@ -449,6 +458,59 @@ def start_run(
cast_to=BenchmarkRunView,
)
+ def update_scenarios(
+ self,
+ id: str,
+ *,
+ scenarios_to_add: Optional[SequenceNotStr[str]] | Omit = omit,
+ scenarios_to_remove: Optional[SequenceNotStr[str]] | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ idempotency_key: str | None = None,
+ ) -> BenchmarkView:
+ """
+ Add and/or remove Scenario IDs from an existing Benchmark.
+
+ Args:
+ scenarios_to_add: Scenario IDs to add to the Benchmark.
+
+ scenarios_to_remove: Scenario IDs to remove from the Benchmark.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+
+ idempotency_key: Specify a custom idempotency key for this request
+ """
+ if not id:
+ raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
+ return self._post(
+ f"/v1/benchmarks/{id}/scenarios",
+ body=maybe_transform(
+ {
+ "scenarios_to_add": scenarios_to_add,
+ "scenarios_to_remove": scenarios_to_remove,
+ },
+ benchmark_update_scenarios_params.BenchmarkUpdateScenariosParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ idempotency_key=idempotency_key,
+ ),
+ cast_to=BenchmarkView,
+ )
+
class AsyncBenchmarksResource(AsyncAPIResource):
@cached_property
@@ -496,16 +558,16 @@ async def create(
Create a Benchmark with a set of Scenarios.
Args:
- name: The name of the Benchmark. This must be unique.
+ name: The unique name of the Benchmark.
attribution: Attribution information for the benchmark.
description: Detailed description of the benchmark.
- metadata: User defined metadata to attach to the benchmark for organization.
+ metadata: User defined metadata to attach to the benchmark.
required_environment_variables: Environment variables required to run the benchmark. If any required variables
- are not supplied, the benchmark will fail to start
+ are not supplied, the benchmark will fail to start.
required_secret_names: Secrets required to run the benchmark with (environment variable name will be
mapped to the your user secret by name). If any of these secrets are not
@@ -584,12 +646,12 @@ async def update(
self,
id: str,
*,
- name: str,
attribution: Optional[str] | Omit = omit,
description: Optional[str] | Omit = omit,
metadata: Optional[Dict[str, str]] | Omit = omit,
+ name: Optional[str] | Omit = omit,
required_environment_variables: Optional[SequenceNotStr[str]] | Omit = omit,
- required_secret_names: SequenceNotStr[str] | Omit = omit,
+ required_secret_names: Optional[SequenceNotStr[str]] | Omit = omit,
scenario_ids: Optional[SequenceNotStr[str]] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -599,26 +661,30 @@ async def update(
timeout: float | httpx.Timeout | None | NotGiven = not_given,
idempotency_key: str | None = None,
) -> BenchmarkView:
- """
- Update a Benchmark with a set of Scenarios.
+ """Update a Benchmark.
+
+ Fields that are null will preserve the existing value.
+ Fields that are provided (including empty values) will replace the existing
+ value entirely.
Args:
- name: The name of the Benchmark. This must be unique.
+ attribution: Attribution information for the benchmark. Pass in empty string to clear.
- attribution: Attribution information for the benchmark.
+ description: Detailed description of the benchmark. Pass in empty string to clear.
- description: Detailed description of the benchmark.
+ metadata: User defined metadata to attach to the benchmark. Pass in empty map to clear.
- metadata: User defined metadata to attach to the benchmark for organization.
+ name: The unique name of the Benchmark. Cannot be blank.
required_environment_variables: Environment variables required to run the benchmark. If any required variables
- are not supplied, the benchmark will fail to start
+ are not supplied, the benchmark will fail to start. Pass in empty list to clear.
required_secret_names: Secrets required to run the benchmark with (environment variable name will be
mapped to the your user secret by name). If any of these secrets are not
- provided or the mapping is incorrect, the benchmark will fail to start.
+ provided or the mapping is incorrect, the benchmark will fail to start. Pass in
+ empty list to clear.
- scenario_ids: The Scenario IDs that make up the Benchmark.
+ scenario_ids: The Scenario IDs that make up the Benchmark. Pass in empty list to clear.
extra_headers: Send extra headers
@@ -636,10 +702,10 @@ async def update(
f"/v1/benchmarks/{id}",
body=await async_maybe_transform(
{
- "name": name,
"attribution": attribution,
"description": description,
"metadata": metadata,
+ "name": name,
"required_environment_variables": required_environment_variables,
"required_secret_names": required_secret_names,
"scenario_ids": scenario_ids,
@@ -660,6 +726,7 @@ def list(
self,
*,
limit: int | Omit = omit,
+ name: str | Omit = omit,
starting_after: str | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -672,7 +739,9 @@ def list(
List all Benchmarks matching filter.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
+
+ name: Filter by name
starting_after: Load the next page of data starting after the item with the given ID.
@@ -695,6 +764,7 @@ def list(
query=maybe_transform(
{
"limit": limit,
+ "name": name,
"starting_after": starting_after,
},
benchmark_list_params.BenchmarkListParams,
@@ -720,7 +790,7 @@ async def definitions(
Get scenario definitions for a previously created Benchmark.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
starting_after: Load the next page of data starting after the item with the given ID.
@@ -768,7 +838,7 @@ def list_public(
List all public benchmarks matching filter.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
starting_after: Load the next page of data starting after the item with the given ID.
@@ -857,6 +927,59 @@ async def start_run(
cast_to=BenchmarkRunView,
)
+ async def update_scenarios(
+ self,
+ id: str,
+ *,
+ scenarios_to_add: Optional[SequenceNotStr[str]] | Omit = omit,
+ scenarios_to_remove: Optional[SequenceNotStr[str]] | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ idempotency_key: str | None = None,
+ ) -> BenchmarkView:
+ """
+ Add and/or remove Scenario IDs from an existing Benchmark.
+
+ Args:
+ scenarios_to_add: Scenario IDs to add to the Benchmark.
+
+ scenarios_to_remove: Scenario IDs to remove from the Benchmark.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+
+ idempotency_key: Specify a custom idempotency key for this request
+ """
+ if not id:
+ raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
+ return await self._post(
+ f"/v1/benchmarks/{id}/scenarios",
+ body=await async_maybe_transform(
+ {
+ "scenarios_to_add": scenarios_to_add,
+ "scenarios_to_remove": scenarios_to_remove,
+ },
+ benchmark_update_scenarios_params.BenchmarkUpdateScenariosParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ idempotency_key=idempotency_key,
+ ),
+ cast_to=BenchmarkView,
+ )
+
class BenchmarksResourceWithRawResponse:
def __init__(self, benchmarks: BenchmarksResource) -> None:
@@ -883,6 +1006,9 @@ def __init__(self, benchmarks: BenchmarksResource) -> None:
self.start_run = to_raw_response_wrapper(
benchmarks.start_run,
)
+ self.update_scenarios = to_raw_response_wrapper(
+ benchmarks.update_scenarios,
+ )
@cached_property
def runs(self) -> RunsResourceWithRawResponse:
@@ -914,6 +1040,9 @@ def __init__(self, benchmarks: AsyncBenchmarksResource) -> None:
self.start_run = async_to_raw_response_wrapper(
benchmarks.start_run,
)
+ self.update_scenarios = async_to_raw_response_wrapper(
+ benchmarks.update_scenarios,
+ )
@cached_property
def runs(self) -> AsyncRunsResourceWithRawResponse:
@@ -945,6 +1074,9 @@ def __init__(self, benchmarks: BenchmarksResource) -> None:
self.start_run = to_streamed_response_wrapper(
benchmarks.start_run,
)
+ self.update_scenarios = to_streamed_response_wrapper(
+ benchmarks.update_scenarios,
+ )
@cached_property
def runs(self) -> RunsResourceWithStreamingResponse:
@@ -976,6 +1108,9 @@ def __init__(self, benchmarks: AsyncBenchmarksResource) -> None:
self.start_run = async_to_streamed_response_wrapper(
benchmarks.start_run,
)
+ self.update_scenarios = async_to_streamed_response_wrapper(
+ benchmarks.update_scenarios,
+ )
@cached_property
def runs(self) -> AsyncRunsResourceWithStreamingResponse:
diff --git a/src/runloop_api_client/resources/benchmarks/runs.py b/src/runloop_api_client/resources/benchmarks/runs.py
index 6d69d160b..cdab6fd30 100644
--- a/src/runloop_api_client/resources/benchmarks/runs.py
+++ b/src/runloop_api_client/resources/benchmarks/runs.py
@@ -83,6 +83,7 @@ def list(
*,
benchmark_id: str | Omit = omit,
limit: int | Omit = omit,
+ name: str | Omit = omit,
starting_after: str | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -97,7 +98,9 @@ def list(
Args:
benchmark_id: The Benchmark ID to filter by.
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
+
+ name: Filter by name
starting_after: Load the next page of data starting after the item with the given ID.
@@ -121,6 +124,7 @@ def list(
{
"benchmark_id": benchmark_id,
"limit": limit,
+ "name": name,
"starting_after": starting_after,
},
run_list_params.RunListParams,
@@ -227,7 +231,7 @@ def list_scenario_runs(
List started scenario runs for a benchmark run.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
starting_after: Load the next page of data starting after the item with the given ID.
@@ -322,6 +326,7 @@ def list(
*,
benchmark_id: str | Omit = omit,
limit: int | Omit = omit,
+ name: str | Omit = omit,
starting_after: str | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -336,7 +341,9 @@ def list(
Args:
benchmark_id: The Benchmark ID to filter by.
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
+
+ name: Filter by name
starting_after: Load the next page of data starting after the item with the given ID.
@@ -360,6 +367,7 @@ def list(
{
"benchmark_id": benchmark_id,
"limit": limit,
+ "name": name,
"starting_after": starting_after,
},
run_list_params.RunListParams,
@@ -466,7 +474,7 @@ def list_scenario_runs(
List started scenario runs for a benchmark run.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
starting_after: Load the next page of data starting after the item with the given ID.
diff --git a/src/runloop_api_client/resources/blueprints.py b/src/runloop_api_client/resources/blueprints.py
index 8cc04c2e3..818365271 100644
--- a/src/runloop_api_client/resources/blueprints.py
+++ b/src/runloop_api_client/resources/blueprints.py
@@ -389,6 +389,7 @@ def list(
limit: int | Omit = omit,
name: str | Omit = omit,
starting_after: str | Omit = omit,
+ status: str | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -400,12 +401,14 @@ def list(
List all Blueprints or filter by name.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
name: Filter by name
starting_after: Load the next page of data starting after the item with the given ID.
+ status: Filter by build status (queued, provisioning, building, failed, build_complete)
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -427,6 +430,7 @@ def list(
"limit": limit,
"name": name,
"starting_after": starting_after,
+ "status": status,
},
blueprint_list_params.BlueprintListParams,
),
@@ -558,6 +562,7 @@ def list_public(
limit: int | Omit = omit,
name: str | Omit = omit,
starting_after: str | Omit = omit,
+ status: str | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -569,12 +574,14 @@ def list_public(
List all public Blueprints that are available to all users.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
name: Filter by name
starting_after: Load the next page of data starting after the item with the given ID.
+ status: Filter by build status (queued, provisioning, building, failed, build_complete)
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -596,6 +603,7 @@ def list_public(
"limit": limit,
"name": name,
"starting_after": starting_after,
+ "status": status,
},
blueprint_list_public_params.BlueprintListPublicParams,
),
@@ -1028,6 +1036,7 @@ def list(
limit: int | Omit = omit,
name: str | Omit = omit,
starting_after: str | Omit = omit,
+ status: str | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -1039,12 +1048,14 @@ def list(
List all Blueprints or filter by name.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
name: Filter by name
starting_after: Load the next page of data starting after the item with the given ID.
+ status: Filter by build status (queued, provisioning, building, failed, build_complete)
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -1066,6 +1077,7 @@ def list(
"limit": limit,
"name": name,
"starting_after": starting_after,
+ "status": status,
},
blueprint_list_params.BlueprintListParams,
),
@@ -1197,6 +1209,7 @@ def list_public(
limit: int | Omit = omit,
name: str | Omit = omit,
starting_after: str | Omit = omit,
+ status: str | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -1208,12 +1221,14 @@ def list_public(
List all public Blueprints that are available to all users.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
name: Filter by name
starting_after: Load the next page of data starting after the item with the given ID.
+ status: Filter by build status (queued, provisioning, building, failed, build_complete)
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -1235,6 +1250,7 @@ def list_public(
"limit": limit,
"name": name,
"starting_after": starting_after,
+ "status": status,
},
blueprint_list_public_params.BlueprintListPublicParams,
),
diff --git a/src/runloop_api_client/resources/devboxes/devboxes.py b/src/runloop_api_client/resources/devboxes/devboxes.py
index fc13c722d..dc7b1b492 100644
--- a/src/runloop_api_client/resources/devboxes/devboxes.py
+++ b/src/runloop_api_client/resources/devboxes/devboxes.py
@@ -558,7 +558,7 @@ def list(
List all Devboxes while optionally filtering by status.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
starting_after: Load the next page of data starting after the item with the given ID.
@@ -1102,7 +1102,7 @@ def list_disk_snapshots(
Args:
devbox_id: Devbox ID to filter by.
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
metadata_key: Filter snapshots by metadata key-value pair. Can be used multiple times for
different keys.
@@ -2093,7 +2093,7 @@ def list(
List all Devboxes while optionally filtering by status.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
starting_after: Load the next page of data starting after the item with the given ID.
@@ -2638,7 +2638,7 @@ def list_disk_snapshots(
Args:
devbox_id: Devbox ID to filter by.
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
metadata_key: Filter snapshots by metadata key-value pair. Can be used multiple times for
different keys.
diff --git a/src/runloop_api_client/resources/devboxes/disk_snapshots.py b/src/runloop_api_client/resources/devboxes/disk_snapshots.py
index 0e3530374..b896adbb6 100644
--- a/src/runloop_api_client/resources/devboxes/disk_snapshots.py
+++ b/src/runloop_api_client/resources/devboxes/disk_snapshots.py
@@ -130,7 +130,7 @@ def list(
Args:
devbox_id: Devbox ID to filter by.
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
metadata_key: Filter snapshots by metadata key-value pair. Can be used multiple times for
different keys.
@@ -381,7 +381,7 @@ def list(
Args:
devbox_id: Devbox ID to filter by.
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
metadata_key: Filter snapshots by metadata key-value pair. Can be used multiple times for
different keys.
diff --git a/src/runloop_api_client/resources/objects.py b/src/runloop_api_client/resources/objects.py
index 4d7d2e0a3..409d5f6f3 100644
--- a/src/runloop_api_client/resources/objects.py
+++ b/src/runloop_api_client/resources/objects.py
@@ -162,7 +162,7 @@ def list(
Args:
content_type: Filter storage objects by content type.
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
name: Filter storage objects by name (partial match supported).
@@ -352,7 +352,7 @@ def list_public(
Args:
content_type: Filter storage objects by content type.
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
name: Filter storage objects by name (partial match supported).
@@ -530,7 +530,7 @@ def list(
Args:
content_type: Filter storage objects by content type.
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
name: Filter storage objects by name (partial match supported).
@@ -720,7 +720,7 @@ def list_public(
Args:
content_type: Filter storage objects by content type.
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
name: Filter storage objects by name (partial match supported).
diff --git a/src/runloop_api_client/resources/repositories.py b/src/runloop_api_client/resources/repositories.py
index e2b238750..a22075540 100644
--- a/src/runloop_api_client/resources/repositories.py
+++ b/src/runloop_api_client/resources/repositories.py
@@ -163,7 +163,7 @@ def list(
List all available repository connections.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
name: Filter by repository name
@@ -542,7 +542,7 @@ def list(
List all available repository connections.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
name: Filter by repository name
diff --git a/src/runloop_api_client/resources/scenarios/runs.py b/src/runloop_api_client/resources/scenarios/runs.py
index a6a16a5a0..3ea9a960f 100644
--- a/src/runloop_api_client/resources/scenarios/runs.py
+++ b/src/runloop_api_client/resources/scenarios/runs.py
@@ -89,9 +89,12 @@ def retrieve(
def list(
self,
*,
+ benchmark_run_id: str | Omit = omit,
limit: int | Omit = omit,
+ name: str | Omit = omit,
scenario_id: str | Omit = omit,
starting_after: str | Omit = omit,
+ state: str | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -103,12 +106,18 @@ def list(
List all ScenarioRuns matching filter.
Args:
- limit: The limit of items to return. Default is 20.
+ benchmark_run_id: Filter by benchmark run ID
+
+ limit: The limit of items to return. Default is 20. Max is 5000.
+
+ name: Filter by name
scenario_id: Filter runs associated to Scenario given ID
starting_after: Load the next page of data starting after the item with the given ID.
+ state: Filter by state
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -127,9 +136,12 @@ def list(
timeout=timeout,
query=maybe_transform(
{
+ "benchmark_run_id": benchmark_run_id,
"limit": limit,
+ "name": name,
"scenario_id": scenario_id,
"starting_after": starting_after,
+ "state": state,
},
run_list_params.RunListParams,
),
@@ -497,9 +509,12 @@ async def retrieve(
def list(
self,
*,
+ benchmark_run_id: str | Omit = omit,
limit: int | Omit = omit,
+ name: str | Omit = omit,
scenario_id: str | Omit = omit,
starting_after: str | Omit = omit,
+ state: str | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -511,12 +526,18 @@ def list(
List all ScenarioRuns matching filter.
Args:
- limit: The limit of items to return. Default is 20.
+ benchmark_run_id: Filter by benchmark run ID
+
+ limit: The limit of items to return. Default is 20. Max is 5000.
+
+ name: Filter by name
scenario_id: Filter runs associated to Scenario given ID
starting_after: Load the next page of data starting after the item with the given ID.
+ state: Filter by state
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -535,9 +556,12 @@ def list(
timeout=timeout,
query=maybe_transform(
{
+ "benchmark_run_id": benchmark_run_id,
"limit": limit,
+ "name": name,
"scenario_id": scenario_id,
"starting_after": starting_after,
+ "state": state,
},
run_list_params.RunListParams,
),
diff --git a/src/runloop_api_client/resources/scenarios/scenarios.py b/src/runloop_api_client/resources/scenarios/scenarios.py
index 6b7c729f4..bd961a285 100644
--- a/src/runloop_api_client/resources/scenarios/scenarios.py
+++ b/src/runloop_api_client/resources/scenarios/scenarios.py
@@ -221,31 +221,32 @@ def update(
timeout: float | httpx.Timeout | None | NotGiven = not_given,
idempotency_key: str | None = None,
) -> ScenarioView:
- """
- Update a Scenario, a repeatable AI coding evaluation test that defines the
- starting environment as well as evaluation success criteria. Only provided
- fields will be updated.
+ """Update a Scenario.
+
+ Fields that are null will preserve the existing value. Fields
+ that are provided (including empty values) will replace the existing value
+ entirely.
Args:
environment_parameters: The Environment in which the Scenario will run.
input_context: The input context for the Scenario.
- metadata: User defined metadata to attach to the scenario for organization.
+ metadata: User defined metadata to attach to the scenario. Pass in empty map to clear.
- name: Name of the scenario.
+ name: Name of the scenario. Cannot be blank.
reference_output: A string representation of the reference output to solve the scenario. Commonly
can be the result of a git diff or a sequence of command actions to apply to the
- environment.
+ environment. Pass in empty string to clear.
- required_environment_variables: Environment variables required to run the scenario.
+ required_environment_variables: Environment variables required to run the scenario. Pass in empty list to clear.
- required_secret_names: Secrets required to run the scenario.
+ required_secret_names: Secrets required to run the scenario. Pass in empty list to clear.
scoring_contract: The scoring contract for the Scenario.
- validation_type: Validation strategy.
+ validation_type: Validation strategy. Pass in empty string to clear.
extra_headers: Send extra headers
@@ -292,6 +293,7 @@ def list(
limit: int | Omit = omit,
name: str | Omit = omit,
starting_after: str | Omit = omit,
+ validation_type: str | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -305,12 +307,14 @@ def list(
Args:
benchmark_id: Filter scenarios by benchmark ID.
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
name: Query for Scenarios with a given name.
starting_after: Load the next page of data starting after the item with the given ID.
+ validation_type: Filter by validation type
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -333,6 +337,7 @@ def list(
"limit": limit,
"name": name,
"starting_after": starting_after,
+ "validation_type": validation_type,
},
scenario_list_params.ScenarioListParams,
),
@@ -357,7 +362,7 @@ def list_public(
List all public scenarios matching filter.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
name: Query for Scenarios with a given name.
@@ -678,31 +683,32 @@ async def update(
timeout: float | httpx.Timeout | None | NotGiven = not_given,
idempotency_key: str | None = None,
) -> ScenarioView:
- """
- Update a Scenario, a repeatable AI coding evaluation test that defines the
- starting environment as well as evaluation success criteria. Only provided
- fields will be updated.
+ """Update a Scenario.
+
+ Fields that are null will preserve the existing value. Fields
+ that are provided (including empty values) will replace the existing value
+ entirely.
Args:
environment_parameters: The Environment in which the Scenario will run.
input_context: The input context for the Scenario.
- metadata: User defined metadata to attach to the scenario for organization.
+ metadata: User defined metadata to attach to the scenario. Pass in empty map to clear.
- name: Name of the scenario.
+ name: Name of the scenario. Cannot be blank.
reference_output: A string representation of the reference output to solve the scenario. Commonly
can be the result of a git diff or a sequence of command actions to apply to the
- environment.
+ environment. Pass in empty string to clear.
- required_environment_variables: Environment variables required to run the scenario.
+ required_environment_variables: Environment variables required to run the scenario. Pass in empty list to clear.
- required_secret_names: Secrets required to run the scenario.
+ required_secret_names: Secrets required to run the scenario. Pass in empty list to clear.
scoring_contract: The scoring contract for the Scenario.
- validation_type: Validation strategy.
+ validation_type: Validation strategy. Pass in empty string to clear.
extra_headers: Send extra headers
@@ -749,6 +755,7 @@ def list(
limit: int | Omit = omit,
name: str | Omit = omit,
starting_after: str | Omit = omit,
+ validation_type: str | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -762,12 +769,14 @@ def list(
Args:
benchmark_id: Filter scenarios by benchmark ID.
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
name: Query for Scenarios with a given name.
starting_after: Load the next page of data starting after the item with the given ID.
+ validation_type: Filter by validation type
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -790,6 +799,7 @@ def list(
"limit": limit,
"name": name,
"starting_after": starting_after,
+ "validation_type": validation_type,
},
scenario_list_params.ScenarioListParams,
),
@@ -814,7 +824,7 @@ def list_public(
List all public scenarios matching filter.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
name: Query for Scenarios with a given name.
diff --git a/src/runloop_api_client/resources/scenarios/scorers.py b/src/runloop_api_client/resources/scenarios/scorers.py
index 5b083f8e4..9e5d5e198 100644
--- a/src/runloop_api_client/resources/scenarios/scorers.py
+++ b/src/runloop_api_client/resources/scenarios/scorers.py
@@ -201,7 +201,7 @@ def list(
List all Scenario Scorers matching filter.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
starting_after: Load the next page of data starting after the item with the given ID.
@@ -460,7 +460,7 @@ def list(
List all Scenario Scorers matching filter.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
starting_after: Load the next page of data starting after the item with the given ID.
diff --git a/src/runloop_api_client/resources/secrets.py b/src/runloop_api_client/resources/secrets.py
index 8e170fca2..892557497 100644
--- a/src/runloop_api_client/resources/secrets.py
+++ b/src/runloop_api_client/resources/secrets.py
@@ -160,7 +160,7 @@ def list(
for security reasons.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
extra_headers: Send extra headers
@@ -363,7 +363,7 @@ async def list(
for security reasons.
Args:
- limit: The limit of items to return. Default is 20.
+ limit: The limit of items to return. Default is 20. Max is 5000.
extra_headers: Send extra headers
diff --git a/src/runloop_api_client/sdk/__init__.py b/src/runloop_api_client/sdk/__init__.py
index 5773b9d53..610017b79 100644
--- a/src/runloop_api_client/sdk/__init__.py
+++ b/src/runloop_api_client/sdk/__init__.py
@@ -5,7 +5,17 @@
from __future__ import annotations
-from .sync import AgentOps, DevboxOps, ScorerOps, RunloopSDK, ScenarioOps, SnapshotOps, BlueprintOps, StorageObjectOps
+from .sync import (
+ AgentOps,
+ DevboxOps,
+ ScorerOps,
+ RunloopSDK,
+ ScenarioOps,
+ SnapshotOps,
+ BenchmarkOps,
+ BlueprintOps,
+ StorageObjectOps,
+)
from .agent import Agent
from ._types import ScenarioPreview
from .async_ import (
@@ -15,6 +25,7 @@
AsyncRunloopSDK,
AsyncScenarioOps,
AsyncSnapshotOps,
+ AsyncBenchmarkOps,
AsyncBlueprintOps,
AsyncStorageObjectOps,
)
@@ -22,20 +33,24 @@
from .scorer import Scorer
from .scenario import Scenario
from .snapshot import Snapshot
+from .benchmark import Benchmark
from .blueprint import Blueprint
from .execution import Execution
from .async_agent import AsyncAgent
from .async_devbox import AsyncDevbox, AsyncNamedShell
from .async_scorer import AsyncScorer
from .scenario_run import ScenarioRun
+from .benchmark_run import BenchmarkRun
from .async_scenario import AsyncScenario
from .async_snapshot import AsyncSnapshot
from .storage_object import StorageObject
+from .async_benchmark import AsyncBenchmark
from .async_blueprint import AsyncBlueprint
from .async_execution import AsyncExecution
from .execution_result import ExecutionResult
from .scenario_builder import ScenarioBuilder
from .async_scenario_run import AsyncScenarioRun
+from .async_benchmark_run import AsyncBenchmarkRun
from .async_storage_object import AsyncStorageObject
from .async_execution_result import AsyncExecutionResult
from .async_scenario_builder import AsyncScenarioBuilder
@@ -47,6 +62,8 @@
# Management interfaces
"AgentOps",
"AsyncAgentOps",
+ "BenchmarkOps",
+ "AsyncBenchmarkOps",
"DevboxOps",
"AsyncDevboxOps",
"BlueprintOps",
@@ -62,6 +79,10 @@
# Resource classes
"Agent",
"AsyncAgent",
+ "Benchmark",
+ "AsyncBenchmark",
+ "BenchmarkRun",
+ "AsyncBenchmarkRun",
"Devbox",
"AsyncDevbox",
"Execution",
diff --git a/src/runloop_api_client/sdk/_types.py b/src/runloop_api_client/sdk/_types.py
index be09f6eed..c3024b4ca 100644
--- a/src/runloop_api_client/sdk/_types.py
+++ b/src/runloop_api_client/sdk/_types.py
@@ -1,32 +1,41 @@
from typing import Union, Callable, Optional
from typing_extensions import TypedDict
+from ..types import (
+ InputContext,
+ ScenarioView,
+ AgentListParams,
+ DevboxListParams,
+ ObjectListParams,
+ AgentCreateParams,
+ DevboxCreateParams,
+ ObjectCreateParams,
+ ScenarioListParams,
+ BenchmarkListParams,
+ BlueprintListParams,
+ ObjectDownloadParams,
+ ScenarioUpdateParams,
+ BenchmarkCreateParams,
+ BenchmarkUpdateParams,
+ BlueprintCreateParams,
+ DevboxUploadFileParams,
+ DevboxCreateTunnelParams,
+ DevboxDownloadFileParams,
+ DevboxRemoveTunnelParams,
+ DevboxSnapshotDiskParams,
+ DevboxReadFileContentsParams,
+ DevboxWriteFileContentsParams,
+)
from .._types import Body, Query, Headers, Timeout, NotGiven
from ..lib.polling import PollingConfig
from ..types.devboxes import DiskSnapshotListParams, DiskSnapshotUpdateParams
from ..types.scenarios import ScorerListParams, ScorerCreateParams, ScorerUpdateParams, ScorerValidateParams
-from ..types.input_context import InputContext
-from ..types.scenario_view import ScenarioView
-from ..types.agent_list_params import AgentListParams
-from ..types.devbox_list_params import DevboxListParams
-from ..types.object_list_params import ObjectListParams
-from ..types.agent_create_params import AgentCreateParams
-from ..types.devbox_create_params import DevboxCreateParams, DevboxBaseCreateParams
-from ..types.object_create_params import ObjectCreateParams
-from ..types.scenario_list_params import ScenarioListParams
-from ..types.blueprint_list_params import BlueprintListParams
-from ..types.object_download_params import ObjectDownloadParams
-from ..types.scenario_update_params import ScenarioUpdateParams
-from ..types.blueprint_create_params import BlueprintCreateParams
-from ..types.devbox_upload_file_params import DevboxUploadFileParams
+from ..types.benchmarks import RunListScenarioRunsParams
+from ..types.devbox_create_params import DevboxBaseCreateParams
from ..types.scenario_start_run_params import ScenarioStartRunBaseParams
-from ..types.devbox_create_tunnel_params import DevboxCreateTunnelParams
-from ..types.devbox_download_file_params import DevboxDownloadFileParams
+from ..types.benchmark_start_run_params import BenchmarkSelfStartRunParams
+from ..types.benchmarks.run_list_params import RunSelfListParams
from ..types.devbox_execute_async_params import DevboxNiceExecuteAsyncParams
-from ..types.devbox_remove_tunnel_params import DevboxRemoveTunnelParams
-from ..types.devbox_snapshot_disk_params import DevboxSnapshotDiskParams
-from ..types.devbox_read_file_contents_params import DevboxReadFileContentsParams
-from ..types.devbox_write_file_contents_params import DevboxWriteFileContentsParams
LogCallback = Callable[[str], None]
@@ -203,3 +212,27 @@ class ScenarioPreview(ScenarioView):
input_context: InputContextPreview # type: ignore[assignment]
"""The input context for the Scenario."""
+
+
+class SDKBenchmarkCreateParams(BenchmarkCreateParams, LongRequestOptions):
+ pass
+
+
+class SDKBenchmarkListParams(BenchmarkListParams, BaseRequestOptions):
+ pass
+
+
+class SDKBenchmarkUpdateParams(BenchmarkUpdateParams, LongRequestOptions):
+ pass
+
+
+class SDKBenchmarkStartRunParams(BenchmarkSelfStartRunParams, LongRequestOptions):
+ pass
+
+
+class SDKBenchmarkListRunsParams(RunSelfListParams, BaseRequestOptions):
+ pass
+
+
+class SDKBenchmarkRunListScenarioRunsParams(RunListScenarioRunsParams, BaseRequestOptions):
+ pass
diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py
index 4bcd08fc1..6e6e828ff 100644
--- a/src/runloop_api_client/sdk/async_.py
+++ b/src/runloop_api_client/sdk/async_.py
@@ -21,7 +21,9 @@
SDKObjectCreateParams,
SDKScenarioListParams,
SDKScorerCreateParams,
+ SDKBenchmarkListParams,
SDKBlueprintListParams,
+ SDKBenchmarkCreateParams,
SDKBlueprintCreateParams,
SDKDiskSnapshotListParams,
SDKDevboxCreateFromImageParams,
@@ -34,6 +36,7 @@
from .async_scorer import AsyncScorer
from .async_scenario import AsyncScenario
from .async_snapshot import AsyncSnapshot
+from .async_benchmark import AsyncBenchmark
from .async_blueprint import AsyncBlueprint
from ..lib.context_loader import TarFilter, build_directory_tar
from .async_storage_object import AsyncStorageObject
@@ -599,7 +602,6 @@ async def create_from_npm(
self,
*,
package_name: str,
- npm_version: Optional[str] = None,
registry_url: Optional[str] = None,
agent_setup: Optional[list[str]] = None,
**params: Unpack[SDKAgentCreateParams],
@@ -608,8 +610,6 @@ async def create_from_npm(
:param package_name: NPM package name
:type package_name: str
- :param npm_version: NPM version constraint, defaults to None
- :type npm_version: Optional[str], optional
:param registry_url: NPM registry URL, defaults to None
:type registry_url: Optional[str], optional
:param agent_setup: Setup commands to run after installation, defaults to None
@@ -625,8 +625,6 @@ async def create_from_npm(
)
npm_config: Npm = {"package_name": package_name}
- if npm_version is not None:
- npm_config["npm_version"] = npm_version
if registry_url is not None:
npm_config["registry_url"] = registry_url
if agent_setup is not None:
@@ -639,7 +637,6 @@ async def create_from_pip(
self,
*,
package_name: str,
- pip_version: Optional[str] = None,
registry_url: Optional[str] = None,
agent_setup: Optional[list[str]] = None,
**params: Unpack[SDKAgentCreateParams],
@@ -648,8 +645,6 @@ async def create_from_pip(
:param package_name: Pip package name
:type package_name: str
- :param pip_version: Pip version constraint, defaults to None
- :type pip_version: Optional[str], optional
:param registry_url: Pip registry URL, defaults to None
:type registry_url: Optional[str], optional
:param agent_setup: Setup commands to run after installation, defaults to None
@@ -665,8 +660,6 @@ async def create_from_pip(
)
pip_config: Pip = {"package_name": package_name}
- if pip_version is not None:
- pip_config["pip_version"] = pip_version
if registry_url is not None:
pip_config["registry_url"] = registry_url
if agent_setup is not None:
@@ -825,6 +818,55 @@ async def list(self, **params: Unpack[SDKScenarioListParams]) -> list[AsyncScena
return [AsyncScenario(self._client, item.id) async for item in page]
+class AsyncBenchmarkOps:
+ """Manage benchmarks (async). Access via ``runloop.benchmark``.
+
+ Example:
+ >>> runloop = AsyncRunloopSDK()
+ >>> benchmarks = await runloop.benchmark.list()
+ >>> benchmark = runloop.benchmark.from_id("bmd_xxx")
+ >>> run = await benchmark.start_run(run_name="evaluation-v1")
+ """
+
+ def __init__(self, client: AsyncRunloop) -> None:
+ """Initialize AsyncBenchmarkOps.
+
+ :param client: AsyncRunloop client instance
+ :type client: AsyncRunloop
+ """
+ self._client = client
+
+ async def create(self, **params: Unpack[SDKBenchmarkCreateParams]) -> AsyncBenchmark:
+ """Create a new benchmark.
+
+ :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkCreateParams` for available parameters
+ :return: The newly created benchmark
+ :rtype: AsyncBenchmark
+ """
+ response = await self._client.benchmarks.create(**params)
+ return AsyncBenchmark(self._client, response.id)
+
+ def from_id(self, benchmark_id: str) -> AsyncBenchmark:
+ """Get an AsyncBenchmark instance for an existing benchmark ID.
+
+ :param benchmark_id: ID of the benchmark
+ :type benchmark_id: str
+ :return: AsyncBenchmark instance for the given ID
+ :rtype: AsyncBenchmark
+ """
+ return AsyncBenchmark(self._client, benchmark_id)
+
+ async def list(self, **params: Unpack[SDKBenchmarkListParams]) -> list[AsyncBenchmark]:
+ """List all benchmarks, optionally filtered by parameters.
+
+ :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkListParams` for available parameters
+ :return: List of benchmarks
+ :rtype: list[AsyncBenchmark]
+ """
+ page = await self._client.benchmarks.list(**params)
+ return [AsyncBenchmark(self._client, item.id) for item in page.benchmarks]
+
+
class AsyncRunloopSDK:
"""High-level asynchronous entry point for the Runloop SDK.
@@ -836,6 +878,8 @@ class AsyncRunloopSDK:
:vartype api: AsyncRunloop
:ivar agent: High-level async interface for agent management.
:vartype agent: AsyncAgentOps
+ :ivar benchmark: High-level async interface for benchmark management
+ :vartype benchmark: AsyncBenchmarkOps
:ivar devbox: High-level async interface for devbox management
:vartype devbox: AsyncDevboxOps
:ivar blueprint: High-level async interface for blueprint management
@@ -859,6 +903,7 @@ class AsyncRunloopSDK:
api: AsyncRunloop
agent: AsyncAgentOps
+ benchmark: AsyncBenchmarkOps
devbox: AsyncDevboxOps
blueprint: AsyncBlueprintOps
scenario: AsyncScenarioOps
@@ -905,6 +950,7 @@ def __init__(
)
self.agent = AsyncAgentOps(self.api)
+ self.benchmark = AsyncBenchmarkOps(self.api)
self.devbox = AsyncDevboxOps(self.api)
self.blueprint = AsyncBlueprintOps(self.api)
self.scenario = AsyncScenarioOps(self.api)
diff --git a/src/runloop_api_client/sdk/async_benchmark.py b/src/runloop_api_client/sdk/async_benchmark.py
new file mode 100644
index 000000000..63443e37b
--- /dev/null
+++ b/src/runloop_api_client/sdk/async_benchmark.py
@@ -0,0 +1,164 @@
+"""AsyncBenchmark resource class for asynchronous operations."""
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Unpack, override
+
+from ..types import BenchmarkView
+from ._types import (
+ BaseRequestOptions,
+ LongRequestOptions,
+ SDKBenchmarkUpdateParams,
+ SDKBenchmarkListRunsParams,
+ SDKBenchmarkStartRunParams,
+)
+from .._types import SequenceNotStr
+from .._client import AsyncRunloop
+from .async_benchmark_run import AsyncBenchmarkRun
+
+
+class AsyncBenchmark:
+ """A benchmark for evaluating agent performance across scenarios (async).
+
+ Provides async methods for retrieving benchmark details, updating the benchmark,
+ managing scenarios, and starting benchmark runs. Obtain instances via
+ ``runloop.benchmark.from_id()`` or ``runloop.benchmark.list()``.
+
+ Example:
+ >>> benchmark = runloop.benchmark.from_id("bmd_xxx")
+ >>> info = await benchmark.get_info()
+ >>> run = await benchmark.start_run(run_name="evaluation-v1")
+ >>> for scenario_id in info.scenario_ids:
+ ... scenario = await runloop.scenario.from_id(scenario_id)
+ ... scenario_run = await scenario.run(benchmark_run_id=run.id, run_name="evaluation-v1")
+ """
+
+ def __init__(self, client: AsyncRunloop, benchmark_id: str) -> None:
+ """Create an AsyncBenchmark instance.
+
+ :param client: AsyncRunloop client instance
+ :type client: AsyncRunloop
+ :param benchmark_id: Benchmark ID
+ :type benchmark_id: str
+ """
+ self._client = client
+ self._id = benchmark_id
+
+ @override
+ def __repr__(self) -> str:
+ return f""
+
+ @property
+ def id(self) -> str:
+ """Return the benchmark ID.
+
+ :return: Unique benchmark ID
+ :rtype: str
+ """
+ return self._id
+
+ async def get_info(
+ self,
+ **options: Unpack[BaseRequestOptions],
+ ) -> BenchmarkView:
+ """Retrieve current benchmark details.
+
+ :param options: See :typeddict:`~runloop_api_client.sdk._types.BaseRequestOptions` for available options
+ :return: Current benchmark info
+ :rtype: BenchmarkView
+ """
+ return await self._client.benchmarks.retrieve(
+ self._id,
+ **options,
+ )
+
+ async def update(
+ self,
+ **params: Unpack[SDKBenchmarkUpdateParams],
+ ) -> BenchmarkView:
+ """Update the benchmark.
+
+ Only provided fields will be updated.
+
+ :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkUpdateParams` for available parameters
+ :return: Updated benchmark info
+ :rtype: BenchmarkView
+ """
+ return await self._client.benchmarks.update(
+ self._id,
+ **params,
+ )
+
+ async def start_run(
+ self,
+ **params: Unpack[SDKBenchmarkStartRunParams],
+ ) -> AsyncBenchmarkRun:
+ """Start a new benchmark run.
+
+ Creates a new benchmark run and returns an AsyncBenchmarkRun instance for
+ managing the run lifecycle.
+
+ :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkStartRunParams` for available parameters
+ :return: AsyncBenchmarkRun instance for managing the run
+ :rtype: AsyncBenchmarkRun
+ """
+ run_view = await self._client.benchmarks.start_run(
+ benchmark_id=self._id,
+ **params,
+ )
+ return AsyncBenchmarkRun(self._client, run_view.id, run_view.benchmark_id)
+
+ async def add_scenarios(
+ self,
+ scenario_ids: SequenceNotStr[str],
+ **options: Unpack[LongRequestOptions],
+ ) -> BenchmarkView:
+ """Add scenarios to the benchmark.
+
+ :param scenario_ids: List of scenario IDs to add
+ :type scenario_ids: SequenceNotStr[str]
+ :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+ :return: Updated benchmark info
+ :rtype: BenchmarkView
+ """
+ return await self._client.benchmarks.update_scenarios(
+ self._id,
+ scenarios_to_add=scenario_ids,
+ **options,
+ )
+
+ async def remove_scenarios(
+ self,
+ scenario_ids: SequenceNotStr[str],
+ **options: Unpack[LongRequestOptions],
+ ) -> BenchmarkView:
+ """Remove scenarios from the benchmark.
+
+ :param scenario_ids: List of scenario IDs to remove
+ :type scenario_ids: SequenceNotStr[str]
+ :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+ :return: Updated benchmark info
+ :rtype: BenchmarkView
+ """
+ return await self._client.benchmarks.update_scenarios(
+ self._id,
+ scenarios_to_remove=scenario_ids,
+ **options,
+ )
+
+ async def list_runs(
+ self,
+ **params: Unpack[SDKBenchmarkListRunsParams],
+ ) -> List[AsyncBenchmarkRun]:
+ """List all runs for this benchmark.
+
+ :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkListRunsParams` for available parameters
+ :return: List of async benchmark runs
+ :rtype: List[AsyncBenchmarkRun]
+ """
+ page = await self._client.benchmarks.runs.list(
+ benchmark_id=self._id,
+ **params,
+ )
+ return [AsyncBenchmarkRun(self._client, run.id, run.benchmark_id) for run in page.runs]
diff --git a/src/runloop_api_client/sdk/async_benchmark_run.py b/src/runloop_api_client/sdk/async_benchmark_run.py
new file mode 100644
index 000000000..f498d1408
--- /dev/null
+++ b/src/runloop_api_client/sdk/async_benchmark_run.py
@@ -0,0 +1,127 @@
+"""AsyncBenchmarkRun resource class for asynchronous operations."""
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Unpack, override
+
+from ..types import BenchmarkRunView
+from ._types import BaseRequestOptions, LongRequestOptions, SDKBenchmarkRunListScenarioRunsParams
+from .._client import AsyncRunloop
+from .async_scenario_run import AsyncScenarioRun
+
+
+class AsyncBenchmarkRun:
+ """A benchmark run for evaluating agent performance across scenarios (async).
+
+ Provides async methods for monitoring run status, managing the run lifecycle,
+ and accessing scenario run results. Obtain instances via
+ ``benchmark.start_run()`` or ``benchmark.list_runs()``.
+
+ Example:
+ >>> benchmark = runloop.benchmark.from_id("bench-xxx")
+ >>> run = await benchmark.start_run(run_name="evaluation-v1")
+ >>> info = await run.get_info()
+ >>> scenario_runs = await run.list_scenario_runs()
+ """
+
+ def __init__(self, client: AsyncRunloop, run_id: str, benchmark_id: str) -> None:
+ """Create an AsyncBenchmarkRun instance.
+
+ :param client: AsyncRunloop client instance
+ :type client: AsyncRunloop
+ :param run_id: Benchmark run ID
+ :type run_id: str
+ :param benchmark_id: Parent benchmark ID
+ :type benchmark_id: str
+ """
+ self._client = client
+ self._id = run_id
+ self._benchmark_id = benchmark_id
+
+ @override
+ def __repr__(self) -> str:
+ return f""
+
+ @property
+ def id(self) -> str:
+ """Return the benchmark run ID.
+
+ :return: Unique benchmark run ID
+ :rtype: str
+ """
+ return self._id
+
+ @property
+ def benchmark_id(self) -> str:
+ """Return the parent benchmark ID.
+
+ :return: Parent benchmark ID
+ :rtype: str
+ """
+ return self._benchmark_id
+
+ async def get_info(
+ self,
+ **options: Unpack[BaseRequestOptions],
+ ) -> BenchmarkRunView:
+ """Retrieve current benchmark run status and metadata.
+
+ :param options: See :typeddict:`~runloop_api_client.sdk._types.BaseRequestOptions` for available options
+ :return: Current benchmark run state info
+ :rtype: BenchmarkRunView
+ """
+ return await self._client.benchmarks.runs.retrieve(
+ self._id,
+ **options,
+ )
+
+ async def cancel(
+ self,
+ **options: Unpack[LongRequestOptions],
+ ) -> BenchmarkRunView:
+ """Cancel the benchmark run.
+
+ Stops all running scenarios and marks the run as canceled.
+
+ :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+ :return: Updated benchmark run state
+ :rtype: BenchmarkRunView
+ """
+ return await self._client.benchmarks.runs.cancel(
+ self._id,
+ **options,
+ )
+
+ async def complete(
+ self,
+ **options: Unpack[LongRequestOptions],
+ ) -> BenchmarkRunView:
+ """Complete the benchmark run.
+
+ Marks the run as completed. Call this after all scenarios have finished.
+
+ :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+ :return: Completed benchmark run state
+ :rtype: BenchmarkRunView
+ """
+ return await self._client.benchmarks.runs.complete(
+ self._id,
+ **options,
+ )
+
+ async def list_scenario_runs(
+ self,
+ **params: Unpack[SDKBenchmarkRunListScenarioRunsParams],
+ ) -> List[AsyncScenarioRun]:
+ """List all scenario runs for this benchmark run.
+
+ :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkRunListScenarioRunsParams` for available parameters
+ :return: List of async scenario run objects
+ :rtype: List[AsyncScenarioRun]
+ """
+ page = await self._client.benchmarks.runs.list_scenario_runs(
+ self._id,
+ **params,
+ )
+ return [AsyncScenarioRun(self._client, run.id, run.devbox_id) for run in page.runs]
diff --git a/src/runloop_api_client/sdk/benchmark.py b/src/runloop_api_client/sdk/benchmark.py
new file mode 100644
index 000000000..7e8ed826d
--- /dev/null
+++ b/src/runloop_api_client/sdk/benchmark.py
@@ -0,0 +1,164 @@
+"""Benchmark resource class for synchronous operations."""
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Unpack, override
+
+from ..types import BenchmarkView
+from ._types import (
+ BaseRequestOptions,
+ LongRequestOptions,
+ SDKBenchmarkUpdateParams,
+ SDKBenchmarkListRunsParams,
+ SDKBenchmarkStartRunParams,
+)
+from .._types import SequenceNotStr
+from .._client import Runloop
+from .benchmark_run import BenchmarkRun
+
+
+class Benchmark:
+ """A benchmark for evaluating agent performance across scenarios.
+
+ Provides methods for retrieving benchmark details, updating the benchmark,
+ managing scenarios, and starting benchmark runs. Obtain instances via
+ ``runloop.benchmark.from_id()`` or ``runloop.benchmark.list()``.
+
+ Example:
+ >>> benchmark = runloop.benchmark.from_id("bmd_xxx")
+ >>> info = benchmark.get_info()
+ >>> run = benchmark.start_run(run_name="evaluation-v1")
+ >>> for scenario_id in info.scenario_ids:
+ ... scenario = runloop.scenario.from_id(scenario_id)
+ ... scenario_run = scenario.run(benchmark_run_id=run.id, run_name="evaluation-v1")
+ """
+
+ def __init__(self, client: Runloop, benchmark_id: str) -> None:
+ """Create a Benchmark instance.
+
+ :param client: Runloop client instance
+ :type client: Runloop
+ :param benchmark_id: Benchmark ID
+ :type benchmark_id: str
+ """
+ self._client = client
+ self._id = benchmark_id
+
+ @override
+ def __repr__(self) -> str:
+ return f""
+
+ @property
+ def id(self) -> str:
+ """Return the benchmark ID.
+
+ :return: Unique benchmark ID
+ :rtype: str
+ """
+ return self._id
+
+ def get_info(
+ self,
+ **options: Unpack[BaseRequestOptions],
+ ) -> BenchmarkView:
+ """Retrieve current benchmark details.
+
+ :param options: See :typeddict:`~runloop_api_client.sdk._types.BaseRequestOptions` for available options
+ :return: Current benchmark info
+ :rtype: BenchmarkView
+ """
+ return self._client.benchmarks.retrieve(
+ self._id,
+ **options,
+ )
+
+ def update(
+ self,
+ **params: Unpack[SDKBenchmarkUpdateParams],
+ ) -> BenchmarkView:
+ """Update the benchmark.
+
+ Only provided fields will be updated.
+
+ :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkUpdateParams` for available parameters
+ :return: Updated benchmark info
+ :rtype: BenchmarkView
+ """
+ return self._client.benchmarks.update(
+ self._id,
+ **params,
+ )
+
+ def start_run(
+ self,
+ **params: Unpack[SDKBenchmarkStartRunParams],
+ ) -> BenchmarkRun:
+ """Start a new benchmark run.
+
+ Creates a new benchmark run and returns a BenchmarkRun instance for
+ managing the run lifecycle.
+
+ :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkStartRunParams` for available parameters
+ :return: BenchmarkRun instance for managing the run
+ :rtype: BenchmarkRun
+ """
+ run_view = self._client.benchmarks.start_run(
+ benchmark_id=self._id,
+ **params,
+ )
+ return BenchmarkRun(self._client, run_view.id, run_view.benchmark_id)
+
+ def add_scenarios(
+ self,
+ scenario_ids: SequenceNotStr[str],
+ **options: Unpack[LongRequestOptions],
+ ) -> BenchmarkView:
+ """Add scenarios to the benchmark.
+
+ :param scenario_ids: List of scenario IDs to add
+ :type scenario_ids: SequenceNotStr[str]
+ :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+ :return: Updated benchmark info
+ :rtype: BenchmarkView
+ """
+ return self._client.benchmarks.update_scenarios(
+ self._id,
+ scenarios_to_add=scenario_ids,
+ **options,
+ )
+
+ def remove_scenarios(
+ self,
+ scenario_ids: SequenceNotStr[str],
+ **options: Unpack[LongRequestOptions],
+ ) -> BenchmarkView:
+ """Remove scenarios from the benchmark.
+
+ :param scenario_ids: List of scenario IDs to remove
+ :type scenario_ids: SequenceNotStr[str]
+ :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+ :return: Updated benchmark info
+ :rtype: BenchmarkView
+ """
+ return self._client.benchmarks.update_scenarios(
+ self._id,
+ scenarios_to_remove=scenario_ids,
+ **options,
+ )
+
+ def list_runs(
+ self,
+ **params: Unpack[SDKBenchmarkListRunsParams],
+ ) -> List[BenchmarkRun]:
+ """List all runs for this benchmark.
+
+ :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkListRunsParams` for available parameters
+ :return: List of benchmark runs
+ :rtype: List[BenchmarkRun]
+ """
+ page = self._client.benchmarks.runs.list(
+ benchmark_id=self._id,
+ **params,
+ )
+ return [BenchmarkRun(self._client, run.id, run.benchmark_id) for run in page.runs]
diff --git a/src/runloop_api_client/sdk/benchmark_run.py b/src/runloop_api_client/sdk/benchmark_run.py
new file mode 100644
index 000000000..10da7ba05
--- /dev/null
+++ b/src/runloop_api_client/sdk/benchmark_run.py
@@ -0,0 +1,127 @@
+"""BenchmarkRun resource class for synchronous operations."""
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Unpack, override
+
+from ..types import BenchmarkRunView
+from ._types import BaseRequestOptions, LongRequestOptions, SDKBenchmarkRunListScenarioRunsParams
+from .._client import Runloop
+from .scenario_run import ScenarioRun
+
+
+class BenchmarkRun:
+ """A benchmark run for evaluating agent performance across scenarios.
+
+ Provides methods for monitoring run status, managing the run lifecycle,
+ and accessing scenario run results. Obtain instances via
+ ``benchmark.start_run()`` or ``benchmark.list_runs()``.
+
+ Example:
+ >>> benchmark = runloop.benchmark.from_id("bench-xxx")
+ >>> run = benchmark.start_run(run_name="evaluation-v1")
+ >>> info = run.get_info()
+ >>> scenario_runs = run.list_scenario_runs()
+ """
+
+ def __init__(self, client: Runloop, run_id: str, benchmark_id: str) -> None:
+ """Create a BenchmarkRun instance.
+
+ :param client: Runloop client instance
+ :type client: Runloop
+ :param run_id: Benchmark run ID
+ :type run_id: str
+ :param benchmark_id: Parent benchmark ID
+ :type benchmark_id: str
+ """
+ self._client = client
+ self._id = run_id
+ self._benchmark_id = benchmark_id
+
+ @override
+ def __repr__(self) -> str:
+ return f""
+
+ @property
+ def id(self) -> str:
+ """Return the benchmark run ID.
+
+ :return: Unique benchmark run ID
+ :rtype: str
+ """
+ return self._id
+
+ @property
+ def benchmark_id(self) -> str:
+ """Return the parent benchmark ID.
+
+ :return: Parent benchmark ID
+ :rtype: str
+ """
+ return self._benchmark_id
+
+ def get_info(
+ self,
+ **options: Unpack[BaseRequestOptions],
+ ) -> BenchmarkRunView:
+ """Retrieve current benchmark run status and metadata.
+
+ :param options: See :typeddict:`~runloop_api_client.sdk._types.BaseRequestOptions` for available options
+ :return: Current benchmark run state info
+ :rtype: BenchmarkRunView
+ """
+ return self._client.benchmarks.runs.retrieve(
+ self._id,
+ **options,
+ )
+
+ def cancel(
+ self,
+ **options: Unpack[LongRequestOptions],
+ ) -> BenchmarkRunView:
+ """Cancel the benchmark run.
+
+ Stops all running scenarios and marks the run as canceled.
+
+ :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+ :return: Updated benchmark run state
+ :rtype: BenchmarkRunView
+ """
+ return self._client.benchmarks.runs.cancel(
+ self._id,
+ **options,
+ )
+
+ def complete(
+ self,
+ **options: Unpack[LongRequestOptions],
+ ) -> BenchmarkRunView:
+ """Complete the benchmark run.
+
+ Marks the run as completed. Call this after all scenarios have finished.
+
+ :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+ :return: Completed benchmark run state
+ :rtype: BenchmarkRunView
+ """
+ return self._client.benchmarks.runs.complete(
+ self._id,
+ **options,
+ )
+
+ def list_scenario_runs(
+ self,
+ **params: Unpack[SDKBenchmarkRunListScenarioRunsParams],
+ ) -> List[ScenarioRun]:
+ """List all scenario runs for this benchmark run.
+
+ :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkRunListScenarioRunsParams` for available parameters
+ :return: List of scenario run objects
+ :rtype: List[ScenarioRun]
+ """
+ page = self._client.benchmarks.runs.list_scenario_runs(
+ self._id,
+ **params,
+ )
+ return [ScenarioRun(self._client, run.id, run.devbox_id) for run in page.runs]
diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py
index f215c8116..d83eb5a6e 100644
--- a/src/runloop_api_client/sdk/sync.py
+++ b/src/runloop_api_client/sdk/sync.py
@@ -21,7 +21,9 @@
SDKObjectCreateParams,
SDKScenarioListParams,
SDKScorerCreateParams,
+ SDKBenchmarkListParams,
SDKBlueprintListParams,
+ SDKBenchmarkCreateParams,
SDKBlueprintCreateParams,
SDKDiskSnapshotListParams,
SDKDevboxCreateFromImageParams,
@@ -33,6 +35,7 @@
from ._helpers import detect_content_type
from .scenario import Scenario
from .snapshot import Snapshot
+from .benchmark import Benchmark
from .blueprint import Blueprint
from .storage_object import StorageObject
from .scenario_builder import ScenarioBuilder
@@ -594,7 +597,6 @@ def create_from_npm(
self,
*,
package_name: str,
- npm_version: Optional[str] = None,
registry_url: Optional[str] = None,
agent_setup: Optional[list[str]] = None,
**params: Unpack[SDKAgentCreateParams],
@@ -603,13 +605,11 @@ def create_from_npm(
Example:
>>> agent = runloop.agent.create_from_npm(
- ... name="my-npm-agent", package_name="@runloop/example-agent", npm_version="^1.0.0"
+ ... name="my-npm-agent", package_name="@runloop/example-agent", version="1.0.0"
... )
:param package_name: NPM package name
:type package_name: str
- :param npm_version: NPM version constraint, defaults to None
- :type npm_version: Optional[str], optional
:param registry_url: NPM registry URL, defaults to None
:type registry_url: Optional[str], optional
:param agent_setup: Setup commands to run after installation, defaults to None
@@ -625,8 +625,6 @@ def create_from_npm(
)
npm_config: Npm = {"package_name": package_name}
- if npm_version is not None:
- npm_config["npm_version"] = npm_version
if registry_url is not None:
npm_config["registry_url"] = registry_url
if agent_setup is not None:
@@ -639,7 +637,6 @@ def create_from_pip(
self,
*,
package_name: str,
- pip_version: Optional[str] = None,
registry_url: Optional[str] = None,
agent_setup: Optional[list[str]] = None,
**params: Unpack[SDKAgentCreateParams],
@@ -648,13 +645,11 @@ def create_from_pip(
Example:
>>> agent = runloop.agent.create_from_pip(
- ... name="my-pip-agent", package_name="runloop-example-agent", pip_version=">=1.0.0"
+ ... name="my-pip-agent", package_name="runloop-example-agent", version="1.0.0"
... )
:param package_name: Pip package name
:type package_name: str
- :param pip_version: Pip version constraint, defaults to None
- :type pip_version: Optional[str], optional
:param registry_url: Pip registry URL, defaults to None
:type registry_url: Optional[str], optional
:param agent_setup: Setup commands to run after installation, defaults to None
@@ -670,8 +665,6 @@ def create_from_pip(
)
pip_config: Pip = {"package_name": package_name}
- if pip_version is not None:
- pip_config["pip_version"] = pip_version
if registry_url is not None:
pip_config["registry_url"] = registry_url
if agent_setup is not None:
@@ -696,6 +689,7 @@ def create_from_git(
... repository="https://github.com/user/agent-repo",
... ref="main",
... agent_setup=["npm install", "npm run build"],
+ ... version="1.0.0",
... )
:param repository: Git repository URL
@@ -737,7 +731,10 @@ def create_from_object(
>>> obj = runloop.storage_object.upload_from_dir("./my-agent")
>>> # Then create agent from the object
>>> agent = runloop.agent.create_from_object(
- ... name="my-object-agent", object_id=obj.id, agent_setup=["chmod +x setup.sh", "./setup.sh"]
+ ... name="my-object-agent",
+ ... object_id=obj.id,
+ ... agent_setup=["chmod +x setup.sh", "./setup.sh"],
+ ... version="1.0.0",
... )
:param object_id: Storage object ID
@@ -846,6 +843,55 @@ def list(self, **params: Unpack[SDKScenarioListParams]) -> list[Scenario]:
return [Scenario(self._client, item.id) for item in page]
+class BenchmarkOps:
+ """Manage benchmarks. Access via ``runloop.benchmark``.
+
+ Example:
+ >>> runloop = RunloopSDK()
+ >>> benchmarks = runloop.benchmark.list()
+ >>> benchmark = runloop.benchmark.from_id("bmd_xxx")
+ >>> run = benchmark.start_run(run_name="evaluation-v1")
+ """
+
+ def __init__(self, client: Runloop) -> None:
+ """Initialize BenchmarkOps.
+
+ :param client: Runloop client instance
+ :type client: Runloop
+ """
+ self._client = client
+
+ def create(self, **params: Unpack[SDKBenchmarkCreateParams]) -> Benchmark:
+ """Create a new benchmark.
+
+ :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkCreateParams` for available parameters
+ :return: The newly created benchmark
+ :rtype: Benchmark
+ """
+ response = self._client.benchmarks.create(**params)
+ return Benchmark(self._client, response.id)
+
+ def from_id(self, benchmark_id: str) -> Benchmark:
+ """Get a Benchmark instance for an existing benchmark ID.
+
+ :param benchmark_id: ID of the benchmark
+ :type benchmark_id: str
+ :return: Benchmark instance for the given ID
+ :rtype: Benchmark
+ """
+ return Benchmark(self._client, benchmark_id)
+
+ def list(self, **params: Unpack[SDKBenchmarkListParams]) -> list[Benchmark]:
+ """List all benchmarks, optionally filtered by parameters.
+
+ :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkListParams` for available parameters
+ :return: List of benchmarks
+ :rtype: list[Benchmark]
+ """
+ page = self._client.benchmarks.list(**params)
+ return [Benchmark(self._client, item.id) for item in page.benchmarks]
+
+
class RunloopSDK:
"""High-level synchronous entry point for the Runloop SDK.
@@ -857,6 +903,8 @@ class RunloopSDK:
:vartype api: Runloop
:ivar agent: High-level interface for agent management.
:vartype agent: AgentOps
+ :ivar benchmark: High-level interface for benchmark management
+ :vartype benchmark: BenchmarkOps
:ivar devbox: High-level interface for devbox management
:vartype devbox: DevboxOps
:ivar blueprint: High-level interface for blueprint management
@@ -880,6 +928,7 @@ class RunloopSDK:
api: Runloop
agent: AgentOps
+ benchmark: BenchmarkOps
devbox: DevboxOps
blueprint: BlueprintOps
scenario: ScenarioOps
@@ -926,6 +975,7 @@ def __init__(
)
self.agent = AgentOps(self.api)
+ self.benchmark = BenchmarkOps(self.api)
self.devbox = DevboxOps(self.api)
self.blueprint = BlueprintOps(self.api)
self.scenario = ScenarioOps(self.api)
diff --git a/src/runloop_api_client/types/__init__.py b/src/runloop_api_client/types/__init__.py
index 6856d9670..6afd070a3 100644
--- a/src/runloop_api_client/types/__init__.py
+++ b/src/runloop_api_client/types/__init__.py
@@ -97,6 +97,7 @@
from .repository_connection_list_view import RepositoryConnectionListView as RepositoryConnectionListView
from .repository_inspection_list_view import RepositoryInspectionListView as RepositoryInspectionListView
from .devbox_read_file_contents_params import DevboxReadFileContentsParams as DevboxReadFileContentsParams
+from .benchmark_update_scenarios_params import BenchmarkUpdateScenariosParams as BenchmarkUpdateScenariosParams
from .devbox_list_disk_snapshots_params import DevboxListDiskSnapshotsParams as DevboxListDiskSnapshotsParams
from .devbox_snapshot_disk_async_params import DevboxSnapshotDiskAsyncParams as DevboxSnapshotDiskAsyncParams
from .devbox_write_file_contents_params import DevboxWriteFileContentsParams as DevboxWriteFileContentsParams
diff --git a/src/runloop_api_client/types/agent_create_params.py b/src/runloop_api_client/types/agent_create_params.py
index 1a3372e7e..3c2deff2a 100644
--- a/src/runloop_api_client/types/agent_create_params.py
+++ b/src/runloop_api_client/types/agent_create_params.py
@@ -14,5 +14,8 @@ class AgentCreateParams(TypedDict, total=False):
name: Required[str]
"""The name of the Agent."""
+ version: Required[str]
+ """The version of the Agent. Must be a semver string (e.g., '2.0.65') or a SHA."""
+
source: Optional[AgentSource]
"""The source configuration for the Agent."""
diff --git a/src/runloop_api_client/types/agent_list_params.py b/src/runloop_api_client/types/agent_list_params.py
index a3199190b..3df89fc25 100644
--- a/src/runloop_api_client/types/agent_list_params.py
+++ b/src/runloop_api_client/types/agent_list_params.py
@@ -12,7 +12,7 @@ class AgentListParams(TypedDict, total=False):
"""Filter agents by public visibility."""
limit: int
- """The limit of items to return. Default is 20."""
+ """The limit of items to return. Default is 20. Max is 5000."""
name: str
"""Filter agents by name (partial match supported)."""
@@ -22,3 +22,6 @@ class AgentListParams(TypedDict, total=False):
starting_after: str
"""Load the next page of data starting after the item with the given ID."""
+
+ version: str
+ """Filter by version. Use 'latest' to get the most recently created agent."""
diff --git a/src/runloop_api_client/types/agent_list_view.py b/src/runloop_api_client/types/agent_list_view.py
index c2a7be455..bfb1560e1 100644
--- a/src/runloop_api_client/types/agent_list_view.py
+++ b/src/runloop_api_client/types/agent_list_view.py
@@ -9,6 +9,8 @@
class AgentListView(BaseModel):
+ """A paginated list of Agents."""
+
agents: List[AgentView]
"""The list of Agents."""
diff --git a/src/runloop_api_client/types/agent_view.py b/src/runloop_api_client/types/agent_view.py
index 77e56d1b8..23b1f68ff 100644
--- a/src/runloop_api_client/types/agent_view.py
+++ b/src/runloop_api_client/types/agent_view.py
@@ -9,6 +9,8 @@
class AgentView(BaseModel):
+ """An Agent represents a registered AI agent entity."""
+
id: str
"""The unique identifier of the Agent."""
@@ -21,5 +23,8 @@ class AgentView(BaseModel):
name: str
"""The name of the Agent."""
+ version: str
+ """The version of the Agent. A semver string (e.g., '2.0.65') or a SHA."""
+
source: Optional[AgentSource] = None
"""The source configuration for the Agent."""
diff --git a/src/runloop_api_client/types/benchmark_create_params.py b/src/runloop_api_client/types/benchmark_create_params.py
index 1aec35f5f..36f7b95a9 100644
--- a/src/runloop_api_client/types/benchmark_create_params.py
+++ b/src/runloop_api_client/types/benchmark_create_params.py
@@ -12,7 +12,7 @@
class BenchmarkCreateParams(TypedDict, total=False):
name: Required[str]
- """The name of the Benchmark. This must be unique."""
+ """The unique name of the Benchmark."""
attribution: Optional[str]
"""Attribution information for the benchmark."""
@@ -21,12 +21,12 @@ class BenchmarkCreateParams(TypedDict, total=False):
"""Detailed description of the benchmark."""
metadata: Optional[Dict[str, str]]
- """User defined metadata to attach to the benchmark for organization."""
+ """User defined metadata to attach to the benchmark."""
required_environment_variables: Optional[SequenceNotStr[str]]
"""Environment variables required to run the benchmark.
- If any required variables are not supplied, the benchmark will fail to start
+ If any required variables are not supplied, the benchmark will fail to start.
"""
required_secret_names: SequenceNotStr[str]
diff --git a/src/runloop_api_client/types/benchmark_definitions_params.py b/src/runloop_api_client/types/benchmark_definitions_params.py
index f92d57d76..97caff125 100644
--- a/src/runloop_api_client/types/benchmark_definitions_params.py
+++ b/src/runloop_api_client/types/benchmark_definitions_params.py
@@ -9,7 +9,7 @@
class BenchmarkDefinitionsParams(TypedDict, total=False):
limit: int
- """The limit of items to return. Default is 20."""
+ """The limit of items to return. Default is 20. Max is 5000."""
starting_after: str
"""Load the next page of data starting after the item with the given ID."""
diff --git a/src/runloop_api_client/types/benchmark_list_params.py b/src/runloop_api_client/types/benchmark_list_params.py
index 51b2b1320..4e8b0c78b 100644
--- a/src/runloop_api_client/types/benchmark_list_params.py
+++ b/src/runloop_api_client/types/benchmark_list_params.py
@@ -9,7 +9,10 @@
class BenchmarkListParams(TypedDict, total=False):
limit: int
- """The limit of items to return. Default is 20."""
+ """The limit of items to return. Default is 20. Max is 5000."""
+
+ name: str
+ """Filter by name"""
starting_after: str
"""Load the next page of data starting after the item with the given ID."""
diff --git a/src/runloop_api_client/types/benchmark_list_public_params.py b/src/runloop_api_client/types/benchmark_list_public_params.py
index c5081922d..6dec4283b 100644
--- a/src/runloop_api_client/types/benchmark_list_public_params.py
+++ b/src/runloop_api_client/types/benchmark_list_public_params.py
@@ -9,7 +9,7 @@
class BenchmarkListPublicParams(TypedDict, total=False):
limit: int
- """The limit of items to return. Default is 20."""
+ """The limit of items to return. Default is 20. Max is 5000."""
starting_after: str
"""Load the next page of data starting after the item with the given ID."""
diff --git a/src/runloop_api_client/types/benchmark_run_view.py b/src/runloop_api_client/types/benchmark_run_view.py
index 00dd98fc2..07fd4c022 100644
--- a/src/runloop_api_client/types/benchmark_run_view.py
+++ b/src/runloop_api_client/types/benchmark_run_view.py
@@ -9,6 +9,10 @@
class BenchmarkRunView(BaseModel):
+ """
+ A BenchmarkRunView represents a run of a complete set of Scenarios, organized under a Benchmark.
+ """
+
id: str
"""The ID of the BenchmarkRun."""
diff --git a/src/runloop_api_client/types/benchmark_start_run_params.py b/src/runloop_api_client/types/benchmark_start_run_params.py
index 7655ff5ad..edd65ca7c 100644
--- a/src/runloop_api_client/types/benchmark_start_run_params.py
+++ b/src/runloop_api_client/types/benchmark_start_run_params.py
@@ -11,10 +11,9 @@
__all__ = ["BenchmarkStartRunParams"]
-class BenchmarkStartRunParams(TypedDict, total=False):
- benchmark_id: Required[str]
- """ID of the Benchmark to run."""
-
+# Split into separate params so that OO SDK start_run params can omit the benchmark_id
+# Neither of these params are exposed to the user, only the derived SDKBenchmarkStartRunParams
+class BenchmarkSelfStartRunParams(TypedDict, total=False):
metadata: Optional[Dict[str, str]]
"""User defined metadata to attach to the benchmark run for organization."""
@@ -23,3 +22,8 @@ class BenchmarkStartRunParams(TypedDict, total=False):
run_profile: Annotated[Optional[RunProfile], PropertyInfo(alias="runProfile")]
"""Runtime configuration to use for this benchmark run"""
+
+
+class BenchmarkStartRunParams(BenchmarkSelfStartRunParams, total=False):
+ benchmark_id: Required[str]
+ """ID of the Benchmark to run."""
diff --git a/src/runloop_api_client/types/benchmark_update_params.py b/src/runloop_api_client/types/benchmark_update_params.py
index 1291e3e38..ce9e8fb0c 100644
--- a/src/runloop_api_client/types/benchmark_update_params.py
+++ b/src/runloop_api_client/types/benchmark_update_params.py
@@ -3,7 +3,7 @@
from __future__ import annotations
from typing import Dict, Optional
-from typing_extensions import Required, TypedDict
+from typing_extensions import TypedDict
from .._types import SequenceNotStr
@@ -11,30 +11,32 @@
class BenchmarkUpdateParams(TypedDict, total=False):
- name: Required[str]
- """The name of the Benchmark. This must be unique."""
-
attribution: Optional[str]
- """Attribution information for the benchmark."""
+ """Attribution information for the benchmark. Pass in empty string to clear."""
description: Optional[str]
- """Detailed description of the benchmark."""
+ """Detailed description of the benchmark. Pass in empty string to clear."""
metadata: Optional[Dict[str, str]]
- """User defined metadata to attach to the benchmark for organization."""
+ """User defined metadata to attach to the benchmark. Pass in empty map to clear."""
+
+ name: Optional[str]
+ """The unique name of the Benchmark. Cannot be blank."""
required_environment_variables: Optional[SequenceNotStr[str]]
"""Environment variables required to run the benchmark.
- If any required variables are not supplied, the benchmark will fail to start
+ If any required variables are not supplied, the benchmark will fail to start.
+ Pass in empty list to clear.
"""
- required_secret_names: SequenceNotStr[str]
+ required_secret_names: Optional[SequenceNotStr[str]]
"""
Secrets required to run the benchmark with (environment variable name will be
mapped to the your user secret by name). If any of these secrets are not
- provided or the mapping is incorrect, the benchmark will fail to start.
+ provided or the mapping is incorrect, the benchmark will fail to start. Pass in
+ empty list to clear.
"""
scenario_ids: Optional[SequenceNotStr[str]]
- """The Scenario IDs that make up the Benchmark."""
+ """The Scenario IDs that make up the Benchmark. Pass in empty list to clear."""
diff --git a/src/runloop_api_client/types/benchmark_update_scenarios_params.py b/src/runloop_api_client/types/benchmark_update_scenarios_params.py
new file mode 100644
index 000000000..2aca2b0d4
--- /dev/null
+++ b/src/runloop_api_client/types/benchmark_update_scenarios_params.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import TypedDict
+
+from .._types import SequenceNotStr
+
+__all__ = ["BenchmarkUpdateScenariosParams"]
+
+
+class BenchmarkUpdateScenariosParams(TypedDict, total=False):
+ scenarios_to_add: Optional[SequenceNotStr[str]]
+ """Scenario IDs to add to the Benchmark."""
+
+ scenarios_to_remove: Optional[SequenceNotStr[str]]
+ """Scenario IDs to remove from the Benchmark."""
diff --git a/src/runloop_api_client/types/benchmark_view.py b/src/runloop_api_client/types/benchmark_view.py
index 877c8fe26..4150847ac 100644
--- a/src/runloop_api_client/types/benchmark_view.py
+++ b/src/runloop_api_client/types/benchmark_view.py
@@ -10,6 +10,10 @@
class BenchmarkView(BaseModel):
+ """
+ A BenchmarkDefinitionView represents a grouped set of Scenarios that together form a Benchmark.
+ """
+
id: str
"""The ID of the Benchmark."""
diff --git a/src/runloop_api_client/types/benchmarks/run_list_params.py b/src/runloop_api_client/types/benchmarks/run_list_params.py
index f93695b2a..a75e1b592 100644
--- a/src/runloop_api_client/types/benchmarks/run_list_params.py
+++ b/src/runloop_api_client/types/benchmarks/run_list_params.py
@@ -7,12 +7,19 @@
__all__ = ["RunListParams"]
-class RunListParams(TypedDict, total=False):
- benchmark_id: str
- """The Benchmark ID to filter by."""
-
+# Split into separate params so that OO SDK list_runs params can omit the benchmark_id
+# Neither of these params are exposed to the user, only the derived SDKBenchmarkListRunsParams
+class RunSelfListParams(TypedDict, total=False):
limit: int
- """The limit of items to return. Default is 20."""
+ """The limit of items to return. Default is 20. Max is 5000."""
+
+ name: str
+ """Filter by name"""
starting_after: str
"""Load the next page of data starting after the item with the given ID."""
+
+
+class RunListParams(RunSelfListParams, total=False):
+ benchmark_id: str
+ """The Benchmark ID to filter by."""
diff --git a/src/runloop_api_client/types/benchmarks/run_list_scenario_runs_params.py b/src/runloop_api_client/types/benchmarks/run_list_scenario_runs_params.py
index 241df1a1f..ddce6aa4a 100644
--- a/src/runloop_api_client/types/benchmarks/run_list_scenario_runs_params.py
+++ b/src/runloop_api_client/types/benchmarks/run_list_scenario_runs_params.py
@@ -9,7 +9,7 @@
class RunListScenarioRunsParams(TypedDict, total=False):
limit: int
- """The limit of items to return. Default is 20."""
+ """The limit of items to return. Default is 20. Max is 5000."""
starting_after: str
"""Load the next page of data starting after the item with the given ID."""
diff --git a/src/runloop_api_client/types/blueprint_build_parameters.py b/src/runloop_api_client/types/blueprint_build_parameters.py
index 129a8047a..52ddfda7c 100644
--- a/src/runloop_api_client/types/blueprint_build_parameters.py
+++ b/src/runloop_api_client/types/blueprint_build_parameters.py
@@ -11,6 +11,8 @@
class BuildContext(BaseModel):
+ """A build context backed by an Object."""
+
object_id: str
"""The ID of an object, whose contents are to be used as a build context."""
@@ -18,6 +20,8 @@ class BuildContext(BaseModel):
class ServiceCredentials(BaseModel):
+ """The credentials of the container service."""
+
password: str
"""The password of the container service."""
diff --git a/src/runloop_api_client/types/blueprint_create_params.py b/src/runloop_api_client/types/blueprint_create_params.py
index d82de7f35..94156d2e9 100644
--- a/src/runloop_api_client/types/blueprint_create_params.py
+++ b/src/runloop_api_client/types/blueprint_create_params.py
@@ -71,6 +71,8 @@ class BlueprintCreateParams(TypedDict, total=False):
class BuildContext(TypedDict, total=False):
+ """A build context backed by an Object."""
+
object_id: Required[str]
"""The ID of an object, whose contents are to be used as a build context."""
@@ -78,6 +80,8 @@ class BuildContext(TypedDict, total=False):
class ServiceCredentials(TypedDict, total=False):
+ """The credentials of the container service."""
+
password: Required[str]
"""The password of the container service."""
diff --git a/src/runloop_api_client/types/blueprint_list_params.py b/src/runloop_api_client/types/blueprint_list_params.py
index b0a3ade62..f72de7d2f 100644
--- a/src/runloop_api_client/types/blueprint_list_params.py
+++ b/src/runloop_api_client/types/blueprint_list_params.py
@@ -9,10 +9,13 @@
class BlueprintListParams(TypedDict, total=False):
limit: int
- """The limit of items to return. Default is 20."""
+ """The limit of items to return. Default is 20. Max is 5000."""
name: str
"""Filter by name"""
starting_after: str
"""Load the next page of data starting after the item with the given ID."""
+
+ status: str
+ """Filter by build status (queued, provisioning, building, failed, build_complete)"""
diff --git a/src/runloop_api_client/types/blueprint_list_public_params.py b/src/runloop_api_client/types/blueprint_list_public_params.py
index d6b11e78e..e0f224f32 100644
--- a/src/runloop_api_client/types/blueprint_list_public_params.py
+++ b/src/runloop_api_client/types/blueprint_list_public_params.py
@@ -9,10 +9,13 @@
class BlueprintListPublicParams(TypedDict, total=False):
limit: int
- """The limit of items to return. Default is 20."""
+ """The limit of items to return. Default is 20. Max is 5000."""
name: str
"""Filter by name"""
starting_after: str
"""Load the next page of data starting after the item with the given ID."""
+
+ status: str
+ """Filter by build status (queued, provisioning, building, failed, build_complete)"""
diff --git a/src/runloop_api_client/types/blueprint_preview_params.py b/src/runloop_api_client/types/blueprint_preview_params.py
index 9f6c4d9bc..4269b734f 100644
--- a/src/runloop_api_client/types/blueprint_preview_params.py
+++ b/src/runloop_api_client/types/blueprint_preview_params.py
@@ -71,6 +71,8 @@ class BlueprintPreviewParams(TypedDict, total=False):
class BuildContext(TypedDict, total=False):
+ """A build context backed by an Object."""
+
object_id: Required[str]
"""The ID of an object, whose contents are to be used as a build context."""
@@ -78,6 +80,8 @@ class BuildContext(TypedDict, total=False):
class ServiceCredentials(TypedDict, total=False):
+ """The credentials of the container service."""
+
password: Required[str]
"""The password of the container service."""
diff --git a/src/runloop_api_client/types/blueprint_view.py b/src/runloop_api_client/types/blueprint_view.py
index 7a10d1686..851b09426 100644
--- a/src/runloop_api_client/types/blueprint_view.py
+++ b/src/runloop_api_client/types/blueprint_view.py
@@ -10,6 +10,8 @@
class ContainerizedServiceCredentials(BaseModel):
+ """The credentials of the container service."""
+
password: str
"""The password of the container service."""
@@ -41,6 +43,11 @@ class ContainerizedService(BaseModel):
class BlueprintView(BaseModel):
+ """Blueprints are ways to create customized starting points for Devboxes.
+
+ They allow you to define custom starting points for Devboxes such that environment set up can be cached to improve Devbox boot times.
+ """
+
id: str
"""The id of the Blueprint."""
diff --git a/src/runloop_api_client/types/devbox_list_disk_snapshots_params.py b/src/runloop_api_client/types/devbox_list_disk_snapshots_params.py
index 7ffcf5386..d26c3fbd8 100644
--- a/src/runloop_api_client/types/devbox_list_disk_snapshots_params.py
+++ b/src/runloop_api_client/types/devbox_list_disk_snapshots_params.py
@@ -14,7 +14,7 @@ class DevboxListDiskSnapshotsParams(TypedDict, total=False):
"""Devbox ID to filter by."""
limit: int
- """The limit of items to return. Default is 20."""
+ """The limit of items to return. Default is 20. Max is 5000."""
metadata_key: Annotated[str, PropertyInfo(alias="metadata[key]")]
"""Filter snapshots by metadata key-value pair.
diff --git a/src/runloop_api_client/types/devbox_list_params.py b/src/runloop_api_client/types/devbox_list_params.py
index 066b2ed85..c508762da 100644
--- a/src/runloop_api_client/types/devbox_list_params.py
+++ b/src/runloop_api_client/types/devbox_list_params.py
@@ -9,7 +9,7 @@
class DevboxListParams(TypedDict, total=False):
limit: int
- """The limit of items to return. Default is 20."""
+ """The limit of items to return. Default is 20. Max is 5000."""
starting_after: str
"""Load the next page of data starting after the item with the given ID."""
diff --git a/src/runloop_api_client/types/devbox_view.py b/src/runloop_api_client/types/devbox_view.py
index 007af6575..e2c9a28d8 100644
--- a/src/runloop_api_client/types/devbox_view.py
+++ b/src/runloop_api_client/types/devbox_view.py
@@ -31,6 +31,11 @@ class StateTransition(BaseModel):
class DevboxView(BaseModel):
+ """A Devbox represents a virtual development environment.
+
+ It is an isolated sandbox that can be given to agents and used to run arbitrary code such as AI generated code.
+ """
+
id: str
"""The ID of the Devbox."""
diff --git a/src/runloop_api_client/types/devboxes/browser_view.py b/src/runloop_api_client/types/devboxes/browser_view.py
index d6d377a28..4486d76ec 100644
--- a/src/runloop_api_client/types/devboxes/browser_view.py
+++ b/src/runloop_api_client/types/devboxes/browser_view.py
@@ -7,6 +7,10 @@
class BrowserView(BaseModel):
+ """
+ A Browser represents a managed implementation of a browser like Chromiumon top of Devboxes. It includes the tunnel to the live screen and the underlying DevboxView.
+ """
+
connection_url: str
"""
The url to enable remote connection from browser automation tools like
diff --git a/src/runloop_api_client/types/devboxes/computer_create_params.py b/src/runloop_api_client/types/devboxes/computer_create_params.py
index febd5aef5..c2e32e035 100644
--- a/src/runloop_api_client/types/devboxes/computer_create_params.py
+++ b/src/runloop_api_client/types/devboxes/computer_create_params.py
@@ -17,6 +17,8 @@ class ComputerCreateParams(TypedDict, total=False):
class DisplayDimensions(TypedDict, total=False):
+ """Customize the dimensions of the computer display."""
+
display_height_px: Required[int]
"""The height of the display being controlled by the model in pixels."""
diff --git a/src/runloop_api_client/types/devboxes/computer_mouse_interaction_params.py b/src/runloop_api_client/types/devboxes/computer_mouse_interaction_params.py
index b28a0723f..a3a02279d 100644
--- a/src/runloop_api_client/types/devboxes/computer_mouse_interaction_params.py
+++ b/src/runloop_api_client/types/devboxes/computer_mouse_interaction_params.py
@@ -23,6 +23,11 @@ class ComputerMouseInteractionParams(TypedDict, total=False):
class Coordinate(TypedDict, total=False):
+ """
+ The x (pixels from the left) and y (pixels from the top) coordinates for the mouse to move or click-drag. Required only by
+ `action=mouse_move` or `action=left_click_drag`
+ """
+
x: Required[int]
"""The x coordinate (pixels from the left) for the mouse to move or click-drag."""
diff --git a/src/runloop_api_client/types/devboxes/computer_view.py b/src/runloop_api_client/types/devboxes/computer_view.py
index 907629d54..4706d44a0 100644
--- a/src/runloop_api_client/types/devboxes/computer_view.py
+++ b/src/runloop_api_client/types/devboxes/computer_view.py
@@ -7,6 +7,10 @@
class ComputerView(BaseModel):
+ """
+ A Computer represents an implementation of Anthropic Computer usage on top of Devboxes. It includes the tunnel to the live screen and the underlying DevboxView.
+ """
+
devbox: DevboxView
"""The underlying devbox the computer setup is running on."""
diff --git a/src/runloop_api_client/types/devboxes/disk_snapshot_list_params.py b/src/runloop_api_client/types/devboxes/disk_snapshot_list_params.py
index 7b0f3454f..73e60f457 100644
--- a/src/runloop_api_client/types/devboxes/disk_snapshot_list_params.py
+++ b/src/runloop_api_client/types/devboxes/disk_snapshot_list_params.py
@@ -14,7 +14,7 @@ class DiskSnapshotListParams(TypedDict, total=False):
"""Devbox ID to filter by."""
limit: int
- """The limit of items to return. Default is 20."""
+ """The limit of items to return. Default is 20. Max is 5000."""
metadata_key: Annotated[str, PropertyInfo(alias="metadata[key]")]
"""Filter snapshots by metadata key-value pair.
diff --git a/src/runloop_api_client/types/input_context.py b/src/runloop_api_client/types/input_context.py
index 5cc697db9..2daae5d45 100644
--- a/src/runloop_api_client/types/input_context.py
+++ b/src/runloop_api_client/types/input_context.py
@@ -8,6 +8,10 @@
class InputContext(BaseModel):
+ """
+ InputContextView specifies the problem statement along with all additional context for a Scenario.
+ """
+
problem_statement: str
"""The problem statement for the Scenario."""
diff --git a/src/runloop_api_client/types/input_context_param.py b/src/runloop_api_client/types/input_context_param.py
index 7f977ad65..b0b495c4d 100644
--- a/src/runloop_api_client/types/input_context_param.py
+++ b/src/runloop_api_client/types/input_context_param.py
@@ -9,6 +9,10 @@
class InputContextParam(TypedDict, total=False):
+ """
+ InputContextView specifies the problem statement along with all additional context for a Scenario.
+ """
+
problem_statement: Required[str]
"""The problem statement for the Scenario."""
diff --git a/src/runloop_api_client/types/inspection_source_param.py b/src/runloop_api_client/types/inspection_source_param.py
index ba9e7f397..0d1308d8e 100644
--- a/src/runloop_api_client/types/inspection_source_param.py
+++ b/src/runloop_api_client/types/inspection_source_param.py
@@ -9,6 +9,8 @@
class InspectionSourceParam(TypedDict, total=False):
+ """Use a RepositoryInspection a source of a Blueprint build."""
+
inspection_id: Required[str]
"""The ID of a repository inspection."""
diff --git a/src/runloop_api_client/types/object_download_url_view.py b/src/runloop_api_client/types/object_download_url_view.py
index eb35ac3db..d1e726ca5 100644
--- a/src/runloop_api_client/types/object_download_url_view.py
+++ b/src/runloop_api_client/types/object_download_url_view.py
@@ -6,5 +6,7 @@
class ObjectDownloadURLView(BaseModel):
+ """A response containing a presigned download URL for an Object."""
+
download_url: str
"""The presigned download URL for the Object."""
diff --git a/src/runloop_api_client/types/object_list_params.py b/src/runloop_api_client/types/object_list_params.py
index 084fac54d..eca1c7cdd 100644
--- a/src/runloop_api_client/types/object_list_params.py
+++ b/src/runloop_api_client/types/object_list_params.py
@@ -12,7 +12,7 @@ class ObjectListParams(TypedDict, total=False):
"""Filter storage objects by content type."""
limit: int
- """The limit of items to return. Default is 20."""
+ """The limit of items to return. Default is 20. Max is 5000."""
name: str
"""Filter storage objects by name (partial match supported)."""
diff --git a/src/runloop_api_client/types/object_list_public_params.py b/src/runloop_api_client/types/object_list_public_params.py
index 19b18ba49..67475b263 100644
--- a/src/runloop_api_client/types/object_list_public_params.py
+++ b/src/runloop_api_client/types/object_list_public_params.py
@@ -12,7 +12,7 @@ class ObjectListPublicParams(TypedDict, total=False):
"""Filter storage objects by content type."""
limit: int
- """The limit of items to return. Default is 20."""
+ """The limit of items to return. Default is 20. Max is 5000."""
name: str
"""Filter storage objects by name (partial match supported)."""
diff --git a/src/runloop_api_client/types/object_list_view.py b/src/runloop_api_client/types/object_list_view.py
index 049b1be81..cfd546c0c 100644
--- a/src/runloop_api_client/types/object_list_view.py
+++ b/src/runloop_api_client/types/object_list_view.py
@@ -9,6 +9,8 @@
class ObjectListView(BaseModel):
+ """A paginated list of Objects."""
+
has_more: bool
"""True if there are more results available beyond this page."""
diff --git a/src/runloop_api_client/types/object_view.py b/src/runloop_api_client/types/object_view.py
index 80aea62ed..d4ced655f 100644
--- a/src/runloop_api_client/types/object_view.py
+++ b/src/runloop_api_client/types/object_view.py
@@ -9,6 +9,8 @@
class ObjectView(BaseModel):
+ """An Object represents a stored data entity with metadata."""
+
id: str
"""The unique identifier of the Object."""
diff --git a/src/runloop_api_client/types/repository_connection_view.py b/src/runloop_api_client/types/repository_connection_view.py
index 74718de27..e126071e8 100644
--- a/src/runloop_api_client/types/repository_connection_view.py
+++ b/src/runloop_api_client/types/repository_connection_view.py
@@ -6,6 +6,8 @@
class RepositoryConnectionView(BaseModel):
+ """The ID of the Repository."""
+
id: str
"""The ID of the Repository."""
diff --git a/src/runloop_api_client/types/repository_inspection_details.py b/src/runloop_api_client/types/repository_inspection_details.py
index f83932d29..0870ce693 100644
--- a/src/runloop_api_client/types/repository_inspection_details.py
+++ b/src/runloop_api_client/types/repository_inspection_details.py
@@ -10,6 +10,8 @@
class WorkflowContextsActionsContext(BaseModel):
+ """Details about actions processing for this workflow."""
+
actions_skipped_unnecessary: List[str]
"""
Actions that were skipped because they were unnecessary (e.g., upload
@@ -26,6 +28,10 @@ class WorkflowContextsActionsContext(BaseModel):
class WorkflowContexts(BaseModel):
+ """
+ Workflow context containing file name and details about actions processing during inspection.
+ """
+
actions_context: WorkflowContextsActionsContext
"""Details about actions processing for this workflow."""
diff --git a/src/runloop_api_client/types/repository_list_params.py b/src/runloop_api_client/types/repository_list_params.py
index 91fd7f352..d5f7b248a 100644
--- a/src/runloop_api_client/types/repository_list_params.py
+++ b/src/runloop_api_client/types/repository_list_params.py
@@ -9,7 +9,7 @@
class RepositoryListParams(TypedDict, total=False):
limit: int
- """The limit of items to return. Default is 20."""
+ """The limit of items to return. Default is 20. Max is 5000."""
name: str
"""Filter by repository name"""
diff --git a/src/runloop_api_client/types/repository_manifest_view.py b/src/runloop_api_client/types/repository_manifest_view.py
index 461722b59..acb862672 100644
--- a/src/runloop_api_client/types/repository_manifest_view.py
+++ b/src/runloop_api_client/types/repository_manifest_view.py
@@ -17,6 +17,8 @@
class ContainerConfig(BaseModel):
+ """Container configuration specifying the base image and setup commands."""
+
base_image_name: str
"""The name of the base image.
@@ -41,6 +43,10 @@ class Language(BaseModel):
class WorkspaceDevCommands(BaseModel):
+ """
+ Extracted common commands important to the developer life cycle like linting, testing, building, etc.
+ """
+
build: Optional[List[str]] = None
"""Build command (e.g. npm run build)."""
@@ -58,6 +64,10 @@ class WorkspaceDevCommands(BaseModel):
class Workspace(BaseModel):
+ """
+ A workspace is a buildable unit of code within a repository and often represents a deployable unit of code like a backend service or a frontend app.
+ """
+
build_tool: List[str]
"""Name of the build tool used (e.g. pip, npm)."""
@@ -96,6 +106,8 @@ class Workspace(BaseModel):
class ContainerizedServiceCredentials(BaseModel):
+ """The credentials of the container service."""
+
password: str
"""The password of the container service."""
@@ -127,6 +139,10 @@ class ContainerizedService(BaseModel):
class RepositoryManifestView(BaseModel):
+ """
+ The repository manifest contains container configuration and workspace definitions for a repository.
+ """
+
container_config: ContainerConfig
"""Container configuration specifying the base image and setup commands."""
diff --git a/src/runloop_api_client/types/scenario_environment.py b/src/runloop_api_client/types/scenario_environment.py
index 94e244df9..b6ac9b039 100644
--- a/src/runloop_api_client/types/scenario_environment.py
+++ b/src/runloop_api_client/types/scenario_environment.py
@@ -9,6 +9,10 @@
class ScenarioEnvironment(BaseModel):
+ """
+ ScenarioEnvironmentParameters specify the environment in which a Scenario will be run.
+ """
+
blueprint_id: Optional[str] = None
"""Use the blueprint with matching ID."""
diff --git a/src/runloop_api_client/types/scenario_environment_param.py b/src/runloop_api_client/types/scenario_environment_param.py
index 5069e5943..6a219d250 100644
--- a/src/runloop_api_client/types/scenario_environment_param.py
+++ b/src/runloop_api_client/types/scenario_environment_param.py
@@ -11,6 +11,10 @@
class ScenarioEnvironmentParam(TypedDict, total=False):
+ """
+ ScenarioEnvironmentParameters specify the environment in which a Scenario will be run.
+ """
+
blueprint_id: Optional[str]
"""Use the blueprint with matching ID."""
diff --git a/src/runloop_api_client/types/scenario_list_params.py b/src/runloop_api_client/types/scenario_list_params.py
index 917da6c94..45ff3a87b 100644
--- a/src/runloop_api_client/types/scenario_list_params.py
+++ b/src/runloop_api_client/types/scenario_list_params.py
@@ -12,10 +12,13 @@ class ScenarioListParams(TypedDict, total=False):
"""Filter scenarios by benchmark ID."""
limit: int
- """The limit of items to return. Default is 20."""
+ """The limit of items to return. Default is 20. Max is 5000."""
name: str
"""Query for Scenarios with a given name."""
starting_after: str
"""Load the next page of data starting after the item with the given ID."""
+
+ validation_type: str
+ """Filter by validation type"""
diff --git a/src/runloop_api_client/types/scenario_list_public_params.py b/src/runloop_api_client/types/scenario_list_public_params.py
index 7f413a517..be7e40b8d 100644
--- a/src/runloop_api_client/types/scenario_list_public_params.py
+++ b/src/runloop_api_client/types/scenario_list_public_params.py
@@ -9,7 +9,7 @@
class ScenarioListPublicParams(TypedDict, total=False):
limit: int
- """The limit of items to return. Default is 20."""
+ """The limit of items to return. Default is 20. Max is 5000."""
name: str
"""Query for Scenarios with a given name."""
diff --git a/src/runloop_api_client/types/scenario_run_view.py b/src/runloop_api_client/types/scenario_run_view.py
index 225e90a89..68d4c3573 100644
--- a/src/runloop_api_client/types/scenario_run_view.py
+++ b/src/runloop_api_client/types/scenario_run_view.py
@@ -10,6 +10,11 @@
class ScenarioRunView(BaseModel):
+ """A ScenarioRunView represents a single run of a Scenario on a Devbox.
+
+ When completed, the ScenarioRun will contain the final score and output of the run.
+ """
+
id: str
"""ID of the ScenarioRun."""
diff --git a/src/runloop_api_client/types/scenario_update_params.py b/src/runloop_api_client/types/scenario_update_params.py
index 908988961..9d0fc65e5 100644
--- a/src/runloop_api_client/types/scenario_update_params.py
+++ b/src/runloop_api_client/types/scenario_update_params.py
@@ -21,26 +21,29 @@ class ScenarioUpdateParams(TypedDict, total=False):
"""The input context for the Scenario."""
metadata: Optional[Dict[str, str]]
- """User defined metadata to attach to the scenario for organization."""
+ """User defined metadata to attach to the scenario. Pass in empty map to clear."""
name: Optional[str]
- """Name of the scenario."""
+ """Name of the scenario. Cannot be blank."""
reference_output: Optional[str]
"""A string representation of the reference output to solve the scenario.
Commonly can be the result of a git diff or a sequence of command actions to
- apply to the environment.
+ apply to the environment. Pass in empty string to clear.
"""
required_environment_variables: Optional[SequenceNotStr[str]]
- """Environment variables required to run the scenario."""
+ """Environment variables required to run the scenario.
+
+ Pass in empty list to clear.
+ """
required_secret_names: Optional[SequenceNotStr[str]]
- """Secrets required to run the scenario."""
+ """Secrets required to run the scenario. Pass in empty list to clear."""
scoring_contract: Optional[ScoringContractUpdateParam]
"""The scoring contract for the Scenario."""
validation_type: Optional[Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]]
- """Validation strategy."""
+ """Validation strategy. Pass in empty string to clear."""
diff --git a/src/runloop_api_client/types/scenario_view.py b/src/runloop_api_client/types/scenario_view.py
index 58c0dbb26..5c5ba0164 100644
--- a/src/runloop_api_client/types/scenario_view.py
+++ b/src/runloop_api_client/types/scenario_view.py
@@ -12,6 +12,10 @@
class ScenarioView(BaseModel):
+ """
+ A ScenarioDefinitionView represents a repeatable AI coding evaluation test, complete with initial environment and scoring contract.
+ """
+
id: str
"""The ID of the Scenario."""
diff --git a/src/runloop_api_client/types/scenarios/run_list_params.py b/src/runloop_api_client/types/scenarios/run_list_params.py
index 17a2715c4..97eeb425a 100644
--- a/src/runloop_api_client/types/scenarios/run_list_params.py
+++ b/src/runloop_api_client/types/scenarios/run_list_params.py
@@ -8,11 +8,20 @@
class RunListParams(TypedDict, total=False):
+ benchmark_run_id: str
+ """Filter by benchmark run ID"""
+
limit: int
- """The limit of items to return. Default is 20."""
+ """The limit of items to return. Default is 20. Max is 5000."""
+
+ name: str
+ """Filter by name"""
scenario_id: str
"""Filter runs associated to Scenario given ID"""
starting_after: str
"""Load the next page of data starting after the item with the given ID."""
+
+ state: str
+ """Filter by state"""
diff --git a/src/runloop_api_client/types/scenarios/scorer_create_response.py b/src/runloop_api_client/types/scenarios/scorer_create_response.py
index 376c50f70..2b6e665a1 100644
--- a/src/runloop_api_client/types/scenarios/scorer_create_response.py
+++ b/src/runloop_api_client/types/scenarios/scorer_create_response.py
@@ -6,6 +6,8 @@
class ScorerCreateResponse(BaseModel):
+ """A ScenarioScorerView represents a custom scoring function for a Scenario."""
+
id: str
"""ID for the scenario scorer."""
diff --git a/src/runloop_api_client/types/scenarios/scorer_list_params.py b/src/runloop_api_client/types/scenarios/scorer_list_params.py
index 0577a327e..f80e7f6ac 100644
--- a/src/runloop_api_client/types/scenarios/scorer_list_params.py
+++ b/src/runloop_api_client/types/scenarios/scorer_list_params.py
@@ -9,7 +9,7 @@
class ScorerListParams(TypedDict, total=False):
limit: int
- """The limit of items to return. Default is 20."""
+ """The limit of items to return. Default is 20. Max is 5000."""
starting_after: str
"""Load the next page of data starting after the item with the given ID."""
diff --git a/src/runloop_api_client/types/scenarios/scorer_list_response.py b/src/runloop_api_client/types/scenarios/scorer_list_response.py
index bdbc9b9de..46eb8802e 100644
--- a/src/runloop_api_client/types/scenarios/scorer_list_response.py
+++ b/src/runloop_api_client/types/scenarios/scorer_list_response.py
@@ -6,6 +6,8 @@
class ScorerListResponse(BaseModel):
+ """A ScenarioScorerView represents a custom scoring function for a Scenario."""
+
id: str
"""ID for the scenario scorer."""
diff --git a/src/runloop_api_client/types/scenarios/scorer_retrieve_response.py b/src/runloop_api_client/types/scenarios/scorer_retrieve_response.py
index ab0f85231..a67cd35c0 100644
--- a/src/runloop_api_client/types/scenarios/scorer_retrieve_response.py
+++ b/src/runloop_api_client/types/scenarios/scorer_retrieve_response.py
@@ -6,6 +6,8 @@
class ScorerRetrieveResponse(BaseModel):
+ """A ScenarioScorerView represents a custom scoring function for a Scenario."""
+
id: str
"""ID for the scenario scorer."""
diff --git a/src/runloop_api_client/types/scenarios/scorer_update_response.py b/src/runloop_api_client/types/scenarios/scorer_update_response.py
index 60a1b5e4b..91e668d22 100644
--- a/src/runloop_api_client/types/scenarios/scorer_update_response.py
+++ b/src/runloop_api_client/types/scenarios/scorer_update_response.py
@@ -6,6 +6,8 @@
class ScorerUpdateResponse(BaseModel):
+ """A ScenarioScorerView represents a custom scoring function for a Scenario."""
+
id: str
"""ID for the scenario scorer."""
diff --git a/src/runloop_api_client/types/scoring_contract.py b/src/runloop_api_client/types/scoring_contract.py
index f19f5aa54..d3c646fda 100644
--- a/src/runloop_api_client/types/scoring_contract.py
+++ b/src/runloop_api_client/types/scoring_contract.py
@@ -9,5 +9,9 @@
class ScoringContract(BaseModel):
+ """
+ InputContextView specifies the problem statement along with all additional context for a Scenario.
+ """
+
scoring_function_parameters: List[ScoringFunction]
"""A list of scoring functions used to evaluate the Scenario."""
diff --git a/src/runloop_api_client/types/scoring_contract_param.py b/src/runloop_api_client/types/scoring_contract_param.py
index 4a68abb93..46f1b7b6d 100644
--- a/src/runloop_api_client/types/scoring_contract_param.py
+++ b/src/runloop_api_client/types/scoring_contract_param.py
@@ -11,5 +11,9 @@
class ScoringContractParam(TypedDict, total=False):
+ """
+ InputContextView specifies the problem statement along with all additional context for a Scenario.
+ """
+
scoring_function_parameters: Required[Iterable[ScoringFunctionParam]]
"""A list of scoring functions used to evaluate the Scenario."""
diff --git a/src/runloop_api_client/types/scoring_contract_result_view.py b/src/runloop_api_client/types/scoring_contract_result_view.py
index 823de83c4..85e1a42c4 100644
--- a/src/runloop_api_client/types/scoring_contract_result_view.py
+++ b/src/runloop_api_client/types/scoring_contract_result_view.py
@@ -9,6 +9,10 @@
class ScoringContractResultView(BaseModel):
+ """
+ A ScoringContractResultView represents the result of running all scoring functions on a given input context.
+ """
+
score: float
"""Total score for all scoring contracts. This will be a value between 0 and 1."""
diff --git a/src/runloop_api_client/types/scoring_function.py b/src/runloop_api_client/types/scoring_function.py
index ba4aea9e1..fe5d2a467 100644
--- a/src/runloop_api_client/types/scoring_function.py
+++ b/src/runloop_api_client/types/scoring_function.py
@@ -20,6 +20,8 @@
class ScorerAstGrepScoringFunction(BaseModel):
+ """AstGrepScoringFunction utilizes structured coach search for scoring."""
+
pattern: str
"""AST pattern to match.
@@ -37,6 +39,10 @@ class ScorerAstGrepScoringFunction(BaseModel):
class ScorerBashScriptScoringFunction(BaseModel):
+ """
+ BashScriptScoringFunction is a scoring function specified by a bash script that will be run in the context of your environment.
+ """
+
type: Literal["bash_script_scorer"]
bash_script: Optional[str] = None
@@ -48,6 +54,10 @@ class ScorerBashScriptScoringFunction(BaseModel):
class ScorerCommandScoringFunction(BaseModel):
+ """
+ CommandScoringFunction executes a single command and checks the result.The output of the command will be printed. Scoring will passed if the command returns status code 0, otherwise it will be failed.
+ """
+
type: Literal["command_scorer"]
command: Optional[str] = None
@@ -55,6 +65,8 @@ class ScorerCommandScoringFunction(BaseModel):
class ScorerCustomScoringFunction(BaseModel):
+ """CustomScoringFunction is a custom, user defined scoring function."""
+
custom_scorer_type: str
"""Type of the scoring function, previously registered with Runloop."""
@@ -65,6 +77,10 @@ class ScorerCustomScoringFunction(BaseModel):
class ScorerPythonScriptScoringFunction(BaseModel):
+ """
+ PythonScriptScoringFunction will run a python script in the context of your environment as a ScoringFunction.
+ """
+
python_script: str
"""Python script to be run.
@@ -96,6 +112,10 @@ class ScorerTestBasedScoringFunctionTestFile(BaseModel):
class ScorerTestBasedScoringFunction(BaseModel):
+ """
+ TestBasedScoringFunction writes test files to disk and executes a test command to verify the solution.
+ """
+
type: Literal["test_based_scorer"]
test_command: Optional[str] = None
@@ -119,6 +139,8 @@ class ScorerTestBasedScoringFunction(BaseModel):
class ScoringFunction(BaseModel):
+ """ScoringFunction specifies a method of scoring a Scenario."""
+
name: str
"""Name of scoring function. Names must only contain ``[a-zA-Z0-9_-]``."""
diff --git a/src/runloop_api_client/types/scoring_function_param.py b/src/runloop_api_client/types/scoring_function_param.py
index f9b6b26c7..033101d52 100644
--- a/src/runloop_api_client/types/scoring_function_param.py
+++ b/src/runloop_api_client/types/scoring_function_param.py
@@ -19,6 +19,8 @@
class ScorerAstGrepScoringFunction(TypedDict, total=False):
+ """AstGrepScoringFunction utilizes structured coach search for scoring."""
+
pattern: Required[str]
"""AST pattern to match.
@@ -36,6 +38,10 @@ class ScorerAstGrepScoringFunction(TypedDict, total=False):
class ScorerBashScriptScoringFunction(TypedDict, total=False):
+ """
+ BashScriptScoringFunction is a scoring function specified by a bash script that will be run in the context of your environment.
+ """
+
type: Required[Literal["bash_script_scorer"]]
bash_script: str
@@ -47,6 +53,10 @@ class ScorerBashScriptScoringFunction(TypedDict, total=False):
class ScorerCommandScoringFunction(TypedDict, total=False):
+ """
+ CommandScoringFunction executes a single command and checks the result.The output of the command will be printed. Scoring will passed if the command returns status code 0, otherwise it will be failed.
+ """
+
type: Required[Literal["command_scorer"]]
command: str
@@ -54,6 +64,8 @@ class ScorerCommandScoringFunction(TypedDict, total=False):
class ScorerCustomScoringFunction(TypedDict, total=False):
+ """CustomScoringFunction is a custom, user defined scoring function."""
+
custom_scorer_type: Required[str]
"""Type of the scoring function, previously registered with Runloop."""
@@ -64,6 +76,10 @@ class ScorerCustomScoringFunction(TypedDict, total=False):
class ScorerPythonScriptScoringFunction(TypedDict, total=False):
+ """
+ PythonScriptScoringFunction will run a python script in the context of your environment as a ScoringFunction.
+ """
+
python_script: Required[str]
"""Python script to be run.
@@ -95,6 +111,10 @@ class ScorerTestBasedScoringFunctionTestFile(TypedDict, total=False):
class ScorerTestBasedScoringFunction(TypedDict, total=False):
+ """
+ TestBasedScoringFunction writes test files to disk and executes a test command to verify the solution.
+ """
+
type: Required[Literal["test_based_scorer"]]
test_command: str
@@ -115,6 +135,8 @@ class ScorerTestBasedScoringFunction(TypedDict, total=False):
class ScoringFunctionParam(TypedDict, total=False):
+ """ScoringFunction specifies a method of scoring a Scenario."""
+
name: Required[str]
"""Name of scoring function. Names must only contain ``[a-zA-Z0-9_-]``."""
diff --git a/src/runloop_api_client/types/scoring_function_result_view.py b/src/runloop_api_client/types/scoring_function_result_view.py
index 8f782df11..4fe5b67cb 100644
--- a/src/runloop_api_client/types/scoring_function_result_view.py
+++ b/src/runloop_api_client/types/scoring_function_result_view.py
@@ -8,6 +8,10 @@
class ScoringFunctionResultView(BaseModel):
+ """
+ A ScoringFunctionResultView represents the result of running a single scoring function on a given input context.
+ """
+
output: str
"""Log output of the scoring function."""
diff --git a/src/runloop_api_client/types/secret_list_params.py b/src/runloop_api_client/types/secret_list_params.py
index 296a66b62..13d25bd7e 100644
--- a/src/runloop_api_client/types/secret_list_params.py
+++ b/src/runloop_api_client/types/secret_list_params.py
@@ -9,4 +9,4 @@
class SecretListParams(TypedDict, total=False):
limit: int
- """The limit of items to return. Default is 20."""
+ """The limit of items to return. Default is 20. Max is 5000."""
diff --git a/src/runloop_api_client/types/secret_list_view.py b/src/runloop_api_client/types/secret_list_view.py
index d7feec9c5..4d66fa2e4 100644
--- a/src/runloop_api_client/types/secret_list_view.py
+++ b/src/runloop_api_client/types/secret_list_view.py
@@ -9,6 +9,8 @@
class SecretListView(BaseModel):
+ """A paginated list of Secrets."""
+
has_more: bool
"""True if there are more results available beyond this page."""
diff --git a/src/runloop_api_client/types/secret_view.py b/src/runloop_api_client/types/secret_view.py
index 1303f7bfd..bd1c8811e 100644
--- a/src/runloop_api_client/types/secret_view.py
+++ b/src/runloop_api_client/types/secret_view.py
@@ -6,6 +6,10 @@
class SecretView(BaseModel):
+ """
+ A Secret represents a key-value pair that can be securely stored and used in Devboxes as environment variables.
+ """
+
id: str
"""The unique identifier of the Secret."""
diff --git a/src/runloop_api_client/types/shared/agent_source.py b/src/runloop_api_client/types/shared/agent_source.py
index 25bcbbc1d..9282d6181 100644
--- a/src/runloop_api_client/types/shared/agent_source.py
+++ b/src/runloop_api_client/types/shared/agent_source.py
@@ -8,6 +8,8 @@
class Git(BaseModel):
+ """Git source configuration"""
+
repository: str
"""Git repository URL"""
@@ -19,20 +21,21 @@ class Git(BaseModel):
class Npm(BaseModel):
+ """NPM source configuration"""
+
package_name: str
"""NPM package name"""
agent_setup: Optional[List[str]] = None
"""Setup commands to run after installation"""
- npm_version: Optional[str] = None
- """NPM version constraint"""
-
registry_url: Optional[str] = None
"""NPM registry URL"""
class Object(BaseModel):
+ """Object store source configuration"""
+
object_id: str
"""Object ID"""
@@ -41,20 +44,21 @@ class Object(BaseModel):
class Pip(BaseModel):
+ """Pip source configuration"""
+
package_name: str
"""Pip package name"""
agent_setup: Optional[List[str]] = None
"""Setup commands to run after installation"""
- pip_version: Optional[str] = None
- """Pip version constraint"""
-
registry_url: Optional[str] = None
"""Pip registry URL"""
class AgentSource(BaseModel):
+ """Agent source configuration."""
+
type: str
"""Source type: npm, pip, object, or git"""
diff --git a/src/runloop_api_client/types/shared/launch_parameters.py b/src/runloop_api_client/types/shared/launch_parameters.py
index f70023d66..dc0ccfccd 100644
--- a/src/runloop_api_client/types/shared/launch_parameters.py
+++ b/src/runloop_api_client/types/shared/launch_parameters.py
@@ -10,6 +10,11 @@
class UserParameters(BaseModel):
+ """Specify the user for execution on Devbox.
+
+ If not set, default `user` will be used.
+ """
+
uid: int
"""User ID (UID) for the Linux user. Must be a non-negative integer."""
@@ -18,6 +23,10 @@ class UserParameters(BaseModel):
class LaunchParameters(BaseModel):
+ """
+ LaunchParameters enable you to customize the resources available to your Devbox as well as the environment set up that should be completed before the Devbox is marked as 'running'.
+ """
+
after_idle: Optional[AfterIdle] = None
"""Configure Devbox lifecycle based on idle activity.
diff --git a/src/runloop_api_client/types/shared_params/agent_source.py b/src/runloop_api_client/types/shared_params/agent_source.py
index 9f5a50845..7132414c8 100644
--- a/src/runloop_api_client/types/shared_params/agent_source.py
+++ b/src/runloop_api_client/types/shared_params/agent_source.py
@@ -11,6 +11,8 @@
class Git(TypedDict, total=False):
+ """Git source configuration"""
+
repository: Required[str]
"""Git repository URL"""
@@ -22,20 +24,21 @@ class Git(TypedDict, total=False):
class Npm(TypedDict, total=False):
+ """NPM source configuration"""
+
package_name: Required[str]
"""NPM package name"""
agent_setup: Optional[SequenceNotStr[str]]
"""Setup commands to run after installation"""
- npm_version: Optional[str]
- """NPM version constraint"""
-
registry_url: Optional[str]
"""NPM registry URL"""
class Object(TypedDict, total=False):
+ """Object store source configuration"""
+
object_id: Required[str]
"""Object ID"""
@@ -44,20 +47,21 @@ class Object(TypedDict, total=False):
class Pip(TypedDict, total=False):
+ """Pip source configuration"""
+
package_name: Required[str]
"""Pip package name"""
agent_setup: Optional[SequenceNotStr[str]]
"""Setup commands to run after installation"""
- pip_version: Optional[str]
- """Pip version constraint"""
-
registry_url: Optional[str]
"""Pip registry URL"""
class AgentSource(TypedDict, total=False):
+ """Agent source configuration."""
+
type: Required[str]
"""Source type: npm, pip, object, or git"""
diff --git a/src/runloop_api_client/types/shared_params/launch_parameters.py b/src/runloop_api_client/types/shared_params/launch_parameters.py
index f0fe87636..cd2a97ee4 100644
--- a/src/runloop_api_client/types/shared_params/launch_parameters.py
+++ b/src/runloop_api_client/types/shared_params/launch_parameters.py
@@ -12,6 +12,11 @@
class UserParameters(TypedDict, total=False):
+ """Specify the user for execution on Devbox.
+
+ If not set, default `user` will be used.
+ """
+
uid: Required[int]
"""User ID (UID) for the Linux user. Must be a non-negative integer."""
@@ -20,6 +25,10 @@ class UserParameters(TypedDict, total=False):
class LaunchParameters(TypedDict, total=False):
+ """
+ LaunchParameters enable you to customize the resources available to your Devbox as well as the environment set up that should be completed before the Devbox is marked as 'running'.
+ """
+
after_idle: Optional[AfterIdle]
"""Configure Devbox lifecycle based on idle activity.
diff --git a/tests/api_resources/benchmarks/test_runs.py b/tests/api_resources/benchmarks/test_runs.py
index a95855518..9ab74fc9a 100644
--- a/tests/api_resources/benchmarks/test_runs.py
+++ b/tests/api_resources/benchmarks/test_runs.py
@@ -66,6 +66,7 @@ def test_method_list_with_all_params(self, client: Runloop) -> None:
run = client.benchmarks.runs.list(
benchmark_id="benchmark_id",
limit=0,
+ name="name",
starting_after="starting_after",
)
assert_matches_type(SyncBenchmarkRunsCursorIDPage[BenchmarkRunView], run, path=["response"])
@@ -268,6 +269,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncRunloop) ->
run = await async_client.benchmarks.runs.list(
benchmark_id="benchmark_id",
limit=0,
+ name="name",
starting_after="starting_after",
)
assert_matches_type(AsyncBenchmarkRunsCursorIDPage[BenchmarkRunView], run, path=["response"])
diff --git a/tests/api_resources/scenarios/test_runs.py b/tests/api_resources/scenarios/test_runs.py
index 7b981e9bb..f3ac8eb88 100644
--- a/tests/api_resources/scenarios/test_runs.py
+++ b/tests/api_resources/scenarios/test_runs.py
@@ -72,9 +72,12 @@ def test_method_list(self, client: Runloop) -> None:
@parametrize
def test_method_list_with_all_params(self, client: Runloop) -> None:
run = client.scenarios.runs.list(
+ benchmark_run_id="benchmark_run_id",
limit=0,
+ name="name",
scenario_id="scenario_id",
starting_after="starting_after",
+ state="state",
)
assert_matches_type(SyncBenchmarkRunsCursorIDPage[ScenarioRunView], run, path=["response"])
@@ -320,9 +323,12 @@ async def test_method_list(self, async_client: AsyncRunloop) -> None:
@parametrize
async def test_method_list_with_all_params(self, async_client: AsyncRunloop) -> None:
run = await async_client.scenarios.runs.list(
+ benchmark_run_id="benchmark_run_id",
limit=0,
+ name="name",
scenario_id="scenario_id",
starting_after="starting_after",
+ state="state",
)
assert_matches_type(AsyncBenchmarkRunsCursorIDPage[ScenarioRunView], run, path=["response"])
diff --git a/tests/api_resources/test_agents.py b/tests/api_resources/test_agents.py
index 6f8096491..693eec250 100644
--- a/tests/api_resources/test_agents.py
+++ b/tests/api_resources/test_agents.py
@@ -22,6 +22,7 @@ class TestAgents:
def test_method_create(self, client: Runloop) -> None:
agent = client.agents.create(
name="name",
+ version="version",
)
assert_matches_type(AgentView, agent, path=["response"])
@@ -29,6 +30,7 @@ def test_method_create(self, client: Runloop) -> None:
def test_method_create_with_all_params(self, client: Runloop) -> None:
agent = client.agents.create(
name="name",
+ version="version",
source={
"type": "type",
"git": {
@@ -39,7 +41,6 @@ def test_method_create_with_all_params(self, client: Runloop) -> None:
"npm": {
"package_name": "package_name",
"agent_setup": ["string"],
- "npm_version": "npm_version",
"registry_url": "registry_url",
},
"object": {
@@ -49,7 +50,6 @@ def test_method_create_with_all_params(self, client: Runloop) -> None:
"pip": {
"package_name": "package_name",
"agent_setup": ["string"],
- "pip_version": "pip_version",
"registry_url": "registry_url",
},
},
@@ -60,6 +60,7 @@ def test_method_create_with_all_params(self, client: Runloop) -> None:
def test_raw_response_create(self, client: Runloop) -> None:
response = client.agents.with_raw_response.create(
name="name",
+ version="version",
)
assert response.is_closed is True
@@ -71,6 +72,7 @@ def test_raw_response_create(self, client: Runloop) -> None:
def test_streaming_response_create(self, client: Runloop) -> None:
with client.agents.with_streaming_response.create(
name="name",
+ version="version",
) as response:
assert not response.is_closed
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -131,6 +133,7 @@ def test_method_list_with_all_params(self, client: Runloop) -> None:
name="name",
search="search",
starting_after="starting_after",
+ version="version",
)
assert_matches_type(SyncAgentsCursorIDPage[AgentView], agent, path=["response"])
@@ -164,6 +167,7 @@ class TestAsyncAgents:
async def test_method_create(self, async_client: AsyncRunloop) -> None:
agent = await async_client.agents.create(
name="name",
+ version="version",
)
assert_matches_type(AgentView, agent, path=["response"])
@@ -171,6 +175,7 @@ async def test_method_create(self, async_client: AsyncRunloop) -> None:
async def test_method_create_with_all_params(self, async_client: AsyncRunloop) -> None:
agent = await async_client.agents.create(
name="name",
+ version="version",
source={
"type": "type",
"git": {
@@ -181,7 +186,6 @@ async def test_method_create_with_all_params(self, async_client: AsyncRunloop) -
"npm": {
"package_name": "package_name",
"agent_setup": ["string"],
- "npm_version": "npm_version",
"registry_url": "registry_url",
},
"object": {
@@ -191,7 +195,6 @@ async def test_method_create_with_all_params(self, async_client: AsyncRunloop) -
"pip": {
"package_name": "package_name",
"agent_setup": ["string"],
- "pip_version": "pip_version",
"registry_url": "registry_url",
},
},
@@ -202,6 +205,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncRunloop) -
async def test_raw_response_create(self, async_client: AsyncRunloop) -> None:
response = await async_client.agents.with_raw_response.create(
name="name",
+ version="version",
)
assert response.is_closed is True
@@ -213,6 +217,7 @@ async def test_raw_response_create(self, async_client: AsyncRunloop) -> None:
async def test_streaming_response_create(self, async_client: AsyncRunloop) -> None:
async with async_client.agents.with_streaming_response.create(
name="name",
+ version="version",
) as response:
assert not response.is_closed
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -273,6 +278,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncRunloop) ->
name="name",
search="search",
starting_after="starting_after",
+ version="version",
)
assert_matches_type(AsyncAgentsCursorIDPage[AgentView], agent, path=["response"])
diff --git a/tests/api_resources/test_benchmarks.py b/tests/api_resources/test_benchmarks.py
index 891756def..bb001a532 100644
--- a/tests/api_resources/test_benchmarks.py
+++ b/tests/api_resources/test_benchmarks.py
@@ -108,7 +108,6 @@ def test_path_params_retrieve(self, client: Runloop) -> None:
def test_method_update(self, client: Runloop) -> None:
benchmark = client.benchmarks.update(
id="id",
- name="name",
)
assert_matches_type(BenchmarkView, benchmark, path=["response"])
@@ -116,10 +115,10 @@ def test_method_update(self, client: Runloop) -> None:
def test_method_update_with_all_params(self, client: Runloop) -> None:
benchmark = client.benchmarks.update(
id="id",
- name="name",
attribution="attribution",
description="description",
metadata={"foo": "string"},
+ name="name",
required_environment_variables=["string"],
required_secret_names=["string"],
scenario_ids=["string"],
@@ -130,7 +129,6 @@ def test_method_update_with_all_params(self, client: Runloop) -> None:
def test_raw_response_update(self, client: Runloop) -> None:
response = client.benchmarks.with_raw_response.update(
id="id",
- name="name",
)
assert response.is_closed is True
@@ -142,7 +140,6 @@ def test_raw_response_update(self, client: Runloop) -> None:
def test_streaming_response_update(self, client: Runloop) -> None:
with client.benchmarks.with_streaming_response.update(
id="id",
- name="name",
) as response:
assert not response.is_closed
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -157,7 +154,6 @@ def test_path_params_update(self, client: Runloop) -> None:
with pytest.raises(ValueError, match=r"Expected a non-empty value for `id` but received ''"):
client.benchmarks.with_raw_response.update(
id="",
- name="name",
)
@parametrize
@@ -169,6 +165,7 @@ def test_method_list(self, client: Runloop) -> None:
def test_method_list_with_all_params(self, client: Runloop) -> None:
benchmark = client.benchmarks.list(
limit=0,
+ name="name",
starting_after="starting_after",
)
assert_matches_type(SyncBenchmarksCursorIDPage[BenchmarkView], benchmark, path=["response"])
@@ -344,6 +341,53 @@ def test_streaming_response_start_run(self, client: Runloop) -> None:
assert cast(Any, response.is_closed) is True
+ @parametrize
+ def test_method_update_scenarios(self, client: Runloop) -> None:
+ benchmark = client.benchmarks.update_scenarios(
+ id="id",
+ )
+ assert_matches_type(BenchmarkView, benchmark, path=["response"])
+
+ @parametrize
+ def test_method_update_scenarios_with_all_params(self, client: Runloop) -> None:
+ benchmark = client.benchmarks.update_scenarios(
+ id="id",
+ scenarios_to_add=["string"],
+ scenarios_to_remove=["string"],
+ )
+ assert_matches_type(BenchmarkView, benchmark, path=["response"])
+
+ @parametrize
+ def test_raw_response_update_scenarios(self, client: Runloop) -> None:
+ response = client.benchmarks.with_raw_response.update_scenarios(
+ id="id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ benchmark = response.parse()
+ assert_matches_type(BenchmarkView, benchmark, path=["response"])
+
+ @parametrize
+ def test_streaming_response_update_scenarios(self, client: Runloop) -> None:
+ with client.benchmarks.with_streaming_response.update_scenarios(
+ id="id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ benchmark = response.parse()
+ assert_matches_type(BenchmarkView, benchmark, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_update_scenarios(self, client: Runloop) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `id` but received ''"):
+ client.benchmarks.with_raw_response.update_scenarios(
+ id="",
+ )
+
class TestAsyncBenchmarks:
parametrize = pytest.mark.parametrize(
@@ -436,7 +480,6 @@ async def test_path_params_retrieve(self, async_client: AsyncRunloop) -> None:
async def test_method_update(self, async_client: AsyncRunloop) -> None:
benchmark = await async_client.benchmarks.update(
id="id",
- name="name",
)
assert_matches_type(BenchmarkView, benchmark, path=["response"])
@@ -444,10 +487,10 @@ async def test_method_update(self, async_client: AsyncRunloop) -> None:
async def test_method_update_with_all_params(self, async_client: AsyncRunloop) -> None:
benchmark = await async_client.benchmarks.update(
id="id",
- name="name",
attribution="attribution",
description="description",
metadata={"foo": "string"},
+ name="name",
required_environment_variables=["string"],
required_secret_names=["string"],
scenario_ids=["string"],
@@ -458,7 +501,6 @@ async def test_method_update_with_all_params(self, async_client: AsyncRunloop) -
async def test_raw_response_update(self, async_client: AsyncRunloop) -> None:
response = await async_client.benchmarks.with_raw_response.update(
id="id",
- name="name",
)
assert response.is_closed is True
@@ -470,7 +512,6 @@ async def test_raw_response_update(self, async_client: AsyncRunloop) -> None:
async def test_streaming_response_update(self, async_client: AsyncRunloop) -> None:
async with async_client.benchmarks.with_streaming_response.update(
id="id",
- name="name",
) as response:
assert not response.is_closed
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -485,7 +526,6 @@ async def test_path_params_update(self, async_client: AsyncRunloop) -> None:
with pytest.raises(ValueError, match=r"Expected a non-empty value for `id` but received ''"):
await async_client.benchmarks.with_raw_response.update(
id="",
- name="name",
)
@parametrize
@@ -497,6 +537,7 @@ async def test_method_list(self, async_client: AsyncRunloop) -> None:
async def test_method_list_with_all_params(self, async_client: AsyncRunloop) -> None:
benchmark = await async_client.benchmarks.list(
limit=0,
+ name="name",
starting_after="starting_after",
)
assert_matches_type(AsyncBenchmarksCursorIDPage[BenchmarkView], benchmark, path=["response"])
@@ -671,3 +712,50 @@ async def test_streaming_response_start_run(self, async_client: AsyncRunloop) ->
assert_matches_type(BenchmarkRunView, benchmark, path=["response"])
assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_method_update_scenarios(self, async_client: AsyncRunloop) -> None:
+ benchmark = await async_client.benchmarks.update_scenarios(
+ id="id",
+ )
+ assert_matches_type(BenchmarkView, benchmark, path=["response"])
+
+ @parametrize
+ async def test_method_update_scenarios_with_all_params(self, async_client: AsyncRunloop) -> None:
+ benchmark = await async_client.benchmarks.update_scenarios(
+ id="id",
+ scenarios_to_add=["string"],
+ scenarios_to_remove=["string"],
+ )
+ assert_matches_type(BenchmarkView, benchmark, path=["response"])
+
+ @parametrize
+ async def test_raw_response_update_scenarios(self, async_client: AsyncRunloop) -> None:
+ response = await async_client.benchmarks.with_raw_response.update_scenarios(
+ id="id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ benchmark = await response.parse()
+ assert_matches_type(BenchmarkView, benchmark, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_update_scenarios(self, async_client: AsyncRunloop) -> None:
+ async with async_client.benchmarks.with_streaming_response.update_scenarios(
+ id="id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ benchmark = await response.parse()
+ assert_matches_type(BenchmarkView, benchmark, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_update_scenarios(self, async_client: AsyncRunloop) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `id` but received ''"):
+ await async_client.benchmarks.with_raw_response.update_scenarios(
+ id="",
+ )
diff --git a/tests/api_resources/test_blueprints.py b/tests/api_resources/test_blueprints.py
index 4be6d1677..578e4dfb5 100644
--- a/tests/api_resources/test_blueprints.py
+++ b/tests/api_resources/test_blueprints.py
@@ -183,6 +183,7 @@ def test_method_list_with_all_params(self, client: Runloop) -> None:
limit=0,
name="name",
starting_after="starting_after",
+ status="status",
)
assert_matches_type(SyncBlueprintsCursorIDPage[BlueprintView], blueprint, path=["response"])
@@ -323,6 +324,7 @@ def test_method_list_public_with_all_params(self, client: Runloop) -> None:
limit=0,
name="name",
starting_after="starting_after",
+ status="status",
)
assert_matches_type(SyncBlueprintsCursorIDPage[BlueprintView], blueprint, path=["response"])
@@ -641,6 +643,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncRunloop) ->
limit=0,
name="name",
starting_after="starting_after",
+ status="status",
)
assert_matches_type(AsyncBlueprintsCursorIDPage[BlueprintView], blueprint, path=["response"])
@@ -781,6 +784,7 @@ async def test_method_list_public_with_all_params(self, async_client: AsyncRunlo
limit=0,
name="name",
starting_after="starting_after",
+ status="status",
)
assert_matches_type(AsyncBlueprintsCursorIDPage[BlueprintView], blueprint, path=["response"])
diff --git a/tests/api_resources/test_scenarios.py b/tests/api_resources/test_scenarios.py
index b9dadb8b9..3345828c1 100644
--- a/tests/api_resources/test_scenarios.py
+++ b/tests/api_resources/test_scenarios.py
@@ -291,6 +291,7 @@ def test_method_list_with_all_params(self, client: Runloop) -> None:
limit=0,
name="name",
starting_after="starting_after",
+ validation_type="validation_type",
)
assert_matches_type(SyncScenariosCursorIDPage[ScenarioView], scenario, path=["response"])
@@ -696,6 +697,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncRunloop) ->
limit=0,
name="name",
starting_after="starting_after",
+ validation_type="validation_type",
)
assert_matches_type(AsyncScenariosCursorIDPage[ScenarioView], scenario, path=["response"])
diff --git a/tests/sdk/async_devbox/test_core.py b/tests/sdk/async_devbox/test_core.py
index 5d3405c80..9925fa724 100644
--- a/tests/sdk/async_devbox/test_core.py
+++ b/tests/sdk/async_devbox/test_core.py
@@ -26,21 +26,21 @@ class TestAsyncDevbox:
def test_init(self, mock_async_client: AsyncMock) -> None:
"""Test AsyncDevbox initialization."""
- devbox = AsyncDevbox(mock_async_client, "dev_123")
- assert devbox.id == "dev_123"
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
+ assert devbox.id == "dbx_123"
def test_repr(self, mock_async_client: AsyncMock) -> None:
"""Test AsyncDevbox string representation."""
- devbox = AsyncDevbox(mock_async_client, "dev_123")
- assert repr(devbox) == ""
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
+ assert repr(devbox) == ""
@pytest.mark.asyncio
async def test_context_manager_enter_exit(self, mock_async_client: AsyncMock, devbox_view: MockDevboxView) -> None:
"""Test context manager behavior with successful shutdown."""
mock_async_client.devboxes.shutdown = AsyncMock(return_value=devbox_view)
- async with AsyncDevbox(mock_async_client, "dev_123") as devbox:
- assert devbox.id == "dev_123"
+ async with AsyncDevbox(mock_async_client, "dbx_123") as devbox:
+ assert devbox.id == "dbx_123"
call_kwargs = mock_async_client.devboxes.shutdown.call_args[1]
assert "timeout" not in call_kwargs
@@ -51,7 +51,7 @@ async def test_context_manager_exception_handling(self, mock_async_client: Async
mock_async_client.devboxes.shutdown = AsyncMock(side_effect=RuntimeError("Shutdown failed"))
with pytest.raises(ValueError, match="Test error"):
- async with AsyncDevbox(mock_async_client, "dev_123"):
+ async with AsyncDevbox(mock_async_client, "dbx_123"):
raise ValueError("Test error")
# Shutdown should be called even when body raises exception
@@ -62,7 +62,7 @@ async def test_get_info(self, mock_async_client: AsyncMock, devbox_view: MockDev
"""Test get_info method."""
mock_async_client.devboxes.retrieve = AsyncMock(return_value=devbox_view)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
result = await devbox.get_info(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -72,7 +72,7 @@ async def test_get_info(self, mock_async_client: AsyncMock, devbox_view: MockDev
assert result == devbox_view
mock_async_client.devboxes.retrieve.assert_called_once_with(
- "dev_123",
+ "dbx_123",
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
extra_body={"key": "value"},
@@ -85,12 +85,12 @@ async def test_await_running(self, mock_async_client: AsyncMock, devbox_view: Mo
mock_async_client.devboxes.await_running = AsyncMock(return_value=devbox_view)
polling_config = PollingConfig(timeout_seconds=60.0)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
result = await devbox.await_running(polling_config=polling_config)
assert result == devbox_view
mock_async_client.devboxes.await_running.assert_called_once_with(
- "dev_123",
+ "dbx_123",
polling_config=polling_config,
)
@@ -100,12 +100,12 @@ async def test_await_suspended(self, mock_async_client: AsyncMock, devbox_view:
mock_async_client.devboxes.await_suspended = AsyncMock(return_value=devbox_view)
polling_config = PollingConfig(timeout_seconds=60.0)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
result = await devbox.await_suspended(polling_config=polling_config)
assert result == devbox_view
mock_async_client.devboxes.await_suspended.assert_called_once_with(
- "dev_123",
+ "dbx_123",
polling_config=polling_config,
)
@@ -114,7 +114,7 @@ async def test_shutdown(self, mock_async_client: AsyncMock, devbox_view: MockDev
"""Test shutdown method."""
mock_async_client.devboxes.shutdown = AsyncMock(return_value=devbox_view)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
result = await devbox.shutdown(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -125,7 +125,7 @@ async def test_shutdown(self, mock_async_client: AsyncMock, devbox_view: MockDev
assert result == devbox_view
mock_async_client.devboxes.shutdown.assert_called_once_with(
- "dev_123",
+ "dbx_123",
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
extra_body={"key": "value"},
@@ -138,7 +138,7 @@ async def test_suspend(self, mock_async_client: AsyncMock, devbox_view: MockDevb
"""Test suspend method."""
mock_async_client.devboxes.suspend = AsyncMock(return_value=devbox_view)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
result = await devbox.suspend(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -149,7 +149,7 @@ async def test_suspend(self, mock_async_client: AsyncMock, devbox_view: MockDevb
assert result == devbox_view
mock_async_client.devboxes.suspend.assert_called_once_with(
- "dev_123",
+ "dbx_123",
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
extra_body={"key": "value"},
@@ -162,7 +162,7 @@ async def test_resume(self, mock_async_client: AsyncMock, devbox_view: MockDevbo
"""Test resume method."""
mock_async_client.devboxes.resume = AsyncMock(return_value=devbox_view)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
result = await devbox.resume(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -173,7 +173,7 @@ async def test_resume(self, mock_async_client: AsyncMock, devbox_view: MockDevbo
assert result == devbox_view
mock_async_client.devboxes.resume.assert_called_once_with(
- "dev_123",
+ "dbx_123",
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
extra_body={"key": "value"},
@@ -186,7 +186,7 @@ async def test_keep_alive(self, mock_async_client: AsyncMock) -> None:
"""Test keep_alive method."""
mock_async_client.devboxes.keep_alive = AsyncMock(return_value=object())
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
result = await devbox.keep_alive(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -197,7 +197,7 @@ async def test_keep_alive(self, mock_async_client: AsyncMock) -> None:
assert result is not None # Verify return value is propagated
mock_async_client.devboxes.keep_alive.assert_called_once_with(
- "dev_123",
+ "dbx_123",
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
extra_body={"key": "value"},
@@ -208,13 +208,13 @@ async def test_keep_alive(self, mock_async_client: AsyncMock) -> None:
@pytest.mark.asyncio
async def test_snapshot_disk(self, mock_async_client: AsyncMock) -> None:
"""Test snapshot_disk waits for completion."""
- snapshot_data = SimpleNamespace(id="snap_123")
+ snapshot_data = SimpleNamespace(id="snp_123")
snapshot_status = SimpleNamespace(status="completed")
mock_async_client.devboxes.snapshot_disk_async = AsyncMock(return_value=snapshot_data)
mock_async_client.devboxes.disk_snapshots.await_completed = AsyncMock(return_value=snapshot_status)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
polling_config = PollingConfig(timeout_seconds=60.0)
snapshot = await devbox.snapshot_disk(
name="test-snapshot",
@@ -223,7 +223,7 @@ async def test_snapshot_disk(self, mock_async_client: AsyncMock) -> None:
extra_headers={"X-Custom": "value"},
)
- assert snapshot.id == "snap_123"
+ assert snapshot.id == "snp_123"
mock_async_client.devboxes.snapshot_disk_async.assert_called_once()
call_kwargs = mock_async_client.devboxes.snapshot_disk_async.call_args[1]
assert "commit_message" not in call_kwargs
@@ -240,17 +240,17 @@ async def test_snapshot_disk(self, mock_async_client: AsyncMock) -> None:
@pytest.mark.asyncio
async def test_snapshot_disk_async(self, mock_async_client: AsyncMock) -> None:
"""Test snapshot_disk_async returns immediately."""
- snapshot_data = SimpleNamespace(id="snap_123")
+ snapshot_data = SimpleNamespace(id="snp_123")
mock_async_client.devboxes.snapshot_disk_async = AsyncMock(return_value=snapshot_data)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
snapshot = await devbox.snapshot_disk_async(
name="test-snapshot",
metadata={"key": "value"},
extra_headers={"X-Custom": "value"},
)
- assert snapshot.id == "snap_123"
+ assert snapshot.id == "snp_123"
mock_async_client.devboxes.snapshot_disk_async.assert_called_once()
call_kwargs = mock_async_client.devboxes.snapshot_disk_async.call_args[1]
assert "commit_message" not in call_kwargs
@@ -265,7 +265,7 @@ async def test_close(self, mock_async_client: AsyncMock, devbox_view: MockDevbox
"""Test close method calls shutdown."""
mock_async_client.devboxes.shutdown = AsyncMock(return_value=devbox_view)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
await devbox.close()
mock_async_client.devboxes.shutdown.assert_called_once()
@@ -274,21 +274,21 @@ async def test_close(self, mock_async_client: AsyncMock, devbox_view: MockDevbox
def test_cmd_property(self, mock_async_client: AsyncMock) -> None:
"""Test cmd property returns AsyncCommandInterface."""
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
cmd = devbox.cmd
assert isinstance(cmd, AsyncCommandInterface)
assert cmd._devbox is devbox
def test_file_property(self, mock_async_client: AsyncMock) -> None:
"""Test file property returns AsyncFileInterface."""
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
file_interface = devbox.file
assert isinstance(file_interface, AsyncFileInterface)
assert file_interface._devbox is devbox
def test_net_property(self, mock_async_client: AsyncMock) -> None:
"""Test net property returns AsyncNetworkInterface."""
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
net = devbox.net
assert isinstance(net, AsyncNetworkInterface)
assert net._devbox is devbox
diff --git a/tests/sdk/async_devbox/test_edge_cases.py b/tests/sdk/async_devbox/test_edge_cases.py
index fa5b89c7a..94d9e661b 100644
--- a/tests/sdk/async_devbox/test_edge_cases.py
+++ b/tests/sdk/async_devbox/test_edge_cases.py
@@ -21,6 +21,6 @@ async def test_async_network_error(self, mock_async_client: AsyncMock) -> None:
"""Test handling of network errors in async."""
mock_async_client.devboxes.retrieve = AsyncMock(side_effect=httpx.NetworkError("Connection failed"))
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
with pytest.raises(httpx.NetworkError):
await devbox.get_info()
diff --git a/tests/sdk/async_devbox/test_interfaces.py b/tests/sdk/async_devbox/test_interfaces.py
index bcb2a306b..52c439c22 100644
--- a/tests/sdk/async_devbox/test_interfaces.py
+++ b/tests/sdk/async_devbox/test_interfaces.py
@@ -27,7 +27,7 @@ async def test_exec_without_callbacks(
mock_async_client.devboxes.execute_async = AsyncMock(return_value=execution_view)
mock_async_client.devboxes.executions.await_completed = AsyncMock(return_value=execution_view)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
result = await devbox.cmd.exec("echo hello")
assert result.exit_code == 0
@@ -42,13 +42,13 @@ async def test_exec_without_callbacks(
async def test_exec_with_stdout_callback(self, mock_async_client: AsyncMock, mock_async_stream: AsyncMock) -> None:
"""Test exec with stdout callback."""
execution_async = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="running",
)
execution_completed = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout="output",
@@ -61,7 +61,7 @@ async def test_exec_with_stdout_callback(self, mock_async_client: AsyncMock, moc
stdout_calls: list[str] = []
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
result = await devbox.cmd.exec("echo hello", stdout=stdout_calls.append)
assert result.exit_code == 0
@@ -73,19 +73,19 @@ async def test_exec_async_returns_execution(
) -> None:
"""Test exec_async returns AsyncExecution object."""
execution_async = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="running",
)
mock_async_client.devboxes.execute_async = AsyncMock(return_value=execution_async)
mock_async_client.devboxes.executions.stream_stdout_updates = AsyncMock(return_value=mock_async_stream)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
execution = await devbox.cmd.exec_async("long-running command")
- assert execution.execution_id == "exec_123"
- assert execution.devbox_id == "dev_123"
+ assert execution.execution_id == "exn_123"
+ assert execution.devbox_id == "dbx_123"
mock_async_client.devboxes.execute_async.assert_called_once()
@@ -97,7 +97,7 @@ async def test_read(self, mock_async_client: AsyncMock) -> None:
"""Test file read."""
mock_async_client.devboxes.read_file_contents = AsyncMock(return_value="file content")
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
result = await devbox.file.read(file_path="/path/to/file")
assert result == "file content"
@@ -109,7 +109,7 @@ async def test_write_string(self, mock_async_client: AsyncMock) -> None:
execution_detail = SimpleNamespace()
mock_async_client.devboxes.write_file_contents = AsyncMock(return_value=execution_detail)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
result = await devbox.file.write(file_path="/path/to/file", contents="content")
assert result == execution_detail
@@ -121,7 +121,7 @@ async def test_write_bytes(self, mock_async_client: AsyncMock) -> None:
execution_detail = SimpleNamespace()
mock_async_client.devboxes.write_file_contents = AsyncMock(return_value=execution_detail)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
result = await devbox.file.write(file_path="/path/to/file", contents="content")
assert result == execution_detail
@@ -134,7 +134,7 @@ async def test_download(self, mock_async_client: AsyncMock) -> None:
mock_response.read = AsyncMock(return_value=b"file content")
mock_async_client.devboxes.download_file = AsyncMock(return_value=mock_response)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
result = await devbox.file.download(path="/path/to/file")
assert result == b"file content"
@@ -146,7 +146,7 @@ async def test_upload(self, mock_async_client: AsyncMock, tmp_path: Path) -> Non
execution_detail = SimpleNamespace()
mock_async_client.devboxes.upload_file = AsyncMock(return_value=execution_detail)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
# Create a temporary file for upload
temp_file = tmp_path / "test_file.txt"
temp_file.write_text("test content")
@@ -166,7 +166,7 @@ async def test_create_ssh_key(self, mock_async_client: AsyncMock) -> None:
ssh_key_response = SimpleNamespace(public_key="ssh-rsa ...")
mock_async_client.devboxes.create_ssh_key = AsyncMock(return_value=ssh_key_response)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
result = await devbox.net.create_ssh_key(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -184,7 +184,7 @@ async def test_create_tunnel(self, mock_async_client: AsyncMock) -> None:
tunnel_view = SimpleNamespace(tunnel_id="tunnel_123")
mock_async_client.devboxes.create_tunnel = AsyncMock(return_value=tunnel_view)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
result = await devbox.net.create_tunnel(
port=8080,
extra_headers={"X-Custom": "value"},
@@ -202,7 +202,7 @@ async def test_remove_tunnel(self, mock_async_client: AsyncMock) -> None:
"""Test remove tunnel."""
mock_async_client.devboxes.remove_tunnel = AsyncMock(return_value=object())
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
result = await devbox.net.remove_tunnel(
port=8080,
extra_headers={"X-Custom": "value"},
diff --git a/tests/sdk/async_devbox/test_streaming.py b/tests/sdk/async_devbox/test_streaming.py
index cd33a8f26..3bb3e1a7b 100644
--- a/tests/sdk/async_devbox/test_streaming.py
+++ b/tests/sdk/async_devbox/test_streaming.py
@@ -25,8 +25,8 @@ class TestAsyncDevboxStreaming:
def test_start_streaming_no_callbacks(self, mock_async_client: AsyncMock) -> None:
"""Test _start_streaming returns None when no callbacks."""
- devbox = AsyncDevbox(mock_async_client, "dev_123")
- result = devbox._start_streaming("exec_123", stdout=None, stderr=None, output=None)
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
+ result = devbox._start_streaming("exn_123", stdout=None, stderr=None, output=None)
assert result is None
@pytest.mark.asyncio
@@ -46,9 +46,9 @@ async def async_iter():
mock_async_client.devboxes.executions.stream_stdout_updates = AsyncMock(return_value=mock_async_stream)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
stdout_calls: list[str] = []
- result = devbox._start_streaming("exec_123", stdout=stdout_calls.append, stderr=None, output=None)
+ result = devbox._start_streaming("exn_123", stdout=stdout_calls.append, stderr=None, output=None)
assert result is not None
assert isinstance(result, _AsyncStreamingGroup)
@@ -76,9 +76,9 @@ async def async_iter():
mock_async_client.devboxes.executions.stream_stderr_updates = AsyncMock(return_value=mock_async_stream)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
stderr_calls: list[str] = []
- result = devbox._start_streaming("exec_123", stdout=None, stderr=stderr_calls.append, output=None)
+ result = devbox._start_streaming("exn_123", stdout=None, stderr=stderr_calls.append, output=None)
assert result is not None
assert isinstance(result, _AsyncStreamingGroup)
@@ -107,9 +107,9 @@ async def async_iter():
mock_async_client.devboxes.executions.stream_stdout_updates = AsyncMock(return_value=mock_async_stream)
mock_async_client.devboxes.executions.stream_stderr_updates = AsyncMock(return_value=mock_async_stream)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
output_calls: list[str] = []
- result = devbox._start_streaming("exec_123", stdout=None, stderr=None, output=output_calls.append)
+ result = devbox._start_streaming("exn_123", stdout=None, stderr=None, output=output_calls.append)
assert result is not None
assert isinstance(result, _AsyncStreamingGroup)
@@ -136,7 +136,7 @@ async def async_iter() -> AsyncIterator[SimpleNamespace]:
mock_async_stream.__aenter__ = AsyncMock(return_value=mock_async_stream)
mock_async_stream.__aexit__ = AsyncMock(return_value=None)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
calls: list[str] = []
async def stream_factory() -> AsyncStream[ExecutionUpdateChunk]:
@@ -166,7 +166,7 @@ async def async_iter() -> AsyncIterator[SimpleNamespace]:
mock_async_stream.__aenter__ = AsyncMock(return_value=mock_async_stream)
mock_async_stream.__aexit__ = AsyncMock(return_value=None)
- devbox = AsyncDevbox(mock_async_client, "dev_123")
+ devbox = AsyncDevbox(mock_async_client, "dbx_123")
calls: list[str] = []
async def stream_factory() -> AsyncStream[ExecutionUpdateChunk]:
diff --git a/tests/sdk/conftest.py b/tests/sdk/conftest.py
index c5546fe55..f22b542c6 100644
--- a/tests/sdk/conftest.py
+++ b/tests/sdk/conftest.py
@@ -15,13 +15,17 @@
# Test ID constants
TEST_IDS = {
- "devbox": "dev_123",
- "execution": "exec_123",
- "snapshot": "snap_123",
- "blueprint": "bp_123",
+ "devbox": "dbx_123",
+ "execution": "exn_123",
+ "snapshot": "snp_123",
+ "blueprint": "bpt_123",
"object": "obj_123",
- "scorer": "scorer_123",
- "agent": "agent_123",
+ "scorer": "sco_123",
+ "agent": "agt_123",
+ "scenario": "scn_123",
+ "scenario_run": "scr_123",
+ "benchmark": "bmd_123",
+ "benchmark_run": "bmr_123",
}
# Test URL constants
@@ -42,7 +46,7 @@
class MockDevboxView:
"""Mock DevboxView for testing."""
- id: str = "dev_123"
+ id: str = TEST_IDS["devbox"]
status: str = "running"
name: str = "test-devbox"
@@ -51,8 +55,8 @@ class MockDevboxView:
class MockExecutionView:
"""Mock DevboxAsyncExecutionDetailView for testing."""
- execution_id: str = "exec_123"
- devbox_id: str = "dev_123"
+ execution_id: str = TEST_IDS["execution"]
+ devbox_id: str = TEST_IDS["devbox"]
status: str = "completed"
exit_status: int = 0
stdout: str = "output"
@@ -65,7 +69,7 @@ class MockExecutionView:
class MockSnapshotView:
"""Mock DevboxSnapshotView for testing."""
- id: str = "snap_123"
+ id: str = TEST_IDS["snapshot"]
status: str = "completed"
name: str = "test-snapshot"
@@ -74,7 +78,7 @@ class MockSnapshotView:
class MockBlueprintView:
"""Mock BlueprintView for testing."""
- id: str = "bp_123"
+ id: str = TEST_IDS["blueprint"]
status: str = "built"
name: str = "test-blueprint"
@@ -83,7 +87,7 @@ class MockBlueprintView:
class MockObjectView:
"""Mock ObjectView for testing."""
- id: str = "obj_123"
+ id: str = TEST_IDS["object"]
upload_url: str = "https://upload.example.com/obj_123"
name: str = "test-object"
@@ -92,7 +96,7 @@ class MockObjectView:
class MockScorerView:
"""Mock ScorerView for testing."""
- id: str = "scorer_123"
+ id: str = TEST_IDS["scorer"]
bash_script: str = "echo 'score=1.0'"
type: str = "test_scorer"
@@ -101,7 +105,7 @@ class MockScorerView:
class MockAgentView:
"""Mock AgentView for testing."""
- id: str = "agent_123"
+ id: str = TEST_IDS["agent"]
name: str = "test-agent"
create_time_ms: int = 1234567890000
is_public: bool = False
@@ -112,7 +116,7 @@ class MockAgentView:
class MockScenarioView:
"""Mock ScenarioView for testing."""
- id: str = "scn_123"
+ id: str = TEST_IDS["scenario"]
name: str = "test-scenario"
metadata: Dict[str, str] = field(default_factory=dict)
@@ -121,14 +125,48 @@ class MockScenarioView:
class MockScenarioRunView:
"""Mock ScenarioRunView for testing."""
- id: str = "run_123"
- devbox_id: str = "dev_123"
- scenario_id: str = "scn_123"
+ id: str = TEST_IDS["scenario_run"]
+ devbox_id: str = TEST_IDS["devbox"]
+ scenario_id: str = TEST_IDS["scenario"]
state: str = "running"
metadata: Dict[str, str] = field(default_factory=dict)
scoring_contract_result: object = None
+@dataclass
+class MockBenchmarkView:
+ """Mock BenchmarkView for testing."""
+
+ id: str = TEST_IDS["benchmark"]
+ name: str = "test-benchmark"
+ metadata: Dict[str, str] = field(default_factory=dict)
+ scenario_ids: list[str] = field(default_factory=list)
+
+
+@dataclass
+class MockBenchmarkRunView:
+ """Mock BenchmarkRunView for testing."""
+
+ id: str = TEST_IDS["benchmark_run"]
+ benchmark_id: str = TEST_IDS["benchmark"]
+ state: str = "running"
+ metadata: Dict[str, str] = field(default_factory=dict)
+ start_time_ms: int = 1234567890000
+ duration_ms: int | None = None
+ score: float | None = None
+
+
+class AsyncIterableMock:
+ """A simple async iterable mock for testing paginated responses."""
+
+ def __init__(self, items: list[Any]) -> None:
+ self._items = items
+
+ async def __aiter__(self):
+ for item in self._items:
+ yield item
+
+
def create_mock_httpx_client(methods: dict[str, Any] | None = None) -> AsyncMock:
"""
Create a mock httpx.AsyncClient with proper context manager setup.
@@ -237,6 +275,18 @@ def scenario_run_view() -> MockScenarioRunView:
return MockScenarioRunView()
+@pytest.fixture
+def benchmark_view() -> MockBenchmarkView:
+ """Create a mock BenchmarkView."""
+ return MockBenchmarkView()
+
+
+@pytest.fixture
+def benchmark_run_view() -> MockBenchmarkRunView:
+ """Create a mock BenchmarkRunView."""
+ return MockBenchmarkRunView()
+
+
@pytest.fixture
def mock_httpx_response() -> Mock:
"""Create a mock httpx.Response."""
diff --git a/tests/sdk/devbox/test_core.py b/tests/sdk/devbox/test_core.py
index b482e030b..c12b02485 100644
--- a/tests/sdk/devbox/test_core.py
+++ b/tests/sdk/devbox/test_core.py
@@ -29,20 +29,20 @@ class TestDevbox:
def test_init(self, mock_client: Mock) -> None:
"""Test Devbox initialization."""
- devbox = Devbox(mock_client, "dev_123")
- assert devbox.id == "dev_123"
+ devbox = Devbox(mock_client, "dbx_123")
+ assert devbox.id == "dbx_123"
def test_repr(self, mock_client: Mock) -> None:
"""Test Devbox string representation."""
- devbox = Devbox(mock_client, "dev_123")
- assert repr(devbox) == ""
+ devbox = Devbox(mock_client, "dbx_123")
+ assert repr(devbox) == ""
def test_context_manager_enter_exit(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
"""Test context manager behavior with successful shutdown."""
mock_client.devboxes.shutdown.return_value = devbox_view
- with Devbox(mock_client, "dev_123") as devbox:
- assert devbox.id == "dev_123"
+ with Devbox(mock_client, "dbx_123") as devbox:
+ assert devbox.id == "dbx_123"
call_kwargs = mock_client.devboxes.shutdown.call_args[1]
assert "timeout" not in call_kwargs
@@ -52,7 +52,7 @@ def test_context_manager_exception_handling(self, mock_client: Mock) -> None:
mock_client.devboxes.shutdown.side_effect = RuntimeError("Shutdown failed")
with pytest.raises(ValueError, match="Test error"):
- with Devbox(mock_client, "dev_123"):
+ with Devbox(mock_client, "dbx_123"):
raise ValueError("Test error")
# Shutdown should be called even when body raises exception
@@ -62,7 +62,7 @@ def test_get_info(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
"""Test get_info method."""
mock_client.devboxes.retrieve.return_value = devbox_view
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
result = devbox.get_info(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -72,7 +72,7 @@ def test_get_info(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
assert result == devbox_view
mock_client.devboxes.retrieve.assert_called_once_with(
- "dev_123",
+ "dbx_123",
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
extra_body={"key": "value"},
@@ -84,12 +84,12 @@ def test_await_running(self, mock_client: Mock, devbox_view: MockDevboxView) ->
mock_client.devboxes.await_running.return_value = devbox_view
polling_config = PollingConfig(timeout_seconds=60.0)
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
result = devbox.await_running(polling_config=polling_config)
assert result == devbox_view
mock_client.devboxes.await_running.assert_called_once_with(
- "dev_123",
+ "dbx_123",
polling_config=polling_config,
)
@@ -98,12 +98,12 @@ def test_await_suspended(self, mock_client: Mock, devbox_view: MockDevboxView) -
mock_client.devboxes.await_suspended.return_value = devbox_view
polling_config = PollingConfig(timeout_seconds=60.0)
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
result = devbox.await_suspended(polling_config=polling_config)
assert result == devbox_view
mock_client.devboxes.await_suspended.assert_called_once_with(
- "dev_123",
+ "dbx_123",
polling_config=polling_config,
)
@@ -111,7 +111,7 @@ def test_shutdown(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
"""Test shutdown method."""
mock_client.devboxes.shutdown.return_value = devbox_view
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
result = devbox.shutdown(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -122,7 +122,7 @@ def test_shutdown(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
assert result == devbox_view
mock_client.devboxes.shutdown.assert_called_once_with(
- "dev_123",
+ "dbx_123",
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
extra_body={"key": "value"},
@@ -136,7 +136,7 @@ def test_suspend(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
mock_client.devboxes.await_suspended.return_value = devbox_view
polling_config = PollingConfig(timeout_seconds=60.0)
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
result = devbox.suspend(
polling_config=polling_config,
extra_headers={"X-Custom": "value"},
@@ -148,7 +148,7 @@ def test_suspend(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
assert result == devbox_view
mock_client.devboxes.suspend.assert_called_once_with(
- "dev_123",
+ "dbx_123",
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
extra_body={"key": "value"},
@@ -156,7 +156,7 @@ def test_suspend(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
idempotency_key="key-123",
)
mock_client.devboxes.await_suspended.assert_called_once_with(
- "dev_123",
+ "dbx_123",
polling_config=polling_config,
)
@@ -166,7 +166,7 @@ def test_resume(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
mock_client.devboxes.await_running.return_value = devbox_view
polling_config = PollingConfig(timeout_seconds=60.0)
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
result = devbox.resume(
polling_config=polling_config,
extra_headers={"X-Custom": "value"},
@@ -178,7 +178,7 @@ def test_resume(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
assert result == devbox_view
mock_client.devboxes.resume.assert_called_once_with(
- "dev_123",
+ "dbx_123",
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
extra_body={"key": "value"},
@@ -186,7 +186,7 @@ def test_resume(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
idempotency_key="key-123",
)
mock_client.devboxes.await_running.assert_called_once_with(
- "dev_123",
+ "dbx_123",
polling_config=polling_config,
)
@@ -194,7 +194,7 @@ def test_keep_alive(self, mock_client: Mock) -> None:
"""Test keep_alive method."""
mock_client.devboxes.keep_alive.return_value = object()
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
result = devbox.keep_alive(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -205,7 +205,7 @@ def test_keep_alive(self, mock_client: Mock) -> None:
assert result is not None # Verify return value is propagated
mock_client.devboxes.keep_alive.assert_called_once_with(
- "dev_123",
+ "dbx_123",
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
extra_body={"key": "value"},
@@ -215,13 +215,13 @@ def test_keep_alive(self, mock_client: Mock) -> None:
def test_snapshot_disk(self, mock_client: Mock) -> None:
"""Test snapshot_disk waits for completion."""
- snapshot_data = SimpleNamespace(id="snap_123")
+ snapshot_data = SimpleNamespace(id="snp_123")
snapshot_status = SimpleNamespace(status="completed")
mock_client.devboxes.snapshot_disk_async.return_value = snapshot_data
mock_client.devboxes.disk_snapshots.await_completed.return_value = snapshot_status
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
polling_config = PollingConfig(timeout_seconds=60.0)
snapshot = devbox.snapshot_disk(
name="test-snapshot",
@@ -230,7 +230,7 @@ def test_snapshot_disk(self, mock_client: Mock) -> None:
extra_headers={"X-Custom": "value"},
)
- assert snapshot.id == "snap_123"
+ assert snapshot.id == "snp_123"
call_kwargs = mock_client.devboxes.snapshot_disk_async.call_args[1]
assert "commit_message" not in call_kwargs or call_kwargs["commit_message"] in (omit, None)
assert call_kwargs["metadata"] == {"key": "value"}
@@ -244,17 +244,17 @@ def test_snapshot_disk(self, mock_client: Mock) -> None:
def test_snapshot_disk_async(self, mock_client: Mock) -> None:
"""Test snapshot_disk_async returns immediately."""
- snapshot_data = SimpleNamespace(id="snap_123")
+ snapshot_data = SimpleNamespace(id="snp_123")
mock_client.devboxes.snapshot_disk_async.return_value = snapshot_data
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
snapshot = devbox.snapshot_disk_async(
name="test-snapshot",
metadata={"key": "value"},
extra_headers={"X-Custom": "value"},
)
- assert snapshot.id == "snap_123"
+ assert snapshot.id == "snp_123"
call_kwargs = mock_client.devboxes.snapshot_disk_async.call_args[1]
assert "commit_message" not in call_kwargs or call_kwargs["commit_message"] in (omit, None)
assert call_kwargs["metadata"] == {"key": "value"}
@@ -270,7 +270,7 @@ def test_close(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
"""Test close method calls shutdown."""
mock_client.devboxes.shutdown.return_value = devbox_view
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
devbox.close()
call_kwargs = mock_client.devboxes.shutdown.call_args[1]
@@ -278,21 +278,21 @@ def test_close(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
def test_cmd_property(self, mock_client: Mock) -> None:
"""Test cmd property returns CommandInterface."""
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
cmd = devbox.cmd
assert isinstance(cmd, CommandInterface)
assert cmd._devbox is devbox
def test_file_property(self, mock_client: Mock) -> None:
"""Test file property returns FileInterface."""
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
file_interface = devbox.file
assert isinstance(file_interface, FileInterface)
assert file_interface._devbox is devbox
def test_net_property(self, mock_client: Mock) -> None:
"""Test net property returns NetworkInterface."""
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
net = devbox.net
assert isinstance(net, NetworkInterface)
assert net._devbox is devbox
diff --git a/tests/sdk/devbox/test_edge_cases.py b/tests/sdk/devbox/test_edge_cases.py
index ff2491f66..23341f0c0 100644
--- a/tests/sdk/devbox/test_edge_cases.py
+++ b/tests/sdk/devbox/test_edge_cases.py
@@ -31,7 +31,7 @@ def test_network_error(self, mock_client: Mock) -> None:
"""Test handling of network errors."""
mock_client.devboxes.retrieve.side_effect = httpx.NetworkError("Connection failed")
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
with pytest.raises(httpx.NetworkError):
devbox.get_info()
@@ -50,7 +50,7 @@ def test_api_error(self, mock_client: Mock, status_code: int, message: str) -> N
mock_client.devboxes.retrieve.side_effect = error
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
with pytest.raises(APIStatusError):
devbox.get_info()
@@ -58,7 +58,7 @@ def test_timeout_error(self, mock_client: Mock) -> None:
"""Test handling of timeout errors."""
mock_client.devboxes.retrieve.side_effect = httpx.TimeoutException("Request timed out")
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
with pytest.raises(httpx.TimeoutException):
devbox.get_info(timeout=1.0)
@@ -68,19 +68,19 @@ class TestDevboxEdgeCases:
def test_empty_responses(self, mock_client: Mock) -> None:
"""Test handling of empty responses."""
- empty_view = SimpleNamespace(id="dev_123", status="", name="")
+ empty_view = SimpleNamespace(id="dbx_123", status="", name="")
mock_client.devboxes.retrieve.return_value = empty_view
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
result = devbox.get_info()
assert result == empty_view
def test_none_values(self, mock_client: Mock) -> None:
"""Test handling of None values."""
- view_with_none = SimpleNamespace(id="dev_123", status=None, name=None)
+ view_with_none = SimpleNamespace(id="dbx_123", status=None, name=None)
mock_client.devboxes.retrieve.return_value = view_with_none
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
result = devbox.get_info()
assert result.status is None
assert result.name is None
@@ -89,9 +89,9 @@ def test_concurrent_operations(
self, mock_client: Mock, thread_cleanup: tuple[list[threading.Thread], list[threading.Event]]
) -> None:
"""Test concurrent operations."""
- mock_client.devboxes.retrieve.return_value = SimpleNamespace(id="dev_123", status="running")
+ mock_client.devboxes.retrieve.return_value = SimpleNamespace(id="dbx_123", status="running")
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
results: list[DevboxView] = []
def get_info() -> None:
@@ -118,13 +118,13 @@ def test_context_manager_vs_manual_cleanup(self, mock_client: Mock, devbox_view:
mock_client.devboxes.shutdown.return_value = devbox_view
# Context manager approach (Pythonic)
- with Devbox(mock_client, "dev_123"):
+ with Devbox(mock_client, "dbx_123"):
pass
mock_client.devboxes.shutdown.assert_called_once()
# Manual cleanup (TypeScript-like)
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
devbox.shutdown()
assert mock_client.devboxes.shutdown.call_count == 2
diff --git a/tests/sdk/devbox/test_interfaces.py b/tests/sdk/devbox/test_interfaces.py
index a8ca574ba..66ef8fa7b 100644
--- a/tests/sdk/devbox/test_interfaces.py
+++ b/tests/sdk/devbox/test_interfaces.py
@@ -24,7 +24,7 @@ def test_exec_without_callbacks(self, mock_client: Mock, execution_view: MockExe
mock_client.devboxes.execute_async.return_value = execution_view
mock_client.devboxes.executions.await_completed.return_value = execution_view
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
result = devbox.cmd.exec("echo hello")
assert result.exit_code == 0
@@ -38,13 +38,13 @@ def test_exec_without_callbacks(self, mock_client: Mock, execution_view: MockExe
def test_exec_with_stdout_callback(self, mock_client: Mock, mock_stream: Mock) -> None:
"""Test exec with stdout callback."""
execution_async = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="running",
)
execution_completed = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout="output",
@@ -57,7 +57,7 @@ def test_exec_with_stdout_callback(self, mock_client: Mock, mock_stream: Mock) -
stdout_calls: list[str] = []
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
result = devbox.cmd.exec("echo hello", stdout=stdout_calls.append)
assert result.exit_code == 0
@@ -67,13 +67,13 @@ def test_exec_with_stdout_callback(self, mock_client: Mock, mock_stream: Mock) -
def test_exec_with_stderr_callback(self, mock_client: Mock, mock_stream: Mock) -> None:
"""Test exec with stderr callback."""
execution_async = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="running",
)
execution_completed = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout="",
@@ -86,7 +86,7 @@ def test_exec_with_stderr_callback(self, mock_client: Mock, mock_stream: Mock) -
stderr_calls: list[str] = []
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
result = devbox.cmd.exec("echo hello", stderr=stderr_calls.append)
assert result.exit_code == 0
@@ -95,13 +95,13 @@ def test_exec_with_stderr_callback(self, mock_client: Mock, mock_stream: Mock) -
def test_exec_with_output_callback(self, mock_client: Mock, mock_stream: Mock) -> None:
"""Test exec with output callback."""
execution_async = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="running",
)
execution_completed = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout="output",
@@ -115,7 +115,7 @@ def test_exec_with_output_callback(self, mock_client: Mock, mock_stream: Mock) -
output_calls: list[str] = []
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
result = devbox.cmd.exec("echo hello", output=output_calls.append)
assert result.exit_code == 0
@@ -124,13 +124,13 @@ def test_exec_with_output_callback(self, mock_client: Mock, mock_stream: Mock) -
def test_exec_with_all_callbacks(self, mock_client: Mock, mock_stream: Mock) -> None:
"""Test exec with all callbacks."""
execution_async = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="running",
)
execution_completed = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout="output",
@@ -146,7 +146,7 @@ def test_exec_with_all_callbacks(self, mock_client: Mock, mock_stream: Mock) ->
stderr_calls: list[str] = []
output_calls: list[str] = []
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
result = devbox.cmd.exec(
"echo hello",
stdout=stdout_calls.append,
@@ -160,19 +160,19 @@ def test_exec_with_all_callbacks(self, mock_client: Mock, mock_stream: Mock) ->
def test_exec_async_returns_execution(self, mock_client: Mock, mock_stream: Mock) -> None:
"""Test exec_async returns Execution object."""
execution_async = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="running",
)
mock_client.devboxes.execute_async.return_value = execution_async
mock_client.devboxes.executions.stream_stdout_updates.return_value = mock_stream
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
execution = devbox.cmd.exec_async("long-running command")
- assert execution.execution_id == "exec_123"
- assert execution.devbox_id == "dev_123"
+ assert execution.execution_id == "exn_123"
+ assert execution.devbox_id == "dbx_123"
mock_client.devboxes.execute_async.assert_called_once()
@@ -183,7 +183,7 @@ def test_read(self, mock_client: Mock) -> None:
"""Test file read."""
mock_client.devboxes.read_file_contents.return_value = "file content"
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
result = devbox.file.read(file_path="/path/to/file")
assert result == "file content"
@@ -196,7 +196,7 @@ def test_write_string(self, mock_client: Mock) -> None:
execution_detail = SimpleNamespace()
mock_client.devboxes.write_file_contents.return_value = execution_detail
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
result = devbox.file.write(file_path="/path/to/file", contents="content")
assert result == execution_detail
@@ -210,7 +210,7 @@ def test_write_bytes(self, mock_client: Mock) -> None:
execution_detail = SimpleNamespace()
mock_client.devboxes.write_file_contents.return_value = execution_detail
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
result = devbox.file.write(file_path="/path/to/file", contents="content")
assert result == execution_detail
@@ -225,7 +225,7 @@ def test_download(self, mock_client: Mock) -> None:
mock_response.read.return_value = b"file content"
mock_client.devboxes.download_file.return_value = mock_response
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
result = devbox.file.download(path="/path/to/file")
assert result == b"file content"
@@ -238,7 +238,7 @@ def test_upload(self, mock_client: Mock, tmp_path: Path) -> None:
execution_detail = SimpleNamespace()
mock_client.devboxes.upload_file.return_value = execution_detail
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
# Create a temporary file for upload
temp_file = tmp_path / "test_file.txt"
temp_file.write_text("test content")
@@ -260,7 +260,7 @@ def test_create_ssh_key(self, mock_client: Mock) -> None:
ssh_key_response = SimpleNamespace(public_key="ssh-rsa ...")
mock_client.devboxes.create_ssh_key.return_value = ssh_key_response
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
result = devbox.net.create_ssh_key(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -271,7 +271,7 @@ def test_create_ssh_key(self, mock_client: Mock) -> None:
assert result == ssh_key_response
mock_client.devboxes.create_ssh_key.assert_called_once_with(
- "dev_123",
+ "dbx_123",
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
extra_body={"key": "value"},
@@ -284,7 +284,7 @@ def test_create_tunnel(self, mock_client: Mock) -> None:
tunnel_view = SimpleNamespace(port=8080)
mock_client.devboxes.create_tunnel.return_value = tunnel_view
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
result = devbox.net.create_tunnel(
port=8080,
extra_headers={"X-Custom": "value"},
@@ -296,7 +296,7 @@ def test_create_tunnel(self, mock_client: Mock) -> None:
assert result == tunnel_view
mock_client.devboxes.create_tunnel.assert_called_once_with(
- "dev_123",
+ "dbx_123",
port=8080,
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -309,7 +309,7 @@ def test_remove_tunnel(self, mock_client: Mock) -> None:
"""Test remove tunnel."""
mock_client.devboxes.remove_tunnel.return_value = object()
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
result = devbox.net.remove_tunnel(
port=8080,
extra_headers={"X-Custom": "value"},
@@ -321,7 +321,7 @@ def test_remove_tunnel(self, mock_client: Mock) -> None:
assert result is not None # Verify return value is propagated
mock_client.devboxes.remove_tunnel.assert_called_once_with(
- "dev_123",
+ "dbx_123",
port=8080,
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
diff --git a/tests/sdk/devbox/test_streaming.py b/tests/sdk/devbox/test_streaming.py
index 4550b94a2..6d44a4e5e 100644
--- a/tests/sdk/devbox/test_streaming.py
+++ b/tests/sdk/devbox/test_streaming.py
@@ -26,17 +26,17 @@ class TestDevboxStreaming:
def test_start_streaming_no_callbacks(self, mock_client: Mock) -> None:
"""Test _start_streaming returns None when no callbacks."""
- devbox = Devbox(mock_client, "dev_123")
- result = devbox._start_streaming("exec_123", stdout=None, stderr=None, output=None)
+ devbox = Devbox(mock_client, "dbx_123")
+ result = devbox._start_streaming("exn_123", stdout=None, stderr=None, output=None)
assert result is None
def test_start_streaming_stdout_only(self, mock_client: Mock, mock_stream: Mock) -> None:
"""Test _start_streaming with stdout callback only."""
mock_client.devboxes.executions.stream_stdout_updates.return_value = mock_stream
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
stdout_calls: list[str] = []
- result = devbox._start_streaming("exec_123", stdout=stdout_calls.append, stderr=None, output=None)
+ result = devbox._start_streaming("exn_123", stdout=stdout_calls.append, stderr=None, output=None)
assert result is not None
assert isinstance(result, _StreamingGroup)
@@ -47,9 +47,9 @@ def test_start_streaming_stderr_only(self, mock_client: Mock, mock_stream: Mock)
"""Test _start_streaming with stderr callback only."""
mock_client.devboxes.executions.stream_stderr_updates.return_value = mock_stream
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
stderr_calls: list[str] = []
- result = devbox._start_streaming("exec_123", stdout=None, stderr=stderr_calls.append, output=None)
+ result = devbox._start_streaming("exn_123", stdout=None, stderr=stderr_calls.append, output=None)
assert result is not None
assert isinstance(result, _StreamingGroup)
@@ -61,9 +61,9 @@ def test_start_streaming_output_only(self, mock_client: Mock, mock_stream: Mock)
mock_client.devboxes.executions.stream_stdout_updates.return_value = mock_stream
mock_client.devboxes.executions.stream_stderr_updates.return_value = mock_stream
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
output_calls: list[str] = []
- result = devbox._start_streaming("exec_123", stdout=None, stderr=None, output=output_calls.append)
+ result = devbox._start_streaming("exn_123", stdout=None, stderr=None, output=output_calls.append)
assert result is not None
assert isinstance(result, _StreamingGroup)
@@ -74,12 +74,12 @@ def test_start_streaming_all_callbacks(self, mock_client: Mock, mock_stream: Moc
mock_client.devboxes.executions.stream_stdout_updates.return_value = mock_stream
mock_client.devboxes.executions.stream_stderr_updates.return_value = mock_stream
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
stdout_calls: list[str] = []
stderr_calls: list[str] = []
output_calls: list[str] = []
result = devbox._start_streaming(
- "exec_123",
+ "exn_123",
stdout=stdout_calls.append,
stderr=stderr_calls.append,
output=output_calls.append,
@@ -104,7 +104,7 @@ def test_spawn_stream_thread(
mock_stream.__enter__ = Mock(return_value=mock_stream)
mock_stream.__exit__ = Mock(return_value=None)
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
stop_event = threading.Event()
calls: list[str] = []
@@ -147,7 +147,7 @@ def test_spawn_stream_thread_stop_event(
mock_stream.__enter__ = Mock(return_value=mock_stream)
mock_stream.__exit__ = Mock(return_value=None)
- devbox = Devbox(mock_client, "dev_123")
+ devbox = Devbox(mock_client, "dbx_123")
stop_event = threading.Event()
calls: list[str] = []
diff --git a/tests/sdk/test_agent.py b/tests/sdk/test_agent.py
index 7580b44d8..f1bbb083d 100644
--- a/tests/sdk/test_agent.py
+++ b/tests/sdk/test_agent.py
@@ -13,19 +13,19 @@ class TestAgent:
def test_init(self, mock_client: Mock) -> None:
"""Test Agent initialization."""
- agent = Agent(mock_client, "agent_123")
- assert agent.id == "agent_123"
+ agent = Agent(mock_client, "agt_123")
+ assert agent.id == "agt_123"
def test_repr(self, mock_client: Mock) -> None:
"""Test Agent string representation."""
- agent = Agent(mock_client, "agent_123")
- assert repr(agent) == ""
+ agent = Agent(mock_client, "agt_123")
+ assert repr(agent) == ""
def test_get_info(self, mock_client: Mock, agent_view: MockAgentView) -> None:
"""Test get_info method."""
mock_client.agents.retrieve.return_value = agent_view
- agent = Agent(mock_client, "agent_123")
+ agent = Agent(mock_client, "agt_123")
result = agent.get_info(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -35,7 +35,7 @@ def test_get_info(self, mock_client: Mock, agent_view: MockAgentView) -> None:
assert result == agent_view
mock_client.agents.retrieve.assert_called_once_with(
- "agent_123",
+ "agt_123",
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
extra_body={"key": "value"},
diff --git a/tests/sdk/test_async_agent.py b/tests/sdk/test_async_agent.py
index a2bb9496c..be7efa845 100644
--- a/tests/sdk/test_async_agent.py
+++ b/tests/sdk/test_async_agent.py
@@ -15,20 +15,20 @@ class TestAsyncAgent:
def test_init(self, mock_async_client: AsyncMock) -> None:
"""Test AsyncAgent initialization."""
- agent = AsyncAgent(mock_async_client, "agent_123")
- assert agent.id == "agent_123"
+ agent = AsyncAgent(mock_async_client, "agt_123")
+ assert agent.id == "agt_123"
def test_repr(self, mock_async_client: AsyncMock) -> None:
"""Test AsyncAgent string representation."""
- agent = AsyncAgent(mock_async_client, "agent_123")
- assert repr(agent) == ""
+ agent = AsyncAgent(mock_async_client, "agt_123")
+ assert repr(agent) == ""
@pytest.mark.asyncio
async def test_get_info(self, mock_async_client: AsyncMock, agent_view: MockAgentView) -> None:
"""Test get_info method."""
mock_async_client.agents.retrieve = AsyncMock(return_value=agent_view)
- agent = AsyncAgent(mock_async_client, "agent_123")
+ agent = AsyncAgent(mock_async_client, "agt_123")
result = await agent.get_info(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -38,7 +38,7 @@ async def test_get_info(self, mock_async_client: AsyncMock, agent_view: MockAgen
assert result == agent_view
mock_async_client.agents.retrieve.assert_called_once_with(
- "agent_123",
+ "agt_123",
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
extra_body={"key": "value"},
diff --git a/tests/sdk/test_async_benchmark.py b/tests/sdk/test_async_benchmark.py
new file mode 100644
index 000000000..d7d72daad
--- /dev/null
+++ b/tests/sdk/test_async_benchmark.py
@@ -0,0 +1,130 @@
+"""Comprehensive tests for async AsyncBenchmark class."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from unittest.mock import AsyncMock
+
+from tests.sdk.conftest import MockBenchmarkView, MockBenchmarkRunView
+from runloop_api_client.sdk.async_benchmark import AsyncBenchmark
+from runloop_api_client.sdk.async_benchmark_run import AsyncBenchmarkRun
+
+
+class TestAsyncBenchmark:
+ """Tests for AsyncBenchmark class."""
+
+ def test_init(self, mock_async_client: AsyncMock) -> None:
+ """Test AsyncBenchmark initialization."""
+ benchmark = AsyncBenchmark(mock_async_client, "bmd_123")
+ assert benchmark.id == "bmd_123"
+ assert repr(benchmark) == ""
+
+ async def test_get_info(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None:
+ """Test get_info method."""
+ mock_async_client.benchmarks.retrieve = AsyncMock(return_value=benchmark_view)
+
+ benchmark = AsyncBenchmark(mock_async_client, "bmd_123")
+ result = await benchmark.get_info()
+
+ assert result == benchmark_view
+ mock_async_client.benchmarks.retrieve.assert_awaited_once_with("bmd_123")
+
+ async def test_update(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None:
+ """Test update method."""
+ benchmark_view.name = "updated-name"
+ mock_async_client.benchmarks.update = AsyncMock(return_value=benchmark_view)
+
+ benchmark = AsyncBenchmark(mock_async_client, "bmd_123")
+ result = await benchmark.update(name="updated-name")
+
+ assert result == benchmark_view
+ mock_async_client.benchmarks.update.assert_awaited_once_with("bmd_123", name="updated-name")
+
+ async def test_run(self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView) -> None:
+ """Test run method."""
+ mock_async_client.benchmarks.start_run = AsyncMock(return_value=benchmark_run_view)
+
+ benchmark = AsyncBenchmark(mock_async_client, "bmd_123")
+ result = await benchmark.start_run(run_name="test-run", metadata={"key": "value"})
+
+ assert isinstance(result, AsyncBenchmarkRun)
+ assert result.id == benchmark_run_view.id
+ assert result.benchmark_id == benchmark_run_view.benchmark_id
+ mock_async_client.benchmarks.start_run.assert_awaited_once_with(
+ benchmark_id="bmd_123", run_name="test-run", metadata={"key": "value"}
+ )
+
+ async def test_add_scenarios(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None:
+ """Test add_scenarios method."""
+ benchmark_view.scenario_ids = ["scn_001", "scn_002"]
+ mock_async_client.benchmarks.update_scenarios = AsyncMock(return_value=benchmark_view)
+
+ benchmark = AsyncBenchmark(mock_async_client, "bmd_123")
+ result = await benchmark.add_scenarios(["scn_001", "scn_002"])
+
+ assert result == benchmark_view
+ mock_async_client.benchmarks.update_scenarios.assert_awaited_once_with(
+ "bmd_123", scenarios_to_add=["scn_001", "scn_002"]
+ )
+
+ async def test_remove_scenarios(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None:
+ """Test remove_scenarios method."""
+ mock_async_client.benchmarks.update_scenarios = AsyncMock(return_value=benchmark_view)
+
+ benchmark = AsyncBenchmark(mock_async_client, "bmd_123")
+ result = await benchmark.remove_scenarios(["scn_001"])
+
+ assert result == benchmark_view
+ mock_async_client.benchmarks.update_scenarios.assert_awaited_once_with(
+ "bmd_123", scenarios_to_remove=["scn_001"]
+ )
+
+ async def test_list_runs_single(
+ self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView
+ ) -> None:
+ """Test list_runs method with single result."""
+ page = SimpleNamespace(runs=[benchmark_run_view])
+ mock_async_client.benchmarks.runs.list = AsyncMock(return_value=page)
+
+ benchmark = AsyncBenchmark(mock_async_client, "bmd_123")
+ result = await benchmark.list_runs()
+
+ assert len(result) == 1
+ assert isinstance(result[0], AsyncBenchmarkRun)
+ assert result[0].id == benchmark_run_view.id
+ assert result[0].benchmark_id == benchmark_run_view.benchmark_id
+ mock_async_client.benchmarks.runs.list.assert_awaited_once_with(benchmark_id="bmd_123")
+
+ async def test_list_runs_multiple(self, mock_async_client: AsyncMock) -> None:
+ """Test list_runs method with multiple results."""
+ run_view1 = MockBenchmarkRunView(id="bmr_001")
+ run_view2 = MockBenchmarkRunView(id="bmr_002")
+ page = SimpleNamespace(runs=[run_view1, run_view2])
+ mock_async_client.benchmarks.runs.list = AsyncMock(return_value=page)
+
+ benchmark = AsyncBenchmark(mock_async_client, "bmd_123")
+ result = await benchmark.list_runs()
+
+ assert len(result) == 2
+ assert isinstance(result[0], AsyncBenchmarkRun)
+ assert isinstance(result[1], AsyncBenchmarkRun)
+ assert result[0].id == run_view1.id
+ assert result[0].benchmark_id == run_view1.benchmark_id
+ assert result[1].id == run_view2.id
+ assert result[1].benchmark_id == run_view2.benchmark_id
+ mock_async_client.benchmarks.runs.list.assert_awaited_once_with(benchmark_id="bmd_123")
+
+ async def test_list_runs_with_params(
+ self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView
+ ) -> None:
+ """Test list_runs method with filtering parameters."""
+ page = SimpleNamespace(runs=[benchmark_run_view])
+ mock_async_client.benchmarks.runs.list = AsyncMock(return_value=page)
+
+ benchmark = AsyncBenchmark(mock_async_client, "bmd_123")
+ result = await benchmark.list_runs(limit=10, name="test-run")
+
+ assert len(result) == 1
+ mock_async_client.benchmarks.runs.list.assert_awaited_once_with(
+ benchmark_id="bmd_123", limit=10, name="test-run"
+ )
diff --git a/tests/sdk/test_async_benchmark_run.py b/tests/sdk/test_async_benchmark_run.py
new file mode 100644
index 000000000..dd6e230d2
--- /dev/null
+++ b/tests/sdk/test_async_benchmark_run.py
@@ -0,0 +1,120 @@
+"""Comprehensive tests for async AsyncBenchmarkRun class."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from unittest.mock import AsyncMock
+
+from tests.sdk.conftest import MockScenarioRunView, MockBenchmarkRunView
+from runloop_api_client.sdk.async_scenario_run import AsyncScenarioRun
+from runloop_api_client.sdk.async_benchmark_run import AsyncBenchmarkRun
+
+
+class TestAsyncBenchmarkRun:
+ """Tests for AsyncBenchmarkRun class."""
+
+ def test_init(self, mock_async_client: AsyncMock) -> None:
+ """Test AsyncBenchmarkRun initialization."""
+ run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123")
+ assert run.id == "bmr_123"
+ assert run.benchmark_id == "bmd_123"
+
+ def test_repr(self, mock_async_client: AsyncMock) -> None:
+ """Test AsyncBenchmarkRun string representation."""
+ run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123")
+ assert repr(run) == ""
+
+ async def test_get_info(self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView) -> None:
+ """Test get_info method."""
+ mock_async_client.benchmarks.runs.retrieve = AsyncMock(return_value=benchmark_run_view)
+
+ run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123")
+ result = await run.get_info()
+
+ assert result == benchmark_run_view
+ mock_async_client.benchmarks.runs.retrieve.assert_awaited_once_with("bmr_123")
+
+ async def test_cancel(self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView) -> None:
+ """Test cancel method."""
+ benchmark_run_view.state = "canceled"
+ mock_async_client.benchmarks.runs.cancel = AsyncMock(return_value=benchmark_run_view)
+
+ run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123")
+ result = await run.cancel()
+
+ assert result == benchmark_run_view
+ assert result.state == "canceled"
+ mock_async_client.benchmarks.runs.cancel.assert_awaited_once_with("bmr_123")
+
+ async def test_complete(self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView) -> None:
+ """Test complete method."""
+ benchmark_run_view.state = "completed"
+ mock_async_client.benchmarks.runs.complete = AsyncMock(return_value=benchmark_run_view)
+
+ run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123")
+ result = await run.complete()
+
+ assert result == benchmark_run_view
+ assert result.state == "completed"
+ mock_async_client.benchmarks.runs.complete.assert_awaited_once_with("bmr_123")
+
+ async def test_list_scenario_runs_empty(self, mock_async_client: AsyncMock) -> None:
+ """Test list_scenario_runs method with empty results."""
+ page = SimpleNamespace(runs=[])
+ mock_async_client.benchmarks.runs.list_scenario_runs = AsyncMock(return_value=page)
+
+ run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123")
+ result = await run.list_scenario_runs()
+
+ assert len(result) == 0
+ mock_async_client.benchmarks.runs.list_scenario_runs.assert_awaited_once_with("bmr_123")
+
+ async def test_list_scenario_runs_single(
+ self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView
+ ) -> None:
+ """Test list_scenario_runs method with single result."""
+ page = SimpleNamespace(runs=[scenario_run_view])
+ mock_async_client.benchmarks.runs.list_scenario_runs = AsyncMock(return_value=page)
+
+ run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123")
+ result = await run.list_scenario_runs()
+
+ assert len(result) == 1
+ assert isinstance(result[0], AsyncScenarioRun)
+ assert result[0].id == scenario_run_view.id
+ assert result[0].devbox_id == scenario_run_view.devbox_id
+ mock_async_client.benchmarks.runs.list_scenario_runs.assert_awaited_once_with("bmr_123")
+
+ async def test_list_scenario_runs_multiple(self, mock_async_client: AsyncMock) -> None:
+ """Test list_scenario_runs method with multiple results."""
+ scenario_run_view1 = MockScenarioRunView(id="scr_001", devbox_id="dev_001")
+ scenario_run_view2 = MockScenarioRunView(id="scr_002", devbox_id="dev_002")
+ page = SimpleNamespace(runs=[scenario_run_view1, scenario_run_view2])
+ mock_async_client.benchmarks.runs.list_scenario_runs = AsyncMock(return_value=page)
+
+ run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123")
+ result = await run.list_scenario_runs()
+
+ assert len(result) == 2
+ assert isinstance(result[0], AsyncScenarioRun)
+ assert isinstance(result[1], AsyncScenarioRun)
+ assert result[0].id == "scr_001"
+ assert result[1].id == "scr_002"
+ mock_async_client.benchmarks.runs.list_scenario_runs.assert_awaited_once_with("bmr_123")
+
+ async def test_list_scenario_runs_with_params(
+ self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView
+ ) -> None:
+ """Test list_scenario_runs method with filtering parameters."""
+ page = SimpleNamespace(runs=[scenario_run_view])
+ mock_async_client.benchmarks.runs.list_scenario_runs = AsyncMock(return_value=page)
+
+ run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123")
+ result = await run.list_scenario_runs(limit=10, state="completed")
+
+ assert len(result) == 1
+ assert isinstance(result[0], AsyncScenarioRun)
+ assert result[0].id == scenario_run_view.id
+ mock_async_client.benchmarks.runs.list_scenario_runs.assert_awaited_once_with(
+ "bmr_123", limit=10, state="completed"
+ )
diff --git a/tests/sdk/test_async_blueprint.py b/tests/sdk/test_async_blueprint.py
index 75901a445..4c7de1e22 100644
--- a/tests/sdk/test_async_blueprint.py
+++ b/tests/sdk/test_async_blueprint.py
@@ -16,20 +16,20 @@ class TestAsyncBlueprint:
def test_init(self, mock_async_client: AsyncMock) -> None:
"""Test AsyncBlueprint initialization."""
- blueprint = AsyncBlueprint(mock_async_client, "bp_123")
- assert blueprint.id == "bp_123"
+ blueprint = AsyncBlueprint(mock_async_client, "bpt_123")
+ assert blueprint.id == "bpt_123"
def test_repr(self, mock_async_client: AsyncMock) -> None:
"""Test AsyncBlueprint string representation."""
- blueprint = AsyncBlueprint(mock_async_client, "bp_123")
- assert repr(blueprint) == ""
+ blueprint = AsyncBlueprint(mock_async_client, "bpt_123")
+ assert repr(blueprint) == ""
@pytest.mark.asyncio
async def test_get_info(self, mock_async_client: AsyncMock, blueprint_view: MockBlueprintView) -> None:
"""Test get_info method."""
mock_async_client.blueprints.retrieve = AsyncMock(return_value=blueprint_view)
- blueprint = AsyncBlueprint(mock_async_client, "bp_123")
+ blueprint = AsyncBlueprint(mock_async_client, "bpt_123")
result = await blueprint.get_info(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -46,7 +46,7 @@ async def test_logs(self, mock_async_client: AsyncMock) -> None:
logs_view = SimpleNamespace(logs=[])
mock_async_client.blueprints.logs = AsyncMock(return_value=logs_view)
- blueprint = AsyncBlueprint(mock_async_client, "bp_123")
+ blueprint = AsyncBlueprint(mock_async_client, "bpt_123")
result = await blueprint.logs(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -62,7 +62,7 @@ async def test_delete(self, mock_async_client: AsyncMock) -> None:
"""Test delete method."""
mock_async_client.blueprints.delete = AsyncMock(return_value=object())
- blueprint = AsyncBlueprint(mock_async_client, "bp_123")
+ blueprint = AsyncBlueprint(mock_async_client, "bpt_123")
result = await blueprint.delete(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -78,7 +78,7 @@ async def test_create_devbox(self, mock_async_client: AsyncMock, devbox_view: Mo
"""Test create_devbox method."""
mock_async_client.devboxes.create_and_await_running = AsyncMock(return_value=devbox_view)
- blueprint = AsyncBlueprint(mock_async_client, "bp_123")
+ blueprint = AsyncBlueprint(mock_async_client, "bpt_123")
devbox = await blueprint.create_devbox(
name="test-devbox",
metadata={"key": "value"},
@@ -86,5 +86,5 @@ async def test_create_devbox(self, mock_async_client: AsyncMock, devbox_view: Mo
extra_headers={"X-Custom": "value"},
)
- assert devbox.id == "dev_123"
+ assert devbox.id == "dbx_123"
mock_async_client.devboxes.create_and_await_running.assert_awaited_once()
diff --git a/tests/sdk/test_async_execution.py b/tests/sdk/test_async_execution.py
index 06629cf63..f05633263 100644
--- a/tests/sdk/test_async_execution.py
+++ b/tests/sdk/test_async_execution.py
@@ -91,9 +91,9 @@ class TestAsyncExecution:
def test_init(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
"""Test AsyncExecution initialization."""
- execution = AsyncExecution(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type]
- assert execution.execution_id == "exec_123"
- assert execution.devbox_id == "dev_123"
+ execution = AsyncExecution(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type]
+ assert execution.execution_id == "exn_123"
+ assert execution.devbox_id == "dbx_123"
assert execution._initial_result == execution_view
@pytest.mark.asyncio
@@ -113,19 +113,19 @@ async def task() -> None:
async_task_cleanup.extend(tasks)
streaming_group = _AsyncStreamingGroup(tasks)
- execution = AsyncExecution(mock_async_client, "dev_123", execution_view, streaming_group) # type: ignore[arg-type]
+ execution = AsyncExecution(mock_async_client, "dbx_123", execution_view, streaming_group) # type: ignore[arg-type]
assert execution._streaming_group is streaming_group
def test_properties(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
"""Test AsyncExecution properties."""
- execution = AsyncExecution(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type]
- assert execution.execution_id == "exec_123"
- assert execution.devbox_id == "dev_123"
+ execution = AsyncExecution(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type]
+ assert execution.execution_id == "exn_123"
+ assert execution.devbox_id == "dbx_123"
def test_repr(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
"""Test AsyncExecution repr formatting."""
- execution = AsyncExecution(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type]
- assert repr(execution) == ""
+ execution = AsyncExecution(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type]
+ assert repr(execution) == ""
@pytest.mark.asyncio
async def test_result_already_completed(
@@ -134,14 +134,14 @@ async def test_result_already_completed(
"""Test result when execution is already completed."""
mock_async_client.devboxes.wait_for_command = AsyncMock(return_value=execution_view)
- execution = AsyncExecution(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type]
+ execution = AsyncExecution(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type]
result = await execution.result()
assert result.exit_code == 0
assert await result.stdout(num_lines=10) == "output"
mock_async_client.devboxes.wait_for_command.assert_awaited_once_with(
- "exec_123",
- devbox_id="dev_123",
+ "exn_123",
+ devbox_id="dbx_123",
statuses=["completed"],
)
@@ -149,13 +149,13 @@ async def test_result_already_completed(
async def test_result_needs_polling(self, mock_async_client: AsyncMock) -> None:
"""Test result when execution needs polling."""
running_execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="running",
)
completed_execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout="output",
@@ -166,14 +166,14 @@ async def test_result_needs_polling(self, mock_async_client: AsyncMock) -> None:
mock_async_client.devboxes.wait_for_command = AsyncMock(return_value=completed_execution)
- execution = AsyncExecution(mock_async_client, "dev_123", running_execution) # type: ignore[arg-type]
+ execution = AsyncExecution(mock_async_client, "dbx_123", running_execution) # type: ignore[arg-type]
result = await execution.result()
assert result.exit_code == 0
assert await result.stdout(num_lines=10) == "output"
mock_async_client.devboxes.wait_for_command.assert_awaited_once_with(
- "exec_123",
- devbox_id="dev_123",
+ "exn_123",
+ devbox_id="dbx_123",
statuses=["completed"],
)
@@ -181,13 +181,13 @@ async def test_result_needs_polling(self, mock_async_client: AsyncMock) -> None:
async def test_result_with_streaming_group(self, mock_async_client: AsyncMock) -> None:
"""Test result with streaming group cleanup."""
running_execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="running",
)
completed_execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout="output",
@@ -202,7 +202,7 @@ async def task() -> None:
tasks = [asyncio.create_task(task())]
streaming_group = _AsyncStreamingGroup(tasks)
- execution = AsyncExecution(mock_async_client, "dev_123", running_execution, streaming_group) # type: ignore[arg-type]
+ execution = AsyncExecution(mock_async_client, "dbx_123", running_execution, streaming_group) # type: ignore[arg-type]
result = await execution.result()
assert result.exit_code == 0
@@ -213,8 +213,8 @@ async def task() -> None:
async def test_result_passes_options(self, mock_async_client: AsyncMock) -> None:
"""Ensure result forwards options to wait_for_command."""
execution_view = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout="output",
@@ -223,12 +223,12 @@ async def test_result_passes_options(self, mock_async_client: AsyncMock) -> None
mock_async_client.devboxes.wait_for_command = AsyncMock(return_value=execution_view)
- execution = AsyncExecution(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type]
+ execution = AsyncExecution(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type]
await execution.result(timeout=30.0, idempotency_key="abc123")
mock_async_client.devboxes.wait_for_command.assert_awaited_once_with(
- "exec_123",
- devbox_id="dev_123",
+ "exn_123",
+ devbox_id="dbx_123",
statuses=["completed"],
timeout=30.0,
idempotency_key="abc123",
@@ -238,20 +238,20 @@ async def test_result_passes_options(self, mock_async_client: AsyncMock) -> None
async def test_get_state(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
"""Test get_state method."""
updated_execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="running",
)
mock_async_client.devboxes.executions.retrieve = AsyncMock(return_value=updated_execution)
- execution = AsyncExecution(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type]
+ execution = AsyncExecution(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type]
result = await execution.get_state()
assert result == updated_execution
assert execution._initial_result == execution_view
mock_async_client.devboxes.executions.retrieve.assert_awaited_once_with(
- "exec_123",
- devbox_id="dev_123",
+ "exn_123",
+ devbox_id="dbx_123",
)
@pytest.mark.asyncio
@@ -259,10 +259,10 @@ async def test_kill(self, mock_async_client: AsyncMock, execution_view: MockExec
"""Test kill method."""
mock_async_client.devboxes.executions.kill = AsyncMock(return_value=None)
- execution = AsyncExecution(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type]
+ execution = AsyncExecution(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type]
await execution.kill()
mock_async_client.devboxes.executions.kill.assert_awaited_once_with(
- "exec_123",
- devbox_id="dev_123",
+ "exn_123",
+ devbox_id="dbx_123",
)
diff --git a/tests/sdk/test_async_execution_result.py b/tests/sdk/test_async_execution_result.py
index cf8a23caa..58802cc4f 100644
--- a/tests/sdk/test_async_execution_result.py
+++ b/tests/sdk/test_async_execution_result.py
@@ -16,31 +16,31 @@ class TestAsyncExecutionResult:
def test_init(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
"""Test AsyncExecutionResult initialization."""
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type]
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type]
# Verify via public API
- assert result.devbox_id == "dev_123"
- assert result.execution_id == "exec_123"
+ assert result.devbox_id == "dbx_123"
+ assert result.execution_id == "exn_123"
def test_devbox_id_property(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
"""Test devbox_id property."""
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type]
- assert result.devbox_id == "dev_123"
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type]
+ assert result.devbox_id == "dbx_123"
def test_execution_id_property(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
"""Test execution_id property."""
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type]
- assert result.execution_id == "exec_123"
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type]
+ assert result.execution_id == "exn_123"
def test_exit_code_property(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
"""Test exit_code property."""
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type]
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type]
assert result.exit_code == 0
def test_exit_code_none(self, mock_async_client: AsyncMock) -> None:
"""Test exit_code property when exit_status is None."""
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="running",
exit_status=None,
stdout="",
@@ -48,19 +48,19 @@ def test_exit_code_none(self, mock_async_client: AsyncMock) -> None:
stdout_truncated=False,
stderr_truncated=False,
)
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type]
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type]
assert result.exit_code is None
def test_success_property(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
"""Test success property."""
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type]
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type]
assert result.success is True
def test_success_false(self, mock_async_client: AsyncMock) -> None:
"""Test success property when exit code is non-zero."""
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=1,
stdout="",
@@ -68,19 +68,19 @@ def test_success_false(self, mock_async_client: AsyncMock) -> None:
stdout_truncated=False,
stderr_truncated=False,
)
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type]
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type]
assert result.success is False
def test_failed_property(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
"""Test failed property when exit code is zero."""
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type]
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type]
assert result.failed is False
def test_failed_true(self, mock_async_client: AsyncMock) -> None:
"""Test failed property when exit code is non-zero."""
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=1,
stdout="",
@@ -88,14 +88,14 @@ def test_failed_true(self, mock_async_client: AsyncMock) -> None:
stdout_truncated=False,
stderr_truncated=False,
)
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type]
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type]
assert result.failed is True
def test_failed_none(self, mock_async_client: AsyncMock) -> None:
"""Test failed property when exit_status is None."""
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="running",
exit_status=None,
stdout="",
@@ -103,13 +103,13 @@ def test_failed_none(self, mock_async_client: AsyncMock) -> None:
stdout_truncated=False,
stderr_truncated=False,
)
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type]
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type]
assert result.failed is False
@pytest.mark.asyncio
async def test_stdout(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
"""Test stdout method."""
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type]
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type]
assert await result.stdout() == "output"
assert await result.stdout(num_lines=10) == "output"
@@ -117,8 +117,8 @@ async def test_stdout(self, mock_async_client: AsyncMock, execution_view: MockEx
async def test_stdout_empty(self, mock_async_client: AsyncMock) -> None:
"""Test stdout method when stdout is None."""
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout=None,
@@ -126,15 +126,15 @@ async def test_stdout_empty(self, mock_async_client: AsyncMock) -> None:
stdout_truncated=False,
stderr_truncated=False,
)
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type]
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type]
assert await result.stdout() == ""
@pytest.mark.asyncio
async def test_stderr(self, mock_async_client: AsyncMock) -> None:
"""Test stderr method."""
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=1,
stdout="",
@@ -142,19 +142,19 @@ async def test_stderr(self, mock_async_client: AsyncMock) -> None:
stdout_truncated=False,
stderr_truncated=False,
)
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type]
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type]
assert await result.stderr() == "error message"
assert await result.stderr(num_lines=20) == "error message"
@pytest.mark.asyncio
async def test_stderr_empty(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
"""Test stderr method when stderr is None."""
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type]
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type]
assert await result.stderr() == ""
def test_result_property(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
"""Test result property."""
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type]
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type]
assert result.result == execution_view
@pytest.mark.asyncio
@@ -176,8 +176,8 @@ async def mock_iter():
mock_async_client.devboxes.executions.stream_stdout_updates = AsyncMock(return_value=mock_async_stream)
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout="partial",
@@ -185,13 +185,13 @@ async def mock_iter():
stdout_truncated=True,
stderr_truncated=False,
)
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type]
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type]
# Should stream full output
output = await result.stdout()
assert output == "line1\nline2\nline3\n"
mock_async_client.devboxes.executions.stream_stdout_updates.assert_awaited_once_with(
- "exec_123", devbox_id="dev_123"
+ "exn_123", devbox_id="dbx_123"
)
@pytest.mark.asyncio
@@ -212,8 +212,8 @@ async def mock_iter():
mock_async_client.devboxes.executions.stream_stderr_updates = AsyncMock(return_value=mock_async_stream)
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout="",
@@ -221,13 +221,13 @@ async def mock_iter():
stdout_truncated=False,
stderr_truncated=True,
)
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type]
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type]
# Should stream full output
output = await result.stderr()
assert output == "error1\nerror2\n"
mock_async_client.devboxes.executions.stream_stderr_updates.assert_awaited_once_with(
- "exec_123", devbox_id="dev_123"
+ "exn_123", devbox_id="dbx_123"
)
@pytest.mark.asyncio
@@ -248,8 +248,8 @@ async def mock_iter():
mock_async_client.devboxes.executions.stream_stdout_updates = AsyncMock(return_value=mock_async_stream)
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout="line1\n",
@@ -257,7 +257,7 @@ async def mock_iter():
stdout_truncated=True,
stderr_truncated=False,
)
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type]
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type]
# Should stream and return last 2 lines
output = await result.stdout(num_lines=2)
@@ -267,8 +267,8 @@ async def mock_iter():
async def test_stdout_no_streaming_when_not_truncated(self, mock_async_client: AsyncMock) -> None:
"""Test stdout doesn't stream when not truncated."""
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout="complete output",
@@ -276,7 +276,7 @@ async def test_stdout_no_streaming_when_not_truncated(self, mock_async_client: A
stdout_truncated=False,
stderr_truncated=False,
)
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type]
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type]
# Should return existing output without streaming
output = await result.stdout()
@@ -286,8 +286,8 @@ async def test_stdout_no_streaming_when_not_truncated(self, mock_async_client: A
async def test_stdout_with_num_lines_no_truncation(self, mock_async_client: AsyncMock) -> None:
"""Test stdout with num_lines when not truncated."""
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout="line1\nline2\nline3\nline4\nline5",
@@ -295,7 +295,7 @@ async def test_stdout_with_num_lines_no_truncation(self, mock_async_client: Asyn
stdout_truncated=False,
stderr_truncated=False,
)
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type]
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type]
# Should return last 2 lines without streaming
output = await result.stdout(num_lines=2)
@@ -303,7 +303,7 @@ async def test_stdout_with_num_lines_no_truncation(self, mock_async_client: Asyn
def test_count_non_empty_lines(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
"""Test the _count_non_empty_lines helper method."""
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type]
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type]
# Test various input strings
assert result._count_non_empty_lines("") == 0
@@ -315,7 +315,7 @@ def test_count_non_empty_lines(self, mock_async_client: AsyncMock, execution_vie
def test_get_last_n_lines(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
"""Test the _get_last_n_lines helper method."""
- result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type]
+ result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type]
# Test various scenarios
assert result._get_last_n_lines("", 5) == ""
diff --git a/tests/sdk/test_async_ops.py b/tests/sdk/test_async_ops.py
index f8a16e1c0..7e36e938d 100644
--- a/tests/sdk/test_async_ops.py
+++ b/tests/sdk/test_async_ops.py
@@ -17,6 +17,7 @@
MockScorerView,
MockScenarioView,
MockSnapshotView,
+ MockBenchmarkView,
MockBlueprintView,
create_mock_httpx_response,
)
@@ -27,12 +28,14 @@
AsyncAgentOps,
AsyncScenario,
AsyncSnapshot,
+ AsyncBenchmark,
AsyncBlueprint,
AsyncDevboxOps,
AsyncScorerOps,
AsyncRunloopSDK,
AsyncScenarioOps,
AsyncSnapshotOps,
+ AsyncBenchmarkOps,
AsyncBlueprintOps,
AsyncStorageObject,
AsyncStorageObjectOps,
@@ -56,7 +59,7 @@ async def test_create(self, mock_async_client: AsyncMock, devbox_view: MockDevbo
)
assert isinstance(devbox, AsyncDevbox)
- assert devbox.id == "dev_123"
+ assert devbox.id == "dbx_123"
mock_async_client.devboxes.create_and_await_running.assert_awaited_once()
@pytest.mark.asyncio
@@ -66,13 +69,13 @@ async def test_create_from_blueprint_id(self, mock_async_client: AsyncMock, devb
ops = AsyncDevboxOps(mock_async_client)
devbox = await ops.create_from_blueprint_id(
- "bp_123",
+ "bpt_123",
name="test-devbox",
)
assert isinstance(devbox, AsyncDevbox)
call_kwargs = mock_async_client.devboxes.create_and_await_running.call_args[1]
- assert call_kwargs["blueprint_id"] == "bp_123"
+ assert call_kwargs["blueprint_id"] == "bpt_123"
@pytest.mark.asyncio
async def test_create_from_blueprint_name(self, mock_async_client: AsyncMock, devbox_view: MockDevboxView) -> None:
@@ -96,21 +99,21 @@ async def test_create_from_snapshot(self, mock_async_client: AsyncMock, devbox_v
ops = AsyncDevboxOps(mock_async_client)
devbox = await ops.create_from_snapshot(
- "snap_123",
+ "snp_123",
name="test-devbox",
)
assert isinstance(devbox, AsyncDevbox)
call_kwargs = mock_async_client.devboxes.create_and_await_running.call_args[1]
- assert call_kwargs["snapshot_id"] == "snap_123"
+ assert call_kwargs["snapshot_id"] == "snp_123"
def test_from_id(self, mock_async_client: AsyncMock) -> None:
"""Test from_id method."""
ops = AsyncDevboxOps(mock_async_client)
- devbox = ops.from_id("dev_123")
+ devbox = ops.from_id("dbx_123")
assert isinstance(devbox, AsyncDevbox)
- assert devbox.id == "dev_123"
+ assert devbox.id == "dbx_123"
# Verify from_id does not wait for running status
if hasattr(mock_async_client.devboxes, "await_running"):
assert not mock_async_client.devboxes.await_running.called
@@ -142,7 +145,7 @@ async def test_list_single(self, mock_async_client: AsyncMock, devbox_view: Mock
assert len(devboxes) == 1
assert isinstance(devboxes[0], AsyncDevbox)
- assert devboxes[0].id == "dev_123"
+ assert devboxes[0].id == "dbx_123"
mock_async_client.devboxes.list.assert_awaited_once()
@pytest.mark.asyncio
@@ -174,7 +177,7 @@ async def test_list_empty(self, mock_async_client: AsyncMock) -> None:
mock_async_client.devboxes.disk_snapshots.list = AsyncMock(return_value=page)
ops = AsyncSnapshotOps(mock_async_client)
- snapshots = await ops.list(devbox_id="dev_123", limit=10)
+ snapshots = await ops.list(devbox_id="dbx_123", limit=10)
assert len(snapshots) == 0
mock_async_client.devboxes.disk_snapshots.list.assert_awaited_once()
@@ -187,14 +190,14 @@ async def test_list_single(self, mock_async_client: AsyncMock, snapshot_view: Mo
ops = AsyncSnapshotOps(mock_async_client)
snapshots = await ops.list(
- devbox_id="dev_123",
+ devbox_id="dbx_123",
limit=10,
starting_after="snap_000",
)
assert len(snapshots) == 1
assert isinstance(snapshots[0], AsyncSnapshot)
- assert snapshots[0].id == "snap_123"
+ assert snapshots[0].id == "snp_123"
mock_async_client.devboxes.disk_snapshots.list.assert_awaited_once()
@pytest.mark.asyncio
@@ -206,7 +209,7 @@ async def test_list_multiple(self, mock_async_client: AsyncMock) -> None:
mock_async_client.devboxes.disk_snapshots.list = AsyncMock(return_value=page)
ops = AsyncSnapshotOps(mock_async_client)
- snapshots = await ops.list(devbox_id="dev_123", limit=10)
+ snapshots = await ops.list(devbox_id="dbx_123", limit=10)
assert len(snapshots) == 2
assert isinstance(snapshots[0], AsyncSnapshot)
@@ -218,10 +221,10 @@ async def test_list_multiple(self, mock_async_client: AsyncMock) -> None:
def test_from_id(self, mock_async_client: AsyncMock) -> None:
"""Test from_id method."""
ops = AsyncSnapshotOps(mock_async_client)
- snapshot = ops.from_id("snap_123")
+ snapshot = ops.from_id("snp_123")
assert isinstance(snapshot, AsyncSnapshot)
- assert snapshot.id == "snap_123"
+ assert snapshot.id == "snp_123"
class TestAsyncBlueprintOps:
@@ -239,16 +242,16 @@ async def test_create(self, mock_async_client: AsyncMock, blueprint_view: MockBl
)
assert isinstance(blueprint, AsyncBlueprint)
- assert blueprint.id == "bp_123"
+ assert blueprint.id == "bpt_123"
mock_async_client.blueprints.create_and_await_build_complete.assert_awaited_once()
def test_from_id(self, mock_async_client: AsyncMock) -> None:
"""Test from_id method."""
ops = AsyncBlueprintOps(mock_async_client)
- blueprint = ops.from_id("bp_123")
+ blueprint = ops.from_id("bpt_123")
assert isinstance(blueprint, AsyncBlueprint)
- assert blueprint.id == "bp_123"
+ assert blueprint.id == "bpt_123"
@pytest.mark.asyncio
async def test_list_empty(self, mock_async_client: AsyncMock) -> None:
@@ -277,7 +280,7 @@ async def test_list_single(self, mock_async_client: AsyncMock, blueprint_view: M
assert len(blueprints) == 1
assert isinstance(blueprints[0], AsyncBlueprint)
- assert blueprints[0].id == "bp_123"
+ assert blueprints[0].id == "bpt_123"
mock_async_client.blueprints.list.assert_awaited_once()
@pytest.mark.asyncio
@@ -712,16 +715,16 @@ async def test_create(self, mock_async_client: AsyncMock, scorer_view: MockScore
)
assert isinstance(scorer, AsyncScorer)
- assert scorer.id == "scorer_123"
+ assert scorer.id == "sco_123"
mock_async_client.scenarios.scorers.create.assert_awaited_once()
def test_from_id(self, mock_async_client: AsyncMock) -> None:
"""Test from_id method."""
ops = AsyncScorerOps(mock_async_client)
- scorer = ops.from_id("scorer_123")
+ scorer = ops.from_id("sco_123")
assert isinstance(scorer, AsyncScorer)
- assert scorer.id == "scorer_123"
+ assert scorer.id == "sco_123"
@pytest.mark.asyncio
async def test_list_empty(self, mock_async_client: AsyncMock) -> None:
@@ -756,7 +759,7 @@ async def async_iter():
assert len(scorers) == 1
assert isinstance(scorers[0], AsyncScorer)
- assert scorers[0].id == "scorer_123"
+ assert scorers[0].id == "sco_123"
mock_async_client.scenarios.scorers.list.assert_awaited_once()
@pytest.mark.asyncio
@@ -793,19 +796,20 @@ async def test_create(self, mock_async_client: AsyncMock, agent_view: MockAgentV
client = AsyncAgentOps(mock_async_client)
agent = await client.create(
name="test-agent",
+ version="1.2.3",
)
assert isinstance(agent, AsyncAgent)
- assert agent.id == "agent_123"
+ assert agent.id == "agt_123"
mock_async_client.agents.create.assert_called_once()
def test_from_id(self, mock_async_client: AsyncMock) -> None:
"""Test from_id method."""
client = AsyncAgentOps(mock_async_client)
- agent = client.from_id("agent_123")
+ agent = client.from_id("agt_123")
assert isinstance(agent, AsyncAgent)
- assert agent.id == "agent_123"
+ assert agent.id == "agt_123"
@pytest.mark.asyncio
async def test_list(self, mock_async_client: AsyncMock) -> None:
@@ -901,10 +905,11 @@ async def test_create_from_npm(self, mock_async_client: AsyncMock, agent_view: M
agent = await client.create_from_npm(
name="test-agent",
package_name="@runloop/example-agent",
+ version="1.2.3",
)
assert isinstance(agent, AsyncAgent)
- assert agent.id == "agent_123"
+ assert agent.id == "agt_123"
mock_async_client.agents.create.assert_awaited_once_with(
source={
"type": "npm",
@@ -913,6 +918,7 @@ async def test_create_from_npm(self, mock_async_client: AsyncMock, agent_view: M
},
},
name="test-agent",
+ version="1.2.3",
)
@pytest.mark.asyncio
@@ -926,25 +932,25 @@ async def test_create_from_npm_with_all_options(
agent = await client.create_from_npm(
name="test-agent",
package_name="@runloop/example-agent",
- npm_version="1.2.3",
registry_url="https://registry.example.com",
agent_setup=["npm install", "npm run setup"],
+ version="1.2.3",
extra_headers={"X-Custom": "header"},
)
assert isinstance(agent, AsyncAgent)
- assert agent.id == "agent_123"
+ assert agent.id == "agt_123"
mock_async_client.agents.create.assert_awaited_once_with(
source={
"type": "npm",
"npm": {
"package_name": "@runloop/example-agent",
- "npm_version": "1.2.3",
"registry_url": "https://registry.example.com",
"agent_setup": ["npm install", "npm run setup"],
},
},
name="test-agent",
+ version="1.2.3",
extra_headers={"X-Custom": "header"},
)
@@ -957,6 +963,7 @@ async def test_create_from_npm_raises_when_source_provided(self, mock_async_clie
await client.create_from_npm(
name="test-agent",
package_name="@runloop/example-agent",
+ version="1.2.3",
source={"type": "git", "git": {"repository": "https://github.com/example/repo"}},
)
@@ -969,10 +976,11 @@ async def test_create_from_pip(self, mock_async_client: AsyncMock, agent_view: M
agent = await client.create_from_pip(
name="test-agent",
package_name="runloop-example-agent",
+ version="1.2.3",
)
assert isinstance(agent, AsyncAgent)
- assert agent.id == "agent_123"
+ assert agent.id == "agt_123"
mock_async_client.agents.create.assert_awaited_once_with(
source={
"type": "pip",
@@ -981,6 +989,7 @@ async def test_create_from_pip(self, mock_async_client: AsyncMock, agent_view: M
},
},
name="test-agent",
+ version="1.2.3",
)
@pytest.mark.asyncio
@@ -994,24 +1003,24 @@ async def test_create_from_pip_with_all_options(
agent = await client.create_from_pip(
name="test-agent",
package_name="runloop-example-agent",
- pip_version="1.2.3",
registry_url="https://pypi.example.com",
agent_setup=["pip install extra-deps"],
+ version="1.2.3",
)
assert isinstance(agent, AsyncAgent)
- assert agent.id == "agent_123"
+ assert agent.id == "agt_123"
mock_async_client.agents.create.assert_awaited_once_with(
source={
"type": "pip",
"pip": {
"package_name": "runloop-example-agent",
- "pip_version": "1.2.3",
"registry_url": "https://pypi.example.com",
"agent_setup": ["pip install extra-deps"],
},
},
name="test-agent",
+ version="1.2.3",
)
@pytest.mark.asyncio
@@ -1023,10 +1032,11 @@ async def test_create_from_git(self, mock_async_client: AsyncMock, agent_view: M
agent = await client.create_from_git(
name="test-agent",
repository="https://github.com/example/agent-repo",
+ version="1.2.3",
)
assert isinstance(agent, AsyncAgent)
- assert agent.id == "agent_123"
+ assert agent.id == "agt_123"
mock_async_client.agents.create.assert_awaited_once_with(
source={
"type": "git",
@@ -1035,6 +1045,7 @@ async def test_create_from_git(self, mock_async_client: AsyncMock, agent_view: M
},
},
name="test-agent",
+ version="1.2.3",
)
@pytest.mark.asyncio
@@ -1050,10 +1061,11 @@ async def test_create_from_git_with_all_options(
repository="https://github.com/example/agent-repo",
ref="develop",
agent_setup=["npm install", "npm run build"],
+ version="1.2.3",
)
assert isinstance(agent, AsyncAgent)
- assert agent.id == "agent_123"
+ assert agent.id == "agt_123"
mock_async_client.agents.create.assert_awaited_once_with(
source={
"type": "git",
@@ -1064,6 +1076,7 @@ async def test_create_from_git_with_all_options(
},
},
name="test-agent",
+ version="1.2.3",
)
@pytest.mark.asyncio
@@ -1075,10 +1088,11 @@ async def test_create_from_object(self, mock_async_client: AsyncMock, agent_view
agent = await client.create_from_object(
name="test-agent",
object_id="obj_123",
+ version="1.2.3",
)
assert isinstance(agent, AsyncAgent)
- assert agent.id == "agent_123"
+ assert agent.id == "agt_123"
mock_async_client.agents.create.assert_awaited_once_with(
source={
"type": "object",
@@ -1087,6 +1101,7 @@ async def test_create_from_object(self, mock_async_client: AsyncMock, agent_view
},
},
name="test-agent",
+ version="1.2.3",
)
@pytest.mark.asyncio
@@ -1101,10 +1116,11 @@ async def test_create_from_object_with_agent_setup(
name="test-agent",
object_id="obj_123",
agent_setup=["chmod +x setup.sh", "./setup.sh"],
+ version="1.2.3",
)
assert isinstance(agent, AsyncAgent)
- assert agent.id == "agent_123"
+ assert agent.id == "agt_123"
mock_async_client.agents.create.assert_awaited_once_with(
source={
"type": "object",
@@ -1114,6 +1130,7 @@ async def test_create_from_object_with_agent_setup(
},
},
name="test-agent",
+ version="1.2.3",
)
@@ -1186,6 +1203,62 @@ async def async_iter():
mock_async_client.scenarios.list.assert_awaited_once()
+class TestAsyncBenchmarkOps:
+ """Tests for AsyncBenchmarkOps class."""
+
+ @pytest.mark.asyncio
+ async def test_create(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None:
+ """Test create method."""
+ mock_async_client.benchmarks.create = AsyncMock(return_value=benchmark_view)
+
+ ops = AsyncBenchmarkOps(mock_async_client)
+ benchmark = await ops.create(name="test-benchmark", scenario_ids=["scn_001", "scn_002"])
+
+ assert isinstance(benchmark, AsyncBenchmark)
+ assert benchmark.id == "bmd_123"
+ mock_async_client.benchmarks.create.assert_awaited_once_with(
+ name="test-benchmark", scenario_ids=["scn_001", "scn_002"]
+ )
+
+ def test_from_id(self, mock_async_client: AsyncMock) -> None:
+ """Test from_id method."""
+ ops = AsyncBenchmarkOps(mock_async_client)
+ benchmark = ops.from_id("bmd_123")
+
+ assert isinstance(benchmark, AsyncBenchmark)
+ assert benchmark.id == "bmd_123"
+
+ @pytest.mark.asyncio
+ async def test_list_multiple(self, mock_async_client: AsyncMock) -> None:
+ """Test list method with multiple results."""
+ benchmark_view1 = MockBenchmarkView(id="bmd_001", name="benchmark-1")
+ benchmark_view2 = MockBenchmarkView(id="bmd_002", name="benchmark-2")
+ page = SimpleNamespace(benchmarks=[benchmark_view1, benchmark_view2])
+ mock_async_client.benchmarks.list = AsyncMock(return_value=page)
+
+ ops = AsyncBenchmarkOps(mock_async_client)
+ benchmarks = await ops.list(limit=10)
+
+ assert len(benchmarks) == 2
+ assert isinstance(benchmarks[0], AsyncBenchmark)
+ assert isinstance(benchmarks[1], AsyncBenchmark)
+ assert benchmarks[0].id == "bmd_001"
+ assert benchmarks[1].id == "bmd_002"
+ mock_async_client.benchmarks.list.assert_awaited_once_with(limit=10)
+
+ @pytest.mark.asyncio
+ async def test_list_with_name_filter(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None:
+ """Test list method with name filter."""
+ page = SimpleNamespace(benchmarks=[benchmark_view])
+ mock_async_client.benchmarks.list = AsyncMock(return_value=page)
+
+ ops = AsyncBenchmarkOps(mock_async_client)
+ benchmarks = await ops.list(name="test-benchmark", limit=10)
+
+ assert len(benchmarks) == 1
+ mock_async_client.benchmarks.list.assert_awaited_once_with(name="test-benchmark", limit=10)
+
+
class TestAsyncRunloopSDK:
"""Tests for AsyncRunloopSDK class."""
@@ -1194,6 +1267,7 @@ def test_init(self) -> None:
runloop = AsyncRunloopSDK(bearer_token="test-token")
assert runloop.api is not None
assert isinstance(runloop.agent, AsyncAgentOps)
+ assert isinstance(runloop.benchmark, AsyncBenchmarkOps)
assert isinstance(runloop.devbox, AsyncDevboxOps)
assert isinstance(runloop.scorer, AsyncScorerOps)
assert isinstance(runloop.snapshot, AsyncSnapshotOps)
diff --git a/tests/sdk/test_async_scenario.py b/tests/sdk/test_async_scenario.py
index 22a8f457a..cffca9b82 100644
--- a/tests/sdk/test_async_scenario.py
+++ b/tests/sdk/test_async_scenario.py
@@ -99,8 +99,8 @@ async def test_run_async(self, mock_async_client: AsyncMock, scenario_run_view:
scenario = AsyncScenario(mock_async_client, "scn_123")
run = await scenario.run_async(run_name="test-run")
- assert run.id == "run_123"
- assert run.devbox_id == "dev_123"
+ assert run.id == "scr_123"
+ assert run.devbox_id == "dbx_123"
mock_async_client.scenarios.start_run.assert_awaited_once_with(
scenario_id="scn_123",
run_name="test-run",
@@ -113,8 +113,8 @@ async def test_run(self, mock_async_client: AsyncMock, scenario_run_view: MockSc
scenario = AsyncScenario(mock_async_client, "scn_123")
run = await scenario.run(run_name="test-run")
- assert run.id == "run_123"
- assert run.devbox_id == "dev_123"
+ assert run.id == "scr_123"
+ assert run.devbox_id == "dbx_123"
mock_async_client.scenarios.start_run_and_await_env_ready.assert_awaited_once_with(
scenario_id="scn_123",
run_name="test-run",
diff --git a/tests/sdk/test_async_scenario_run.py b/tests/sdk/test_async_scenario_run.py
index 010ad6cbb..c034524a0 100644
--- a/tests/sdk/test_async_scenario_run.py
+++ b/tests/sdk/test_async_scenario_run.py
@@ -15,31 +15,31 @@ class TestAsyncScenarioRun:
def test_init(self, mock_async_client: AsyncMock) -> None:
"""Test AsyncScenarioRun initialization."""
- run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
- assert run.id == "run_123"
- assert run.devbox_id == "dev_123"
+ run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
+ assert run.id == "scr_123"
+ assert run.devbox_id == "dbx_123"
def test_repr(self, mock_async_client: AsyncMock) -> None:
"""Test AsyncScenarioRun string representation."""
- run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
- assert repr(run) == ""
+ run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
+ assert repr(run) == ""
def test_devbox_property(self, mock_async_client: AsyncMock) -> None:
"""Test devbox property returns AsyncDevbox wrapper."""
- run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+ run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
devbox = run.devbox
- assert devbox.id == "dev_123"
+ assert devbox.id == "dbx_123"
async def test_get_info(self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView) -> None:
"""Test get_info method."""
mock_async_client.scenarios.runs.retrieve = AsyncMock(return_value=scenario_run_view)
- run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+ run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
result = await run.get_info()
assert result == scenario_run_view
- mock_async_client.scenarios.runs.retrieve.assert_awaited_once_with("run_123")
+ mock_async_client.scenarios.runs.retrieve.assert_awaited_once_with("scr_123")
async def test_await_env_ready(
self,
@@ -51,10 +51,10 @@ async def test_await_env_ready(
mock_async_client.devboxes.await_running = AsyncMock(return_value=devbox_view)
mock_async_client.scenarios.runs.retrieve = AsyncMock(return_value=scenario_run_view)
- run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+ run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
result = await run.await_env_ready()
- mock_async_client.devboxes.await_running.assert_awaited_once_with("dev_123", polling_config=None)
+ mock_async_client.devboxes.await_running.assert_awaited_once_with("dbx_123", polling_config=None)
assert result == scenario_run_view
async def test_score(self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView) -> None:
@@ -62,33 +62,33 @@ async def test_score(self, mock_async_client: AsyncMock, scenario_run_view: Mock
scenario_run_view.state = "scoring"
mock_async_client.scenarios.runs.score = AsyncMock(return_value=scenario_run_view)
- run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+ run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
result = await run.score()
assert result == scenario_run_view
- mock_async_client.scenarios.runs.score.assert_awaited_once_with("run_123")
+ mock_async_client.scenarios.runs.score.assert_awaited_once_with("scr_123")
async def test_await_scored(self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView) -> None:
"""Test await_scored method."""
scenario_run_view.state = "scored"
mock_async_client.scenarios.runs.await_scored = AsyncMock(return_value=scenario_run_view)
- run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+ run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
result = await run.await_scored()
assert result == scenario_run_view
- mock_async_client.scenarios.runs.await_scored.assert_awaited_once_with("run_123")
+ mock_async_client.scenarios.runs.await_scored.assert_awaited_once_with("scr_123")
async def test_score_and_await(self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView) -> None:
"""Test score_and_await method."""
scenario_run_view.state = "scored"
mock_async_client.scenarios.runs.score_and_await = AsyncMock(return_value=scenario_run_view)
- run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+ run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
result = await run.score_and_await()
assert result == scenario_run_view
- mock_async_client.scenarios.runs.score_and_await.assert_awaited_once_with("run_123")
+ mock_async_client.scenarios.runs.score_and_await.assert_awaited_once_with("scr_123")
async def test_score_and_complete(
self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView
@@ -97,33 +97,33 @@ async def test_score_and_complete(
scenario_run_view.state = "completed"
mock_async_client.scenarios.runs.score_and_complete = AsyncMock(return_value=scenario_run_view)
- run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+ run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
result = await run.score_and_complete()
assert result == scenario_run_view
- mock_async_client.scenarios.runs.score_and_complete.assert_awaited_once_with("run_123")
+ mock_async_client.scenarios.runs.score_and_complete.assert_awaited_once_with("scr_123")
async def test_complete(self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView) -> None:
"""Test complete method."""
scenario_run_view.state = "completed"
mock_async_client.scenarios.runs.complete = AsyncMock(return_value=scenario_run_view)
- run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+ run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
result = await run.complete()
assert result == scenario_run_view
- mock_async_client.scenarios.runs.complete.assert_awaited_once_with("run_123")
+ mock_async_client.scenarios.runs.complete.assert_awaited_once_with("scr_123")
async def test_cancel(self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView) -> None:
"""Test cancel method."""
scenario_run_view.state = "canceled"
mock_async_client.scenarios.runs.cancel = AsyncMock(return_value=scenario_run_view)
- run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+ run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
result = await run.cancel()
assert result == scenario_run_view
- mock_async_client.scenarios.runs.cancel.assert_awaited_once_with("run_123")
+ mock_async_client.scenarios.runs.cancel.assert_awaited_once_with("scr_123")
async def test_download_logs(self, mock_async_client: AsyncMock, tmp_path: Path) -> None:
"""Test download_logs method writes to file."""
@@ -131,11 +131,11 @@ async def test_download_logs(self, mock_async_client: AsyncMock, tmp_path: Path)
mock_response.write_to_file = AsyncMock()
mock_async_client.scenarios.runs.download_logs = AsyncMock(return_value=mock_response)
- run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+ run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
output_path = tmp_path / "logs.zip"
await run.download_logs(output_path)
- mock_async_client.scenarios.runs.download_logs.assert_awaited_once_with("run_123")
+ mock_async_client.scenarios.runs.download_logs.assert_awaited_once_with("scr_123")
mock_response.write_to_file.assert_awaited_once_with(output_path)
async def test_get_score_when_scored(self, mock_async_client: AsyncMock) -> None:
@@ -144,19 +144,19 @@ async def test_get_score_when_scored(self, mock_async_client: AsyncMock) -> None
run_view = MockScenarioRunView(state="scored", scoring_contract_result=scoring_result)
mock_async_client.scenarios.runs.retrieve = AsyncMock(return_value=run_view)
- run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+ run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
result = await run.get_score()
assert result == scoring_result
- mock_async_client.scenarios.runs.retrieve.assert_awaited_once_with("run_123")
+ mock_async_client.scenarios.runs.retrieve.assert_awaited_once_with("scr_123")
async def test_get_score_when_not_scored(self, mock_async_client: AsyncMock) -> None:
"""Test get_score returns None when not scored."""
run_view = MockScenarioRunView(state="running", scoring_contract_result=None)
mock_async_client.scenarios.runs.retrieve = AsyncMock(return_value=run_view)
- run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+ run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
result = await run.get_score()
assert result is None
- mock_async_client.scenarios.runs.retrieve.assert_awaited_once_with("run_123")
+ mock_async_client.scenarios.runs.retrieve.assert_awaited_once_with("scr_123")
diff --git a/tests/sdk/test_async_scorer.py b/tests/sdk/test_async_scorer.py
index a3eeea884..253ae9585 100644
--- a/tests/sdk/test_async_scorer.py
+++ b/tests/sdk/test_async_scorer.py
@@ -16,20 +16,20 @@ class TestAsyncScorer:
def test_init(self, mock_async_client: AsyncMock) -> None:
"""Test AsyncScorer initialization."""
- scorer = AsyncScorer(mock_async_client, "scorer_123")
- assert scorer.id == "scorer_123"
+ scorer = AsyncScorer(mock_async_client, "sco_123")
+ assert scorer.id == "sco_123"
def test_repr(self, mock_async_client: AsyncMock) -> None:
"""Test AsyncScorer string representation."""
- scorer = AsyncScorer(mock_async_client, "scorer_123")
- assert repr(scorer) == ""
+ scorer = AsyncScorer(mock_async_client, "sco_123")
+ assert repr(scorer) == ""
@pytest.mark.asyncio
async def test_get_info(self, mock_async_client: AsyncMock, scorer_view: MockScorerView) -> None:
"""Test get_info method."""
mock_async_client.scenarios.scorers.retrieve = AsyncMock(return_value=scorer_view)
- scorer = AsyncScorer(mock_async_client, "scorer_123")
+ scorer = AsyncScorer(mock_async_client, "sco_123")
result = await scorer.get_info()
assert result == scorer_view
@@ -38,10 +38,10 @@ async def test_get_info(self, mock_async_client: AsyncMock, scorer_view: MockSco
@pytest.mark.asyncio
async def test_update(self, mock_async_client: AsyncMock) -> None:
"""Test update method."""
- update_response = SimpleNamespace(id="scorer_123", type="updated_scorer", bash_script="echo 'score=1.0'")
+ update_response = SimpleNamespace(id="sco_123", type="updated_scorer", bash_script="echo 'score=1.0'")
mock_async_client.scenarios.scorers.update = AsyncMock(return_value=update_response)
- scorer = AsyncScorer(mock_async_client, "scorer_123")
+ scorer = AsyncScorer(mock_async_client, "sco_123")
result = await scorer.update(
type="updated_scorer",
bash_script="echo 'score=1.0'",
@@ -60,7 +60,7 @@ async def test_validate(self, mock_async_client: AsyncMock) -> None:
)
mock_async_client.scenarios.scorers.validate = AsyncMock(return_value=validate_response)
- scorer = AsyncScorer(mock_async_client, "scorer_123")
+ scorer = AsyncScorer(mock_async_client, "sco_123")
result = await scorer.validate(
scoring_context={"test": "context"},
)
diff --git a/tests/sdk/test_async_snapshot.py b/tests/sdk/test_async_snapshot.py
index a7b946c11..e9dca48bc 100644
--- a/tests/sdk/test_async_snapshot.py
+++ b/tests/sdk/test_async_snapshot.py
@@ -17,20 +17,20 @@ class TestAsyncSnapshot:
def test_init(self, mock_async_client: AsyncMock) -> None:
"""Test AsyncSnapshot initialization."""
- snapshot = AsyncSnapshot(mock_async_client, "snap_123")
- assert snapshot.id == "snap_123"
+ snapshot = AsyncSnapshot(mock_async_client, "snp_123")
+ assert snapshot.id == "snp_123"
def test_repr(self, mock_async_client: AsyncMock) -> None:
"""Test AsyncSnapshot string representation."""
- snapshot = AsyncSnapshot(mock_async_client, "snap_123")
- assert repr(snapshot) == ""
+ snapshot = AsyncSnapshot(mock_async_client, "snp_123")
+ assert repr(snapshot) == ""
@pytest.mark.asyncio
async def test_get_info(self, mock_async_client: AsyncMock, snapshot_view: MockSnapshotView) -> None:
"""Test get_info method."""
mock_async_client.devboxes.disk_snapshots.query_status = AsyncMock(return_value=snapshot_view)
- snapshot = AsyncSnapshot(mock_async_client, "snap_123")
+ snapshot = AsyncSnapshot(mock_async_client, "snp_123")
result = await snapshot.get_info(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -44,10 +44,10 @@ async def test_get_info(self, mock_async_client: AsyncMock, snapshot_view: MockS
@pytest.mark.asyncio
async def test_update(self, mock_async_client: AsyncMock) -> None:
"""Test update method."""
- updated_snapshot = SimpleNamespace(id="snap_123", name="updated-name")
+ updated_snapshot = SimpleNamespace(id="snp_123", name="updated-name")
mock_async_client.devboxes.disk_snapshots.update = AsyncMock(return_value=updated_snapshot)
- snapshot = AsyncSnapshot(mock_async_client, "snap_123")
+ snapshot = AsyncSnapshot(mock_async_client, "snp_123")
result = await snapshot.update(
commit_message="Update message",
metadata={"key": "value"},
@@ -67,7 +67,7 @@ async def test_delete(self, mock_async_client: AsyncMock) -> None:
"""Test delete method."""
mock_async_client.devboxes.disk_snapshots.delete = AsyncMock(return_value=object())
- snapshot = AsyncSnapshot(mock_async_client, "snap_123")
+ snapshot = AsyncSnapshot(mock_async_client, "snp_123")
result = await snapshot.delete(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -85,7 +85,7 @@ async def test_await_completed(self, mock_async_client: AsyncMock, snapshot_view
mock_async_client.devboxes.disk_snapshots.await_completed = AsyncMock(return_value=snapshot_view)
polling_config = PollingConfig(timeout_seconds=60.0)
- snapshot = AsyncSnapshot(mock_async_client, "snap_123")
+ snapshot = AsyncSnapshot(mock_async_client, "snp_123")
result = await snapshot.await_completed(
polling_config=polling_config,
extra_headers={"X-Custom": "value"},
@@ -102,7 +102,7 @@ async def test_create_devbox(self, mock_async_client: AsyncMock, devbox_view: Mo
"""Test create_devbox method."""
mock_async_client.devboxes.create_and_await_running = AsyncMock(return_value=devbox_view)
- snapshot = AsyncSnapshot(mock_async_client, "snap_123")
+ snapshot = AsyncSnapshot(mock_async_client, "snp_123")
devbox = await snapshot.create_devbox(
name="test-devbox",
metadata={"key": "value"},
@@ -110,5 +110,5 @@ async def test_create_devbox(self, mock_async_client: AsyncMock, devbox_view: Mo
extra_headers={"X-Custom": "value"},
)
- assert devbox.id == "dev_123"
+ assert devbox.id == "dbx_123"
mock_async_client.devboxes.create_and_await_running.assert_awaited_once()
diff --git a/tests/sdk/test_benchmark.py b/tests/sdk/test_benchmark.py
new file mode 100644
index 000000000..1f4f12751
--- /dev/null
+++ b/tests/sdk/test_benchmark.py
@@ -0,0 +1,122 @@
+"""Comprehensive tests for sync Benchmark class."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from unittest.mock import Mock
+
+from tests.sdk.conftest import MockBenchmarkView, MockBenchmarkRunView
+from runloop_api_client.sdk.benchmark import Benchmark
+from runloop_api_client.sdk.benchmark_run import BenchmarkRun
+
+
+class TestBenchmark:
+ """Tests for Benchmark class."""
+
+ def test_init(self, mock_client: Mock) -> None:
+ """Test Benchmark initialization."""
+ benchmark = Benchmark(mock_client, "bmd_123")
+ assert benchmark.id == "bmd_123"
+ assert repr(benchmark) == ""
+
+ def test_get_info(self, mock_client: Mock, benchmark_view: MockBenchmarkView) -> None:
+ """Test get_info method."""
+ mock_client.benchmarks.retrieve.return_value = benchmark_view
+
+ benchmark = Benchmark(mock_client, "bmd_123")
+ result = benchmark.get_info()
+
+ assert result == benchmark_view
+ mock_client.benchmarks.retrieve.assert_called_once_with("bmd_123")
+
+ def test_update(self, mock_client: Mock, benchmark_view: MockBenchmarkView) -> None:
+ """Test update method."""
+ benchmark_view.name = "updated-name"
+ mock_client.benchmarks.update.return_value = benchmark_view
+
+ benchmark = Benchmark(mock_client, "bmd_123")
+ result = benchmark.update(name="updated-name")
+
+ assert result == benchmark_view
+ mock_client.benchmarks.update.assert_called_once_with("bmd_123", name="updated-name")
+
+ def test_run(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None:
+ """Test run method."""
+ mock_client.benchmarks.start_run.return_value = benchmark_run_view
+
+ benchmark = Benchmark(mock_client, "bmd_123")
+ result = benchmark.start_run(run_name="test-run", metadata={"key": "value"})
+
+ assert isinstance(result, BenchmarkRun)
+ assert result.id == benchmark_run_view.id
+ assert result.benchmark_id == benchmark_run_view.benchmark_id
+ mock_client.benchmarks.start_run.assert_called_once_with(
+ benchmark_id="bmd_123", run_name="test-run", metadata={"key": "value"}
+ )
+
+ def test_add_scenarios(self, mock_client: Mock, benchmark_view: MockBenchmarkView) -> None:
+ """Test add_scenarios method."""
+ benchmark_view.scenario_ids = ["scn_001", "scn_002"]
+ mock_client.benchmarks.update_scenarios.return_value = benchmark_view
+
+ benchmark = Benchmark(mock_client, "bmd_123")
+ result = benchmark.add_scenarios(["scn_001", "scn_002"])
+
+ assert result == benchmark_view
+ mock_client.benchmarks.update_scenarios.assert_called_once_with(
+ "bmd_123", scenarios_to_add=["scn_001", "scn_002"]
+ )
+
+ def test_remove_scenarios(self, mock_client: Mock, benchmark_view: MockBenchmarkView) -> None:
+ """Test remove_scenarios method."""
+ mock_client.benchmarks.update_scenarios.return_value = benchmark_view
+
+ benchmark = Benchmark(mock_client, "bmd_123")
+ result = benchmark.remove_scenarios(["scn_001"])
+
+ assert result == benchmark_view
+ mock_client.benchmarks.update_scenarios.assert_called_once_with("bmd_123", scenarios_to_remove=["scn_001"])
+
+ def test_list_runs_single(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None:
+ """Test list_runs method with single result."""
+ page = SimpleNamespace(runs=[benchmark_run_view])
+ mock_client.benchmarks.runs.list.return_value = page
+
+ benchmark = Benchmark(mock_client, "bmd_123")
+ result = benchmark.list_runs()
+
+ assert len(result) == 1
+ assert isinstance(result[0], BenchmarkRun)
+ assert result[0].id == benchmark_run_view.id
+ assert result[0].benchmark_id == benchmark_run_view.benchmark_id
+ mock_client.benchmarks.runs.list.assert_called_once_with(benchmark_id="bmd_123")
+
+ def test_list_runs_multiple(self, mock_client: Mock) -> None:
+ """Test list_runs method with multiple results."""
+ run_view1 = MockBenchmarkRunView(id="bmr_001")
+ run_view2 = MockBenchmarkRunView(id="bmr_002")
+ page = SimpleNamespace(runs=[run_view1, run_view2])
+ mock_client.benchmarks.runs.list.return_value = page
+
+ benchmark = Benchmark(mock_client, "bmd_123")
+ result = benchmark.list_runs()
+
+ assert len(result) == 2
+ assert isinstance(result[0], BenchmarkRun)
+ assert isinstance(result[1], BenchmarkRun)
+ assert result[0].id == run_view1.id
+ assert result[0].benchmark_id == run_view1.benchmark_id
+ assert result[1].id == run_view2.id
+ assert result[1].benchmark_id == run_view2.benchmark_id
+ mock_client.benchmarks.runs.list.assert_called_once_with(benchmark_id="bmd_123")
+
+ def test_list_runs_with_params(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None:
+ """Test list_runs method with filtering parameters."""
+ page = SimpleNamespace(runs=[benchmark_run_view])
+ mock_client.benchmarks.runs.list.return_value = page
+
+ benchmark = Benchmark(mock_client, "bmd_123")
+ result = benchmark.list_runs(limit=10, name="test-run")
+
+ assert len(result) == 1
+ mock_client.benchmarks.runs.list.assert_called_once_with(benchmark_id="bmd_123", limit=10, name="test-run")
diff --git a/tests/sdk/test_benchmark_run.py b/tests/sdk/test_benchmark_run.py
new file mode 100644
index 000000000..e7a826a90
--- /dev/null
+++ b/tests/sdk/test_benchmark_run.py
@@ -0,0 +1,114 @@
+"""Comprehensive tests for sync BenchmarkRun class."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from unittest.mock import Mock
+
+from tests.sdk.conftest import MockScenarioRunView, MockBenchmarkRunView
+from runloop_api_client.sdk.scenario_run import ScenarioRun
+from runloop_api_client.sdk.benchmark_run import BenchmarkRun
+
+
+class TestBenchmarkRun:
+ """Tests for BenchmarkRun class."""
+
+ def test_init(self, mock_client: Mock) -> None:
+ """Test BenchmarkRun initialization."""
+ run = BenchmarkRun(mock_client, "bmr_123", "bmd_123")
+ assert run.id == "bmr_123"
+ assert run.benchmark_id == "bmd_123"
+
+ def test_repr(self, mock_client: Mock) -> None:
+ """Test BenchmarkRun string representation."""
+ run = BenchmarkRun(mock_client, "bmr_123", "bmd_123")
+ assert repr(run) == ""
+
+ def test_get_info(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None:
+ """Test get_info method."""
+ mock_client.benchmarks.runs.retrieve.return_value = benchmark_run_view
+
+ run = BenchmarkRun(mock_client, "bmr_123", "bmd_123")
+ result = run.get_info()
+
+ assert result == benchmark_run_view
+ mock_client.benchmarks.runs.retrieve.assert_called_once_with("bmr_123")
+
+ def test_cancel(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None:
+ """Test cancel method."""
+ benchmark_run_view.state = "canceled"
+ mock_client.benchmarks.runs.cancel.return_value = benchmark_run_view
+
+ run = BenchmarkRun(mock_client, "bmr_123", "bmd_123")
+ result = run.cancel()
+
+ assert result == benchmark_run_view
+ assert result.state == "canceled"
+ mock_client.benchmarks.runs.cancel.assert_called_once_with("bmr_123")
+
+ def test_complete(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None:
+ """Test complete method."""
+ benchmark_run_view.state = "completed"
+ mock_client.benchmarks.runs.complete.return_value = benchmark_run_view
+
+ run = BenchmarkRun(mock_client, "bmr_123", "bmd_123")
+ result = run.complete()
+
+ assert result == benchmark_run_view
+ assert result.state == "completed"
+ mock_client.benchmarks.runs.complete.assert_called_once_with("bmr_123")
+
+ def test_list_scenario_runs_empty(self, mock_client: Mock) -> None:
+ """Test list_scenario_runs method with empty results."""
+ page = SimpleNamespace(runs=[])
+ mock_client.benchmarks.runs.list_scenario_runs.return_value = page
+
+ run = BenchmarkRun(mock_client, "bmr_123", "bmd_123")
+ result = run.list_scenario_runs()
+
+ assert len(result) == 0
+ mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bmr_123")
+
+ def test_list_scenario_runs_single(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
+ """Test list_scenario_runs method with single result."""
+ page = SimpleNamespace(runs=[scenario_run_view])
+ mock_client.benchmarks.runs.list_scenario_runs.return_value = page
+
+ run = BenchmarkRun(mock_client, "bmr_123", "bmd_123")
+ result = run.list_scenario_runs()
+
+ assert len(result) == 1
+ assert isinstance(result[0], ScenarioRun)
+ assert result[0].id == scenario_run_view.id
+ assert result[0].devbox_id == scenario_run_view.devbox_id
+ mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bmr_123")
+
+ def test_list_scenario_runs_multiple(self, mock_client: Mock) -> None:
+ """Test list_scenario_runs method with multiple results."""
+ scenario_run_view1 = MockScenarioRunView(id="scr_001", devbox_id="dev_001")
+ scenario_run_view2 = MockScenarioRunView(id="scr_002", devbox_id="dev_002")
+ page = SimpleNamespace(runs=[scenario_run_view1, scenario_run_view2])
+ mock_client.benchmarks.runs.list_scenario_runs.return_value = page
+
+ run = BenchmarkRun(mock_client, "bmr_123", "bmd_123")
+ result = run.list_scenario_runs()
+
+ assert len(result) == 2
+ assert isinstance(result[0], ScenarioRun)
+ assert isinstance(result[1], ScenarioRun)
+ assert result[0].id == "scr_001"
+ assert result[1].id == "scr_002"
+ mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bmr_123")
+
+ def test_list_scenario_runs_with_params(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
+ """Test list_scenario_runs method with filtering parameters."""
+ page = SimpleNamespace(runs=[scenario_run_view])
+ mock_client.benchmarks.runs.list_scenario_runs.return_value = page
+
+ run = BenchmarkRun(mock_client, "bmr_123", "bmd_123")
+ result = run.list_scenario_runs(limit=10, state="completed")
+
+ assert len(result) == 1
+ assert isinstance(result[0], ScenarioRun)
+ assert result[0].id == scenario_run_view.id
+ mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bmr_123", limit=10, state="completed")
diff --git a/tests/sdk/test_blueprint.py b/tests/sdk/test_blueprint.py
index 2c6bc6580..40cbed3f6 100644
--- a/tests/sdk/test_blueprint.py
+++ b/tests/sdk/test_blueprint.py
@@ -14,19 +14,19 @@ class TestBlueprint:
def test_init(self, mock_client: Mock) -> None:
"""Test Blueprint initialization."""
- blueprint = Blueprint(mock_client, "bp_123")
- assert blueprint.id == "bp_123"
+ blueprint = Blueprint(mock_client, "bpt_123")
+ assert blueprint.id == "bpt_123"
def test_repr(self, mock_client: Mock) -> None:
"""Test Blueprint string representation."""
- blueprint = Blueprint(mock_client, "bp_123")
- assert repr(blueprint) == ""
+ blueprint = Blueprint(mock_client, "bpt_123")
+ assert repr(blueprint) == ""
def test_get_info(self, mock_client: Mock, blueprint_view: MockBlueprintView) -> None:
"""Test get_info method."""
mock_client.blueprints.retrieve.return_value = blueprint_view
- blueprint = Blueprint(mock_client, "bp_123")
+ blueprint = Blueprint(mock_client, "bpt_123")
result = blueprint.get_info(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -36,7 +36,7 @@ def test_get_info(self, mock_client: Mock, blueprint_view: MockBlueprintView) ->
assert result == blueprint_view
mock_client.blueprints.retrieve.assert_called_once_with(
- "bp_123",
+ "bpt_123",
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
extra_body={"key": "value"},
@@ -48,7 +48,7 @@ def test_logs(self, mock_client: Mock) -> None:
logs_view = SimpleNamespace(logs=[])
mock_client.blueprints.logs.return_value = logs_view
- blueprint = Blueprint(mock_client, "bp_123")
+ blueprint = Blueprint(mock_client, "bpt_123")
result = blueprint.logs(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -58,7 +58,7 @@ def test_logs(self, mock_client: Mock) -> None:
assert result == logs_view
mock_client.blueprints.logs.assert_called_once_with(
- "bp_123",
+ "bpt_123",
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
extra_body={"key": "value"},
@@ -69,7 +69,7 @@ def test_delete(self, mock_client: Mock) -> None:
"""Test delete method."""
mock_client.blueprints.delete.return_value = object()
- blueprint = Blueprint(mock_client, "bp_123")
+ blueprint = Blueprint(mock_client, "bpt_123")
result = blueprint.delete(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -79,7 +79,7 @@ def test_delete(self, mock_client: Mock) -> None:
assert result is not None # Verify return value is propagated
mock_client.blueprints.delete.assert_called_once_with(
- "bp_123",
+ "bpt_123",
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
extra_body={"key": "value"},
@@ -90,7 +90,7 @@ def test_create_devbox(self, mock_client: Mock, devbox_view: MockDevboxView) ->
"""Test create_devbox method."""
mock_client.devboxes.create_and_await_running.return_value = devbox_view
- blueprint = Blueprint(mock_client, "bp_123")
+ blueprint = Blueprint(mock_client, "bpt_123")
devbox = blueprint.create_devbox(
name="test-devbox",
metadata={"key": "value"},
@@ -98,9 +98,9 @@ def test_create_devbox(self, mock_client: Mock, devbox_view: MockDevboxView) ->
extra_headers={"X-Custom": "value"},
)
- assert devbox.id == "dev_123"
+ assert devbox.id == "dbx_123"
mock_client.devboxes.create_and_await_running.assert_called_once()
call_kwargs = mock_client.devboxes.create_and_await_running.call_args[1]
- assert call_kwargs["blueprint_id"] == "bp_123"
+ assert call_kwargs["blueprint_id"] == "bpt_123"
assert call_kwargs["name"] == "test-devbox"
assert call_kwargs["metadata"] == {"key": "value"}
diff --git a/tests/sdk/test_execution.py b/tests/sdk/test_execution.py
index 63b244d0e..249d670bf 100644
--- a/tests/sdk/test_execution.py
+++ b/tests/sdk/test_execution.py
@@ -83,9 +83,9 @@ class TestExecution:
def test_init(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
"""Test Execution initialization."""
- execution = Execution(mock_client, "dev_123", execution_view) # type: ignore[arg-type]
- assert execution.execution_id == "exec_123"
- assert execution.devbox_id == "dev_123"
+ execution = Execution(mock_client, "dbx_123", execution_view) # type: ignore[arg-type]
+ assert execution.execution_id == "exn_123"
+ assert execution.devbox_id == "dbx_123"
assert execution._initial_result == execution_view
def test_init_with_streaming_group(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
@@ -94,46 +94,46 @@ def test_init_with_streaming_group(self, mock_client: Mock, execution_view: Mock
stop_event = threading.Event()
streaming_group = _StreamingGroup(threads, stop_event)
- execution = Execution(mock_client, "dev_123", execution_view, streaming_group) # type: ignore[arg-type]
+ execution = Execution(mock_client, "dbx_123", execution_view, streaming_group) # type: ignore[arg-type]
assert execution._streaming_group is streaming_group
def test_properties(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
"""Test Execution properties."""
- execution = Execution(mock_client, "dev_123", execution_view) # type: ignore[arg-type]
- assert execution.execution_id == "exec_123"
- assert execution.devbox_id == "dev_123"
+ execution = Execution(mock_client, "dbx_123", execution_view) # type: ignore[arg-type]
+ assert execution.execution_id == "exn_123"
+ assert execution.devbox_id == "dbx_123"
def test_repr(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
"""Test Execution repr formatting."""
- execution = Execution(mock_client, "dev_123", execution_view) # type: ignore[arg-type]
- assert repr(execution) == ""
+ execution = Execution(mock_client, "dbx_123", execution_view) # type: ignore[arg-type]
+ assert repr(execution) == ""
def test_result_already_completed(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
"""Test result delegates to wait_for_command when already completed."""
mock_client.devboxes = Mock()
mock_client.devboxes.wait_for_command.return_value = execution_view
- execution = Execution(mock_client, "dev_123", execution_view) # type: ignore[arg-type]
+ execution = Execution(mock_client, "dbx_123", execution_view) # type: ignore[arg-type]
result = execution.result()
assert result.exit_code == 0
assert result.stdout(num_lines=10) == "output"
mock_client.devboxes.wait_for_command.assert_called_once_with(
- "exec_123",
- devbox_id="dev_123",
+ "exn_123",
+ devbox_id="dbx_123",
statuses=["completed"],
)
def test_result_needs_polling(self, mock_client: Mock) -> None:
"""Test result when execution needs to poll for completion."""
running_execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="running",
)
completed_execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout="output",
@@ -145,27 +145,27 @@ def test_result_needs_polling(self, mock_client: Mock) -> None:
mock_client.devboxes = Mock()
mock_client.devboxes.wait_for_command.return_value = completed_execution
- execution = Execution(mock_client, "dev_123", running_execution) # type: ignore[arg-type]
+ execution = Execution(mock_client, "dbx_123", running_execution) # type: ignore[arg-type]
result = execution.result()
assert result.exit_code == 0
assert result.stdout(num_lines=10) == "output"
mock_client.devboxes.wait_for_command.assert_called_once_with(
- "exec_123",
- devbox_id="dev_123",
+ "exn_123",
+ devbox_id="dbx_123",
statuses=["completed"],
)
def test_result_with_streaming_group(self, mock_client: Mock) -> None:
"""Test result waits for streaming group to finish."""
running_execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="running",
)
completed_execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout="output",
@@ -180,7 +180,7 @@ def test_result_with_streaming_group(self, mock_client: Mock) -> None:
thread.start()
streaming_group = _StreamingGroup([thread], stop_event)
- execution = Execution(mock_client, "dev_123", running_execution, streaming_group) # type: ignore[arg-type]
+ execution = Execution(mock_client, "dbx_123", running_execution, streaming_group) # type: ignore[arg-type]
result = execution.result()
assert result.exit_code == 0
@@ -190,8 +190,8 @@ def test_result_with_streaming_group(self, mock_client: Mock) -> None:
def test_result_passes_options(self, mock_client: Mock) -> None:
"""Ensure options are forwarded to wait_for_command."""
execution_view = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout="output",
@@ -201,12 +201,12 @@ def test_result_passes_options(self, mock_client: Mock) -> None:
mock_client.devboxes = Mock()
mock_client.devboxes.wait_for_command.return_value = execution_view
- execution = Execution(mock_client, "dev_123", execution_view) # type: ignore[arg-type]
+ execution = Execution(mock_client, "dbx_123", execution_view) # type: ignore[arg-type]
execution.result(timeout=30.0, idempotency_key="abc123")
mock_client.devboxes.wait_for_command.assert_called_once_with(
- "exec_123",
- devbox_id="dev_123",
+ "exn_123",
+ devbox_id="dbx_123",
statuses=["completed"],
timeout=30.0,
idempotency_key="abc123",
@@ -215,31 +215,31 @@ def test_result_passes_options(self, mock_client: Mock) -> None:
def test_get_state(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
"""Test get_state method."""
updated_execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="running",
)
mock_client.devboxes.executions = Mock()
mock_client.devboxes.executions.retrieve.return_value = updated_execution
- execution = Execution(mock_client, "dev_123", execution_view) # type: ignore[arg-type]
+ execution = Execution(mock_client, "dbx_123", execution_view) # type: ignore[arg-type]
result = execution.get_state()
assert result == updated_execution
assert execution._initial_result == execution_view
mock_client.devboxes.executions.retrieve.assert_called_once_with(
- "exec_123",
- devbox_id="dev_123",
+ "exn_123",
+ devbox_id="dbx_123",
)
def test_kill(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
"""Test kill method."""
mock_client.devboxes.executions.kill.return_value = None
- execution = Execution(mock_client, "dev_123", execution_view) # type: ignore[arg-type]
+ execution = Execution(mock_client, "dbx_123", execution_view) # type: ignore[arg-type]
execution.kill()
mock_client.devboxes.executions.kill.assert_called_once_with(
- "exec_123",
- devbox_id="dev_123",
+ "exn_123",
+ devbox_id="dbx_123",
)
diff --git a/tests/sdk/test_execution_result.py b/tests/sdk/test_execution_result.py
index 689b108d5..7bc4fbfef 100644
--- a/tests/sdk/test_execution_result.py
+++ b/tests/sdk/test_execution_result.py
@@ -14,31 +14,31 @@ class TestExecutionResult:
def test_init(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
"""Test ExecutionResult initialization."""
- result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type]
+ result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type]
# Verify via public API
- assert result.devbox_id == "dev_123"
- assert result.execution_id == "exec_123"
+ assert result.devbox_id == "dbx_123"
+ assert result.execution_id == "exn_123"
def test_devbox_id_property(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
"""Test devbox_id property."""
- result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type]
- assert result.devbox_id == "dev_123"
+ result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type]
+ assert result.devbox_id == "dbx_123"
def test_execution_id_property(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
"""Test execution_id property."""
- result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type]
- assert result.execution_id == "exec_123"
+ result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type]
+ assert result.execution_id == "exn_123"
def test_exit_code_property(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
"""Test exit_code property."""
- result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type]
+ result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type]
assert result.exit_code == 0
def test_exit_code_none(self, mock_client: Mock) -> None:
"""Test exit_code property when exit_status is None."""
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="running",
exit_status=None,
stdout="",
@@ -46,19 +46,19 @@ def test_exit_code_none(self, mock_client: Mock) -> None:
stdout_truncated=False,
stderr_truncated=False,
)
- result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type]
+ result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type]
assert result.exit_code is None
def test_success_property(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
"""Test success property."""
- result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type]
+ result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type]
assert result.success is True
def test_success_false(self, mock_client: Mock) -> None:
"""Test success property when exit code is non-zero."""
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=1,
stdout="",
@@ -66,19 +66,19 @@ def test_success_false(self, mock_client: Mock) -> None:
stdout_truncated=False,
stderr_truncated=False,
)
- result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type]
+ result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type]
assert result.success is False
def test_failed_property(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
"""Test failed property when exit code is zero."""
- result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type]
+ result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type]
assert result.failed is False
def test_failed_true(self, mock_client: Mock) -> None:
"""Test failed property when exit code is non-zero."""
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=1,
stdout="",
@@ -86,14 +86,14 @@ def test_failed_true(self, mock_client: Mock) -> None:
stdout_truncated=False,
stderr_truncated=False,
)
- result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type]
+ result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type]
assert result.failed is True
def test_failed_none(self, mock_client: Mock) -> None:
"""Test failed property when exit_status is None."""
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="running",
exit_status=None,
stdout="",
@@ -101,20 +101,20 @@ def test_failed_none(self, mock_client: Mock) -> None:
stdout_truncated=False,
stderr_truncated=False,
)
- result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type]
+ result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type]
assert result.failed is False
def test_stdout(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
"""Test stdout method."""
- result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type]
+ result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type]
assert result.stdout() == "output"
assert result.stdout(num_lines=10) == "output"
def test_stdout_empty(self, mock_client: Mock) -> None:
"""Test stdout method when stdout is None."""
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout=None,
@@ -122,14 +122,14 @@ def test_stdout_empty(self, mock_client: Mock) -> None:
stdout_truncated=False,
stderr_truncated=False,
)
- result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type]
+ result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type]
assert result.stdout() == ""
def test_stderr(self, mock_client: Mock) -> None:
"""Test stderr method."""
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=1,
stdout="",
@@ -137,18 +137,18 @@ def test_stderr(self, mock_client: Mock) -> None:
stdout_truncated=False,
stderr_truncated=False,
)
- result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type]
+ result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type]
assert result.stderr() == "error message"
assert result.stderr(num_lines=20) == "error message"
def test_stderr_empty(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
"""Test stderr method when stderr is None."""
- result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type]
+ result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type]
assert result.stderr() == ""
def test_result_property(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
"""Test result property."""
- result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type]
+ result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type]
assert result.result == execution_view
def test_stdout_with_truncation_and_streaming(self, mock_client: Mock, mock_stream: Mock) -> None:
@@ -165,8 +165,8 @@ def test_stdout_with_truncation_and_streaming(self, mock_client: Mock, mock_stre
mock_client.devboxes.executions.stream_stdout_updates = Mock(return_value=mock_stream)
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout="partial",
@@ -174,12 +174,12 @@ def test_stdout_with_truncation_and_streaming(self, mock_client: Mock, mock_stre
stdout_truncated=True,
stderr_truncated=False,
)
- result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type]
+ result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type]
# Should stream full output
output = result.stdout()
assert output == "line1\nline2\nline3\n"
- mock_client.devboxes.executions.stream_stdout_updates.assert_called_once_with("exec_123", devbox_id="dev_123")
+ mock_client.devboxes.executions.stream_stdout_updates.assert_called_once_with("exn_123", devbox_id="dbx_123")
def test_stderr_with_truncation_and_streaming(self, mock_client: Mock, mock_stream: Mock) -> None:
"""Test stderr streams full output when truncated."""
@@ -194,8 +194,8 @@ def test_stderr_with_truncation_and_streaming(self, mock_client: Mock, mock_stre
mock_client.devboxes.executions.stream_stderr_updates = Mock(return_value=mock_stream)
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout="",
@@ -203,12 +203,12 @@ def test_stderr_with_truncation_and_streaming(self, mock_client: Mock, mock_stre
stdout_truncated=False,
stderr_truncated=True,
)
- result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type]
+ result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type]
# Should stream full output
output = result.stderr()
assert output == "error1\nerror2\n"
- mock_client.devboxes.executions.stream_stderr_updates.assert_called_once_with("exec_123", devbox_id="dev_123")
+ mock_client.devboxes.executions.stream_stderr_updates.assert_called_once_with("exn_123", devbox_id="dbx_123")
def test_stdout_with_num_lines_when_truncated(self, mock_client: Mock, mock_stream: Mock) -> None:
"""Test stdout with num_lines parameter when truncated."""
@@ -223,8 +223,8 @@ def test_stdout_with_num_lines_when_truncated(self, mock_client: Mock, mock_stre
mock_client.devboxes.executions.stream_stdout_updates = Mock(return_value=mock_stream)
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout="line1\n",
@@ -232,7 +232,7 @@ def test_stdout_with_num_lines_when_truncated(self, mock_client: Mock, mock_stre
stdout_truncated=True,
stderr_truncated=False,
)
- result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type]
+ result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type]
# Should stream and return last 2 lines
output = result.stdout(num_lines=2)
@@ -241,8 +241,8 @@ def test_stdout_with_num_lines_when_truncated(self, mock_client: Mock, mock_stre
def test_stdout_no_streaming_when_not_truncated(self, mock_client: Mock) -> None:
"""Test stdout doesn't stream when not truncated."""
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout="complete output",
@@ -250,7 +250,7 @@ def test_stdout_no_streaming_when_not_truncated(self, mock_client: Mock) -> None
stdout_truncated=False,
stderr_truncated=False,
)
- result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type]
+ result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type]
# Should return existing output without streaming
output = result.stdout()
@@ -259,8 +259,8 @@ def test_stdout_no_streaming_when_not_truncated(self, mock_client: Mock) -> None
def test_stdout_with_num_lines_no_truncation(self, mock_client: Mock) -> None:
"""Test stdout with num_lines when not truncated."""
execution = SimpleNamespace(
- execution_id="exec_123",
- devbox_id="dev_123",
+ execution_id="exn_123",
+ devbox_id="dbx_123",
status="completed",
exit_status=0,
stdout="line1\nline2\nline3\nline4\nline5",
@@ -268,7 +268,7 @@ def test_stdout_with_num_lines_no_truncation(self, mock_client: Mock) -> None:
stdout_truncated=False,
stderr_truncated=False,
)
- result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type]
+ result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type]
# Should return last 2 lines without streaming
output = result.stdout(num_lines=2)
@@ -276,7 +276,7 @@ def test_stdout_with_num_lines_no_truncation(self, mock_client: Mock) -> None:
def test_count_non_empty_lines(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
"""Test the _count_non_empty_lines helper method."""
- result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type]
+ result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type]
# Test various input strings
assert result._count_non_empty_lines("") == 0
@@ -288,7 +288,7 @@ def test_count_non_empty_lines(self, mock_client: Mock, execution_view: MockExec
def test_get_last_n_lines(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
"""Test the _get_last_n_lines helper method."""
- result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type]
+ result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type]
# Test various scenarios
assert result._get_last_n_lines("", 5) == ""
diff --git a/tests/sdk/test_ops.py b/tests/sdk/test_ops.py
index 7ac503933..af54776af 100644
--- a/tests/sdk/test_ops.py
+++ b/tests/sdk/test_ops.py
@@ -17,6 +17,7 @@
MockScorerView,
MockScenarioView,
MockSnapshotView,
+ MockBenchmarkView,
MockBlueprintView,
create_mock_httpx_response,
)
@@ -27,12 +28,14 @@
AgentOps,
Scenario,
Snapshot,
+ Benchmark,
Blueprint,
DevboxOps,
ScorerOps,
RunloopSDK,
ScenarioOps,
SnapshotOps,
+ BenchmarkOps,
BlueprintOps,
StorageObject,
StorageObjectOps,
@@ -55,7 +58,7 @@ def test_create(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
)
assert isinstance(devbox, Devbox)
- assert devbox.id == "dev_123"
+ assert devbox.id == "dbx_123"
mock_client.devboxes.create_and_await_running.assert_called_once()
def test_create_from_blueprint_id(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
@@ -64,15 +67,15 @@ def test_create_from_blueprint_id(self, mock_client: Mock, devbox_view: MockDevb
ops = DevboxOps(mock_client)
devbox = ops.create_from_blueprint_id(
- "bp_123",
+ "bpt_123",
name="test-devbox",
metadata={"key": "value"},
)
assert isinstance(devbox, Devbox)
- assert devbox.id == "dev_123"
+ assert devbox.id == "dbx_123"
call_kwargs = mock_client.devboxes.create_and_await_running.call_args[1]
- assert call_kwargs["blueprint_id"] == "bp_123"
+ assert call_kwargs["blueprint_id"] == "bpt_123"
def test_create_from_blueprint_name(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
"""Test create_from_blueprint_name method."""
@@ -94,24 +97,24 @@ def test_create_from_snapshot(self, mock_client: Mock, devbox_view: MockDevboxVi
ops = DevboxOps(mock_client)
devbox = ops.create_from_snapshot(
- "snap_123",
+ "snp_123",
name="test-devbox",
)
assert isinstance(devbox, Devbox)
call_kwargs = mock_client.devboxes.create_and_await_running.call_args[1]
- assert call_kwargs["snapshot_id"] == "snap_123"
+ assert call_kwargs["snapshot_id"] == "snp_123"
def test_from_id(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
"""Test from_id method waits for running."""
mock_client.devboxes.await_running.return_value = devbox_view
ops = DevboxOps(mock_client)
- devbox = ops.from_id("dev_123")
+ devbox = ops.from_id("dbx_123")
assert isinstance(devbox, Devbox)
- assert devbox.id == "dev_123"
- mock_client.devboxes.await_running.assert_called_once_with("dev_123")
+ assert devbox.id == "dbx_123"
+ mock_client.devboxes.await_running.assert_called_once_with("dbx_123")
def test_list_empty(self, mock_client: Mock) -> None:
"""Test list method with empty results."""
@@ -138,7 +141,7 @@ def test_list_single(self, mock_client: Mock, devbox_view: MockDevboxView) -> No
assert len(devboxes) == 1
assert isinstance(devboxes[0], Devbox)
- assert devboxes[0].id == "dev_123"
+ assert devboxes[0].id == "dbx_123"
mock_client.devboxes.list.assert_called_once()
def test_list_multiple(self, mock_client: Mock) -> None:
@@ -168,7 +171,7 @@ def test_list_empty(self, mock_client: Mock) -> None:
mock_client.devboxes.disk_snapshots.list.return_value = page
ops = SnapshotOps(mock_client)
- snapshots = ops.list(devbox_id="dev_123", limit=10)
+ snapshots = ops.list(devbox_id="dbx_123", limit=10)
assert len(snapshots) == 0
mock_client.devboxes.disk_snapshots.list.assert_called_once()
@@ -180,14 +183,14 @@ def test_list_single(self, mock_client: Mock, snapshot_view: MockSnapshotView) -
ops = SnapshotOps(mock_client)
snapshots = ops.list(
- devbox_id="dev_123",
+ devbox_id="dbx_123",
limit=10,
starting_after="snap_000",
)
assert len(snapshots) == 1
assert isinstance(snapshots[0], Snapshot)
- assert snapshots[0].id == "snap_123"
+ assert snapshots[0].id == "snp_123"
mock_client.devboxes.disk_snapshots.list.assert_called_once()
def test_list_multiple(self, mock_client: Mock) -> None:
@@ -198,7 +201,7 @@ def test_list_multiple(self, mock_client: Mock) -> None:
mock_client.devboxes.disk_snapshots.list.return_value = page
ops = SnapshotOps(mock_client)
- snapshots = ops.list(devbox_id="dev_123", limit=10)
+ snapshots = ops.list(devbox_id="dbx_123", limit=10)
assert len(snapshots) == 2
assert isinstance(snapshots[0], Snapshot)
@@ -210,10 +213,10 @@ def test_list_multiple(self, mock_client: Mock) -> None:
def test_from_id(self, mock_client: Mock) -> None:
"""Test from_id method."""
ops = SnapshotOps(mock_client)
- snapshot = ops.from_id("snap_123")
+ snapshot = ops.from_id("snp_123")
assert isinstance(snapshot, Snapshot)
- assert snapshot.id == "snap_123"
+ assert snapshot.id == "snp_123"
class TestBlueprintOps:
@@ -230,16 +233,16 @@ def test_create(self, mock_client: Mock, blueprint_view: MockBlueprintView) -> N
)
assert isinstance(blueprint, Blueprint)
- assert blueprint.id == "bp_123"
+ assert blueprint.id == "bpt_123"
mock_client.blueprints.create_and_await_build_complete.assert_called_once()
def test_from_id(self, mock_client: Mock) -> None:
"""Test from_id method."""
ops = BlueprintOps(mock_client)
- blueprint = ops.from_id("bp_123")
+ blueprint = ops.from_id("bpt_123")
assert isinstance(blueprint, Blueprint)
- assert blueprint.id == "bp_123"
+ assert blueprint.id == "bpt_123"
def test_list_empty(self, mock_client: Mock) -> None:
"""Test list method with empty results."""
@@ -266,7 +269,7 @@ def test_list_single(self, mock_client: Mock, blueprint_view: MockBlueprintView)
assert len(blueprints) == 1
assert isinstance(blueprints[0], Blueprint)
- assert blueprints[0].id == "bp_123"
+ assert blueprints[0].id == "bpt_123"
mock_client.blueprints.list.assert_called_once()
def test_list_multiple(self, mock_client: Mock) -> None:
@@ -660,16 +663,16 @@ def test_create(self, mock_client: Mock, scorer_view: MockScorerView) -> None:
)
assert isinstance(scorer, Scorer)
- assert scorer.id == "scorer_123"
+ assert scorer.id == "sco_123"
mock_client.scenarios.scorers.create.assert_called_once()
def test_from_id(self, mock_client: Mock) -> None:
"""Test from_id method."""
ops = ScorerOps(mock_client)
- scorer = ops.from_id("scorer_123")
+ scorer = ops.from_id("sco_123")
assert isinstance(scorer, Scorer)
- assert scorer.id == "scorer_123"
+ assert scorer.id == "sco_123"
def test_list_empty(self, mock_client: Mock) -> None:
"""Test list method with empty results."""
@@ -693,7 +696,7 @@ def test_list_single(self, mock_client: Mock, scorer_view: MockScorerView) -> No
assert len(scorers) == 1
assert isinstance(scorers[0], Scorer)
- assert scorers[0].id == "scorer_123"
+ assert scorers[0].id == "sco_123"
mock_client.scenarios.scorers.list.assert_called_once()
def test_list_multiple(self, mock_client: Mock) -> None:
@@ -723,19 +726,20 @@ def test_create(self, mock_client: Mock, agent_view: MockAgentView) -> None:
client = AgentOps(mock_client)
agent = client.create(
name="test-agent",
+ version="1.2.3",
)
assert isinstance(agent, Agent)
- assert agent.id == "agent_123"
+ assert agent.id == "agt_123"
mock_client.agents.create.assert_called_once()
def test_from_id(self, mock_client: Mock) -> None:
"""Test from_id method."""
client = AgentOps(mock_client)
- agent = client.from_id("agent_123")
+ agent = client.from_id("agt_123")
assert isinstance(agent, Agent)
- assert agent.id == "agent_123"
+ assert agent.id == "agt_123"
def test_list(self, mock_client: Mock) -> None:
"""Test list method."""
@@ -817,10 +821,11 @@ def test_create_from_npm(self, mock_client: Mock, agent_view: MockAgentView) ->
agent = client.create_from_npm(
name="test-agent",
package_name="@runloop/example-agent",
+ version="1.2.3",
)
assert isinstance(agent, Agent)
- assert agent.id == "agent_123"
+ assert agent.id == "agt_123"
mock_client.agents.create.assert_called_once_with(
source={
"type": "npm",
@@ -829,6 +834,7 @@ def test_create_from_npm(self, mock_client: Mock, agent_view: MockAgentView) ->
},
},
name="test-agent",
+ version="1.2.3",
)
def test_create_from_npm_with_all_options(self, mock_client: Mock, agent_view: MockAgentView) -> None:
@@ -838,26 +844,26 @@ def test_create_from_npm_with_all_options(self, mock_client: Mock, agent_view: M
client = AgentOps(mock_client)
agent = client.create_from_npm(
package_name="@runloop/example-agent",
- npm_version="1.2.3",
registry_url="https://registry.example.com",
agent_setup=["npm install", "npm run setup"],
name="test-agent",
+ version="1.2.3",
extra_headers={"X-Custom": "header"},
)
assert isinstance(agent, Agent)
- assert agent.id == "agent_123"
+ assert agent.id == "agt_123"
mock_client.agents.create.assert_called_once_with(
source={
"type": "npm",
"npm": {
"package_name": "@runloop/example-agent",
- "npm_version": "1.2.3",
"registry_url": "https://registry.example.com",
"agent_setup": ["npm install", "npm run setup"],
},
},
name="test-agent",
+ version="1.2.3",
extra_headers={"X-Custom": "header"},
)
@@ -869,6 +875,7 @@ def test_create_from_npm_raises_when_source_provided(self, mock_client: Mock) ->
client.create_from_npm(
package_name="@runloop/example-agent",
name="test-agent",
+ version="1.2.3",
source={"type": "git", "git": {"repository": "https://github.com/example/repo"}},
)
@@ -880,10 +887,11 @@ def test_create_from_pip(self, mock_client: Mock, agent_view: MockAgentView) ->
agent = client.create_from_pip(
package_name="runloop-example-agent",
name="test-agent",
+ version="1.2.3",
)
assert isinstance(agent, Agent)
- assert agent.id == "agent_123"
+ assert agent.id == "agt_123"
mock_client.agents.create.assert_called_once_with(
source={
"type": "pip",
@@ -892,6 +900,7 @@ def test_create_from_pip(self, mock_client: Mock, agent_view: MockAgentView) ->
},
},
name="test-agent",
+ version="1.2.3",
)
def test_create_from_pip_with_all_options(self, mock_client: Mock, agent_view: MockAgentView) -> None:
@@ -901,25 +910,25 @@ def test_create_from_pip_with_all_options(self, mock_client: Mock, agent_view: M
client = AgentOps(mock_client)
agent = client.create_from_pip(
package_name="runloop-example-agent",
- pip_version="1.2.3",
registry_url="https://pypi.example.com",
agent_setup=["pip install extra-deps"],
name="test-agent",
+ version="1.2.3",
)
assert isinstance(agent, Agent)
- assert agent.id == "agent_123"
+ assert agent.id == "agt_123"
mock_client.agents.create.assert_called_once_with(
source={
"type": "pip",
"pip": {
"package_name": "runloop-example-agent",
- "pip_version": "1.2.3",
"registry_url": "https://pypi.example.com",
"agent_setup": ["pip install extra-deps"],
},
},
name="test-agent",
+ version="1.2.3",
)
def test_create_from_git(self, mock_client: Mock, agent_view: MockAgentView) -> None:
@@ -930,10 +939,11 @@ def test_create_from_git(self, mock_client: Mock, agent_view: MockAgentView) ->
agent = client.create_from_git(
repository="https://github.com/example/agent-repo",
name="test-agent",
+ version="1.2.3",
)
assert isinstance(agent, Agent)
- assert agent.id == "agent_123"
+ assert agent.id == "agt_123"
mock_client.agents.create.assert_called_once_with(
source={
"type": "git",
@@ -942,6 +952,7 @@ def test_create_from_git(self, mock_client: Mock, agent_view: MockAgentView) ->
},
},
name="test-agent",
+ version="1.2.3",
)
def test_create_from_git_with_all_options(self, mock_client: Mock, agent_view: MockAgentView) -> None:
@@ -954,10 +965,11 @@ def test_create_from_git_with_all_options(self, mock_client: Mock, agent_view: M
ref="develop",
agent_setup=["npm install", "npm run build"],
name="test-agent",
+ version="1.2.3",
)
assert isinstance(agent, Agent)
- assert agent.id == "agent_123"
+ assert agent.id == "agt_123"
mock_client.agents.create.assert_called_once_with(
source={
"type": "git",
@@ -968,6 +980,7 @@ def test_create_from_git_with_all_options(self, mock_client: Mock, agent_view: M
},
},
name="test-agent",
+ version="1.2.3",
)
def test_create_from_object(self, mock_client: Mock, agent_view: MockAgentView) -> None:
@@ -978,10 +991,11 @@ def test_create_from_object(self, mock_client: Mock, agent_view: MockAgentView)
agent = client.create_from_object(
object_id="obj_123",
name="test-agent",
+ version="1.2.3",
)
assert isinstance(agent, Agent)
- assert agent.id == "agent_123"
+ assert agent.id == "agt_123"
mock_client.agents.create.assert_called_once_with(
source={
"type": "object",
@@ -990,6 +1004,7 @@ def test_create_from_object(self, mock_client: Mock, agent_view: MockAgentView)
},
},
name="test-agent",
+ version="1.2.3",
)
def test_create_from_object_with_agent_setup(self, mock_client: Mock, agent_view: MockAgentView) -> None:
@@ -1001,10 +1016,11 @@ def test_create_from_object_with_agent_setup(self, mock_client: Mock, agent_view
object_id="obj_123",
agent_setup=["chmod +x setup.sh", "./setup.sh"],
name="test-agent",
+ version="1.2.3",
)
assert isinstance(agent, Agent)
- assert agent.id == "agent_123"
+ assert agent.id == "agt_123"
mock_client.agents.create.assert_called_once_with(
source={
"type": "object",
@@ -1014,6 +1030,7 @@ def test_create_from_object_with_agent_setup(self, mock_client: Mock, agent_view
},
},
name="test-agent",
+ version="1.2.3",
)
@@ -1071,6 +1088,59 @@ def test_list_multiple(self, mock_client: Mock) -> None:
mock_client.scenarios.list.assert_called_once()
+class TestBenchmarkOps:
+ """Tests for BenchmarkOps class."""
+
+ def test_create(self, mock_client: Mock, benchmark_view: MockBenchmarkView) -> None:
+ """Test create method."""
+ mock_client.benchmarks.create.return_value = benchmark_view
+
+ ops = BenchmarkOps(mock_client)
+ benchmark = ops.create(name="test-benchmark", scenario_ids=["scn_001", "scn_002"])
+
+ assert isinstance(benchmark, Benchmark)
+ assert benchmark.id == "bmd_123"
+ mock_client.benchmarks.create.assert_called_once_with(
+ name="test-benchmark", scenario_ids=["scn_001", "scn_002"]
+ )
+
+ def test_from_id(self, mock_client: Mock) -> None:
+ """Test from_id method."""
+ ops = BenchmarkOps(mock_client)
+ benchmark = ops.from_id("bmd_123")
+
+ assert isinstance(benchmark, Benchmark)
+ assert benchmark.id == "bmd_123"
+
+ def test_list_multiple(self, mock_client: Mock) -> None:
+ """Test list method with multiple results."""
+ benchmark_view1 = MockBenchmarkView(id="bmd_001", name="benchmark-1")
+ benchmark_view2 = MockBenchmarkView(id="bmd_002", name="benchmark-2")
+ page = SimpleNamespace(benchmarks=[benchmark_view1, benchmark_view2])
+ mock_client.benchmarks.list.return_value = page
+
+ ops = BenchmarkOps(mock_client)
+ benchmarks = ops.list(limit=10)
+
+ assert len(benchmarks) == 2
+ assert isinstance(benchmarks[0], Benchmark)
+ assert isinstance(benchmarks[1], Benchmark)
+ assert benchmarks[0].id == "bmd_001"
+ assert benchmarks[1].id == "bmd_002"
+ mock_client.benchmarks.list.assert_called_once_with(limit=10)
+
+ def test_list_with_name_filter(self, mock_client: Mock, benchmark_view: MockBenchmarkView) -> None:
+ """Test list method with name filter."""
+ page = SimpleNamespace(benchmarks=[benchmark_view])
+ mock_client.benchmarks.list.return_value = page
+
+ ops = BenchmarkOps(mock_client)
+ benchmarks = ops.list(name="test-benchmark", limit=10)
+
+ assert len(benchmarks) == 1
+ mock_client.benchmarks.list.assert_called_once_with(name="test-benchmark", limit=10)
+
+
class TestRunloopSDK:
"""Tests for RunloopSDK class."""
@@ -1079,6 +1149,7 @@ def test_init(self) -> None:
runloop = RunloopSDK(bearer_token="test-token")
assert runloop.api is not None
assert isinstance(runloop.agent, AgentOps)
+ assert isinstance(runloop.benchmark, BenchmarkOps)
assert isinstance(runloop.devbox, DevboxOps)
assert isinstance(runloop.scorer, ScorerOps)
assert isinstance(runloop.snapshot, SnapshotOps)
diff --git a/tests/sdk/test_scenario.py b/tests/sdk/test_scenario.py
index 3504c1714..e3aa5f1c8 100644
--- a/tests/sdk/test_scenario.py
+++ b/tests/sdk/test_scenario.py
@@ -99,8 +99,8 @@ def test_run_async(self, mock_client: Mock, scenario_run_view: MockScenarioRunVi
scenario = Scenario(mock_client, "scn_123")
run = scenario.run_async(run_name="test-run")
- assert run.id == "run_123"
- assert run.devbox_id == "dev_123"
+ assert run.id == "scr_123"
+ assert run.devbox_id == "dbx_123"
mock_client.scenarios.start_run.assert_called_once_with(
scenario_id="scn_123",
run_name="test-run",
@@ -113,8 +113,8 @@ def test_run(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) ->
scenario = Scenario(mock_client, "scn_123")
run = scenario.run(run_name="test-run")
- assert run.id == "run_123"
- assert run.devbox_id == "dev_123"
+ assert run.id == "scr_123"
+ assert run.devbox_id == "dbx_123"
mock_client.scenarios.start_run_and_await_env_ready.assert_called_once_with(
scenario_id="scn_123",
run_name="test-run",
diff --git a/tests/sdk/test_scenario_run.py b/tests/sdk/test_scenario_run.py
index 54ea6e89b..339e365f8 100644
--- a/tests/sdk/test_scenario_run.py
+++ b/tests/sdk/test_scenario_run.py
@@ -15,31 +15,31 @@ class TestScenarioRun:
def test_init(self, mock_client: Mock) -> None:
"""Test ScenarioRun initialization."""
- run = ScenarioRun(mock_client, "run_123", "dev_123")
- assert run.id == "run_123"
- assert run.devbox_id == "dev_123"
+ run = ScenarioRun(mock_client, "scr_123", "dbx_123")
+ assert run.id == "scr_123"
+ assert run.devbox_id == "dbx_123"
def test_repr(self, mock_client: Mock) -> None:
"""Test ScenarioRun string representation."""
- run = ScenarioRun(mock_client, "run_123", "dev_123")
- assert repr(run) == ""
+ run = ScenarioRun(mock_client, "scr_123", "dbx_123")
+ assert repr(run) == ""
def test_devbox_property(self, mock_client: Mock) -> None:
"""Test devbox property returns Devbox wrapper."""
- run = ScenarioRun(mock_client, "run_123", "dev_123")
+ run = ScenarioRun(mock_client, "scr_123", "dbx_123")
devbox = run.devbox
- assert devbox.id == "dev_123"
+ assert devbox.id == "dbx_123"
def test_get_info(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
"""Test get_info method."""
mock_client.scenarios.runs.retrieve.return_value = scenario_run_view
- run = ScenarioRun(mock_client, "run_123", "dev_123")
+ run = ScenarioRun(mock_client, "scr_123", "dbx_123")
result = run.get_info()
assert result == scenario_run_view
- mock_client.scenarios.runs.retrieve.assert_called_once_with("run_123")
+ mock_client.scenarios.runs.retrieve.assert_called_once_with("scr_123")
def test_await_env_ready(
self, mock_client: Mock, scenario_run_view: MockScenarioRunView, devbox_view: MockDevboxView
@@ -48,10 +48,10 @@ def test_await_env_ready(
mock_client.devboxes.await_running.return_value = devbox_view
mock_client.scenarios.runs.retrieve.return_value = scenario_run_view
- run = ScenarioRun(mock_client, "run_123", "dev_123")
+ run = ScenarioRun(mock_client, "scr_123", "dbx_123")
result = run.await_env_ready()
- mock_client.devboxes.await_running.assert_called_once_with("dev_123", polling_config=None)
+ mock_client.devboxes.await_running.assert_called_once_with("dbx_123", polling_config=None)
assert result == scenario_run_view
def test_score(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
@@ -59,66 +59,66 @@ def test_score(self, mock_client: Mock, scenario_run_view: MockScenarioRunView)
scenario_run_view.state = "scoring"
mock_client.scenarios.runs.score.return_value = scenario_run_view
- run = ScenarioRun(mock_client, "run_123", "dev_123")
+ run = ScenarioRun(mock_client, "scr_123", "dbx_123")
result = run.score()
assert result == scenario_run_view
- mock_client.scenarios.runs.score.assert_called_once_with("run_123")
+ mock_client.scenarios.runs.score.assert_called_once_with("scr_123")
def test_await_scored(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
"""Test await_scored method."""
scenario_run_view.state = "scored"
mock_client.scenarios.runs.await_scored.return_value = scenario_run_view
- run = ScenarioRun(mock_client, "run_123", "dev_123")
+ run = ScenarioRun(mock_client, "scr_123", "dbx_123")
result = run.await_scored()
assert result == scenario_run_view
- mock_client.scenarios.runs.await_scored.assert_called_once_with("run_123")
+ mock_client.scenarios.runs.await_scored.assert_called_once_with("scr_123")
def test_score_and_await(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
"""Test score_and_await method."""
scenario_run_view.state = "scored"
mock_client.scenarios.runs.score_and_await.return_value = scenario_run_view
- run = ScenarioRun(mock_client, "run_123", "dev_123")
+ run = ScenarioRun(mock_client, "scr_123", "dbx_123")
result = run.score_and_await()
assert result == scenario_run_view
- mock_client.scenarios.runs.score_and_await.assert_called_once_with("run_123")
+ mock_client.scenarios.runs.score_and_await.assert_called_once_with("scr_123")
def test_score_and_complete(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
"""Test score_and_complete method."""
scenario_run_view.state = "completed"
mock_client.scenarios.runs.score_and_complete.return_value = scenario_run_view
- run = ScenarioRun(mock_client, "run_123", "dev_123")
+ run = ScenarioRun(mock_client, "scr_123", "dbx_123")
result = run.score_and_complete()
assert result == scenario_run_view
- mock_client.scenarios.runs.score_and_complete.assert_called_once_with("run_123")
+ mock_client.scenarios.runs.score_and_complete.assert_called_once_with("scr_123")
def test_complete(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
"""Test complete method."""
scenario_run_view.state = "completed"
mock_client.scenarios.runs.complete.return_value = scenario_run_view
- run = ScenarioRun(mock_client, "run_123", "dev_123")
+ run = ScenarioRun(mock_client, "scr_123", "dbx_123")
result = run.complete()
assert result == scenario_run_view
- mock_client.scenarios.runs.complete.assert_called_once_with("run_123")
+ mock_client.scenarios.runs.complete.assert_called_once_with("scr_123")
def test_cancel(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
"""Test cancel method."""
scenario_run_view.state = "canceled"
mock_client.scenarios.runs.cancel.return_value = scenario_run_view
- run = ScenarioRun(mock_client, "run_123", "dev_123")
+ run = ScenarioRun(mock_client, "scr_123", "dbx_123")
result = run.cancel()
assert result == scenario_run_view
- mock_client.scenarios.runs.cancel.assert_called_once_with("run_123")
+ mock_client.scenarios.runs.cancel.assert_called_once_with("scr_123")
def test_download_logs(self, mock_client: Mock, tmp_path: Path) -> None:
"""Test download_logs method writes to file."""
@@ -126,11 +126,11 @@ def test_download_logs(self, mock_client: Mock, tmp_path: Path) -> None:
mock_response.write_to_file = Mock()
mock_client.scenarios.runs.download_logs.return_value = mock_response
- run = ScenarioRun(mock_client, "run_123", "dev_123")
+ run = ScenarioRun(mock_client, "scr_123", "dbx_123")
output_path = tmp_path / "logs.zip"
run.download_logs(output_path)
- mock_client.scenarios.runs.download_logs.assert_called_once_with("run_123")
+ mock_client.scenarios.runs.download_logs.assert_called_once_with("scr_123")
mock_response.write_to_file.assert_called_once_with(output_path)
def test_get_score_when_scored(self, mock_client: Mock) -> None:
@@ -139,19 +139,19 @@ def test_get_score_when_scored(self, mock_client: Mock) -> None:
run_view = MockScenarioRunView(state="scored", scoring_contract_result=scoring_result)
mock_client.scenarios.runs.retrieve.return_value = run_view
- run = ScenarioRun(mock_client, "run_123", "dev_123")
+ run = ScenarioRun(mock_client, "scr_123", "dbx_123")
result = run.get_score()
assert result == scoring_result
- mock_client.scenarios.runs.retrieve.assert_called_once_with("run_123")
+ mock_client.scenarios.runs.retrieve.assert_called_once_with("scr_123")
def test_get_score_when_not_scored(self, mock_client: Mock) -> None:
"""Test get_score returns None when not scored."""
run_view = MockScenarioRunView(state="running", scoring_contract_result=None)
mock_client.scenarios.runs.retrieve.return_value = run_view
- run = ScenarioRun(mock_client, "run_123", "dev_123")
+ run = ScenarioRun(mock_client, "scr_123", "dbx_123")
result = run.get_score()
assert result is None
- mock_client.scenarios.runs.retrieve.assert_called_once_with("run_123")
+ mock_client.scenarios.runs.retrieve.assert_called_once_with("scr_123")
diff --git a/tests/sdk/test_scorer.py b/tests/sdk/test_scorer.py
index 761a487cb..91b430db0 100644
--- a/tests/sdk/test_scorer.py
+++ b/tests/sdk/test_scorer.py
@@ -14,30 +14,30 @@ class TestScorer:
def test_init(self, mock_client: Mock) -> None:
"""Test Scorer initialization."""
- scorer = Scorer(mock_client, "scorer_123")
- assert scorer.id == "scorer_123"
+ scorer = Scorer(mock_client, "sco_123")
+ assert scorer.id == "sco_123"
def test_repr(self, mock_client: Mock) -> None:
"""Test Scorer string representation."""
- scorer = Scorer(mock_client, "scorer_123")
- assert repr(scorer) == ""
+ scorer = Scorer(mock_client, "sco_123")
+ assert repr(scorer) == ""
def test_get_info(self, mock_client: Mock, scorer_view: MockScorerView) -> None:
"""Test get_info method."""
mock_client.scenarios.scorers.retrieve.return_value = scorer_view
- scorer = Scorer(mock_client, "scorer_123")
+ scorer = Scorer(mock_client, "sco_123")
result = scorer.get_info()
assert result == scorer_view
- mock_client.scenarios.scorers.retrieve.assert_called_once_with("scorer_123")
+ mock_client.scenarios.scorers.retrieve.assert_called_once_with("sco_123")
def test_update(self, mock_client: Mock) -> None:
"""Test update method."""
- update_response = SimpleNamespace(id="scorer_123", type="updated_scorer", bash_script="echo 'score=1.0'")
+ update_response = SimpleNamespace(id="sco_123", type="updated_scorer", bash_script="echo 'score=1.0'")
mock_client.scenarios.scorers.update.return_value = update_response
- scorer = Scorer(mock_client, "scorer_123")
+ scorer = Scorer(mock_client, "sco_123")
result = scorer.update(
type="updated_scorer",
bash_script="echo 'score=1.0'",
@@ -45,7 +45,7 @@ def test_update(self, mock_client: Mock) -> None:
assert result == update_response
mock_client.scenarios.scorers.update.assert_called_once_with(
- "scorer_123",
+ "sco_123",
type="updated_scorer",
bash_script="echo 'score=1.0'",
)
@@ -59,13 +59,13 @@ def test_validate(self, mock_client: Mock) -> None:
)
mock_client.scenarios.scorers.validate.return_value = validate_response
- scorer = Scorer(mock_client, "scorer_123")
+ scorer = Scorer(mock_client, "sco_123")
result = scorer.validate(
scoring_context={"test": "context"},
)
assert result == validate_response
mock_client.scenarios.scorers.validate.assert_called_once_with(
- "scorer_123",
+ "sco_123",
scoring_context={"test": "context"},
)
diff --git a/tests/sdk/test_snapshot.py b/tests/sdk/test_snapshot.py
index 383e812cc..4b066e29a 100644
--- a/tests/sdk/test_snapshot.py
+++ b/tests/sdk/test_snapshot.py
@@ -15,19 +15,19 @@ class TestSnapshot:
def test_init(self, mock_client: Mock) -> None:
"""Test Snapshot initialization."""
- snapshot = Snapshot(mock_client, "snap_123")
- assert snapshot.id == "snap_123"
+ snapshot = Snapshot(mock_client, "snp_123")
+ assert snapshot.id == "snp_123"
def test_repr(self, mock_client: Mock) -> None:
"""Test Snapshot string representation."""
- snapshot = Snapshot(mock_client, "snap_123")
- assert repr(snapshot) == ""
+ snapshot = Snapshot(mock_client, "snp_123")
+ assert repr(snapshot) == ""
def test_get_info(self, mock_client: Mock, snapshot_view: MockSnapshotView) -> None:
"""Test get_info method."""
mock_client.devboxes.disk_snapshots.query_status.return_value = snapshot_view
- snapshot = Snapshot(mock_client, "snap_123")
+ snapshot = Snapshot(mock_client, "snp_123")
result = snapshot.get_info(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -37,7 +37,7 @@ def test_get_info(self, mock_client: Mock, snapshot_view: MockSnapshotView) -> N
assert result == snapshot_view
mock_client.devboxes.disk_snapshots.query_status.assert_called_once_with(
- "snap_123",
+ "snp_123",
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
extra_body={"key": "value"},
@@ -46,10 +46,10 @@ def test_get_info(self, mock_client: Mock, snapshot_view: MockSnapshotView) -> N
def test_update(self, mock_client: Mock) -> None:
"""Test update method."""
- updated_snapshot = SimpleNamespace(id="snap_123", name="updated-name")
+ updated_snapshot = SimpleNamespace(id="snp_123", name="updated-name")
mock_client.devboxes.disk_snapshots.update.return_value = updated_snapshot
- snapshot = Snapshot(mock_client, "snap_123")
+ snapshot = Snapshot(mock_client, "snp_123")
result = snapshot.update(
commit_message="Update message",
metadata={"key": "value"},
@@ -63,7 +63,7 @@ def test_update(self, mock_client: Mock) -> None:
assert result == updated_snapshot
mock_client.devboxes.disk_snapshots.update.assert_called_once_with(
- "snap_123",
+ "snp_123",
commit_message="Update message",
metadata={"key": "value"},
name="updated-name",
@@ -78,7 +78,7 @@ def test_delete(self, mock_client: Mock) -> None:
"""Test delete method."""
mock_client.devboxes.disk_snapshots.delete.return_value = object()
- snapshot = Snapshot(mock_client, "snap_123")
+ snapshot = Snapshot(mock_client, "snp_123")
result = snapshot.delete(
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -89,7 +89,7 @@ def test_delete(self, mock_client: Mock) -> None:
assert result is not None # Verify return value is propagated
mock_client.devboxes.disk_snapshots.delete.assert_called_once_with(
- "snap_123",
+ "snp_123",
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
extra_body={"key": "value"},
@@ -102,7 +102,7 @@ def test_await_completed(self, mock_client: Mock, snapshot_view: MockSnapshotVie
mock_client.devboxes.disk_snapshots.await_completed.return_value = snapshot_view
polling_config = PollingConfig(timeout_seconds=60.0)
- snapshot = Snapshot(mock_client, "snap_123")
+ snapshot = Snapshot(mock_client, "snp_123")
result = snapshot.await_completed(
polling_config=polling_config,
extra_headers={"X-Custom": "value"},
@@ -113,7 +113,7 @@ def test_await_completed(self, mock_client: Mock, snapshot_view: MockSnapshotVie
assert result == snapshot_view
mock_client.devboxes.disk_snapshots.await_completed.assert_called_once_with(
- "snap_123",
+ "snp_123",
polling_config=polling_config,
extra_headers={"X-Custom": "value"},
extra_query={"param": "value"},
@@ -125,7 +125,7 @@ def test_create_devbox(self, mock_client: Mock, devbox_view: MockDevboxView) ->
"""Test create_devbox method."""
mock_client.devboxes.create_and_await_running.return_value = devbox_view
- snapshot = Snapshot(mock_client, "snap_123")
+ snapshot = Snapshot(mock_client, "snp_123")
devbox = snapshot.create_devbox(
name="test-devbox",
metadata={"key": "value"},
@@ -133,9 +133,9 @@ def test_create_devbox(self, mock_client: Mock, devbox_view: MockDevboxView) ->
extra_headers={"X-Custom": "value"},
)
- assert devbox.id == "dev_123"
+ assert devbox.id == "dbx_123"
mock_client.devboxes.create_and_await_running.assert_called_once()
call_kwargs = mock_client.devboxes.create_and_await_running.call_args[1]
- assert call_kwargs["snapshot_id"] == "snap_123"
+ assert call_kwargs["snapshot_id"] == "snp_123"
assert call_kwargs["name"] == "test-devbox"
assert call_kwargs["metadata"] == {"key": "value"}
diff --git a/tests/smoketests/sdk/test_agent.py b/tests/smoketests/sdk/test_agent.py
index deb659087..7ddfb6f70 100644
--- a/tests/smoketests/sdk/test_agent.py
+++ b/tests/smoketests/sdk/test_agent.py
@@ -12,6 +12,7 @@
THIRTY_SECOND_TIMEOUT = 30
TWO_MINUTE_TIMEOUT = 120
+AGENT_VERSION = "1.2.3"
class TestAgentLifecycle:
@@ -23,6 +24,7 @@ def test_agent_create_basic(self, sdk_client: RunloopSDK) -> None:
name = unique_name("sdk-agent-test-basic")
agent = sdk_client.agent.create(
name=name,
+ version=AGENT_VERSION,
source={
"type": "npm",
"npm": {
@@ -52,6 +54,7 @@ def test_agent_get_info(self, sdk_client: RunloopSDK) -> None:
name = unique_name("sdk-agent-test-info")
agent = sdk_client.agent.create(
name=name,
+ version=AGENT_VERSION,
source={
"type": "npm",
"npm": {
@@ -90,6 +93,7 @@ def test_get_agent_by_id(self, sdk_client: RunloopSDK) -> None:
# Create an agent
created = sdk_client.agent.create(
name=unique_name("sdk-agent-test-retrieve"),
+ version=AGENT_VERSION,
source={
"type": "npm",
"npm": {
@@ -121,9 +125,15 @@ def test_list_multiple_agents(self, sdk_client: RunloopSDK) -> None:
}
# Create multiple agents
- agent1 = sdk_client.agent.create(name=unique_name("sdk-agent-test-list-1"), source=source_config)
- agent2 = sdk_client.agent.create(name=unique_name("sdk-agent-test-list-2"), source=source_config)
- agent3 = sdk_client.agent.create(name=unique_name("sdk-agent-test-list-3"), source=source_config)
+ agent1 = sdk_client.agent.create(
+ name=unique_name("sdk-agent-test-list-1"), source=source_config, version=AGENT_VERSION
+ )
+ agent2 = sdk_client.agent.create(
+ name=unique_name("sdk-agent-test-list-2"), source=source_config, version=AGENT_VERSION
+ )
+ agent3 = sdk_client.agent.create(
+ name=unique_name("sdk-agent-test-list-3"), source=source_config, version=AGENT_VERSION
+ )
try:
# List agents
@@ -153,6 +163,7 @@ def test_agent_with_source_npm(self, sdk_client: RunloopSDK) -> None:
agent = sdk_client.agent.create(
name=name,
+ version=AGENT_VERSION,
source={
"type": "npm",
"npm": {
@@ -178,6 +189,7 @@ def test_agent_with_source_git(self, sdk_client: RunloopSDK) -> None:
agent = sdk_client.agent.create(
name=name,
+ version=AGENT_VERSION,
source={
"type": "git",
"git": {
diff --git a/tests/smoketests/sdk/test_async_agent.py b/tests/smoketests/sdk/test_async_agent.py
index fb9d17b42..36129605f 100644
--- a/tests/smoketests/sdk/test_async_agent.py
+++ b/tests/smoketests/sdk/test_async_agent.py
@@ -12,6 +12,7 @@
THIRTY_SECOND_TIMEOUT = 30
TWO_MINUTE_TIMEOUT = 120
+AGENT_VERSION = "1.2.3"
class TestAsyncAgentLifecycle:
@@ -23,6 +24,7 @@ async def test_agent_create_basic(self, async_sdk_client: AsyncRunloopSDK) -> No
name = unique_name("sdk-async-agent-test-basic")
agent = await async_sdk_client.agent.create(
name=name,
+ version=AGENT_VERSION,
source={
"type": "npm",
"npm": {
@@ -52,6 +54,7 @@ async def test_agent_get_info(self, async_sdk_client: AsyncRunloopSDK) -> None:
name = unique_name("sdk-async-agent-test-info")
agent = await async_sdk_client.agent.create(
name=name,
+ version=AGENT_VERSION,
source={
"type": "npm",
"npm": {
@@ -90,6 +93,7 @@ async def test_get_agent_by_id(self, async_sdk_client: AsyncRunloopSDK) -> None:
# Create an agent
created = await async_sdk_client.agent.create(
name=unique_name("sdk-async-agent-test-retrieve"),
+ version=AGENT_VERSION,
source={
"type": "npm",
"npm": {
@@ -122,13 +126,13 @@ async def test_list_multiple_agents(self, async_sdk_client: AsyncRunloopSDK) ->
# Create multiple agents
agent1 = await async_sdk_client.agent.create(
- name=unique_name("sdk-async-agent-test-list-1"), source=source_config
+ name=unique_name("sdk-async-agent-test-list-1"), source=source_config, version=AGENT_VERSION
)
agent2 = await async_sdk_client.agent.create(
- name=unique_name("sdk-async-agent-test-list-2"), source=source_config
+ name=unique_name("sdk-async-agent-test-list-2"), source=source_config, version=AGENT_VERSION
)
agent3 = await async_sdk_client.agent.create(
- name=unique_name("sdk-async-agent-test-list-3"), source=source_config
+ name=unique_name("sdk-async-agent-test-list-3"), source=source_config, version=AGENT_VERSION
)
try:
@@ -159,6 +163,7 @@ async def test_agent_with_source_npm(self, async_sdk_client: AsyncRunloopSDK) ->
agent = await async_sdk_client.agent.create(
name=name,
+ version=AGENT_VERSION,
source={
"type": "npm",
"npm": {
@@ -184,6 +189,7 @@ async def test_agent_with_source_git(self, async_sdk_client: AsyncRunloopSDK) ->
agent = await async_sdk_client.agent.create(
name=name,
+ version=AGENT_VERSION,
source={
"type": "git",
"git": {
diff --git a/tests/smoketests/sdk/test_async_benchmark.py b/tests/smoketests/sdk/test_async_benchmark.py
new file mode 100644
index 000000000..7316355a6
--- /dev/null
+++ b/tests/smoketests/sdk/test_async_benchmark.py
@@ -0,0 +1,192 @@
+"""Asynchronous SDK smoke tests for AsyncBenchmark operations.
+
+These tests validate the AsyncBenchmark class against the real API.
+We create a dedicated smoketest benchmark and scenarios with consistent names
+so that resources are reused across test runs (since there's no delete endpoint).
+"""
+
+from __future__ import annotations
+
+from typing import List, Tuple
+
+import pytest
+
+from runloop_api_client import AsyncRunloopSDK
+from runloop_api_client.sdk import AsyncScenario, AsyncBenchmark, AsyncScenarioRun, AsyncBenchmarkRun
+
+pytestmark = [pytest.mark.smoketest]
+
+TWO_MINUTE_TIMEOUT = 120
+
+# Consistent names for smoketest resources
+SMOKETEST_BENCHMARK_NAME = "sdk-smoketest-benchmark"
+SMOKETEST_SCENARIO_1_NAME = "sdk-smoketest-scenario-1"
+SMOKETEST_SCENARIO_2_NAME = "sdk-smoketest-scenario-2"
+
+
+async def get_or_create_scenario(
+ async_sdk_client: AsyncRunloopSDK,
+ name: str,
+ problem_statement: str,
+) -> AsyncScenario:
+ """Get an existing scenario by name or create a new one."""
+ # Check if scenario already exists
+ scenarios = await async_sdk_client.scenario.list(name=name, limit=1)
+ for scenario in scenarios:
+ # Return the first matching scenario
+ return scenario
+
+ # Create a new scenario using the SDK builder
+ return await (
+ async_sdk_client.scenario.builder(name)
+ .with_problem_statement(problem_statement)
+ .add_shell_command_scorer("pass-scorer", command="exit 0")
+ .push()
+ )
+
+
+async def get_or_create_benchmark(
+ async_sdk_client: AsyncRunloopSDK,
+ name: str,
+ scenario_ids: List[str],
+) -> AsyncBenchmark:
+ """Get an existing benchmark by name or create a new one."""
+ # Check if benchmark already exists
+ benchmarks = await async_sdk_client.benchmark.list(name=name, limit=1)
+ for benchmark in benchmarks:
+ # Return the first matching benchmark
+ return benchmark
+
+ # Create a new benchmark
+ return await async_sdk_client.benchmark.create(
+ name=name,
+ scenario_ids=scenario_ids,
+ description="Smoketest benchmark for SDK testing",
+ )
+
+
+@pytest.fixture(scope="module")
+async def smoketest_benchmark(
+ async_sdk_client: AsyncRunloopSDK,
+) -> Tuple[AsyncBenchmark, List[str]]:
+ """Create or retrieve the smoketest benchmark and scenario IDs."""
+ # Create or get scenarios
+ scenario_1 = await get_or_create_scenario(
+ async_sdk_client,
+ SMOKETEST_SCENARIO_1_NAME,
+ "Smoketest scenario 1 - basic validation",
+ )
+ scenario_2 = await get_or_create_scenario(
+ async_sdk_client,
+ SMOKETEST_SCENARIO_2_NAME,
+ "Smoketest scenario 2 - basic validation",
+ )
+
+ scenario_ids = [scenario_1.id, scenario_2.id]
+
+ # Create or get benchmark
+ benchmark = await get_or_create_benchmark(
+ async_sdk_client,
+ SMOKETEST_BENCHMARK_NAME,
+ scenario_ids,
+ )
+
+ return benchmark, scenario_ids
+
+
+class TestAsyncBenchmarkRun:
+ """Test AsyncBenchmark run operations."""
+
+ @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+ async def test_benchmark_run_and_cancel(
+ self,
+ async_sdk_client: AsyncRunloopSDK,
+ smoketest_benchmark: Tuple[AsyncBenchmark, List[str]],
+ ) -> None:
+ """Test starting and canceling a benchmark run.
+
+ This test:
+ 1. Uses the smoketest benchmark fixture
+ 2. Starts a new benchmark run via the AsyncBenchmark class
+ 3. Validates the run object
+ 4. Cancels the run
+ """
+ benchmark, scenario_ids = smoketest_benchmark
+
+ # Start a run
+ run = await benchmark.start_run(run_name="sdk-smoketest-async-benchmark-run")
+ scenario_runs: List[AsyncScenarioRun] = []
+
+ try:
+ assert isinstance(run, AsyncBenchmarkRun)
+ assert run.id is not None
+ assert run.benchmark_id == benchmark.id
+
+ # Get run info
+ info = await run.get_info()
+ assert info.id == run.id
+ assert info.state == "running"
+
+ # Run the scenarios
+ for scenario_id in scenario_ids:
+ scenario = async_sdk_client.scenario.from_id(scenario_id)
+ scenario_runs.append(
+ await scenario.run_async(
+ benchmark_run_id=run.id, run_name="sdk-smoketest-async-benchmark-run-scenario"
+ )
+ )
+
+ benchmark_scenario_runs = await run.list_scenario_runs()
+ assert isinstance(benchmark_scenario_runs, list)
+ assert len(benchmark_scenario_runs) == len(scenario_runs)
+ for scenario_run in benchmark_scenario_runs:
+ assert isinstance(scenario_run, AsyncScenarioRun)
+ assert any(
+ scenario_run.id == scenario_run.id and scenario_run.devbox_id == scenario_run.devbox_id
+ for scenario_run in scenario_runs
+ )
+
+ # Cancel the scenario run
+ for scenario_run in scenario_runs:
+ scenario_result = await scenario_run.cancel()
+ assert scenario_result.state in ["canceled", "completed"]
+
+ # Cancel the benchmark run
+ result = await run.cancel()
+ assert result.state in ["canceled", "completed"]
+
+ except Exception:
+ # Ensure cleanup on any error
+ for scenario_run in scenario_runs:
+ await scenario_run.cancel()
+ await run.cancel()
+ raise
+
+
+class TestAsyncBenchmarkListRuns:
+ """Test AsyncBenchmark list_runs operations."""
+
+ @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+ async def test_list_runs(
+ self,
+ smoketest_benchmark: Tuple[AsyncBenchmark, List[str]],
+ ) -> None:
+ """Test listing benchmark runs.
+
+ This test:
+ 1. Uses the smoketest benchmark fixture
+ 2. Lists its runs
+ 3. Validates returned objects are AsyncBenchmarkRun instances
+ """
+ benchmark, _ = smoketest_benchmark
+
+ runs = await benchmark.list_runs()
+ assert isinstance(runs, list)
+ if not runs:
+ pytest.skip("No runs available to test")
+
+ # Verify returned items are AsyncBenchmarkRun objects
+ for run in runs:
+ assert isinstance(run, AsyncBenchmarkRun)
+ assert run.id is not None
+ assert run.benchmark_id == benchmark.id
diff --git a/tests/smoketests/sdk/test_benchmark.py b/tests/smoketests/sdk/test_benchmark.py
new file mode 100644
index 000000000..2dfe5bb6c
--- /dev/null
+++ b/tests/smoketests/sdk/test_benchmark.py
@@ -0,0 +1,190 @@
+"""Synchronous SDK smoke tests for Benchmark operations.
+
+These tests validate the Benchmark class against the real API.
+We create a dedicated smoketest benchmark and scenarios with consistent names
+so that resources are reused across test runs (since there's no delete endpoint).
+"""
+
+from __future__ import annotations
+
+from typing import List, Tuple
+
+import pytest
+
+from runloop_api_client import RunloopSDK
+from runloop_api_client.sdk import Scenario, Benchmark, ScenarioRun, BenchmarkRun
+
+pytestmark = [pytest.mark.smoketest]
+
+TWO_MINUTE_TIMEOUT = 120
+
+# Consistent names for smoketest resources
+SMOKETEST_BENCHMARK_NAME = "sdk-smoketest-benchmark"
+SMOKETEST_SCENARIO_1_NAME = "sdk-smoketest-scenario-1"
+SMOKETEST_SCENARIO_2_NAME = "sdk-smoketest-scenario-2"
+
+
+def get_or_create_scenario(
+ sdk_client: RunloopSDK,
+ name: str,
+ problem_statement: str,
+) -> Scenario:
+ """Get an existing scenario by name or create a new one."""
+ # Check if scenario already exists
+ scenarios = sdk_client.scenario.list(name=name, limit=1)
+ for scenario in scenarios:
+ # Return the first matching scenario
+ return scenario
+
+ # Create a new scenario using the SDK builder
+ return (
+ sdk_client.scenario.builder(name)
+ .with_problem_statement(problem_statement)
+ .add_shell_command_scorer("pass-scorer", command="exit 0")
+ .push()
+ )
+
+
+def get_or_create_benchmark(
+ sdk_client: RunloopSDK,
+ name: str,
+ scenario_ids: List[str],
+) -> Benchmark:
+ """Get an existing benchmark by name or create a new one."""
+ # Check if benchmark already exists
+ benchmarks = sdk_client.benchmark.list(name=name, limit=1)
+ for benchmark in benchmarks:
+ # Return the first matching benchmark
+ return benchmark
+
+ # Create a new benchmark
+ return sdk_client.benchmark.create(
+ name=name,
+ scenario_ids=scenario_ids,
+ description="Smoketest benchmark for SDK testing",
+ )
+
+
+@pytest.fixture(scope="module")
+def smoketest_benchmark(
+ sdk_client: RunloopSDK,
+) -> Tuple[Benchmark, List[str]]:
+ """Create or retrieve the smoketest benchmark and scenarios."""
+ # Create or get scenarios
+ scenario_1 = get_or_create_scenario(
+ sdk_client,
+ SMOKETEST_SCENARIO_1_NAME,
+ "Smoketest scenario 1 - basic validation",
+ )
+ scenario_2 = get_or_create_scenario(
+ sdk_client,
+ SMOKETEST_SCENARIO_2_NAME,
+ "Smoketest scenario 2 - basic validation",
+ )
+
+ scenario_ids = [scenario_1.id, scenario_2.id]
+
+ # Create or get benchmark
+ benchmark = get_or_create_benchmark(
+ sdk_client,
+ SMOKETEST_BENCHMARK_NAME,
+ scenario_ids,
+ )
+
+ return benchmark, scenario_ids
+
+
+class TestBenchmarkRun:
+ """Test Benchmark run operations."""
+
+ @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+ def test_benchmark_run_lifecycle(
+ self,
+ sdk_client: RunloopSDK,
+ smoketest_benchmark: Tuple[Benchmark, List[str]],
+ ) -> None:
+ """Test starting and canceling a benchmark run.
+
+ This test:
+ 1. Uses the smoketest benchmark fixture
+ 2. Starts a new benchmark run via the Benchmark class
+ 3. Validates the run object
+ 4. Cancels the run
+ """
+ benchmark, scenario_ids = smoketest_benchmark
+
+ # Start a run
+ run = benchmark.start_run(run_name="sdk-smoketest-benchmark-run")
+ scenario_runs: List[ScenarioRun] = []
+
+ try:
+ assert isinstance(run, BenchmarkRun)
+ assert run.id is not None
+ assert run.benchmark_id == benchmark.id
+
+ # Get run info
+ info = run.get_info()
+ assert info.id == run.id
+ assert info.state == "running"
+
+ # Start a scenario run
+ for scenario_id in scenario_ids:
+ scenario = sdk_client.scenario.from_id(scenario_id)
+ scenario_runs.append(
+ scenario.run(benchmark_run_id=run.id, run_name="sdk-smoketest-benchmark-run-scenario")
+ )
+
+ benchmark_scenario_runs = run.list_scenario_runs()
+ assert isinstance(benchmark_scenario_runs, list)
+ assert len(benchmark_scenario_runs) == len(scenario_runs)
+ for scenario_run in benchmark_scenario_runs:
+ assert isinstance(scenario_run, ScenarioRun)
+ assert any(
+ scenario_run.id == scenario_run.id and scenario_run.devbox_id == scenario_run.devbox_id
+ for scenario_run in scenario_runs
+ )
+
+ # Cancel the scenario runs
+ for scenario_run in scenario_runs:
+ scenario_result = scenario_run.cancel()
+ assert scenario_result.state in ["canceled", "completed"]
+
+ # Cancel the benchmark run
+ result = run.cancel()
+ assert result.state in ["canceled", "completed"]
+
+ except Exception:
+ # Ensure cleanup on any error
+ for scenario_run in scenario_runs:
+ scenario_run.cancel()
+ run.cancel()
+ raise
+
+
+class TestBenchmarkListRuns:
+ """Test Benchmark list_runs operations."""
+
+ @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+ def test_list_runs(
+ self,
+ smoketest_benchmark: Tuple[Benchmark, List[str]],
+ ) -> None:
+ """Test listing benchmark runs.
+
+ This test:
+ 1. Uses the smoketest benchmark fixture
+ 2. Lists its runs
+ 3. Validates returned objects are BenchmarkRun instances
+ """
+ benchmark, _ = smoketest_benchmark
+
+ runs = benchmark.list_runs()
+ assert isinstance(runs, list)
+ if not runs:
+ pytest.skip("No runs available to test")
+
+ # Verify returned items are BenchmarkRun objects
+ for run in runs:
+ assert isinstance(run, BenchmarkRun)
+ assert run.id is not None
+ assert run.benchmark_id == benchmark.id
diff --git a/tests/smoketests/test_snapshots.py b/tests/smoketests/test_snapshots.py
index 71b592320..0fc43ca23 100644
--- a/tests/smoketests/test_snapshots.py
+++ b/tests/smoketests/test_snapshots.py
@@ -31,7 +31,7 @@ def _cleanup(client: Runloop) -> Iterator[None]: # pyright: ignore[reportUnused
_snapshot_id = None
-@pytest.mark.timeout(30)
+@pytest.mark.timeout(120)
def test_snapshot_devbox(client: Runloop) -> None:
global _devbox_id, _snapshot_id
created = client.devboxes.create_and_await_running(