diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index d0ab6645f..2a8f4ffdd 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "1.2.0"
+  ".": "1.3.0"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index ca5e82df9..aa9206944 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 97
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/runloop-ai%2Frunloop-f2df3524e4b99c38b634c334d098aa2c7d543d5ea0f49c4dd8f4d92723b81b94.yml
-openapi_spec_hash: c377abec5716d1d6c5b01a527a5bfdfb
-config_hash: 2363f563f42501d2b1587a4f64bdccaf
+configured_endpoints: 98
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/runloop-ai%2Frunloop-5271153bd2f82579803953bd2fa1b9ea6466c979118804f64379fb14e9a9c436.yml
+openapi_spec_hash: 95ac224a4b0f10e9ba6129a86746c9d4
+config_hash: cb8534d20a68a49b92726bedd50f8bb1
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0165349d7..1d1ed6b23 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,42 @@
 # Changelog
 
+## 1.3.0 (2025-12-20)
+
+Full Changelog: [v1.2.0...v1.3.0](https://github.com/runloopai/api-client-python/compare/v1.2.0...v1.3.0)
+
+### ⚠ BREAKING CHANGES
+
+* remove support for pydantic-v1, pydantic-v2 is now default ([#710](https://github.com/runloopai/api-client-python/issues/710))
+
+### Features
+
+* **benchmarks:** add `update_scenarios` method to benchmarks resource ([71ec221](https://github.com/runloopai/api-client-python/commit/71ec221f1d0cad7aac33c0299d3f8b1aa97d0741))
+* **devbox:** added stdin streaming endpoint ([83ae56a](https://github.com/runloopai/api-client-python/commit/83ae56a22a9c1d4528719321b9565731532191f2))
+* **scenarios:** add scenario builder to sdk ([#706](https://github.com/runloopai/api-client-python/issues/706)) ([2d41a15](https://github.com/runloopai/api-client-python/commit/2d41a15b4455ed8d7f6a8063cf19b82d51edeef8))
+* **sdk:** add Benchmark and AsyncBenchmark classes ([#714](https://github.com/runloopai/api-client-python/issues/714)) ([8909d8a](https://github.com/runloopai/api-client-python/commit/8909d8aabfc2f1c80ff74b636225b42cac6725ff))
+* **sdk:** add BenchmarkOps and AsyncBenchmarkOps to SDK ([#716](https://github.com/runloopai/api-client-python/issues/716)) ([9b434d9](https://github.com/runloopai/api-client-python/commit/9b434d9bc7ebdcea2b156689403d853a932f0d9e))
+* **sdk:** add BenchmarkRun and AsyncBenchmarkRun classes ([#712](https://github.com/runloopai/api-client-python/issues/712)) ([6aa83e2](https://github.com/runloopai/api-client-python/commit/6aa83e2a6c8a55694435bd2b707340770f0a326a))
+
+
+### Bug Fixes
+
+* **benchmarks:** `update()` for benchmarks and scenarios replaces all provided fields and does not modify unspecified fields ([#6702](https://github.com/runloopai/api-client-python/issues/6702)) ([cfd04b6](https://github.com/runloopai/api-client-python/commit/cfd04b6e7781534fd0e775e1b00793ad53814a47))
+* **types:** allow pyright to infer TypedDict types within SequenceNotStr ([3241717](https://github.com/runloopai/api-client-python/commit/32417177128b5f5d90b852a5460fe6823198cf9b))
+* use async_to_httpx_files in patch method ([88f8fb9](https://github.com/runloopai/api-client-python/commit/88f8fb92e1d48ff6f95833a7ee1e376bef76e0e1))
+
+
+### Chores
+
+* add documentation url to pypi project page ([#711](https://github.com/runloopai/api-client-python/issues/711)) ([7afb327](https://github.com/runloopai/api-client-python/commit/7afb32731842ebee4f479837959ccac856bd5e85))
+* add missing docstrings ([a198632](https://github.com/runloopai/api-client-python/commit/a198632f6a3936bcf5b5b4f4e6324461c4853893))
+* **internal:** add missing files argument to base client ([b7065e2](https://github.com/runloopai/api-client-python/commit/b7065e204d00f853bcac75637680dc198346a804))
+* remove support for pydantic-v1, pydantic-v2 is now default ([#710](https://github.com/runloopai/api-client-python/issues/710)) ([fb3cc3d](https://github.com/runloopai/api-client-python/commit/fb3cc3d354d4279542cd20f44857f2ec28be7082))
+
+
+### Documentation
+
+* reformat sidebar and index pages to be more transparent, add favicon to browser tab ([#715](https://github.com/runloopai/api-client-python/issues/715)) ([1161b8f](https://github.com/runloopai/api-client-python/commit/1161b8fbe8d78dc572e0310da009e1bcc7dec36f))
+
 ## 1.2.0 (2025-12-09)
 
 Full Changelog: [v1.1.0...v1.2.0](https://github.com/runloopai/api-client-python/compare/v1.1.0...v1.2.0)
diff --git a/api.md b/api.md
index 17cc5978f..83c2c8b60 100644
--- a/api.md
+++ b/api.md
@@ -20,6 +20,8 @@ from runloop_api_client.types import (
     BenchmarkCreateParameters,
     BenchmarkRunListView,
     BenchmarkRunView,
+    BenchmarkScenarioUpdateParameters,
+    BenchmarkUpdateParameters,
     BenchmarkView,
     ScenarioDefinitionListView,
     StartBenchmarkRunParameters,
@@ -35,6 +37,7 @@ Methods:
 - <code title="get /v1/benchmarks/{id}/definitions">client.benchmarks.<a href="./src/runloop_api_client/resources/benchmarks/benchmarks.py">definitions</a>(id, \*\*<a href="src/runloop_api_client/types/benchmark_definitions_params.py">params</a>) -> <a href="./src/runloop_api_client/types/scenario_definition_list_view.py">ScenarioDefinitionListView</a></code>
 - <code title="get /v1/benchmarks/list_public">client.benchmarks.<a href="./src/runloop_api_client/resources/benchmarks/benchmarks.py">list_public</a>(\*\*<a href="src/runloop_api_client/types/benchmark_list_public_params.py">params</a>) -> <a href="./src/runloop_api_client/types/benchmark_view.py">SyncBenchmarksCursorIDPage[BenchmarkView]</a></code>
 - <code title="post /v1/benchmarks/start_run">client.benchmarks.<a href="./src/runloop_api_client/resources/benchmarks/benchmarks.py">start_run</a>(\*\*<a href="src/runloop_api_client/types/benchmark_start_run_params.py">params</a>) -> <a href="./src/runloop_api_client/types/benchmark_run_view.py">BenchmarkRunView</a></code>
+- <code title="post /v1/benchmarks/{id}/scenarios">client.benchmarks.<a href="./src/runloop_api_client/resources/benchmarks/benchmarks.py">update_scenarios</a>(id, \*\*<a href="src/runloop_api_client/types/benchmark_update_scenarios_params.py">params</a>) -> <a href="./src/runloop_api_client/types/benchmark_view.py">BenchmarkView</a></code>
 
 ## Runs
 
diff --git a/pyproject.toml b/pyproject.toml
index 2c90fa53a..5007d5e66 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "runloop_api_client"
-version = "1.2.0"
+version = "1.3.0"
 description = "The official Python library for the runloop API"
 dynamic = ["readme"]
 license = "MIT"
@@ -15,7 +15,7 @@ dependencies = [
   "anyio>=3.5.0, <5",
   "distro>=1.7.0, <2",
   "sniffio",
-    "uuid-utils>=0.11.0",
+  "uuid-utils>=0.11.0",
 ]
 
 requires-python = ">= 3.9"
diff --git a/requirements-dev.lock b/requirements-dev.lock
index b9f3f2862..c48025dbf 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -94,7 +94,7 @@ python-dateutil==2.9.0.post0 ; python_full_version < '3.10'
     # via time-machine
 respx==0.22.0
 rich==14.2.0
-ruff==0.14.8
+ruff==0.14.9
 six==1.17.0 ; python_full_version < '3.10'
     # via python-dateutil
 sniffio==1.3.1
diff --git a/src/runloop_api_client/_base_client.py b/src/runloop_api_client/_base_client.py
index f639d4201..5c05c86c5 100644
--- a/src/runloop_api_client/_base_client.py
+++ b/src/runloop_api_client/_base_client.py
@@ -1247,9 +1247,12 @@ def patch(
         *,
         cast_to: Type[ResponseT],
         body: Body | None = None,
+        files: RequestFiles | None = None,
         options: RequestOptions = {},
     ) -> ResponseT:
-        opts = FinalRequestOptions.construct(method="patch", url=path, json_data=body, **options)
+        opts = FinalRequestOptions.construct(
+            method="patch", url=path, json_data=body, files=to_httpx_files(files), **options
+        )
         return self.request(cast_to, opts)
 
     def put(
@@ -1767,9 +1770,12 @@ async def patch(
         *,
         cast_to: Type[ResponseT],
         body: Body | None = None,
+        files: RequestFiles | None = None,
         options: RequestOptions = {},
     ) -> ResponseT:
-        opts = FinalRequestOptions.construct(method="patch", url=path, json_data=body, **options)
+        opts = FinalRequestOptions.construct(
+            method="patch", url=path, json_data=body, files=await async_to_httpx_files(files), **options
+        )
         return await self.request(cast_to, opts)
 
     async def put(
diff --git a/src/runloop_api_client/_types.py b/src/runloop_api_client/_types.py
index a2a12e84e..31df93064 100644
--- a/src/runloop_api_client/_types.py
+++ b/src/runloop_api_client/_types.py
@@ -243,6 +243,9 @@ class HttpxSendArgs(TypedDict, total=False):
 if TYPE_CHECKING:
     # This works because str.__contains__ does not accept object (either in typeshed or at runtime)
     # https://github.com/hauntsaninja/useful_types/blob/5e9710f3875107d068e7679fd7fec9cfab0eff3b/useful_types/__init__.py#L285
+    #
+    # Note: index() and count() methods are intentionally omitted to allow pyright to properly
+    # infer TypedDict types when dict literals are used in lists assigned to SequenceNotStr.
     class SequenceNotStr(Protocol[_T_co]):
         @overload
         def __getitem__(self, index: SupportsIndex, /) -> _T_co: ...
@@ -251,8 +254,6 @@ def __getitem__(self, index: slice, /) -> Sequence[_T_co]: ...
         def __contains__(self, value: object, /) -> bool: ...
         def __len__(self) -> int: ...
         def __iter__(self) -> Iterator[_T_co]: ...
-        def index(self, value: Any, start: int = 0, stop: int = ..., /) -> int: ...
-        def count(self, value: Any, /) -> int: ...
         def __reversed__(self) -> Iterator[_T_co]: ...
 else:
     # just point this to a normal `Sequence` at runtime to avoid having to special case
diff --git a/src/runloop_api_client/_version.py b/src/runloop_api_client/_version.py
index e13ec2fd5..c746bdc5e 100644
--- a/src/runloop_api_client/_version.py
+++ b/src/runloop_api_client/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "runloop_api_client"
-__version__ = "1.2.0"  # x-release-please-version
+__version__ = "1.3.0"  # x-release-please-version
diff --git a/src/runloop_api_client/resources/agents.py b/src/runloop_api_client/resources/agents.py
index 6ff202d74..9ac9f8c02 100644
--- a/src/runloop_api_client/resources/agents.py
+++ b/src/runloop_api_client/resources/agents.py
@@ -49,6 +49,7 @@ def create(
         self,
         *,
         name: str,
+        version: str,
         source: Optional[AgentSource] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -66,6 +67,8 @@ def create(
         Args:
           name: The name of the Agent.
 
+          version: The version of the Agent. Must be a semver string (e.g., '2.0.65') or a SHA.
+
           source: The source configuration for the Agent.
 
           extra_headers: Send extra headers
@@ -83,6 +86,7 @@ def create(
             body=maybe_transform(
                 {
                     "name": name,
+                    "version": version,
                     "source": source,
                 },
                 agent_create_params.AgentCreateParams,
@@ -138,6 +142,7 @@ def list(
         name: str | Omit = omit,
         search: str | Omit = omit,
         starting_after: str | Omit = omit,
+        version: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -151,7 +156,7 @@ def list(
         Args:
           is_public: Filter agents by public visibility.
 
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           name: Filter agents by name (partial match supported).
 
@@ -159,6 +164,8 @@ def list(
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
+          version: Filter by version. Use 'latest' to get the most recently created agent.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -182,6 +189,7 @@ def list(
                         "name": name,
                         "search": search,
                         "starting_after": starting_after,
+                        "version": version,
                     },
                     agent_list_params.AgentListParams,
                 ),
@@ -214,6 +222,7 @@ async def create(
         self,
         *,
         name: str,
+        version: str,
         source: Optional[AgentSource] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -231,6 +240,8 @@ async def create(
         Args:
           name: The name of the Agent.
 
+          version: The version of the Agent. Must be a semver string (e.g., '2.0.65') or a SHA.
+
           source: The source configuration for the Agent.
 
           extra_headers: Send extra headers
@@ -248,6 +259,7 @@ async def create(
             body=await async_maybe_transform(
                 {
                     "name": name,
+                    "version": version,
                     "source": source,
                 },
                 agent_create_params.AgentCreateParams,
@@ -303,6 +315,7 @@ def list(
         name: str | Omit = omit,
         search: str | Omit = omit,
         starting_after: str | Omit = omit,
+        version: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -316,7 +329,7 @@ def list(
         Args:
           is_public: Filter agents by public visibility.
 
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           name: Filter agents by name (partial match supported).
 
@@ -324,6 +337,8 @@ def list(
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
+          version: Filter by version. Use 'latest' to get the most recently created agent.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -347,6 +362,7 @@ def list(
                         "name": name,
                         "search": search,
                         "starting_after": starting_after,
+                        "version": version,
                     },
                     agent_list_params.AgentListParams,
                 ),
diff --git a/src/runloop_api_client/resources/benchmarks/benchmarks.py b/src/runloop_api_client/resources/benchmarks/benchmarks.py
index c30885e9e..9d9a30b5d 100644
--- a/src/runloop_api_client/resources/benchmarks/benchmarks.py
+++ b/src/runloop_api_client/resources/benchmarks/benchmarks.py
@@ -21,6 +21,7 @@
     benchmark_start_run_params,
     benchmark_definitions_params,
     benchmark_list_public_params,
+    benchmark_update_scenarios_params,
 )
 from ..._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
 from ..._utils import maybe_transform, async_maybe_transform
@@ -88,16 +89,16 @@ def create(
         Create a Benchmark with a set of Scenarios.
 
         Args:
-          name: The name of the Benchmark. This must be unique.
+          name: The unique name of the Benchmark.
 
           attribution: Attribution information for the benchmark.
 
           description: Detailed description of the benchmark.
 
-          metadata: User defined metadata to attach to the benchmark for organization.
+          metadata: User defined metadata to attach to the benchmark.
 
           required_environment_variables: Environment variables required to run the benchmark. If any required variables
-              are not supplied, the benchmark will fail to start
+              are not supplied, the benchmark will fail to start.
 
           required_secret_names: Secrets required to run the benchmark with (environment variable name will be
               mapped to the your user secret by name). If any of these secrets are not
@@ -176,12 +177,12 @@ def update(
         self,
         id: str,
         *,
-        name: str,
         attribution: Optional[str] | Omit = omit,
         description: Optional[str] | Omit = omit,
         metadata: Optional[Dict[str, str]] | Omit = omit,
+        name: Optional[str] | Omit = omit,
         required_environment_variables: Optional[SequenceNotStr[str]] | Omit = omit,
-        required_secret_names: SequenceNotStr[str] | Omit = omit,
+        required_secret_names: Optional[SequenceNotStr[str]] | Omit = omit,
         scenario_ids: Optional[SequenceNotStr[str]] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -191,26 +192,30 @@ def update(
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
         idempotency_key: str | None = None,
     ) -> BenchmarkView:
-        """
-        Update a Benchmark with a set of Scenarios.
+        """Update a Benchmark.
+
+        Fields that are null will preserve the existing value.
+        Fields that are provided (including empty values) will replace the existing
+        value entirely.
 
         Args:
-          name: The name of the Benchmark. This must be unique.
+          attribution: Attribution information for the benchmark. Pass in empty string to clear.
 
-          attribution: Attribution information for the benchmark.
+          description: Detailed description of the benchmark. Pass in empty string to clear.
 
-          description: Detailed description of the benchmark.
+          metadata: User defined metadata to attach to the benchmark. Pass in empty map to clear.
 
-          metadata: User defined metadata to attach to the benchmark for organization.
+          name: The unique name of the Benchmark. Cannot be blank.
 
           required_environment_variables: Environment variables required to run the benchmark. If any required variables
-              are not supplied, the benchmark will fail to start
+              are not supplied, the benchmark will fail to start. Pass in empty list to clear.
 
           required_secret_names: Secrets required to run the benchmark with (environment variable name will be
               mapped to the your user secret by name). If any of these secrets are not
-              provided or the mapping is incorrect, the benchmark will fail to start.
+              provided or the mapping is incorrect, the benchmark will fail to start. Pass in
+              empty list to clear.
 
-          scenario_ids: The Scenario IDs that make up the Benchmark.
+          scenario_ids: The Scenario IDs that make up the Benchmark. Pass in empty list to clear.
 
           extra_headers: Send extra headers
 
@@ -228,10 +233,10 @@ def update(
             f"/v1/benchmarks/{id}",
             body=maybe_transform(
                 {
-                    "name": name,
                     "attribution": attribution,
                     "description": description,
                     "metadata": metadata,
+                    "name": name,
                     "required_environment_variables": required_environment_variables,
                     "required_secret_names": required_secret_names,
                     "scenario_ids": scenario_ids,
@@ -252,6 +257,7 @@ def list(
         self,
         *,
         limit: int | Omit = omit,
+        name: str | Omit = omit,
         starting_after: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -264,7 +270,9 @@ def list(
         List all Benchmarks matching filter.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
+
+          name: Filter by name
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
@@ -287,6 +295,7 @@ def list(
                 query=maybe_transform(
                     {
                         "limit": limit,
+                        "name": name,
                         "starting_after": starting_after,
                     },
                     benchmark_list_params.BenchmarkListParams,
@@ -312,7 +321,7 @@ def definitions(
         Get scenario definitions for a previously created Benchmark.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
@@ -360,7 +369,7 @@ def list_public(
         List all public benchmarks matching filter.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
@@ -449,6 +458,59 @@ def start_run(
             cast_to=BenchmarkRunView,
         )
 
+    def update_scenarios(
+        self,
+        id: str,
+        *,
+        scenarios_to_add: Optional[SequenceNotStr[str]] | Omit = omit,
+        scenarios_to_remove: Optional[SequenceNotStr[str]] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+        idempotency_key: str | None = None,
+    ) -> BenchmarkView:
+        """
+        Add and/or remove Scenario IDs from an existing Benchmark.
+
+        Args:
+          scenarios_to_add: Scenario IDs to add to the Benchmark.
+
+          scenarios_to_remove: Scenario IDs to remove from the Benchmark.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+
+          idempotency_key: Specify a custom idempotency key for this request
+        """
+        if not id:
+            raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
+        return self._post(
+            f"/v1/benchmarks/{id}/scenarios",
+            body=maybe_transform(
+                {
+                    "scenarios_to_add": scenarios_to_add,
+                    "scenarios_to_remove": scenarios_to_remove,
+                },
+                benchmark_update_scenarios_params.BenchmarkUpdateScenariosParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                idempotency_key=idempotency_key,
+            ),
+            cast_to=BenchmarkView,
+        )
+
 
 class AsyncBenchmarksResource(AsyncAPIResource):
     @cached_property
@@ -496,16 +558,16 @@ async def create(
         Create a Benchmark with a set of Scenarios.
 
         Args:
-          name: The name of the Benchmark. This must be unique.
+          name: The unique name of the Benchmark.
 
           attribution: Attribution information for the benchmark.
 
           description: Detailed description of the benchmark.
 
-          metadata: User defined metadata to attach to the benchmark for organization.
+          metadata: User defined metadata to attach to the benchmark.
 
           required_environment_variables: Environment variables required to run the benchmark. If any required variables
-              are not supplied, the benchmark will fail to start
+              are not supplied, the benchmark will fail to start.
 
           required_secret_names: Secrets required to run the benchmark with (environment variable name will be
               mapped to the your user secret by name). If any of these secrets are not
@@ -584,12 +646,12 @@ async def update(
         self,
         id: str,
         *,
-        name: str,
         attribution: Optional[str] | Omit = omit,
         description: Optional[str] | Omit = omit,
         metadata: Optional[Dict[str, str]] | Omit = omit,
+        name: Optional[str] | Omit = omit,
         required_environment_variables: Optional[SequenceNotStr[str]] | Omit = omit,
-        required_secret_names: SequenceNotStr[str] | Omit = omit,
+        required_secret_names: Optional[SequenceNotStr[str]] | Omit = omit,
         scenario_ids: Optional[SequenceNotStr[str]] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -599,26 +661,30 @@ async def update(
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
         idempotency_key: str | None = None,
     ) -> BenchmarkView:
-        """
-        Update a Benchmark with a set of Scenarios.
+        """Update a Benchmark.
+
+        Fields that are null will preserve the existing value.
+        Fields that are provided (including empty values) will replace the existing
+        value entirely.
 
         Args:
-          name: The name of the Benchmark. This must be unique.
+          attribution: Attribution information for the benchmark. Pass in empty string to clear.
 
-          attribution: Attribution information for the benchmark.
+          description: Detailed description of the benchmark. Pass in empty string to clear.
 
-          description: Detailed description of the benchmark.
+          metadata: User defined metadata to attach to the benchmark. Pass in empty map to clear.
 
-          metadata: User defined metadata to attach to the benchmark for organization.
+          name: The unique name of the Benchmark. Cannot be blank.
 
           required_environment_variables: Environment variables required to run the benchmark. If any required variables
-              are not supplied, the benchmark will fail to start
+              are not supplied, the benchmark will fail to start. Pass in empty list to clear.
 
           required_secret_names: Secrets required to run the benchmark with (environment variable name will be
               mapped to the your user secret by name). If any of these secrets are not
-              provided or the mapping is incorrect, the benchmark will fail to start.
+              provided or the mapping is incorrect, the benchmark will fail to start. Pass in
+              empty list to clear.
 
-          scenario_ids: The Scenario IDs that make up the Benchmark.
+          scenario_ids: The Scenario IDs that make up the Benchmark. Pass in empty list to clear.
 
           extra_headers: Send extra headers
 
@@ -636,10 +702,10 @@ async def update(
             f"/v1/benchmarks/{id}",
             body=await async_maybe_transform(
                 {
-                    "name": name,
                     "attribution": attribution,
                     "description": description,
                     "metadata": metadata,
+                    "name": name,
                     "required_environment_variables": required_environment_variables,
                     "required_secret_names": required_secret_names,
                     "scenario_ids": scenario_ids,
@@ -660,6 +726,7 @@ def list(
         self,
         *,
         limit: int | Omit = omit,
+        name: str | Omit = omit,
         starting_after: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -672,7 +739,9 @@ def list(
         List all Benchmarks matching filter.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
+
+          name: Filter by name
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
@@ -695,6 +764,7 @@ def list(
                 query=maybe_transform(
                     {
                         "limit": limit,
+                        "name": name,
                         "starting_after": starting_after,
                     },
                     benchmark_list_params.BenchmarkListParams,
@@ -720,7 +790,7 @@ async def definitions(
         Get scenario definitions for a previously created Benchmark.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
@@ -768,7 +838,7 @@ def list_public(
         List all public benchmarks matching filter.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
@@ -857,6 +927,59 @@ async def start_run(
             cast_to=BenchmarkRunView,
         )
 
+    async def update_scenarios(
+        self,
+        id: str,
+        *,
+        scenarios_to_add: Optional[SequenceNotStr[str]] | Omit = omit,
+        scenarios_to_remove: Optional[SequenceNotStr[str]] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+        idempotency_key: str | None = None,
+    ) -> BenchmarkView:
+        """
+        Add and/or remove Scenario IDs from an existing Benchmark.
+
+        Args:
+          scenarios_to_add: Scenario IDs to add to the Benchmark.
+
+          scenarios_to_remove: Scenario IDs to remove from the Benchmark.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+
+          idempotency_key: Specify a custom idempotency key for this request
+        """
+        if not id:
+            raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
+        return await self._post(
+            f"/v1/benchmarks/{id}/scenarios",
+            body=await async_maybe_transform(
+                {
+                    "scenarios_to_add": scenarios_to_add,
+                    "scenarios_to_remove": scenarios_to_remove,
+                },
+                benchmark_update_scenarios_params.BenchmarkUpdateScenariosParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                idempotency_key=idempotency_key,
+            ),
+            cast_to=BenchmarkView,
+        )
+
 
 class BenchmarksResourceWithRawResponse:
     def __init__(self, benchmarks: BenchmarksResource) -> None:
@@ -883,6 +1006,9 @@ def __init__(self, benchmarks: BenchmarksResource) -> None:
         self.start_run = to_raw_response_wrapper(
             benchmarks.start_run,
         )
+        self.update_scenarios = to_raw_response_wrapper(
+            benchmarks.update_scenarios,
+        )
 
     @cached_property
     def runs(self) -> RunsResourceWithRawResponse:
@@ -914,6 +1040,9 @@ def __init__(self, benchmarks: AsyncBenchmarksResource) -> None:
         self.start_run = async_to_raw_response_wrapper(
             benchmarks.start_run,
         )
+        self.update_scenarios = async_to_raw_response_wrapper(
+            benchmarks.update_scenarios,
+        )
 
     @cached_property
     def runs(self) -> AsyncRunsResourceWithRawResponse:
@@ -945,6 +1074,9 @@ def __init__(self, benchmarks: BenchmarksResource) -> None:
         self.start_run = to_streamed_response_wrapper(
             benchmarks.start_run,
         )
+        self.update_scenarios = to_streamed_response_wrapper(
+            benchmarks.update_scenarios,
+        )
 
     @cached_property
     def runs(self) -> RunsResourceWithStreamingResponse:
@@ -976,6 +1108,9 @@ def __init__(self, benchmarks: AsyncBenchmarksResource) -> None:
         self.start_run = async_to_streamed_response_wrapper(
             benchmarks.start_run,
         )
+        self.update_scenarios = async_to_streamed_response_wrapper(
+            benchmarks.update_scenarios,
+        )
 
     @cached_property
     def runs(self) -> AsyncRunsResourceWithStreamingResponse:
diff --git a/src/runloop_api_client/resources/benchmarks/runs.py b/src/runloop_api_client/resources/benchmarks/runs.py
index 6d69d160b..cdab6fd30 100644
--- a/src/runloop_api_client/resources/benchmarks/runs.py
+++ b/src/runloop_api_client/resources/benchmarks/runs.py
@@ -83,6 +83,7 @@ def list(
         *,
         benchmark_id: str | Omit = omit,
         limit: int | Omit = omit,
+        name: str | Omit = omit,
         starting_after: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -97,7 +98,9 @@ def list(
         Args:
           benchmark_id: The Benchmark ID to filter by.
 
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
+
+          name: Filter by name
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
@@ -121,6 +124,7 @@ def list(
                     {
                         "benchmark_id": benchmark_id,
                         "limit": limit,
+                        "name": name,
                         "starting_after": starting_after,
                     },
                     run_list_params.RunListParams,
@@ -227,7 +231,7 @@ def list_scenario_runs(
         List started scenario runs for a benchmark run.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
@@ -322,6 +326,7 @@ def list(
         *,
         benchmark_id: str | Omit = omit,
         limit: int | Omit = omit,
+        name: str | Omit = omit,
         starting_after: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -336,7 +341,9 @@ def list(
         Args:
           benchmark_id: The Benchmark ID to filter by.
 
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
+
+          name: Filter by name
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
@@ -360,6 +367,7 @@ def list(
                     {
                         "benchmark_id": benchmark_id,
                         "limit": limit,
+                        "name": name,
                         "starting_after": starting_after,
                     },
                     run_list_params.RunListParams,
@@ -466,7 +474,7 @@ def list_scenario_runs(
         List started scenario runs for a benchmark run.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
diff --git a/src/runloop_api_client/resources/blueprints.py b/src/runloop_api_client/resources/blueprints.py
index 8cc04c2e3..818365271 100644
--- a/src/runloop_api_client/resources/blueprints.py
+++ b/src/runloop_api_client/resources/blueprints.py
@@ -389,6 +389,7 @@ def list(
         limit: int | Omit = omit,
         name: str | Omit = omit,
         starting_after: str | Omit = omit,
+        status: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -400,12 +401,14 @@ def list(
         List all Blueprints or filter by name.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           name: Filter by name
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
+          status: Filter by build status (queued, provisioning, building, failed, build_complete)
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -427,6 +430,7 @@ def list(
                         "limit": limit,
                         "name": name,
                         "starting_after": starting_after,
+                        "status": status,
                     },
                     blueprint_list_params.BlueprintListParams,
                 ),
@@ -558,6 +562,7 @@ def list_public(
         limit: int | Omit = omit,
         name: str | Omit = omit,
         starting_after: str | Omit = omit,
+        status: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -569,12 +574,14 @@ def list_public(
         List all public Blueprints that are available to all users.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           name: Filter by name
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
+          status: Filter by build status (queued, provisioning, building, failed, build_complete)
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -596,6 +603,7 @@ def list_public(
                         "limit": limit,
                         "name": name,
                         "starting_after": starting_after,
+                        "status": status,
                     },
                     blueprint_list_public_params.BlueprintListPublicParams,
                 ),
@@ -1028,6 +1036,7 @@ def list(
         limit: int | Omit = omit,
         name: str | Omit = omit,
         starting_after: str | Omit = omit,
+        status: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -1039,12 +1048,14 @@ def list(
         List all Blueprints or filter by name.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           name: Filter by name
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
+          status: Filter by build status (queued, provisioning, building, failed, build_complete)
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -1066,6 +1077,7 @@ def list(
                         "limit": limit,
                         "name": name,
                         "starting_after": starting_after,
+                        "status": status,
                     },
                     blueprint_list_params.BlueprintListParams,
                 ),
@@ -1197,6 +1209,7 @@ def list_public(
         limit: int | Omit = omit,
         name: str | Omit = omit,
         starting_after: str | Omit = omit,
+        status: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -1208,12 +1221,14 @@ def list_public(
         List all public Blueprints that are available to all users.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           name: Filter by name
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
+          status: Filter by build status (queued, provisioning, building, failed, build_complete)
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -1235,6 +1250,7 @@ def list_public(
                         "limit": limit,
                         "name": name,
                         "starting_after": starting_after,
+                        "status": status,
                     },
                     blueprint_list_public_params.BlueprintListPublicParams,
                 ),
diff --git a/src/runloop_api_client/resources/devboxes/devboxes.py b/src/runloop_api_client/resources/devboxes/devboxes.py
index fc13c722d..dc7b1b492 100644
--- a/src/runloop_api_client/resources/devboxes/devboxes.py
+++ b/src/runloop_api_client/resources/devboxes/devboxes.py
@@ -558,7 +558,7 @@ def list(
         List all Devboxes while optionally filtering by status.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
@@ -1102,7 +1102,7 @@ def list_disk_snapshots(
         Args:
           devbox_id: Devbox ID to filter by.
 
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           metadata_key: Filter snapshots by metadata key-value pair. Can be used multiple times for
               different keys.
@@ -2093,7 +2093,7 @@ def list(
         List all Devboxes while optionally filtering by status.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
@@ -2638,7 +2638,7 @@ def list_disk_snapshots(
         Args:
           devbox_id: Devbox ID to filter by.
 
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           metadata_key: Filter snapshots by metadata key-value pair. Can be used multiple times for
               different keys.
diff --git a/src/runloop_api_client/resources/devboxes/disk_snapshots.py b/src/runloop_api_client/resources/devboxes/disk_snapshots.py
index 0e3530374..b896adbb6 100644
--- a/src/runloop_api_client/resources/devboxes/disk_snapshots.py
+++ b/src/runloop_api_client/resources/devboxes/disk_snapshots.py
@@ -130,7 +130,7 @@ def list(
         Args:
           devbox_id: Devbox ID to filter by.
 
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           metadata_key: Filter snapshots by metadata key-value pair. Can be used multiple times for
               different keys.
@@ -381,7 +381,7 @@ def list(
         Args:
           devbox_id: Devbox ID to filter by.
 
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           metadata_key: Filter snapshots by metadata key-value pair. Can be used multiple times for
               different keys.
diff --git a/src/runloop_api_client/resources/objects.py b/src/runloop_api_client/resources/objects.py
index 4d7d2e0a3..409d5f6f3 100644
--- a/src/runloop_api_client/resources/objects.py
+++ b/src/runloop_api_client/resources/objects.py
@@ -162,7 +162,7 @@ def list(
         Args:
           content_type: Filter storage objects by content type.
 
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           name: Filter storage objects by name (partial match supported).
 
@@ -352,7 +352,7 @@ def list_public(
         Args:
           content_type: Filter storage objects by content type.
 
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           name: Filter storage objects by name (partial match supported).
 
@@ -530,7 +530,7 @@ def list(
         Args:
           content_type: Filter storage objects by content type.
 
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           name: Filter storage objects by name (partial match supported).
 
@@ -720,7 +720,7 @@ def list_public(
         Args:
           content_type: Filter storage objects by content type.
 
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           name: Filter storage objects by name (partial match supported).
 
diff --git a/src/runloop_api_client/resources/repositories.py b/src/runloop_api_client/resources/repositories.py
index e2b238750..a22075540 100644
--- a/src/runloop_api_client/resources/repositories.py
+++ b/src/runloop_api_client/resources/repositories.py
@@ -163,7 +163,7 @@ def list(
         List all available repository connections.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           name: Filter by repository name
 
@@ -542,7 +542,7 @@ def list(
         List all available repository connections.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           name: Filter by repository name
 
diff --git a/src/runloop_api_client/resources/scenarios/runs.py b/src/runloop_api_client/resources/scenarios/runs.py
index a6a16a5a0..3ea9a960f 100644
--- a/src/runloop_api_client/resources/scenarios/runs.py
+++ b/src/runloop_api_client/resources/scenarios/runs.py
@@ -89,9 +89,12 @@ def retrieve(
     def list(
         self,
         *,
+        benchmark_run_id: str | Omit = omit,
         limit: int | Omit = omit,
+        name: str | Omit = omit,
         scenario_id: str | Omit = omit,
         starting_after: str | Omit = omit,
+        state: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -103,12 +106,18 @@ def list(
         List all ScenarioRuns matching filter.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          benchmark_run_id: Filter by benchmark run ID
+
+          limit: The limit of items to return. Default is 20. Max is 5000.
+
+          name: Filter by name
 
           scenario_id: Filter runs associated to Scenario given ID
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
+          state: Filter by state
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -127,9 +136,12 @@ def list(
                 timeout=timeout,
                 query=maybe_transform(
                     {
+                        "benchmark_run_id": benchmark_run_id,
                         "limit": limit,
+                        "name": name,
                         "scenario_id": scenario_id,
                         "starting_after": starting_after,
+                        "state": state,
                     },
                     run_list_params.RunListParams,
                 ),
@@ -497,9 +509,12 @@ async def retrieve(
     def list(
         self,
         *,
+        benchmark_run_id: str | Omit = omit,
         limit: int | Omit = omit,
+        name: str | Omit = omit,
         scenario_id: str | Omit = omit,
         starting_after: str | Omit = omit,
+        state: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -511,12 +526,18 @@ def list(
         List all ScenarioRuns matching filter.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          benchmark_run_id: Filter by benchmark run ID
+
+          limit: The limit of items to return. Default is 20. Max is 5000.
+
+          name: Filter by name
 
           scenario_id: Filter runs associated to Scenario given ID
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
+          state: Filter by state
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -535,9 +556,12 @@ def list(
                 timeout=timeout,
                 query=maybe_transform(
                     {
+                        "benchmark_run_id": benchmark_run_id,
                         "limit": limit,
+                        "name": name,
                         "scenario_id": scenario_id,
                         "starting_after": starting_after,
+                        "state": state,
                     },
                     run_list_params.RunListParams,
                 ),
diff --git a/src/runloop_api_client/resources/scenarios/scenarios.py b/src/runloop_api_client/resources/scenarios/scenarios.py
index 6b7c729f4..bd961a285 100644
--- a/src/runloop_api_client/resources/scenarios/scenarios.py
+++ b/src/runloop_api_client/resources/scenarios/scenarios.py
@@ -221,31 +221,32 @@ def update(
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
         idempotency_key: str | None = None,
     ) -> ScenarioView:
-        """
-        Update a Scenario, a repeatable AI coding evaluation test that defines the
-        starting environment as well as evaluation success criteria. Only provided
-        fields will be updated.
+        """Update a Scenario.
+
+        Fields that are null will preserve the existing value. Fields
+        that are provided (including empty values) will replace the existing value
+        entirely.
 
         Args:
           environment_parameters: The Environment in which the Scenario will run.
 
           input_context: The input context for the Scenario.
 
-          metadata: User defined metadata to attach to the scenario for organization.
+          metadata: User defined metadata to attach to the scenario. Pass in empty map to clear.
 
-          name: Name of the scenario.
+          name: Name of the scenario. Cannot be blank.
 
           reference_output: A string representation of the reference output to solve the scenario. Commonly
               can be the result of a git diff or a sequence of command actions to apply to the
-              environment.
+              environment. Pass in empty string to clear.
 
-          required_environment_variables: Environment variables required to run the scenario.
+          required_environment_variables: Environment variables required to run the scenario. Pass in empty list to clear.
 
-          required_secret_names: Secrets required to run the scenario.
+          required_secret_names: Secrets required to run the scenario. Pass in empty list to clear.
 
           scoring_contract: The scoring contract for the Scenario.
 
-          validation_type: Validation strategy.
+          validation_type: Validation strategy. Pass in empty string to clear.
 
           extra_headers: Send extra headers
 
@@ -292,6 +293,7 @@ def list(
         limit: int | Omit = omit,
         name: str | Omit = omit,
         starting_after: str | Omit = omit,
+        validation_type: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -305,12 +307,14 @@ def list(
         Args:
           benchmark_id: Filter scenarios by benchmark ID.
 
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           name: Query for Scenarios with a given name.
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
+          validation_type: Filter by validation type
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -333,6 +337,7 @@ def list(
                         "limit": limit,
                         "name": name,
                         "starting_after": starting_after,
+                        "validation_type": validation_type,
                     },
                     scenario_list_params.ScenarioListParams,
                 ),
@@ -357,7 +362,7 @@ def list_public(
         List all public scenarios matching filter.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           name: Query for Scenarios with a given name.
 
@@ -678,31 +683,32 @@ async def update(
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
         idempotency_key: str | None = None,
     ) -> ScenarioView:
-        """
-        Update a Scenario, a repeatable AI coding evaluation test that defines the
-        starting environment as well as evaluation success criteria. Only provided
-        fields will be updated.
+        """Update a Scenario.
+
+        Fields that are null will preserve the existing value. Fields
+        that are provided (including empty values) will replace the existing value
+        entirely.
 
         Args:
           environment_parameters: The Environment in which the Scenario will run.
 
           input_context: The input context for the Scenario.
 
-          metadata: User defined metadata to attach to the scenario for organization.
+          metadata: User defined metadata to attach to the scenario. Pass in empty map to clear.
 
-          name: Name of the scenario.
+          name: Name of the scenario. Cannot be blank.
 
           reference_output: A string representation of the reference output to solve the scenario. Commonly
               can be the result of a git diff or a sequence of command actions to apply to the
-              environment.
+              environment. Pass in empty string to clear.
 
-          required_environment_variables: Environment variables required to run the scenario.
+          required_environment_variables: Environment variables required to run the scenario. Pass in empty list to clear.
 
-          required_secret_names: Secrets required to run the scenario.
+          required_secret_names: Secrets required to run the scenario. Pass in empty list to clear.
 
           scoring_contract: The scoring contract for the Scenario.
 
-          validation_type: Validation strategy.
+          validation_type: Validation strategy. Pass in empty string to clear.
 
           extra_headers: Send extra headers
 
@@ -749,6 +755,7 @@ def list(
         limit: int | Omit = omit,
         name: str | Omit = omit,
         starting_after: str | Omit = omit,
+        validation_type: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -762,12 +769,14 @@ def list(
         Args:
           benchmark_id: Filter scenarios by benchmark ID.
 
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           name: Query for Scenarios with a given name.
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
+          validation_type: Filter by validation type
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -790,6 +799,7 @@ def list(
                         "limit": limit,
                         "name": name,
                         "starting_after": starting_after,
+                        "validation_type": validation_type,
                     },
                     scenario_list_params.ScenarioListParams,
                 ),
@@ -814,7 +824,7 @@ def list_public(
         List all public scenarios matching filter.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           name: Query for Scenarios with a given name.
 
diff --git a/src/runloop_api_client/resources/scenarios/scorers.py b/src/runloop_api_client/resources/scenarios/scorers.py
index 5b083f8e4..9e5d5e198 100644
--- a/src/runloop_api_client/resources/scenarios/scorers.py
+++ b/src/runloop_api_client/resources/scenarios/scorers.py
@@ -201,7 +201,7 @@ def list(
         List all Scenario Scorers matching filter.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
@@ -460,7 +460,7 @@ def list(
         List all Scenario Scorers matching filter.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           starting_after: Load the next page of data starting after the item with the given ID.
 
diff --git a/src/runloop_api_client/resources/secrets.py b/src/runloop_api_client/resources/secrets.py
index 8e170fca2..892557497 100644
--- a/src/runloop_api_client/resources/secrets.py
+++ b/src/runloop_api_client/resources/secrets.py
@@ -160,7 +160,7 @@ def list(
         for security reasons.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           extra_headers: Send extra headers
 
@@ -363,7 +363,7 @@ async def list(
         for security reasons.
 
         Args:
-          limit: The limit of items to return. Default is 20.
+          limit: The limit of items to return. Default is 20. Max is 5000.
 
           extra_headers: Send extra headers
 
diff --git a/src/runloop_api_client/sdk/__init__.py b/src/runloop_api_client/sdk/__init__.py
index 5773b9d53..610017b79 100644
--- a/src/runloop_api_client/sdk/__init__.py
+++ b/src/runloop_api_client/sdk/__init__.py
@@ -5,7 +5,17 @@
 
 from __future__ import annotations
 
-from .sync import AgentOps, DevboxOps, ScorerOps, RunloopSDK, ScenarioOps, SnapshotOps, BlueprintOps, StorageObjectOps
+from .sync import (
+    AgentOps,
+    DevboxOps,
+    ScorerOps,
+    RunloopSDK,
+    ScenarioOps,
+    SnapshotOps,
+    BenchmarkOps,
+    BlueprintOps,
+    StorageObjectOps,
+)
 from .agent import Agent
 from ._types import ScenarioPreview
 from .async_ import (
@@ -15,6 +25,7 @@
     AsyncRunloopSDK,
     AsyncScenarioOps,
     AsyncSnapshotOps,
+    AsyncBenchmarkOps,
     AsyncBlueprintOps,
     AsyncStorageObjectOps,
 )
@@ -22,20 +33,24 @@
 from .scorer import Scorer
 from .scenario import Scenario
 from .snapshot import Snapshot
+from .benchmark import Benchmark
 from .blueprint import Blueprint
 from .execution import Execution
 from .async_agent import AsyncAgent
 from .async_devbox import AsyncDevbox, AsyncNamedShell
 from .async_scorer import AsyncScorer
 from .scenario_run import ScenarioRun
+from .benchmark_run import BenchmarkRun
 from .async_scenario import AsyncScenario
 from .async_snapshot import AsyncSnapshot
 from .storage_object import StorageObject
+from .async_benchmark import AsyncBenchmark
 from .async_blueprint import AsyncBlueprint
 from .async_execution import AsyncExecution
 from .execution_result import ExecutionResult
 from .scenario_builder import ScenarioBuilder
 from .async_scenario_run import AsyncScenarioRun
+from .async_benchmark_run import AsyncBenchmarkRun
 from .async_storage_object import AsyncStorageObject
 from .async_execution_result import AsyncExecutionResult
 from .async_scenario_builder import AsyncScenarioBuilder
@@ -47,6 +62,8 @@
     # Management interfaces
     "AgentOps",
     "AsyncAgentOps",
+    "BenchmarkOps",
+    "AsyncBenchmarkOps",
     "DevboxOps",
     "AsyncDevboxOps",
     "BlueprintOps",
@@ -62,6 +79,10 @@
     # Resource classes
     "Agent",
     "AsyncAgent",
+    "Benchmark",
+    "AsyncBenchmark",
+    "BenchmarkRun",
+    "AsyncBenchmarkRun",
     "Devbox",
     "AsyncDevbox",
     "Execution",
diff --git a/src/runloop_api_client/sdk/_types.py b/src/runloop_api_client/sdk/_types.py
index be09f6eed..c3024b4ca 100644
--- a/src/runloop_api_client/sdk/_types.py
+++ b/src/runloop_api_client/sdk/_types.py
@@ -1,32 +1,41 @@
 from typing import Union, Callable, Optional
 from typing_extensions import TypedDict
 
+from ..types import (
+    InputContext,
+    ScenarioView,
+    AgentListParams,
+    DevboxListParams,
+    ObjectListParams,
+    AgentCreateParams,
+    DevboxCreateParams,
+    ObjectCreateParams,
+    ScenarioListParams,
+    BenchmarkListParams,
+    BlueprintListParams,
+    ObjectDownloadParams,
+    ScenarioUpdateParams,
+    BenchmarkCreateParams,
+    BenchmarkUpdateParams,
+    BlueprintCreateParams,
+    DevboxUploadFileParams,
+    DevboxCreateTunnelParams,
+    DevboxDownloadFileParams,
+    DevboxRemoveTunnelParams,
+    DevboxSnapshotDiskParams,
+    DevboxReadFileContentsParams,
+    DevboxWriteFileContentsParams,
+)
 from .._types import Body, Query, Headers, Timeout, NotGiven
 from ..lib.polling import PollingConfig
 from ..types.devboxes import DiskSnapshotListParams, DiskSnapshotUpdateParams
 from ..types.scenarios import ScorerListParams, ScorerCreateParams, ScorerUpdateParams, ScorerValidateParams
-from ..types.input_context import InputContext
-from ..types.scenario_view import ScenarioView
-from ..types.agent_list_params import AgentListParams
-from ..types.devbox_list_params import DevboxListParams
-from ..types.object_list_params import ObjectListParams
-from ..types.agent_create_params import AgentCreateParams
-from ..types.devbox_create_params import DevboxCreateParams, DevboxBaseCreateParams
-from ..types.object_create_params import ObjectCreateParams
-from ..types.scenario_list_params import ScenarioListParams
-from ..types.blueprint_list_params import BlueprintListParams
-from ..types.object_download_params import ObjectDownloadParams
-from ..types.scenario_update_params import ScenarioUpdateParams
-from ..types.blueprint_create_params import BlueprintCreateParams
-from ..types.devbox_upload_file_params import DevboxUploadFileParams
+from ..types.benchmarks import RunListScenarioRunsParams
+from ..types.devbox_create_params import DevboxBaseCreateParams
 from ..types.scenario_start_run_params import ScenarioStartRunBaseParams
-from ..types.devbox_create_tunnel_params import DevboxCreateTunnelParams
-from ..types.devbox_download_file_params import DevboxDownloadFileParams
+from ..types.benchmark_start_run_params import BenchmarkSelfStartRunParams
+from ..types.benchmarks.run_list_params import RunSelfListParams
 from ..types.devbox_execute_async_params import DevboxNiceExecuteAsyncParams
-from ..types.devbox_remove_tunnel_params import DevboxRemoveTunnelParams
-from ..types.devbox_snapshot_disk_params import DevboxSnapshotDiskParams
-from ..types.devbox_read_file_contents_params import DevboxReadFileContentsParams
-from ..types.devbox_write_file_contents_params import DevboxWriteFileContentsParams
 
 LogCallback = Callable[[str], None]
 
@@ -203,3 +212,27 @@ class ScenarioPreview(ScenarioView):
 
     input_context: InputContextPreview  # type: ignore[assignment]
     """The input context for the Scenario."""
+
+
+class SDKBenchmarkCreateParams(BenchmarkCreateParams, LongRequestOptions):
+    pass
+
+
+class SDKBenchmarkListParams(BenchmarkListParams, BaseRequestOptions):
+    pass
+
+
+class SDKBenchmarkUpdateParams(BenchmarkUpdateParams, LongRequestOptions):
+    pass
+
+
+class SDKBenchmarkStartRunParams(BenchmarkSelfStartRunParams, LongRequestOptions):
+    pass
+
+
+class SDKBenchmarkListRunsParams(RunSelfListParams, BaseRequestOptions):
+    pass
+
+
+class SDKBenchmarkRunListScenarioRunsParams(RunListScenarioRunsParams, BaseRequestOptions):
+    pass
diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py
index 4bcd08fc1..6e6e828ff 100644
--- a/src/runloop_api_client/sdk/async_.py
+++ b/src/runloop_api_client/sdk/async_.py
@@ -21,7 +21,9 @@
     SDKObjectCreateParams,
     SDKScenarioListParams,
     SDKScorerCreateParams,
+    SDKBenchmarkListParams,
     SDKBlueprintListParams,
+    SDKBenchmarkCreateParams,
     SDKBlueprintCreateParams,
     SDKDiskSnapshotListParams,
     SDKDevboxCreateFromImageParams,
@@ -34,6 +36,7 @@
 from .async_scorer import AsyncScorer
 from .async_scenario import AsyncScenario
 from .async_snapshot import AsyncSnapshot
+from .async_benchmark import AsyncBenchmark
 from .async_blueprint import AsyncBlueprint
 from ..lib.context_loader import TarFilter, build_directory_tar
 from .async_storage_object import AsyncStorageObject
@@ -599,7 +602,6 @@ async def create_from_npm(
         self,
         *,
         package_name: str,
-        npm_version: Optional[str] = None,
         registry_url: Optional[str] = None,
         agent_setup: Optional[list[str]] = None,
         **params: Unpack[SDKAgentCreateParams],
@@ -608,8 +610,6 @@ async def create_from_npm(
 
         :param package_name: NPM package name
         :type package_name: str
-        :param npm_version: NPM version constraint, defaults to None
-        :type npm_version: Optional[str], optional
         :param registry_url: NPM registry URL, defaults to None
         :type registry_url: Optional[str], optional
         :param agent_setup: Setup commands to run after installation, defaults to None
@@ -625,8 +625,6 @@ async def create_from_npm(
             )
 
         npm_config: Npm = {"package_name": package_name}
-        if npm_version is not None:
-            npm_config["npm_version"] = npm_version
         if registry_url is not None:
             npm_config["registry_url"] = registry_url
         if agent_setup is not None:
@@ -639,7 +637,6 @@ async def create_from_pip(
         self,
         *,
         package_name: str,
-        pip_version: Optional[str] = None,
         registry_url: Optional[str] = None,
         agent_setup: Optional[list[str]] = None,
         **params: Unpack[SDKAgentCreateParams],
@@ -648,8 +645,6 @@ async def create_from_pip(
 
         :param package_name: Pip package name
         :type package_name: str
-        :param pip_version: Pip version constraint, defaults to None
-        :type pip_version: Optional[str], optional
         :param registry_url: Pip registry URL, defaults to None
         :type registry_url: Optional[str], optional
         :param agent_setup: Setup commands to run after installation, defaults to None
@@ -665,8 +660,6 @@ async def create_from_pip(
             )
 
         pip_config: Pip = {"package_name": package_name}
-        if pip_version is not None:
-            pip_config["pip_version"] = pip_version
         if registry_url is not None:
             pip_config["registry_url"] = registry_url
         if agent_setup is not None:
@@ -825,6 +818,55 @@ async def list(self, **params: Unpack[SDKScenarioListParams]) -> list[AsyncScena
         return [AsyncScenario(self._client, item.id) async for item in page]
 
 
+class AsyncBenchmarkOps:
+    """Manage benchmarks (async). Access via ``runloop.benchmark``.
+
+    Example:
+        >>> runloop = AsyncRunloopSDK()
+        >>> benchmarks = await runloop.benchmark.list()
+        >>> benchmark = runloop.benchmark.from_id("bmd_xxx")
+        >>> run = await benchmark.start_run(run_name="evaluation-v1")
+    """
+
+    def __init__(self, client: AsyncRunloop) -> None:
+        """Initialize AsyncBenchmarkOps.
+
+        :param client: AsyncRunloop client instance
+        :type client: AsyncRunloop
+        """
+        self._client = client
+
+    async def create(self, **params: Unpack[SDKBenchmarkCreateParams]) -> AsyncBenchmark:
+        """Create a new benchmark.
+
+        :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkCreateParams` for available parameters
+        :return: The newly created benchmark
+        :rtype: AsyncBenchmark
+        """
+        response = await self._client.benchmarks.create(**params)
+        return AsyncBenchmark(self._client, response.id)
+
+    def from_id(self, benchmark_id: str) -> AsyncBenchmark:
+        """Get an AsyncBenchmark instance for an existing benchmark ID.
+
+        :param benchmark_id: ID of the benchmark
+        :type benchmark_id: str
+        :return: AsyncBenchmark instance for the given ID
+        :rtype: AsyncBenchmark
+        """
+        return AsyncBenchmark(self._client, benchmark_id)
+
+    async def list(self, **params: Unpack[SDKBenchmarkListParams]) -> list[AsyncBenchmark]:
+        """List all benchmarks, optionally filtered by parameters.
+
+        :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkListParams` for available parameters
+        :return: List of benchmarks
+        :rtype: list[AsyncBenchmark]
+        """
+        page = await self._client.benchmarks.list(**params)
+        return [AsyncBenchmark(self._client, item.id) for item in page.benchmarks]
+
+
 class AsyncRunloopSDK:
     """High-level asynchronous entry point for the Runloop SDK.
 
@@ -836,6 +878,8 @@ class AsyncRunloopSDK:
     :vartype api: AsyncRunloop
     :ivar agent: High-level async interface for agent management.
     :vartype agent: AsyncAgentOps
+    :ivar benchmark: High-level async interface for benchmark management
+    :vartype benchmark: AsyncBenchmarkOps
     :ivar devbox: High-level async interface for devbox management
     :vartype devbox: AsyncDevboxOps
     :ivar blueprint: High-level async interface for blueprint management
@@ -859,6 +903,7 @@ class AsyncRunloopSDK:
 
     api: AsyncRunloop
     agent: AsyncAgentOps
+    benchmark: AsyncBenchmarkOps
     devbox: AsyncDevboxOps
     blueprint: AsyncBlueprintOps
     scenario: AsyncScenarioOps
@@ -905,6 +950,7 @@ def __init__(
         )
 
         self.agent = AsyncAgentOps(self.api)
+        self.benchmark = AsyncBenchmarkOps(self.api)
         self.devbox = AsyncDevboxOps(self.api)
         self.blueprint = AsyncBlueprintOps(self.api)
         self.scenario = AsyncScenarioOps(self.api)
diff --git a/src/runloop_api_client/sdk/async_benchmark.py b/src/runloop_api_client/sdk/async_benchmark.py
new file mode 100644
index 000000000..63443e37b
--- /dev/null
+++ b/src/runloop_api_client/sdk/async_benchmark.py
@@ -0,0 +1,164 @@
+"""AsyncBenchmark resource class for asynchronous operations."""
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Unpack, override
+
+from ..types import BenchmarkView
+from ._types import (
+    BaseRequestOptions,
+    LongRequestOptions,
+    SDKBenchmarkUpdateParams,
+    SDKBenchmarkListRunsParams,
+    SDKBenchmarkStartRunParams,
+)
+from .._types import SequenceNotStr
+from .._client import AsyncRunloop
+from .async_benchmark_run import AsyncBenchmarkRun
+
+
+class AsyncBenchmark:
+    """A benchmark for evaluating agent performance across scenarios (async).
+
+    Provides async methods for retrieving benchmark details, updating the benchmark,
+    managing scenarios, and starting benchmark runs. Obtain instances via
+    ``runloop.benchmark.from_id()`` or ``runloop.benchmark.list()``.
+
+    Example:
+        >>> benchmark = runloop.benchmark.from_id("bmd_xxx")
+        >>> info = await benchmark.get_info()
+        >>> run = await benchmark.start_run(run_name="evaluation-v1")
+        >>> for scenario_id in info.scenario_ids:
+        ...     scenario = await runloop.scenario.from_id(scenario_id)
+        ...     scenario_run = await scenario.run(benchmark_run_id=run.id, run_name="evaluation-v1")
+    """
+
+    def __init__(self, client: AsyncRunloop, benchmark_id: str) -> None:
+        """Create an AsyncBenchmark instance.
+
+        :param client: AsyncRunloop client instance
+        :type client: AsyncRunloop
+        :param benchmark_id: Benchmark ID
+        :type benchmark_id: str
+        """
+        self._client = client
+        self._id = benchmark_id
+
+    @override
+    def __repr__(self) -> str:
+        return f"<AsyncBenchmark id={self._id!r}>"
+
+    @property
+    def id(self) -> str:
+        """Return the benchmark ID.
+
+        :return: Unique benchmark ID
+        :rtype: str
+        """
+        return self._id
+
+    async def get_info(
+        self,
+        **options: Unpack[BaseRequestOptions],
+    ) -> BenchmarkView:
+        """Retrieve current benchmark details.
+
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.BaseRequestOptions` for available options
+        :return: Current benchmark info
+        :rtype: BenchmarkView
+        """
+        return await self._client.benchmarks.retrieve(
+            self._id,
+            **options,
+        )
+
+    async def update(
+        self,
+        **params: Unpack[SDKBenchmarkUpdateParams],
+    ) -> BenchmarkView:
+        """Update the benchmark.
+
+        Only provided fields will be updated.
+
+        :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkUpdateParams` for available parameters
+        :return: Updated benchmark info
+        :rtype: BenchmarkView
+        """
+        return await self._client.benchmarks.update(
+            self._id,
+            **params,
+        )
+
+    async def start_run(
+        self,
+        **params: Unpack[SDKBenchmarkStartRunParams],
+    ) -> AsyncBenchmarkRun:
+        """Start a new benchmark run.
+
+        Creates a new benchmark run and returns an AsyncBenchmarkRun instance for
+        managing the run lifecycle.
+
+        :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkStartRunParams` for available parameters
+        :return: AsyncBenchmarkRun instance for managing the run
+        :rtype: AsyncBenchmarkRun
+        """
+        run_view = await self._client.benchmarks.start_run(
+            benchmark_id=self._id,
+            **params,
+        )
+        return AsyncBenchmarkRun(self._client, run_view.id, run_view.benchmark_id)
+
+    async def add_scenarios(
+        self,
+        scenario_ids: SequenceNotStr[str],
+        **options: Unpack[LongRequestOptions],
+    ) -> BenchmarkView:
+        """Add scenarios to the benchmark.
+
+        :param scenario_ids: List of scenario IDs to add
+        :type scenario_ids: SequenceNotStr[str]
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+        :return: Updated benchmark info
+        :rtype: BenchmarkView
+        """
+        return await self._client.benchmarks.update_scenarios(
+            self._id,
+            scenarios_to_add=scenario_ids,
+            **options,
+        )
+
+    async def remove_scenarios(
+        self,
+        scenario_ids: SequenceNotStr[str],
+        **options: Unpack[LongRequestOptions],
+    ) -> BenchmarkView:
+        """Remove scenarios from the benchmark.
+
+        :param scenario_ids: List of scenario IDs to remove
+        :type scenario_ids: SequenceNotStr[str]
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+        :return: Updated benchmark info
+        :rtype: BenchmarkView
+        """
+        return await self._client.benchmarks.update_scenarios(
+            self._id,
+            scenarios_to_remove=scenario_ids,
+            **options,
+        )
+
+    async def list_runs(
+        self,
+        **params: Unpack[SDKBenchmarkListRunsParams],
+    ) -> List[AsyncBenchmarkRun]:
+        """List all runs for this benchmark.
+
+        :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkListRunsParams` for available parameters
+        :return: List of async benchmark runs
+        :rtype: List[AsyncBenchmarkRun]
+        """
+        page = await self._client.benchmarks.runs.list(
+            benchmark_id=self._id,
+            **params,
+        )
+        return [AsyncBenchmarkRun(self._client, run.id, run.benchmark_id) for run in page.runs]
diff --git a/src/runloop_api_client/sdk/async_benchmark_run.py b/src/runloop_api_client/sdk/async_benchmark_run.py
new file mode 100644
index 000000000..f498d1408
--- /dev/null
+++ b/src/runloop_api_client/sdk/async_benchmark_run.py
@@ -0,0 +1,127 @@
+"""AsyncBenchmarkRun resource class for asynchronous operations."""
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Unpack, override
+
+from ..types import BenchmarkRunView
+from ._types import BaseRequestOptions, LongRequestOptions, SDKBenchmarkRunListScenarioRunsParams
+from .._client import AsyncRunloop
+from .async_scenario_run import AsyncScenarioRun
+
+
+class AsyncBenchmarkRun:
+    """A benchmark run for evaluating agent performance across scenarios (async).
+
+    Provides async methods for monitoring run status, managing the run lifecycle,
+    and accessing scenario run results. Obtain instances via
+    ``benchmark.start_run()`` or ``benchmark.list_runs()``.
+
+    Example:
+        >>> benchmark = runloop.benchmark.from_id("bench-xxx")
+        >>> run = await benchmark.start_run(run_name="evaluation-v1")
+        >>> info = await run.get_info()
+        >>> scenario_runs = await run.list_scenario_runs()
+    """
+
+    def __init__(self, client: AsyncRunloop, run_id: str, benchmark_id: str) -> None:
+        """Create an AsyncBenchmarkRun instance.
+
+        :param client: AsyncRunloop client instance
+        :type client: AsyncRunloop
+        :param run_id: Benchmark run ID
+        :type run_id: str
+        :param benchmark_id: Parent benchmark ID
+        :type benchmark_id: str
+        """
+        self._client = client
+        self._id = run_id
+        self._benchmark_id = benchmark_id
+
+    @override
+    def __repr__(self) -> str:
+        return f"<AsyncBenchmarkRun id={self._id!r}>"
+
+    @property
+    def id(self) -> str:
+        """Return the benchmark run ID.
+
+        :return: Unique benchmark run ID
+        :rtype: str
+        """
+        return self._id
+
+    @property
+    def benchmark_id(self) -> str:
+        """Return the parent benchmark ID.
+
+        :return: Parent benchmark ID
+        :rtype: str
+        """
+        return self._benchmark_id
+
+    async def get_info(
+        self,
+        **options: Unpack[BaseRequestOptions],
+    ) -> BenchmarkRunView:
+        """Retrieve current benchmark run status and metadata.
+
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.BaseRequestOptions` for available options
+        :return: Current benchmark run state info
+        :rtype: BenchmarkRunView
+        """
+        return await self._client.benchmarks.runs.retrieve(
+            self._id,
+            **options,
+        )
+
+    async def cancel(
+        self,
+        **options: Unpack[LongRequestOptions],
+    ) -> BenchmarkRunView:
+        """Cancel the benchmark run.
+
+        Stops all running scenarios and marks the run as canceled.
+
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+        :return: Updated benchmark run state
+        :rtype: BenchmarkRunView
+        """
+        return await self._client.benchmarks.runs.cancel(
+            self._id,
+            **options,
+        )
+
+    async def complete(
+        self,
+        **options: Unpack[LongRequestOptions],
+    ) -> BenchmarkRunView:
+        """Complete the benchmark run.
+
+        Marks the run as completed. Call this after all scenarios have finished.
+
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+        :return: Completed benchmark run state
+        :rtype: BenchmarkRunView
+        """
+        return await self._client.benchmarks.runs.complete(
+            self._id,
+            **options,
+        )
+
+    async def list_scenario_runs(
+        self,
+        **params: Unpack[SDKBenchmarkRunListScenarioRunsParams],
+    ) -> List[AsyncScenarioRun]:
+        """List all scenario runs for this benchmark run.
+
+        :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkRunListScenarioRunsParams` for available parameters
+        :return: List of async scenario run objects
+        :rtype: List[AsyncScenarioRun]
+        """
+        page = await self._client.benchmarks.runs.list_scenario_runs(
+            self._id,
+            **params,
+        )
+        return [AsyncScenarioRun(self._client, run.id, run.devbox_id) for run in page.runs]
diff --git a/src/runloop_api_client/sdk/benchmark.py b/src/runloop_api_client/sdk/benchmark.py
new file mode 100644
index 000000000..7e8ed826d
--- /dev/null
+++ b/src/runloop_api_client/sdk/benchmark.py
@@ -0,0 +1,164 @@
+"""Benchmark resource class for synchronous operations."""
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Unpack, override
+
+from ..types import BenchmarkView
+from ._types import (
+    BaseRequestOptions,
+    LongRequestOptions,
+    SDKBenchmarkUpdateParams,
+    SDKBenchmarkListRunsParams,
+    SDKBenchmarkStartRunParams,
+)
+from .._types import SequenceNotStr
+from .._client import Runloop
+from .benchmark_run import BenchmarkRun
+
+
+class Benchmark:
+    """A benchmark for evaluating agent performance across scenarios.
+
+    Provides methods for retrieving benchmark details, updating the benchmark,
+    managing scenarios, and starting benchmark runs. Obtain instances via
+    ``runloop.benchmark.from_id()`` or ``runloop.benchmark.list()``.
+
+    Example:
+        >>> benchmark = runloop.benchmark.from_id("bmd_xxx")
+        >>> info = benchmark.get_info()
+        >>> run = benchmark.start_run(run_name="evaluation-v1")
+        >>> for scenario_id in info.scenario_ids:
+        ...     scenario = runloop.scenario.from_id(scenario_id)
+        ...     scenario_run = scenario.run(benchmark_run_id=run.id, run_name="evaluation-v1")
+    """
+
+    def __init__(self, client: Runloop, benchmark_id: str) -> None:
+        """Create a Benchmark instance.
+
+        :param client: Runloop client instance
+        :type client: Runloop
+        :param benchmark_id: Benchmark ID
+        :type benchmark_id: str
+        """
+        self._client = client
+        self._id = benchmark_id
+
+    @override
+    def __repr__(self) -> str:
+        return f"<Benchmark id={self._id!r}>"
+
+    @property
+    def id(self) -> str:
+        """Return the benchmark ID.
+
+        :return: Unique benchmark ID
+        :rtype: str
+        """
+        return self._id
+
+    def get_info(
+        self,
+        **options: Unpack[BaseRequestOptions],
+    ) -> BenchmarkView:
+        """Retrieve current benchmark details.
+
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.BaseRequestOptions` for available options
+        :return: Current benchmark info
+        :rtype: BenchmarkView
+        """
+        return self._client.benchmarks.retrieve(
+            self._id,
+            **options,
+        )
+
+    def update(
+        self,
+        **params: Unpack[SDKBenchmarkUpdateParams],
+    ) -> BenchmarkView:
+        """Update the benchmark.
+
+        Only provided fields will be updated.
+
+        :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkUpdateParams` for available parameters
+        :return: Updated benchmark info
+        :rtype: BenchmarkView
+        """
+        return self._client.benchmarks.update(
+            self._id,
+            **params,
+        )
+
+    def start_run(
+        self,
+        **params: Unpack[SDKBenchmarkStartRunParams],
+    ) -> BenchmarkRun:
+        """Start a new benchmark run.
+
+        Creates a new benchmark run and returns a BenchmarkRun instance for
+        managing the run lifecycle.
+
+        :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkStartRunParams` for available parameters
+        :return: BenchmarkRun instance for managing the run
+        :rtype: BenchmarkRun
+        """
+        run_view = self._client.benchmarks.start_run(
+            benchmark_id=self._id,
+            **params,
+        )
+        return BenchmarkRun(self._client, run_view.id, run_view.benchmark_id)
+
+    def add_scenarios(
+        self,
+        scenario_ids: SequenceNotStr[str],
+        **options: Unpack[LongRequestOptions],
+    ) -> BenchmarkView:
+        """Add scenarios to the benchmark.
+
+        :param scenario_ids: List of scenario IDs to add
+        :type scenario_ids: SequenceNotStr[str]
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+        :return: Updated benchmark info
+        :rtype: BenchmarkView
+        """
+        return self._client.benchmarks.update_scenarios(
+            self._id,
+            scenarios_to_add=scenario_ids,
+            **options,
+        )
+
+    def remove_scenarios(
+        self,
+        scenario_ids: SequenceNotStr[str],
+        **options: Unpack[LongRequestOptions],
+    ) -> BenchmarkView:
+        """Remove scenarios from the benchmark.
+
+        :param scenario_ids: List of scenario IDs to remove
+        :type scenario_ids: SequenceNotStr[str]
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+        :return: Updated benchmark info
+        :rtype: BenchmarkView
+        """
+        return self._client.benchmarks.update_scenarios(
+            self._id,
+            scenarios_to_remove=scenario_ids,
+            **options,
+        )
+
+    def list_runs(
+        self,
+        **params: Unpack[SDKBenchmarkListRunsParams],
+    ) -> List[BenchmarkRun]:
+        """List all runs for this benchmark.
+
+        :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkListRunsParams` for available parameters
+        :return: List of benchmark runs
+        :rtype: List[BenchmarkRun]
+        """
+        page = self._client.benchmarks.runs.list(
+            benchmark_id=self._id,
+            **params,
+        )
+        return [BenchmarkRun(self._client, run.id, run.benchmark_id) for run in page.runs]
diff --git a/src/runloop_api_client/sdk/benchmark_run.py b/src/runloop_api_client/sdk/benchmark_run.py
new file mode 100644
index 000000000..10da7ba05
--- /dev/null
+++ b/src/runloop_api_client/sdk/benchmark_run.py
@@ -0,0 +1,127 @@
+"""BenchmarkRun resource class for synchronous operations."""
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Unpack, override
+
+from ..types import BenchmarkRunView
+from ._types import BaseRequestOptions, LongRequestOptions, SDKBenchmarkRunListScenarioRunsParams
+from .._client import Runloop
+from .scenario_run import ScenarioRun
+
+
+class BenchmarkRun:
+    """A benchmark run for evaluating agent performance across scenarios.
+
+    Provides methods for monitoring run status, managing the run lifecycle,
+    and accessing scenario run results. Obtain instances via
+    ``benchmark.start_run()`` or ``benchmark.list_runs()``.
+
+    Example:
+        >>> benchmark = runloop.benchmark.from_id("bench-xxx")
+        >>> run = benchmark.start_run(run_name="evaluation-v1")
+        >>> info = run.get_info()
+        >>> scenario_runs = run.list_scenario_runs()
+    """
+
+    def __init__(self, client: Runloop, run_id: str, benchmark_id: str) -> None:
+        """Create a BenchmarkRun instance.
+
+        :param client: Runloop client instance
+        :type client: Runloop
+        :param run_id: Benchmark run ID
+        :type run_id: str
+        :param benchmark_id: Parent benchmark ID
+        :type benchmark_id: str
+        """
+        self._client = client
+        self._id = run_id
+        self._benchmark_id = benchmark_id
+
+    @override
+    def __repr__(self) -> str:
+        return f"<BenchmarkRun id={self._id!r}>"
+
+    @property
+    def id(self) -> str:
+        """Return the benchmark run ID.
+
+        :return: Unique benchmark run ID
+        :rtype: str
+        """
+        return self._id
+
+    @property
+    def benchmark_id(self) -> str:
+        """Return the parent benchmark ID.
+
+        :return: Parent benchmark ID
+        :rtype: str
+        """
+        return self._benchmark_id
+
+    def get_info(
+        self,
+        **options: Unpack[BaseRequestOptions],
+    ) -> BenchmarkRunView:
+        """Retrieve current benchmark run status and metadata.
+
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.BaseRequestOptions` for available options
+        :return: Current benchmark run state info
+        :rtype: BenchmarkRunView
+        """
+        return self._client.benchmarks.runs.retrieve(
+            self._id,
+            **options,
+        )
+
+    def cancel(
+        self,
+        **options: Unpack[LongRequestOptions],
+    ) -> BenchmarkRunView:
+        """Cancel the benchmark run.
+
+        Stops all running scenarios and marks the run as canceled.
+
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+        :return: Updated benchmark run state
+        :rtype: BenchmarkRunView
+        """
+        return self._client.benchmarks.runs.cancel(
+            self._id,
+            **options,
+        )
+
+    def complete(
+        self,
+        **options: Unpack[LongRequestOptions],
+    ) -> BenchmarkRunView:
+        """Complete the benchmark run.
+
+        Marks the run as completed. Call this after all scenarios have finished.
+
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+        :return: Completed benchmark run state
+        :rtype: BenchmarkRunView
+        """
+        return self._client.benchmarks.runs.complete(
+            self._id,
+            **options,
+        )
+
+    def list_scenario_runs(
+        self,
+        **params: Unpack[SDKBenchmarkRunListScenarioRunsParams],
+    ) -> List[ScenarioRun]:
+        """List all scenario runs for this benchmark run.
+
+        :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkRunListScenarioRunsParams` for available parameters
+        :return: List of scenario run objects
+        :rtype: List[ScenarioRun]
+        """
+        page = self._client.benchmarks.runs.list_scenario_runs(
+            self._id,
+            **params,
+        )
+        return [ScenarioRun(self._client, run.id, run.devbox_id) for run in page.runs]
diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py
index f215c8116..d83eb5a6e 100644
--- a/src/runloop_api_client/sdk/sync.py
+++ b/src/runloop_api_client/sdk/sync.py
@@ -21,7 +21,9 @@
     SDKObjectCreateParams,
     SDKScenarioListParams,
     SDKScorerCreateParams,
+    SDKBenchmarkListParams,
     SDKBlueprintListParams,
+    SDKBenchmarkCreateParams,
     SDKBlueprintCreateParams,
     SDKDiskSnapshotListParams,
     SDKDevboxCreateFromImageParams,
@@ -33,6 +35,7 @@
 from ._helpers import detect_content_type
 from .scenario import Scenario
 from .snapshot import Snapshot
+from .benchmark import Benchmark
 from .blueprint import Blueprint
 from .storage_object import StorageObject
 from .scenario_builder import ScenarioBuilder
@@ -594,7 +597,6 @@ def create_from_npm(
         self,
         *,
         package_name: str,
-        npm_version: Optional[str] = None,
         registry_url: Optional[str] = None,
         agent_setup: Optional[list[str]] = None,
         **params: Unpack[SDKAgentCreateParams],
@@ -603,13 +605,11 @@ def create_from_npm(
 
         Example:
             >>> agent = runloop.agent.create_from_npm(
-            ...     name="my-npm-agent", package_name="@runloop/example-agent", npm_version="^1.0.0"
+            ...     name="my-npm-agent", package_name="@runloop/example-agent", version="1.0.0"
             ... )
 
         :param package_name: NPM package name
         :type package_name: str
-        :param npm_version: NPM version constraint, defaults to None
-        :type npm_version: Optional[str], optional
         :param registry_url: NPM registry URL, defaults to None
         :type registry_url: Optional[str], optional
         :param agent_setup: Setup commands to run after installation, defaults to None
@@ -625,8 +625,6 @@ def create_from_npm(
             )
 
         npm_config: Npm = {"package_name": package_name}
-        if npm_version is not None:
-            npm_config["npm_version"] = npm_version
         if registry_url is not None:
             npm_config["registry_url"] = registry_url
         if agent_setup is not None:
@@ -639,7 +637,6 @@ def create_from_pip(
         self,
         *,
         package_name: str,
-        pip_version: Optional[str] = None,
         registry_url: Optional[str] = None,
         agent_setup: Optional[list[str]] = None,
         **params: Unpack[SDKAgentCreateParams],
@@ -648,13 +645,11 @@ def create_from_pip(
 
         Example:
             >>> agent = runloop.agent.create_from_pip(
-            ...     name="my-pip-agent", package_name="runloop-example-agent", pip_version=">=1.0.0"
+            ...     name="my-pip-agent", package_name="runloop-example-agent", version="1.0.0"
             ... )
 
         :param package_name: Pip package name
         :type package_name: str
-        :param pip_version: Pip version constraint, defaults to None
-        :type pip_version: Optional[str], optional
         :param registry_url: Pip registry URL, defaults to None
         :type registry_url: Optional[str], optional
         :param agent_setup: Setup commands to run after installation, defaults to None
@@ -670,8 +665,6 @@ def create_from_pip(
             )
 
         pip_config: Pip = {"package_name": package_name}
-        if pip_version is not None:
-            pip_config["pip_version"] = pip_version
         if registry_url is not None:
             pip_config["registry_url"] = registry_url
         if agent_setup is not None:
@@ -696,6 +689,7 @@ def create_from_git(
             ...     repository="https://github.com/user/agent-repo",
             ...     ref="main",
             ...     agent_setup=["npm install", "npm run build"],
+            ...     version="1.0.0",
             ... )
 
         :param repository: Git repository URL
@@ -737,7 +731,10 @@ def create_from_object(
             >>> obj = runloop.storage_object.upload_from_dir("./my-agent")
             >>> # Then create agent from the object
             >>> agent = runloop.agent.create_from_object(
-            ...     name="my-object-agent", object_id=obj.id, agent_setup=["chmod +x setup.sh", "./setup.sh"]
+            ...     name="my-object-agent",
+            ...     object_id=obj.id,
+            ...     agent_setup=["chmod +x setup.sh", "./setup.sh"],
+            ...     version="1.0.0",
             ... )
 
         :param object_id: Storage object ID
@@ -846,6 +843,55 @@ def list(self, **params: Unpack[SDKScenarioListParams]) -> list[Scenario]:
         return [Scenario(self._client, item.id) for item in page]
 
 
+class BenchmarkOps:
+    """Manage benchmarks. Access via ``runloop.benchmark``.
+
+    Example:
+        >>> runloop = RunloopSDK()
+        >>> benchmarks = runloop.benchmark.list()
+        >>> benchmark = runloop.benchmark.from_id("bmd_xxx")
+        >>> run = benchmark.start_run(run_name="evaluation-v1")
+    """
+
+    def __init__(self, client: Runloop) -> None:
+        """Initialize BenchmarkOps.
+
+        :param client: Runloop client instance
+        :type client: Runloop
+        """
+        self._client = client
+
+    def create(self, **params: Unpack[SDKBenchmarkCreateParams]) -> Benchmark:
+        """Create a new benchmark.
+
+        :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkCreateParams` for available parameters
+        :return: The newly created benchmark
+        :rtype: Benchmark
+        """
+        response = self._client.benchmarks.create(**params)
+        return Benchmark(self._client, response.id)
+
+    def from_id(self, benchmark_id: str) -> Benchmark:
+        """Get a Benchmark instance for an existing benchmark ID.
+
+        :param benchmark_id: ID of the benchmark
+        :type benchmark_id: str
+        :return: Benchmark instance for the given ID
+        :rtype: Benchmark
+        """
+        return Benchmark(self._client, benchmark_id)
+
+    def list(self, **params: Unpack[SDKBenchmarkListParams]) -> list[Benchmark]:
+        """List all benchmarks, optionally filtered by parameters.
+
+        :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkListParams` for available parameters
+        :return: List of benchmarks
+        :rtype: list[Benchmark]
+        """
+        page = self._client.benchmarks.list(**params)
+        return [Benchmark(self._client, item.id) for item in page.benchmarks]
+
+
 class RunloopSDK:
     """High-level synchronous entry point for the Runloop SDK.
 
@@ -857,6 +903,8 @@ class RunloopSDK:
     :vartype api: Runloop
     :ivar agent: High-level interface for agent management.
     :vartype agent: AgentOps
+    :ivar benchmark: High-level interface for benchmark management
+    :vartype benchmark: BenchmarkOps
     :ivar devbox: High-level interface for devbox management
     :vartype devbox: DevboxOps
     :ivar blueprint: High-level interface for blueprint management
@@ -880,6 +928,7 @@ class RunloopSDK:
 
     api: Runloop
     agent: AgentOps
+    benchmark: BenchmarkOps
     devbox: DevboxOps
     blueprint: BlueprintOps
     scenario: ScenarioOps
@@ -926,6 +975,7 @@ def __init__(
         )
 
         self.agent = AgentOps(self.api)
+        self.benchmark = BenchmarkOps(self.api)
         self.devbox = DevboxOps(self.api)
         self.blueprint = BlueprintOps(self.api)
         self.scenario = ScenarioOps(self.api)
diff --git a/src/runloop_api_client/types/__init__.py b/src/runloop_api_client/types/__init__.py
index 6856d9670..6afd070a3 100644
--- a/src/runloop_api_client/types/__init__.py
+++ b/src/runloop_api_client/types/__init__.py
@@ -97,6 +97,7 @@
 from .repository_connection_list_view import RepositoryConnectionListView as RepositoryConnectionListView
 from .repository_inspection_list_view import RepositoryInspectionListView as RepositoryInspectionListView
 from .devbox_read_file_contents_params import DevboxReadFileContentsParams as DevboxReadFileContentsParams
+from .benchmark_update_scenarios_params import BenchmarkUpdateScenariosParams as BenchmarkUpdateScenariosParams
 from .devbox_list_disk_snapshots_params import DevboxListDiskSnapshotsParams as DevboxListDiskSnapshotsParams
 from .devbox_snapshot_disk_async_params import DevboxSnapshotDiskAsyncParams as DevboxSnapshotDiskAsyncParams
 from .devbox_write_file_contents_params import DevboxWriteFileContentsParams as DevboxWriteFileContentsParams
diff --git a/src/runloop_api_client/types/agent_create_params.py b/src/runloop_api_client/types/agent_create_params.py
index 1a3372e7e..3c2deff2a 100644
--- a/src/runloop_api_client/types/agent_create_params.py
+++ b/src/runloop_api_client/types/agent_create_params.py
@@ -14,5 +14,8 @@ class AgentCreateParams(TypedDict, total=False):
     name: Required[str]
     """The name of the Agent."""
 
+    version: Required[str]
+    """The version of the Agent. Must be a semver string (e.g., '2.0.65') or a SHA."""
+
     source: Optional[AgentSource]
     """The source configuration for the Agent."""
diff --git a/src/runloop_api_client/types/agent_list_params.py b/src/runloop_api_client/types/agent_list_params.py
index a3199190b..3df89fc25 100644
--- a/src/runloop_api_client/types/agent_list_params.py
+++ b/src/runloop_api_client/types/agent_list_params.py
@@ -12,7 +12,7 @@ class AgentListParams(TypedDict, total=False):
     """Filter agents by public visibility."""
 
     limit: int
-    """The limit of items to return. Default is 20."""
+    """The limit of items to return. Default is 20. Max is 5000."""
 
     name: str
     """Filter agents by name (partial match supported)."""
@@ -22,3 +22,6 @@ class AgentListParams(TypedDict, total=False):
 
     starting_after: str
     """Load the next page of data starting after the item with the given ID."""
+
+    version: str
+    """Filter by version. Use 'latest' to get the most recently created agent."""
diff --git a/src/runloop_api_client/types/agent_list_view.py b/src/runloop_api_client/types/agent_list_view.py
index c2a7be455..bfb1560e1 100644
--- a/src/runloop_api_client/types/agent_list_view.py
+++ b/src/runloop_api_client/types/agent_list_view.py
@@ -9,6 +9,8 @@
 
 
 class AgentListView(BaseModel):
+    """A paginated list of Agents."""
+
     agents: List[AgentView]
     """The list of Agents."""
 
diff --git a/src/runloop_api_client/types/agent_view.py b/src/runloop_api_client/types/agent_view.py
index 77e56d1b8..23b1f68ff 100644
--- a/src/runloop_api_client/types/agent_view.py
+++ b/src/runloop_api_client/types/agent_view.py
@@ -9,6 +9,8 @@
 
 
 class AgentView(BaseModel):
+    """An Agent represents a registered AI agent entity."""
+
     id: str
     """The unique identifier of the Agent."""
 
@@ -21,5 +23,8 @@ class AgentView(BaseModel):
     name: str
     """The name of the Agent."""
 
+    version: str
+    """The version of the Agent. A semver string (e.g., '2.0.65') or a SHA."""
+
     source: Optional[AgentSource] = None
     """The source configuration for the Agent."""
diff --git a/src/runloop_api_client/types/benchmark_create_params.py b/src/runloop_api_client/types/benchmark_create_params.py
index 1aec35f5f..36f7b95a9 100644
--- a/src/runloop_api_client/types/benchmark_create_params.py
+++ b/src/runloop_api_client/types/benchmark_create_params.py
@@ -12,7 +12,7 @@
 
 class BenchmarkCreateParams(TypedDict, total=False):
     name: Required[str]
-    """The name of the Benchmark. This must be unique."""
+    """The unique name of the Benchmark."""
 
     attribution: Optional[str]
     """Attribution information for the benchmark."""
@@ -21,12 +21,12 @@ class BenchmarkCreateParams(TypedDict, total=False):
     """Detailed description of the benchmark."""
 
     metadata: Optional[Dict[str, str]]
-    """User defined metadata to attach to the benchmark for organization."""
+    """User defined metadata to attach to the benchmark."""
 
     required_environment_variables: Optional[SequenceNotStr[str]]
     """Environment variables required to run the benchmark.
 
-    If any required variables are not supplied, the benchmark will fail to start
+    If any required variables are not supplied, the benchmark will fail to start.
     """
 
     required_secret_names: SequenceNotStr[str]
diff --git a/src/runloop_api_client/types/benchmark_definitions_params.py b/src/runloop_api_client/types/benchmark_definitions_params.py
index f92d57d76..97caff125 100644
--- a/src/runloop_api_client/types/benchmark_definitions_params.py
+++ b/src/runloop_api_client/types/benchmark_definitions_params.py
@@ -9,7 +9,7 @@
 
 class BenchmarkDefinitionsParams(TypedDict, total=False):
     limit: int
-    """The limit of items to return. Default is 20."""
+    """The limit of items to return. Default is 20. Max is 5000."""
 
     starting_after: str
     """Load the next page of data starting after the item with the given ID."""
diff --git a/src/runloop_api_client/types/benchmark_list_params.py b/src/runloop_api_client/types/benchmark_list_params.py
index 51b2b1320..4e8b0c78b 100644
--- a/src/runloop_api_client/types/benchmark_list_params.py
+++ b/src/runloop_api_client/types/benchmark_list_params.py
@@ -9,7 +9,10 @@
 
 class BenchmarkListParams(TypedDict, total=False):
     limit: int
-    """The limit of items to return. Default is 20."""
+    """The limit of items to return. Default is 20. Max is 5000."""
+
+    name: str
+    """Filter by name"""
 
     starting_after: str
     """Load the next page of data starting after the item with the given ID."""
diff --git a/src/runloop_api_client/types/benchmark_list_public_params.py b/src/runloop_api_client/types/benchmark_list_public_params.py
index c5081922d..6dec4283b 100644
--- a/src/runloop_api_client/types/benchmark_list_public_params.py
+++ b/src/runloop_api_client/types/benchmark_list_public_params.py
@@ -9,7 +9,7 @@
 
 class BenchmarkListPublicParams(TypedDict, total=False):
     limit: int
-    """The limit of items to return. Default is 20."""
+    """The limit of items to return. Default is 20. Max is 5000."""
 
     starting_after: str
     """Load the next page of data starting after the item with the given ID."""
diff --git a/src/runloop_api_client/types/benchmark_run_view.py b/src/runloop_api_client/types/benchmark_run_view.py
index 00dd98fc2..07fd4c022 100644
--- a/src/runloop_api_client/types/benchmark_run_view.py
+++ b/src/runloop_api_client/types/benchmark_run_view.py
@@ -9,6 +9,10 @@
 
 
 class BenchmarkRunView(BaseModel):
+    """
+    A BenchmarkRunView represents a run of a complete set of Scenarios, organized under a Benchmark.
+    """
+
     id: str
     """The ID of the BenchmarkRun."""
 
diff --git a/src/runloop_api_client/types/benchmark_start_run_params.py b/src/runloop_api_client/types/benchmark_start_run_params.py
index 7655ff5ad..edd65ca7c 100644
--- a/src/runloop_api_client/types/benchmark_start_run_params.py
+++ b/src/runloop_api_client/types/benchmark_start_run_params.py
@@ -11,10 +11,9 @@
 __all__ = ["BenchmarkStartRunParams"]
 
 
-class BenchmarkStartRunParams(TypedDict, total=False):
-    benchmark_id: Required[str]
-    """ID of the Benchmark to run."""
-
+# Split into separate params so that OO SDK start_run params can omit the benchmark_id
+# Neither of these params are exposed to the user, only the derived SDKBenchmarkStartRunParams
+class BenchmarkSelfStartRunParams(TypedDict, total=False):
     metadata: Optional[Dict[str, str]]
     """User defined metadata to attach to the benchmark run for organization."""
 
@@ -23,3 +22,8 @@ class BenchmarkStartRunParams(TypedDict, total=False):
 
     run_profile: Annotated[Optional[RunProfile], PropertyInfo(alias="runProfile")]
     """Runtime configuration to use for this benchmark run"""
+
+
+class BenchmarkStartRunParams(BenchmarkSelfStartRunParams, total=False):
+    benchmark_id: Required[str]
+    """ID of the Benchmark to run."""
diff --git a/src/runloop_api_client/types/benchmark_update_params.py b/src/runloop_api_client/types/benchmark_update_params.py
index 1291e3e38..ce9e8fb0c 100644
--- a/src/runloop_api_client/types/benchmark_update_params.py
+++ b/src/runloop_api_client/types/benchmark_update_params.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from typing import Dict, Optional
-from typing_extensions import Required, TypedDict
+from typing_extensions import TypedDict
 
 from .._types import SequenceNotStr
 
@@ -11,30 +11,32 @@
 
 
 class BenchmarkUpdateParams(TypedDict, total=False):
-    name: Required[str]
-    """The name of the Benchmark. This must be unique."""
-
     attribution: Optional[str]
-    """Attribution information for the benchmark."""
+    """Attribution information for the benchmark. Pass in empty string to clear."""
 
     description: Optional[str]
-    """Detailed description of the benchmark."""
+    """Detailed description of the benchmark. Pass in empty string to clear."""
 
     metadata: Optional[Dict[str, str]]
-    """User defined metadata to attach to the benchmark for organization."""
+    """User defined metadata to attach to the benchmark. Pass in empty map to clear."""
+
+    name: Optional[str]
+    """The unique name of the Benchmark. Cannot be blank."""
 
     required_environment_variables: Optional[SequenceNotStr[str]]
     """Environment variables required to run the benchmark.
 
-    If any required variables are not supplied, the benchmark will fail to start
+    If any required variables are not supplied, the benchmark will fail to start.
+    Pass in empty list to clear.
     """
 
-    required_secret_names: SequenceNotStr[str]
+    required_secret_names: Optional[SequenceNotStr[str]]
     """
     Secrets required to run the benchmark with (environment variable name will be
     mapped to the your user secret by name). If any of these secrets are not
-    provided or the mapping is incorrect, the benchmark will fail to start.
+    provided or the mapping is incorrect, the benchmark will fail to start. Pass in
+    empty list to clear.
     """
 
     scenario_ids: Optional[SequenceNotStr[str]]
-    """The Scenario IDs that make up the Benchmark."""
+    """The Scenario IDs that make up the Benchmark. Pass in empty list to clear."""
diff --git a/src/runloop_api_client/types/benchmark_update_scenarios_params.py b/src/runloop_api_client/types/benchmark_update_scenarios_params.py
new file mode 100644
index 000000000..2aca2b0d4
--- /dev/null
+++ b/src/runloop_api_client/types/benchmark_update_scenarios_params.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import TypedDict
+
+from .._types import SequenceNotStr
+
+__all__ = ["BenchmarkUpdateScenariosParams"]
+
+
+class BenchmarkUpdateScenariosParams(TypedDict, total=False):
+    scenarios_to_add: Optional[SequenceNotStr[str]]
+    """Scenario IDs to add to the Benchmark."""
+
+    scenarios_to_remove: Optional[SequenceNotStr[str]]
+    """Scenario IDs to remove from the Benchmark."""
diff --git a/src/runloop_api_client/types/benchmark_view.py b/src/runloop_api_client/types/benchmark_view.py
index 877c8fe26..4150847ac 100644
--- a/src/runloop_api_client/types/benchmark_view.py
+++ b/src/runloop_api_client/types/benchmark_view.py
@@ -10,6 +10,10 @@
 
 
 class BenchmarkView(BaseModel):
+    """
+    A BenchmarkDefinitionView represents a grouped set of Scenarios that together form a Benchmark.
+    """
+
     id: str
     """The ID of the Benchmark."""
 
diff --git a/src/runloop_api_client/types/benchmarks/run_list_params.py b/src/runloop_api_client/types/benchmarks/run_list_params.py
index f93695b2a..a75e1b592 100644
--- a/src/runloop_api_client/types/benchmarks/run_list_params.py
+++ b/src/runloop_api_client/types/benchmarks/run_list_params.py
@@ -7,12 +7,19 @@
 __all__ = ["RunListParams"]
 
 
-class RunListParams(TypedDict, total=False):
-    benchmark_id: str
-    """The Benchmark ID to filter by."""
-
+# Split into separate params so that OO SDK list_runs params can omit the benchmark_id
+# Neither of these params are exposed to the user, only the derived SDKBenchmarkListRunsParams
+class RunSelfListParams(TypedDict, total=False):
     limit: int
-    """The limit of items to return. Default is 20."""
+    """The limit of items to return. Default is 20. Max is 5000."""
+
+    name: str
+    """Filter by name"""
 
     starting_after: str
     """Load the next page of data starting after the item with the given ID."""
+
+
+class RunListParams(RunSelfListParams, total=False):
+    benchmark_id: str
+    """The Benchmark ID to filter by."""
diff --git a/src/runloop_api_client/types/benchmarks/run_list_scenario_runs_params.py b/src/runloop_api_client/types/benchmarks/run_list_scenario_runs_params.py
index 241df1a1f..ddce6aa4a 100644
--- a/src/runloop_api_client/types/benchmarks/run_list_scenario_runs_params.py
+++ b/src/runloop_api_client/types/benchmarks/run_list_scenario_runs_params.py
@@ -9,7 +9,7 @@
 
 class RunListScenarioRunsParams(TypedDict, total=False):
     limit: int
-    """The limit of items to return. Default is 20."""
+    """The limit of items to return. Default is 20. Max is 5000."""
 
     starting_after: str
     """Load the next page of data starting after the item with the given ID."""
diff --git a/src/runloop_api_client/types/blueprint_build_parameters.py b/src/runloop_api_client/types/blueprint_build_parameters.py
index 129a8047a..52ddfda7c 100644
--- a/src/runloop_api_client/types/blueprint_build_parameters.py
+++ b/src/runloop_api_client/types/blueprint_build_parameters.py
@@ -11,6 +11,8 @@
 
 
 class BuildContext(BaseModel):
+    """A build context backed by an Object."""
+
     object_id: str
     """The ID of an object, whose contents are to be used as a build context."""
 
@@ -18,6 +20,8 @@ class BuildContext(BaseModel):
 
 
 class ServiceCredentials(BaseModel):
+    """The credentials of the container service."""
+
     password: str
     """The password of the container service."""
 
diff --git a/src/runloop_api_client/types/blueprint_create_params.py b/src/runloop_api_client/types/blueprint_create_params.py
index d82de7f35..94156d2e9 100644
--- a/src/runloop_api_client/types/blueprint_create_params.py
+++ b/src/runloop_api_client/types/blueprint_create_params.py
@@ -71,6 +71,8 @@ class BlueprintCreateParams(TypedDict, total=False):
 
 
 class BuildContext(TypedDict, total=False):
+    """A build context backed by an Object."""
+
     object_id: Required[str]
     """The ID of an object, whose contents are to be used as a build context."""
 
@@ -78,6 +80,8 @@ class BuildContext(TypedDict, total=False):
 
 
 class ServiceCredentials(TypedDict, total=False):
+    """The credentials of the container service."""
+
     password: Required[str]
     """The password of the container service."""
 
diff --git a/src/runloop_api_client/types/blueprint_list_params.py b/src/runloop_api_client/types/blueprint_list_params.py
index b0a3ade62..f72de7d2f 100644
--- a/src/runloop_api_client/types/blueprint_list_params.py
+++ b/src/runloop_api_client/types/blueprint_list_params.py
@@ -9,10 +9,13 @@
 
 class BlueprintListParams(TypedDict, total=False):
     limit: int
-    """The limit of items to return. Default is 20."""
+    """The limit of items to return. Default is 20. Max is 5000."""
 
     name: str
     """Filter by name"""
 
     starting_after: str
     """Load the next page of data starting after the item with the given ID."""
+
+    status: str
+    """Filter by build status (queued, provisioning, building, failed, build_complete)"""
diff --git a/src/runloop_api_client/types/blueprint_list_public_params.py b/src/runloop_api_client/types/blueprint_list_public_params.py
index d6b11e78e..e0f224f32 100644
--- a/src/runloop_api_client/types/blueprint_list_public_params.py
+++ b/src/runloop_api_client/types/blueprint_list_public_params.py
@@ -9,10 +9,13 @@
 
 class BlueprintListPublicParams(TypedDict, total=False):
     limit: int
-    """The limit of items to return. Default is 20."""
+    """The limit of items to return. Default is 20. Max is 5000."""
 
     name: str
     """Filter by name"""
 
     starting_after: str
     """Load the next page of data starting after the item with the given ID."""
+
+    status: str
+    """Filter by build status (queued, provisioning, building, failed, build_complete)"""
diff --git a/src/runloop_api_client/types/blueprint_preview_params.py b/src/runloop_api_client/types/blueprint_preview_params.py
index 9f6c4d9bc..4269b734f 100644
--- a/src/runloop_api_client/types/blueprint_preview_params.py
+++ b/src/runloop_api_client/types/blueprint_preview_params.py
@@ -71,6 +71,8 @@ class BlueprintPreviewParams(TypedDict, total=False):
 
 
 class BuildContext(TypedDict, total=False):
+    """A build context backed by an Object."""
+
     object_id: Required[str]
     """The ID of an object, whose contents are to be used as a build context."""
 
@@ -78,6 +80,8 @@ class BuildContext(TypedDict, total=False):
 
 
 class ServiceCredentials(TypedDict, total=False):
+    """The credentials of the container service."""
+
     password: Required[str]
     """The password of the container service."""
 
diff --git a/src/runloop_api_client/types/blueprint_view.py b/src/runloop_api_client/types/blueprint_view.py
index 7a10d1686..851b09426 100644
--- a/src/runloop_api_client/types/blueprint_view.py
+++ b/src/runloop_api_client/types/blueprint_view.py
@@ -10,6 +10,8 @@
 
 
 class ContainerizedServiceCredentials(BaseModel):
+    """The credentials of the container service."""
+
     password: str
     """The password of the container service."""
 
@@ -41,6 +43,11 @@ class ContainerizedService(BaseModel):
 
 
 class BlueprintView(BaseModel):
+    """Blueprints are ways to create customized starting points for Devboxes.
+
+    They allow you to define custom starting points for Devboxes such that environment set up can be cached to improve Devbox boot times.
+    """
+
     id: str
     """The id of the Blueprint."""
 
diff --git a/src/runloop_api_client/types/devbox_list_disk_snapshots_params.py b/src/runloop_api_client/types/devbox_list_disk_snapshots_params.py
index 7ffcf5386..d26c3fbd8 100644
--- a/src/runloop_api_client/types/devbox_list_disk_snapshots_params.py
+++ b/src/runloop_api_client/types/devbox_list_disk_snapshots_params.py
@@ -14,7 +14,7 @@ class DevboxListDiskSnapshotsParams(TypedDict, total=False):
     """Devbox ID to filter by."""
 
     limit: int
-    """The limit of items to return. Default is 20."""
+    """The limit of items to return. Default is 20. Max is 5000."""
 
     metadata_key: Annotated[str, PropertyInfo(alias="metadata[key]")]
     """Filter snapshots by metadata key-value pair.
diff --git a/src/runloop_api_client/types/devbox_list_params.py b/src/runloop_api_client/types/devbox_list_params.py
index 066b2ed85..c508762da 100644
--- a/src/runloop_api_client/types/devbox_list_params.py
+++ b/src/runloop_api_client/types/devbox_list_params.py
@@ -9,7 +9,7 @@
 
 class DevboxListParams(TypedDict, total=False):
     limit: int
-    """The limit of items to return. Default is 20."""
+    """The limit of items to return. Default is 20. Max is 5000."""
 
     starting_after: str
     """Load the next page of data starting after the item with the given ID."""
diff --git a/src/runloop_api_client/types/devbox_view.py b/src/runloop_api_client/types/devbox_view.py
index 007af6575..e2c9a28d8 100644
--- a/src/runloop_api_client/types/devbox_view.py
+++ b/src/runloop_api_client/types/devbox_view.py
@@ -31,6 +31,11 @@ class StateTransition(BaseModel):
 
 
 class DevboxView(BaseModel):
+    """A Devbox represents a virtual development environment.
+
+    It is an isolated sandbox that can be given to agents and used to run arbitrary code such as AI generated code.
+    """
+
     id: str
     """The ID of the Devbox."""
 
diff --git a/src/runloop_api_client/types/devboxes/browser_view.py b/src/runloop_api_client/types/devboxes/browser_view.py
index d6d377a28..4486d76ec 100644
--- a/src/runloop_api_client/types/devboxes/browser_view.py
+++ b/src/runloop_api_client/types/devboxes/browser_view.py
@@ -7,6 +7,10 @@
 
 
 class BrowserView(BaseModel):
+    """
+    A Browser represents a managed implementation of a browser like Chromiumon top of Devboxes. It includes the tunnel to the live screen and the underlying DevboxView.
+    """
+
     connection_url: str
     """
     The url to enable remote connection from browser automation tools like
diff --git a/src/runloop_api_client/types/devboxes/computer_create_params.py b/src/runloop_api_client/types/devboxes/computer_create_params.py
index febd5aef5..c2e32e035 100644
--- a/src/runloop_api_client/types/devboxes/computer_create_params.py
+++ b/src/runloop_api_client/types/devboxes/computer_create_params.py
@@ -17,6 +17,8 @@ class ComputerCreateParams(TypedDict, total=False):
 
 
 class DisplayDimensions(TypedDict, total=False):
+    """Customize the dimensions of the computer display."""
+
     display_height_px: Required[int]
     """The height of the display being controlled by the model in pixels."""
 
diff --git a/src/runloop_api_client/types/devboxes/computer_mouse_interaction_params.py b/src/runloop_api_client/types/devboxes/computer_mouse_interaction_params.py
index b28a0723f..a3a02279d 100644
--- a/src/runloop_api_client/types/devboxes/computer_mouse_interaction_params.py
+++ b/src/runloop_api_client/types/devboxes/computer_mouse_interaction_params.py
@@ -23,6 +23,11 @@ class ComputerMouseInteractionParams(TypedDict, total=False):
 
 
 class Coordinate(TypedDict, total=False):
+    """
+    The x (pixels from the left) and y (pixels from the top) coordinates for the mouse to move or click-drag.  Required only by
+            `action=mouse_move` or `action=left_click_drag`
+    """
+
     x: Required[int]
     """The x coordinate (pixels from the left) for the mouse to move or click-drag."""
 
diff --git a/src/runloop_api_client/types/devboxes/computer_view.py b/src/runloop_api_client/types/devboxes/computer_view.py
index 907629d54..4706d44a0 100644
--- a/src/runloop_api_client/types/devboxes/computer_view.py
+++ b/src/runloop_api_client/types/devboxes/computer_view.py
@@ -7,6 +7,10 @@
 
 
 class ComputerView(BaseModel):
+    """
+    A Computer represents an implementation of Anthropic Computer usage on top of Devboxes. It includes the tunnel to the live screen and the underlying DevboxView.
+    """
+
     devbox: DevboxView
     """The underlying devbox the computer setup is running on."""
 
diff --git a/src/runloop_api_client/types/devboxes/disk_snapshot_list_params.py b/src/runloop_api_client/types/devboxes/disk_snapshot_list_params.py
index 7b0f3454f..73e60f457 100644
--- a/src/runloop_api_client/types/devboxes/disk_snapshot_list_params.py
+++ b/src/runloop_api_client/types/devboxes/disk_snapshot_list_params.py
@@ -14,7 +14,7 @@ class DiskSnapshotListParams(TypedDict, total=False):
     """Devbox ID to filter by."""
 
     limit: int
-    """The limit of items to return. Default is 20."""
+    """The limit of items to return. Default is 20. Max is 5000."""
 
     metadata_key: Annotated[str, PropertyInfo(alias="metadata[key]")]
     """Filter snapshots by metadata key-value pair.
diff --git a/src/runloop_api_client/types/input_context.py b/src/runloop_api_client/types/input_context.py
index 5cc697db9..2daae5d45 100644
--- a/src/runloop_api_client/types/input_context.py
+++ b/src/runloop_api_client/types/input_context.py
@@ -8,6 +8,10 @@
 
 
 class InputContext(BaseModel):
+    """
+    InputContextView specifies the problem statement along with all additional context for a Scenario.
+    """
+
     problem_statement: str
     """The problem statement for the Scenario."""
 
diff --git a/src/runloop_api_client/types/input_context_param.py b/src/runloop_api_client/types/input_context_param.py
index 7f977ad65..b0b495c4d 100644
--- a/src/runloop_api_client/types/input_context_param.py
+++ b/src/runloop_api_client/types/input_context_param.py
@@ -9,6 +9,10 @@
 
 
 class InputContextParam(TypedDict, total=False):
+    """
+    InputContextView specifies the problem statement along with all additional context for a Scenario.
+    """
+
     problem_statement: Required[str]
     """The problem statement for the Scenario."""
 
diff --git a/src/runloop_api_client/types/inspection_source_param.py b/src/runloop_api_client/types/inspection_source_param.py
index ba9e7f397..0d1308d8e 100644
--- a/src/runloop_api_client/types/inspection_source_param.py
+++ b/src/runloop_api_client/types/inspection_source_param.py
@@ -9,6 +9,8 @@
 
 
 class InspectionSourceParam(TypedDict, total=False):
+    """Use a RepositoryInspection a source of a Blueprint build."""
+
     inspection_id: Required[str]
     """The ID of a repository inspection."""
 
diff --git a/src/runloop_api_client/types/object_download_url_view.py b/src/runloop_api_client/types/object_download_url_view.py
index eb35ac3db..d1e726ca5 100644
--- a/src/runloop_api_client/types/object_download_url_view.py
+++ b/src/runloop_api_client/types/object_download_url_view.py
@@ -6,5 +6,7 @@
 
 
 class ObjectDownloadURLView(BaseModel):
+    """A response containing a presigned download URL for an Object."""
+
     download_url: str
     """The presigned download URL for the Object."""
diff --git a/src/runloop_api_client/types/object_list_params.py b/src/runloop_api_client/types/object_list_params.py
index 084fac54d..eca1c7cdd 100644
--- a/src/runloop_api_client/types/object_list_params.py
+++ b/src/runloop_api_client/types/object_list_params.py
@@ -12,7 +12,7 @@ class ObjectListParams(TypedDict, total=False):
     """Filter storage objects by content type."""
 
     limit: int
-    """The limit of items to return. Default is 20."""
+    """The limit of items to return. Default is 20. Max is 5000."""
 
     name: str
     """Filter storage objects by name (partial match supported)."""
diff --git a/src/runloop_api_client/types/object_list_public_params.py b/src/runloop_api_client/types/object_list_public_params.py
index 19b18ba49..67475b263 100644
--- a/src/runloop_api_client/types/object_list_public_params.py
+++ b/src/runloop_api_client/types/object_list_public_params.py
@@ -12,7 +12,7 @@ class ObjectListPublicParams(TypedDict, total=False):
     """Filter storage objects by content type."""
 
     limit: int
-    """The limit of items to return. Default is 20."""
+    """The limit of items to return. Default is 20. Max is 5000."""
 
     name: str
     """Filter storage objects by name (partial match supported)."""
diff --git a/src/runloop_api_client/types/object_list_view.py b/src/runloop_api_client/types/object_list_view.py
index 049b1be81..cfd546c0c 100644
--- a/src/runloop_api_client/types/object_list_view.py
+++ b/src/runloop_api_client/types/object_list_view.py
@@ -9,6 +9,8 @@
 
 
 class ObjectListView(BaseModel):
+    """A paginated list of Objects."""
+
     has_more: bool
     """True if there are more results available beyond this page."""
 
diff --git a/src/runloop_api_client/types/object_view.py b/src/runloop_api_client/types/object_view.py
index 80aea62ed..d4ced655f 100644
--- a/src/runloop_api_client/types/object_view.py
+++ b/src/runloop_api_client/types/object_view.py
@@ -9,6 +9,8 @@
 
 
 class ObjectView(BaseModel):
+    """An Object represents a stored data entity with metadata."""
+
     id: str
     """The unique identifier of the Object."""
 
diff --git a/src/runloop_api_client/types/repository_connection_view.py b/src/runloop_api_client/types/repository_connection_view.py
index 74718de27..e126071e8 100644
--- a/src/runloop_api_client/types/repository_connection_view.py
+++ b/src/runloop_api_client/types/repository_connection_view.py
@@ -6,6 +6,8 @@
 
 
 class RepositoryConnectionView(BaseModel):
+    """The ID of the Repository."""
+
     id: str
     """The ID of the Repository."""
 
diff --git a/src/runloop_api_client/types/repository_inspection_details.py b/src/runloop_api_client/types/repository_inspection_details.py
index f83932d29..0870ce693 100644
--- a/src/runloop_api_client/types/repository_inspection_details.py
+++ b/src/runloop_api_client/types/repository_inspection_details.py
@@ -10,6 +10,8 @@
 
 
 class WorkflowContextsActionsContext(BaseModel):
+    """Details about actions processing for this workflow."""
+
     actions_skipped_unnecessary: List[str]
     """
     Actions that were skipped because they were unnecessary (e.g., upload
@@ -26,6 +28,10 @@ class WorkflowContextsActionsContext(BaseModel):
 
 
 class WorkflowContexts(BaseModel):
+    """
+    Workflow context containing file name and details about actions processing during inspection.
+    """
+
     actions_context: WorkflowContextsActionsContext
     """Details about actions processing for this workflow."""
 
diff --git a/src/runloop_api_client/types/repository_list_params.py b/src/runloop_api_client/types/repository_list_params.py
index 91fd7f352..d5f7b248a 100644
--- a/src/runloop_api_client/types/repository_list_params.py
+++ b/src/runloop_api_client/types/repository_list_params.py
@@ -9,7 +9,7 @@
 
 class RepositoryListParams(TypedDict, total=False):
     limit: int
-    """The limit of items to return. Default is 20."""
+    """The limit of items to return. Default is 20. Max is 5000."""
 
     name: str
     """Filter by repository name"""
diff --git a/src/runloop_api_client/types/repository_manifest_view.py b/src/runloop_api_client/types/repository_manifest_view.py
index 461722b59..acb862672 100644
--- a/src/runloop_api_client/types/repository_manifest_view.py
+++ b/src/runloop_api_client/types/repository_manifest_view.py
@@ -17,6 +17,8 @@
 
 
 class ContainerConfig(BaseModel):
+    """Container configuration specifying the base image and setup commands."""
+
     base_image_name: str
     """The name of the base image.
 
@@ -41,6 +43,10 @@ class Language(BaseModel):
 
 
 class WorkspaceDevCommands(BaseModel):
+    """
+    Extracted common commands important to the developer life cycle like linting, testing, building, etc.
+    """
+
     build: Optional[List[str]] = None
     """Build command (e.g. npm run build)."""
 
@@ -58,6 +64,10 @@ class WorkspaceDevCommands(BaseModel):
 
 
 class Workspace(BaseModel):
+    """
+    A workspace is a buildable unit of code within a repository and often represents a deployable unit of code like a backend service or a frontend app.
+    """
+
     build_tool: List[str]
     """Name of the build tool used (e.g. pip, npm)."""
 
@@ -96,6 +106,8 @@ class Workspace(BaseModel):
 
 
 class ContainerizedServiceCredentials(BaseModel):
+    """The credentials of the container service."""
+
     password: str
     """The password of the container service."""
 
@@ -127,6 +139,10 @@ class ContainerizedService(BaseModel):
 
 
 class RepositoryManifestView(BaseModel):
+    """
+    The repository manifest contains container configuration and workspace definitions for a repository.
+    """
+
     container_config: ContainerConfig
     """Container configuration specifying the base image and setup commands."""
 
diff --git a/src/runloop_api_client/types/scenario_environment.py b/src/runloop_api_client/types/scenario_environment.py
index 94e244df9..b6ac9b039 100644
--- a/src/runloop_api_client/types/scenario_environment.py
+++ b/src/runloop_api_client/types/scenario_environment.py
@@ -9,6 +9,10 @@
 
 
 class ScenarioEnvironment(BaseModel):
+    """
+    ScenarioEnvironmentParameters specify the environment in which a Scenario will be run.
+    """
+
     blueprint_id: Optional[str] = None
     """Use the blueprint with matching ID."""
 
diff --git a/src/runloop_api_client/types/scenario_environment_param.py b/src/runloop_api_client/types/scenario_environment_param.py
index 5069e5943..6a219d250 100644
--- a/src/runloop_api_client/types/scenario_environment_param.py
+++ b/src/runloop_api_client/types/scenario_environment_param.py
@@ -11,6 +11,10 @@
 
 
 class ScenarioEnvironmentParam(TypedDict, total=False):
+    """
+    ScenarioEnvironmentParameters specify the environment in which a Scenario will be run.
+    """
+
     blueprint_id: Optional[str]
     """Use the blueprint with matching ID."""
 
diff --git a/src/runloop_api_client/types/scenario_list_params.py b/src/runloop_api_client/types/scenario_list_params.py
index 917da6c94..45ff3a87b 100644
--- a/src/runloop_api_client/types/scenario_list_params.py
+++ b/src/runloop_api_client/types/scenario_list_params.py
@@ -12,10 +12,13 @@ class ScenarioListParams(TypedDict, total=False):
     """Filter scenarios by benchmark ID."""
 
     limit: int
-    """The limit of items to return. Default is 20."""
+    """The limit of items to return. Default is 20. Max is 5000."""
 
     name: str
     """Query for Scenarios with a given name."""
 
     starting_after: str
     """Load the next page of data starting after the item with the given ID."""
+
+    validation_type: str
+    """Filter by validation type"""
diff --git a/src/runloop_api_client/types/scenario_list_public_params.py b/src/runloop_api_client/types/scenario_list_public_params.py
index 7f413a517..be7e40b8d 100644
--- a/src/runloop_api_client/types/scenario_list_public_params.py
+++ b/src/runloop_api_client/types/scenario_list_public_params.py
@@ -9,7 +9,7 @@
 
 class ScenarioListPublicParams(TypedDict, total=False):
     limit: int
-    """The limit of items to return. Default is 20."""
+    """The limit of items to return. Default is 20. Max is 5000."""
 
     name: str
     """Query for Scenarios with a given name."""
diff --git a/src/runloop_api_client/types/scenario_run_view.py b/src/runloop_api_client/types/scenario_run_view.py
index 225e90a89..68d4c3573 100644
--- a/src/runloop_api_client/types/scenario_run_view.py
+++ b/src/runloop_api_client/types/scenario_run_view.py
@@ -10,6 +10,11 @@
 
 
 class ScenarioRunView(BaseModel):
+    """A ScenarioRunView represents a single run of a Scenario on a Devbox.
+
+    When completed, the ScenarioRun will contain the final score and output of the run.
+    """
+
     id: str
     """ID of the ScenarioRun."""
 
diff --git a/src/runloop_api_client/types/scenario_update_params.py b/src/runloop_api_client/types/scenario_update_params.py
index 908988961..9d0fc65e5 100644
--- a/src/runloop_api_client/types/scenario_update_params.py
+++ b/src/runloop_api_client/types/scenario_update_params.py
@@ -21,26 +21,29 @@ class ScenarioUpdateParams(TypedDict, total=False):
     """The input context for the Scenario."""
 
     metadata: Optional[Dict[str, str]]
-    """User defined metadata to attach to the scenario for organization."""
+    """User defined metadata to attach to the scenario. Pass in empty map to clear."""
 
     name: Optional[str]
-    """Name of the scenario."""
+    """Name of the scenario. Cannot be blank."""
 
     reference_output: Optional[str]
     """A string representation of the reference output to solve the scenario.
 
     Commonly can be the result of a git diff or a sequence of command actions to
-    apply to the environment.
+    apply to the environment. Pass in empty string to clear.
     """
 
     required_environment_variables: Optional[SequenceNotStr[str]]
-    """Environment variables required to run the scenario."""
+    """Environment variables required to run the scenario.
+
+    Pass in empty list to clear.
+    """
 
     required_secret_names: Optional[SequenceNotStr[str]]
-    """Secrets required to run the scenario."""
+    """Secrets required to run the scenario. Pass in empty list to clear."""
 
     scoring_contract: Optional[ScoringContractUpdateParam]
     """The scoring contract for the Scenario."""
 
     validation_type: Optional[Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]]
-    """Validation strategy."""
+    """Validation strategy. Pass in empty string to clear."""
diff --git a/src/runloop_api_client/types/scenario_view.py b/src/runloop_api_client/types/scenario_view.py
index 58c0dbb26..5c5ba0164 100644
--- a/src/runloop_api_client/types/scenario_view.py
+++ b/src/runloop_api_client/types/scenario_view.py
@@ -12,6 +12,10 @@
 
 
 class ScenarioView(BaseModel):
+    """
+    A ScenarioDefinitionView represents a repeatable AI coding evaluation test, complete with initial environment and scoring contract.
+    """
+
     id: str
     """The ID of the Scenario."""
 
diff --git a/src/runloop_api_client/types/scenarios/run_list_params.py b/src/runloop_api_client/types/scenarios/run_list_params.py
index 17a2715c4..97eeb425a 100644
--- a/src/runloop_api_client/types/scenarios/run_list_params.py
+++ b/src/runloop_api_client/types/scenarios/run_list_params.py
@@ -8,11 +8,20 @@
 
 
 class RunListParams(TypedDict, total=False):
+    benchmark_run_id: str
+    """Filter by benchmark run ID"""
+
     limit: int
-    """The limit of items to return. Default is 20."""
+    """The limit of items to return. Default is 20. Max is 5000."""
+
+    name: str
+    """Filter by name"""
 
     scenario_id: str
     """Filter runs associated to Scenario given ID"""
 
     starting_after: str
     """Load the next page of data starting after the item with the given ID."""
+
+    state: str
+    """Filter by state"""
diff --git a/src/runloop_api_client/types/scenarios/scorer_create_response.py b/src/runloop_api_client/types/scenarios/scorer_create_response.py
index 376c50f70..2b6e665a1 100644
--- a/src/runloop_api_client/types/scenarios/scorer_create_response.py
+++ b/src/runloop_api_client/types/scenarios/scorer_create_response.py
@@ -6,6 +6,8 @@
 
 
 class ScorerCreateResponse(BaseModel):
+    """A ScenarioScorerView represents a custom scoring function for a Scenario."""
+
     id: str
     """ID for the scenario scorer."""
 
diff --git a/src/runloop_api_client/types/scenarios/scorer_list_params.py b/src/runloop_api_client/types/scenarios/scorer_list_params.py
index 0577a327e..f80e7f6ac 100644
--- a/src/runloop_api_client/types/scenarios/scorer_list_params.py
+++ b/src/runloop_api_client/types/scenarios/scorer_list_params.py
@@ -9,7 +9,7 @@
 
 class ScorerListParams(TypedDict, total=False):
     limit: int
-    """The limit of items to return. Default is 20."""
+    """The limit of items to return. Default is 20. Max is 5000."""
 
     starting_after: str
     """Load the next page of data starting after the item with the given ID."""
diff --git a/src/runloop_api_client/types/scenarios/scorer_list_response.py b/src/runloop_api_client/types/scenarios/scorer_list_response.py
index bdbc9b9de..46eb8802e 100644
--- a/src/runloop_api_client/types/scenarios/scorer_list_response.py
+++ b/src/runloop_api_client/types/scenarios/scorer_list_response.py
@@ -6,6 +6,8 @@
 
 
 class ScorerListResponse(BaseModel):
+    """A ScenarioScorerView represents a custom scoring function for a Scenario."""
+
     id: str
     """ID for the scenario scorer."""
 
diff --git a/src/runloop_api_client/types/scenarios/scorer_retrieve_response.py b/src/runloop_api_client/types/scenarios/scorer_retrieve_response.py
index ab0f85231..a67cd35c0 100644
--- a/src/runloop_api_client/types/scenarios/scorer_retrieve_response.py
+++ b/src/runloop_api_client/types/scenarios/scorer_retrieve_response.py
@@ -6,6 +6,8 @@
 
 
 class ScorerRetrieveResponse(BaseModel):
+    """A ScenarioScorerView represents a custom scoring function for a Scenario."""
+
     id: str
     """ID for the scenario scorer."""
 
diff --git a/src/runloop_api_client/types/scenarios/scorer_update_response.py b/src/runloop_api_client/types/scenarios/scorer_update_response.py
index 60a1b5e4b..91e668d22 100644
--- a/src/runloop_api_client/types/scenarios/scorer_update_response.py
+++ b/src/runloop_api_client/types/scenarios/scorer_update_response.py
@@ -6,6 +6,8 @@
 
 
 class ScorerUpdateResponse(BaseModel):
+    """A ScenarioScorerView represents a custom scoring function for a Scenario."""
+
     id: str
     """ID for the scenario scorer."""
 
diff --git a/src/runloop_api_client/types/scoring_contract.py b/src/runloop_api_client/types/scoring_contract.py
index f19f5aa54..d3c646fda 100644
--- a/src/runloop_api_client/types/scoring_contract.py
+++ b/src/runloop_api_client/types/scoring_contract.py
@@ -9,5 +9,9 @@
 
 
 class ScoringContract(BaseModel):
+    """
+    InputContextView specifies the problem statement along with all additional context for a Scenario.
+    """
+
     scoring_function_parameters: List[ScoringFunction]
     """A list of scoring functions used to evaluate the Scenario."""
diff --git a/src/runloop_api_client/types/scoring_contract_param.py b/src/runloop_api_client/types/scoring_contract_param.py
index 4a68abb93..46f1b7b6d 100644
--- a/src/runloop_api_client/types/scoring_contract_param.py
+++ b/src/runloop_api_client/types/scoring_contract_param.py
@@ -11,5 +11,9 @@
 
 
 class ScoringContractParam(TypedDict, total=False):
+    """
+    InputContextView specifies the problem statement along with all additional context for a Scenario.
+    """
+
     scoring_function_parameters: Required[Iterable[ScoringFunctionParam]]
     """A list of scoring functions used to evaluate the Scenario."""
diff --git a/src/runloop_api_client/types/scoring_contract_result_view.py b/src/runloop_api_client/types/scoring_contract_result_view.py
index 823de83c4..85e1a42c4 100644
--- a/src/runloop_api_client/types/scoring_contract_result_view.py
+++ b/src/runloop_api_client/types/scoring_contract_result_view.py
@@ -9,6 +9,10 @@
 
 
 class ScoringContractResultView(BaseModel):
+    """
+    A ScoringContractResultView represents the result of running all scoring functions on a given input context.
+    """
+
     score: float
     """Total score for all scoring contracts. This will be a value between 0 and 1."""
 
diff --git a/src/runloop_api_client/types/scoring_function.py b/src/runloop_api_client/types/scoring_function.py
index ba4aea9e1..fe5d2a467 100644
--- a/src/runloop_api_client/types/scoring_function.py
+++ b/src/runloop_api_client/types/scoring_function.py
@@ -20,6 +20,8 @@
 
 
 class ScorerAstGrepScoringFunction(BaseModel):
+    """AstGrepScoringFunction utilizes structured coach search for scoring."""
+
     pattern: str
     """AST pattern to match.
 
@@ -37,6 +39,10 @@ class ScorerAstGrepScoringFunction(BaseModel):
 
 
 class ScorerBashScriptScoringFunction(BaseModel):
+    """
+    BashScriptScoringFunction is a scoring function specified by a bash script that will be run in the context of your environment.
+    """
+
     type: Literal["bash_script_scorer"]
 
     bash_script: Optional[str] = None
@@ -48,6 +54,10 @@ class ScorerBashScriptScoringFunction(BaseModel):
 
 
 class ScorerCommandScoringFunction(BaseModel):
+    """
+    CommandScoringFunction executes a single command and checks the result.The output of the command will be printed. Scoring will passed if the command returns status code 0, otherwise it will be failed.
+    """
+
     type: Literal["command_scorer"]
 
     command: Optional[str] = None
@@ -55,6 +65,8 @@ class ScorerCommandScoringFunction(BaseModel):
 
 
 class ScorerCustomScoringFunction(BaseModel):
+    """CustomScoringFunction is a custom, user defined scoring function."""
+
     custom_scorer_type: str
     """Type of the scoring function, previously registered with Runloop."""
 
@@ -65,6 +77,10 @@ class ScorerCustomScoringFunction(BaseModel):
 
 
 class ScorerPythonScriptScoringFunction(BaseModel):
+    """
+    PythonScriptScoringFunction will run a python script in the context of your environment as a ScoringFunction.
+    """
+
     python_script: str
     """Python script to be run.
 
@@ -96,6 +112,10 @@ class ScorerTestBasedScoringFunctionTestFile(BaseModel):
 
 
 class ScorerTestBasedScoringFunction(BaseModel):
+    """
+    TestBasedScoringFunction writes test files to disk and executes a test command to verify the solution.
+    """
+
     type: Literal["test_based_scorer"]
 
     test_command: Optional[str] = None
@@ -119,6 +139,8 @@ class ScorerTestBasedScoringFunction(BaseModel):
 
 
 class ScoringFunction(BaseModel):
+    """ScoringFunction specifies a method of scoring a Scenario."""
+
     name: str
     """Name of scoring function. Names must only contain ``[a-zA-Z0-9_-]``."""
 
diff --git a/src/runloop_api_client/types/scoring_function_param.py b/src/runloop_api_client/types/scoring_function_param.py
index f9b6b26c7..033101d52 100644
--- a/src/runloop_api_client/types/scoring_function_param.py
+++ b/src/runloop_api_client/types/scoring_function_param.py
@@ -19,6 +19,8 @@
 
 
 class ScorerAstGrepScoringFunction(TypedDict, total=False):
+    """AstGrepScoringFunction utilizes structured coach search for scoring."""
+
     pattern: Required[str]
     """AST pattern to match.
 
@@ -36,6 +38,10 @@ class ScorerAstGrepScoringFunction(TypedDict, total=False):
 
 
 class ScorerBashScriptScoringFunction(TypedDict, total=False):
+    """
+    BashScriptScoringFunction is a scoring function specified by a bash script that will be run in the context of your environment.
+    """
+
     type: Required[Literal["bash_script_scorer"]]
 
     bash_script: str
@@ -47,6 +53,10 @@ class ScorerBashScriptScoringFunction(TypedDict, total=False):
 
 
 class ScorerCommandScoringFunction(TypedDict, total=False):
+    """
+    CommandScoringFunction executes a single command and checks the result.The output of the command will be printed. Scoring will passed if the command returns status code 0, otherwise it will be failed.
+    """
+
     type: Required[Literal["command_scorer"]]
 
     command: str
@@ -54,6 +64,8 @@ class ScorerCommandScoringFunction(TypedDict, total=False):
 
 
 class ScorerCustomScoringFunction(TypedDict, total=False):
+    """CustomScoringFunction is a custom, user defined scoring function."""
+
     custom_scorer_type: Required[str]
     """Type of the scoring function, previously registered with Runloop."""
 
@@ -64,6 +76,10 @@ class ScorerCustomScoringFunction(TypedDict, total=False):
 
 
 class ScorerPythonScriptScoringFunction(TypedDict, total=False):
+    """
+    PythonScriptScoringFunction will run a python script in the context of your environment as a ScoringFunction.
+    """
+
     python_script: Required[str]
     """Python script to be run.
 
@@ -95,6 +111,10 @@ class ScorerTestBasedScoringFunctionTestFile(TypedDict, total=False):
 
 
 class ScorerTestBasedScoringFunction(TypedDict, total=False):
+    """
+    TestBasedScoringFunction writes test files to disk and executes a test command to verify the solution.
+    """
+
     type: Required[Literal["test_based_scorer"]]
 
     test_command: str
@@ -115,6 +135,8 @@ class ScorerTestBasedScoringFunction(TypedDict, total=False):
 
 
 class ScoringFunctionParam(TypedDict, total=False):
+    """ScoringFunction specifies a method of scoring a Scenario."""
+
     name: Required[str]
     """Name of scoring function. Names must only contain ``[a-zA-Z0-9_-]``."""
 
diff --git a/src/runloop_api_client/types/scoring_function_result_view.py b/src/runloop_api_client/types/scoring_function_result_view.py
index 8f782df11..4fe5b67cb 100644
--- a/src/runloop_api_client/types/scoring_function_result_view.py
+++ b/src/runloop_api_client/types/scoring_function_result_view.py
@@ -8,6 +8,10 @@
 
 
 class ScoringFunctionResultView(BaseModel):
+    """
+    A ScoringFunctionResultView represents the result of running a single scoring function on a given input context.
+    """
+
     output: str
     """Log output of the scoring function."""
 
diff --git a/src/runloop_api_client/types/secret_list_params.py b/src/runloop_api_client/types/secret_list_params.py
index 296a66b62..13d25bd7e 100644
--- a/src/runloop_api_client/types/secret_list_params.py
+++ b/src/runloop_api_client/types/secret_list_params.py
@@ -9,4 +9,4 @@
 
 class SecretListParams(TypedDict, total=False):
     limit: int
-    """The limit of items to return. Default is 20."""
+    """The limit of items to return. Default is 20. Max is 5000."""
diff --git a/src/runloop_api_client/types/secret_list_view.py b/src/runloop_api_client/types/secret_list_view.py
index d7feec9c5..4d66fa2e4 100644
--- a/src/runloop_api_client/types/secret_list_view.py
+++ b/src/runloop_api_client/types/secret_list_view.py
@@ -9,6 +9,8 @@
 
 
 class SecretListView(BaseModel):
+    """A paginated list of Secrets."""
+
     has_more: bool
     """True if there are more results available beyond this page."""
 
diff --git a/src/runloop_api_client/types/secret_view.py b/src/runloop_api_client/types/secret_view.py
index 1303f7bfd..bd1c8811e 100644
--- a/src/runloop_api_client/types/secret_view.py
+++ b/src/runloop_api_client/types/secret_view.py
@@ -6,6 +6,10 @@
 
 
 class SecretView(BaseModel):
+    """
+    A Secret represents a key-value pair that can be securely stored and used in Devboxes as environment variables.
+    """
+
     id: str
     """The unique identifier of the Secret."""
 
diff --git a/src/runloop_api_client/types/shared/agent_source.py b/src/runloop_api_client/types/shared/agent_source.py
index 25bcbbc1d..9282d6181 100644
--- a/src/runloop_api_client/types/shared/agent_source.py
+++ b/src/runloop_api_client/types/shared/agent_source.py
@@ -8,6 +8,8 @@
 
 
 class Git(BaseModel):
+    """Git source configuration"""
+
     repository: str
     """Git repository URL"""
 
@@ -19,20 +21,21 @@ class Git(BaseModel):
 
 
 class Npm(BaseModel):
+    """NPM source configuration"""
+
     package_name: str
     """NPM package name"""
 
     agent_setup: Optional[List[str]] = None
     """Setup commands to run after installation"""
 
-    npm_version: Optional[str] = None
-    """NPM version constraint"""
-
     registry_url: Optional[str] = None
     """NPM registry URL"""
 
 
 class Object(BaseModel):
+    """Object store source configuration"""
+
     object_id: str
     """Object ID"""
 
@@ -41,20 +44,21 @@ class Object(BaseModel):
 
 
 class Pip(BaseModel):
+    """Pip source configuration"""
+
     package_name: str
     """Pip package name"""
 
     agent_setup: Optional[List[str]] = None
     """Setup commands to run after installation"""
 
-    pip_version: Optional[str] = None
-    """Pip version constraint"""
-
     registry_url: Optional[str] = None
     """Pip registry URL"""
 
 
 class AgentSource(BaseModel):
+    """Agent source configuration."""
+
     type: str
     """Source type: npm, pip, object, or git"""
 
diff --git a/src/runloop_api_client/types/shared/launch_parameters.py b/src/runloop_api_client/types/shared/launch_parameters.py
index f70023d66..dc0ccfccd 100644
--- a/src/runloop_api_client/types/shared/launch_parameters.py
+++ b/src/runloop_api_client/types/shared/launch_parameters.py
@@ -10,6 +10,11 @@
 
 
 class UserParameters(BaseModel):
+    """Specify the user for execution on Devbox.
+
+    If not set, default `user` will be used.
+    """
+
     uid: int
     """User ID (UID) for the Linux user. Must be a non-negative integer."""
 
@@ -18,6 +23,10 @@ class UserParameters(BaseModel):
 
 
 class LaunchParameters(BaseModel):
+    """
+    LaunchParameters enable you to customize the resources available to your Devbox as well as the environment set up that should be completed before the Devbox is marked as 'running'.
+    """
+
     after_idle: Optional[AfterIdle] = None
     """Configure Devbox lifecycle based on idle activity.
 
diff --git a/src/runloop_api_client/types/shared_params/agent_source.py b/src/runloop_api_client/types/shared_params/agent_source.py
index 9f5a50845..7132414c8 100644
--- a/src/runloop_api_client/types/shared_params/agent_source.py
+++ b/src/runloop_api_client/types/shared_params/agent_source.py
@@ -11,6 +11,8 @@
 
 
 class Git(TypedDict, total=False):
+    """Git source configuration"""
+
     repository: Required[str]
     """Git repository URL"""
 
@@ -22,20 +24,21 @@ class Git(TypedDict, total=False):
 
 
 class Npm(TypedDict, total=False):
+    """NPM source configuration"""
+
     package_name: Required[str]
     """NPM package name"""
 
     agent_setup: Optional[SequenceNotStr[str]]
     """Setup commands to run after installation"""
 
-    npm_version: Optional[str]
-    """NPM version constraint"""
-
     registry_url: Optional[str]
     """NPM registry URL"""
 
 
 class Object(TypedDict, total=False):
+    """Object store source configuration"""
+
     object_id: Required[str]
     """Object ID"""
 
@@ -44,20 +47,21 @@ class Object(TypedDict, total=False):
 
 
 class Pip(TypedDict, total=False):
+    """Pip source configuration"""
+
     package_name: Required[str]
     """Pip package name"""
 
     agent_setup: Optional[SequenceNotStr[str]]
     """Setup commands to run after installation"""
 
-    pip_version: Optional[str]
-    """Pip version constraint"""
-
     registry_url: Optional[str]
     """Pip registry URL"""
 
 
 class AgentSource(TypedDict, total=False):
+    """Agent source configuration."""
+
     type: Required[str]
     """Source type: npm, pip, object, or git"""
 
diff --git a/src/runloop_api_client/types/shared_params/launch_parameters.py b/src/runloop_api_client/types/shared_params/launch_parameters.py
index f0fe87636..cd2a97ee4 100644
--- a/src/runloop_api_client/types/shared_params/launch_parameters.py
+++ b/src/runloop_api_client/types/shared_params/launch_parameters.py
@@ -12,6 +12,11 @@
 
 
 class UserParameters(TypedDict, total=False):
+    """Specify the user for execution on Devbox.
+
+    If not set, default `user` will be used.
+    """
+
     uid: Required[int]
     """User ID (UID) for the Linux user. Must be a non-negative integer."""
 
@@ -20,6 +25,10 @@ class UserParameters(TypedDict, total=False):
 
 
 class LaunchParameters(TypedDict, total=False):
+    """
+    LaunchParameters enable you to customize the resources available to your Devbox as well as the environment set up that should be completed before the Devbox is marked as 'running'.
+    """
+
     after_idle: Optional[AfterIdle]
     """Configure Devbox lifecycle based on idle activity.
 
diff --git a/tests/api_resources/benchmarks/test_runs.py b/tests/api_resources/benchmarks/test_runs.py
index a95855518..9ab74fc9a 100644
--- a/tests/api_resources/benchmarks/test_runs.py
+++ b/tests/api_resources/benchmarks/test_runs.py
@@ -66,6 +66,7 @@ def test_method_list_with_all_params(self, client: Runloop) -> None:
         run = client.benchmarks.runs.list(
             benchmark_id="benchmark_id",
             limit=0,
+            name="name",
             starting_after="starting_after",
         )
         assert_matches_type(SyncBenchmarkRunsCursorIDPage[BenchmarkRunView], run, path=["response"])
@@ -268,6 +269,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncRunloop) ->
         run = await async_client.benchmarks.runs.list(
             benchmark_id="benchmark_id",
             limit=0,
+            name="name",
             starting_after="starting_after",
         )
         assert_matches_type(AsyncBenchmarkRunsCursorIDPage[BenchmarkRunView], run, path=["response"])
diff --git a/tests/api_resources/scenarios/test_runs.py b/tests/api_resources/scenarios/test_runs.py
index 7b981e9bb..f3ac8eb88 100644
--- a/tests/api_resources/scenarios/test_runs.py
+++ b/tests/api_resources/scenarios/test_runs.py
@@ -72,9 +72,12 @@ def test_method_list(self, client: Runloop) -> None:
     @parametrize
     def test_method_list_with_all_params(self, client: Runloop) -> None:
         run = client.scenarios.runs.list(
+            benchmark_run_id="benchmark_run_id",
             limit=0,
+            name="name",
             scenario_id="scenario_id",
             starting_after="starting_after",
+            state="state",
         )
         assert_matches_type(SyncBenchmarkRunsCursorIDPage[ScenarioRunView], run, path=["response"])
 
@@ -320,9 +323,12 @@ async def test_method_list(self, async_client: AsyncRunloop) -> None:
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncRunloop) -> None:
         run = await async_client.scenarios.runs.list(
+            benchmark_run_id="benchmark_run_id",
             limit=0,
+            name="name",
             scenario_id="scenario_id",
             starting_after="starting_after",
+            state="state",
         )
         assert_matches_type(AsyncBenchmarkRunsCursorIDPage[ScenarioRunView], run, path=["response"])
 
diff --git a/tests/api_resources/test_agents.py b/tests/api_resources/test_agents.py
index 6f8096491..693eec250 100644
--- a/tests/api_resources/test_agents.py
+++ b/tests/api_resources/test_agents.py
@@ -22,6 +22,7 @@ class TestAgents:
     def test_method_create(self, client: Runloop) -> None:
         agent = client.agents.create(
             name="name",
+            version="version",
         )
         assert_matches_type(AgentView, agent, path=["response"])
 
@@ -29,6 +30,7 @@ def test_method_create(self, client: Runloop) -> None:
     def test_method_create_with_all_params(self, client: Runloop) -> None:
         agent = client.agents.create(
             name="name",
+            version="version",
             source={
                 "type": "type",
                 "git": {
@@ -39,7 +41,6 @@ def test_method_create_with_all_params(self, client: Runloop) -> None:
                 "npm": {
                     "package_name": "package_name",
                     "agent_setup": ["string"],
-                    "npm_version": "npm_version",
                     "registry_url": "registry_url",
                 },
                 "object": {
@@ -49,7 +50,6 @@ def test_method_create_with_all_params(self, client: Runloop) -> None:
                 "pip": {
                     "package_name": "package_name",
                     "agent_setup": ["string"],
-                    "pip_version": "pip_version",
                     "registry_url": "registry_url",
                 },
             },
@@ -60,6 +60,7 @@ def test_method_create_with_all_params(self, client: Runloop) -> None:
     def test_raw_response_create(self, client: Runloop) -> None:
         response = client.agents.with_raw_response.create(
             name="name",
+            version="version",
         )
 
         assert response.is_closed is True
@@ -71,6 +72,7 @@ def test_raw_response_create(self, client: Runloop) -> None:
     def test_streaming_response_create(self, client: Runloop) -> None:
         with client.agents.with_streaming_response.create(
             name="name",
+            version="version",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -131,6 +133,7 @@ def test_method_list_with_all_params(self, client: Runloop) -> None:
             name="name",
             search="search",
             starting_after="starting_after",
+            version="version",
         )
         assert_matches_type(SyncAgentsCursorIDPage[AgentView], agent, path=["response"])
 
@@ -164,6 +167,7 @@ class TestAsyncAgents:
     async def test_method_create(self, async_client: AsyncRunloop) -> None:
         agent = await async_client.agents.create(
             name="name",
+            version="version",
         )
         assert_matches_type(AgentView, agent, path=["response"])
 
@@ -171,6 +175,7 @@ async def test_method_create(self, async_client: AsyncRunloop) -> None:
     async def test_method_create_with_all_params(self, async_client: AsyncRunloop) -> None:
         agent = await async_client.agents.create(
             name="name",
+            version="version",
             source={
                 "type": "type",
                 "git": {
@@ -181,7 +186,6 @@ async def test_method_create_with_all_params(self, async_client: AsyncRunloop) -
                 "npm": {
                     "package_name": "package_name",
                     "agent_setup": ["string"],
-                    "npm_version": "npm_version",
                     "registry_url": "registry_url",
                 },
                 "object": {
@@ -191,7 +195,6 @@ async def test_method_create_with_all_params(self, async_client: AsyncRunloop) -
                 "pip": {
                     "package_name": "package_name",
                     "agent_setup": ["string"],
-                    "pip_version": "pip_version",
                     "registry_url": "registry_url",
                 },
             },
@@ -202,6 +205,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncRunloop) -
     async def test_raw_response_create(self, async_client: AsyncRunloop) -> None:
         response = await async_client.agents.with_raw_response.create(
             name="name",
+            version="version",
         )
 
         assert response.is_closed is True
@@ -213,6 +217,7 @@ async def test_raw_response_create(self, async_client: AsyncRunloop) -> None:
     async def test_streaming_response_create(self, async_client: AsyncRunloop) -> None:
         async with async_client.agents.with_streaming_response.create(
             name="name",
+            version="version",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -273,6 +278,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncRunloop) ->
             name="name",
             search="search",
             starting_after="starting_after",
+            version="version",
         )
         assert_matches_type(AsyncAgentsCursorIDPage[AgentView], agent, path=["response"])
 
diff --git a/tests/api_resources/test_benchmarks.py b/tests/api_resources/test_benchmarks.py
index 891756def..bb001a532 100644
--- a/tests/api_resources/test_benchmarks.py
+++ b/tests/api_resources/test_benchmarks.py
@@ -108,7 +108,6 @@ def test_path_params_retrieve(self, client: Runloop) -> None:
     def test_method_update(self, client: Runloop) -> None:
         benchmark = client.benchmarks.update(
             id="id",
-            name="name",
         )
         assert_matches_type(BenchmarkView, benchmark, path=["response"])
 
@@ -116,10 +115,10 @@ def test_method_update(self, client: Runloop) -> None:
     def test_method_update_with_all_params(self, client: Runloop) -> None:
         benchmark = client.benchmarks.update(
             id="id",
-            name="name",
             attribution="attribution",
             description="description",
             metadata={"foo": "string"},
+            name="name",
             required_environment_variables=["string"],
             required_secret_names=["string"],
             scenario_ids=["string"],
@@ -130,7 +129,6 @@ def test_method_update_with_all_params(self, client: Runloop) -> None:
     def test_raw_response_update(self, client: Runloop) -> None:
         response = client.benchmarks.with_raw_response.update(
             id="id",
-            name="name",
         )
 
         assert response.is_closed is True
@@ -142,7 +140,6 @@ def test_raw_response_update(self, client: Runloop) -> None:
     def test_streaming_response_update(self, client: Runloop) -> None:
         with client.benchmarks.with_streaming_response.update(
             id="id",
-            name="name",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -157,7 +154,6 @@ def test_path_params_update(self, client: Runloop) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `id` but received ''"):
             client.benchmarks.with_raw_response.update(
                 id="",
-                name="name",
             )
 
     @parametrize
@@ -169,6 +165,7 @@ def test_method_list(self, client: Runloop) -> None:
     def test_method_list_with_all_params(self, client: Runloop) -> None:
         benchmark = client.benchmarks.list(
             limit=0,
+            name="name",
             starting_after="starting_after",
         )
         assert_matches_type(SyncBenchmarksCursorIDPage[BenchmarkView], benchmark, path=["response"])
@@ -344,6 +341,53 @@ def test_streaming_response_start_run(self, client: Runloop) -> None:
 
         assert cast(Any, response.is_closed) is True
 
+    @parametrize
+    def test_method_update_scenarios(self, client: Runloop) -> None:
+        benchmark = client.benchmarks.update_scenarios(
+            id="id",
+        )
+        assert_matches_type(BenchmarkView, benchmark, path=["response"])
+
+    @parametrize
+    def test_method_update_scenarios_with_all_params(self, client: Runloop) -> None:
+        benchmark = client.benchmarks.update_scenarios(
+            id="id",
+            scenarios_to_add=["string"],
+            scenarios_to_remove=["string"],
+        )
+        assert_matches_type(BenchmarkView, benchmark, path=["response"])
+
+    @parametrize
+    def test_raw_response_update_scenarios(self, client: Runloop) -> None:
+        response = client.benchmarks.with_raw_response.update_scenarios(
+            id="id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        benchmark = response.parse()
+        assert_matches_type(BenchmarkView, benchmark, path=["response"])
+
+    @parametrize
+    def test_streaming_response_update_scenarios(self, client: Runloop) -> None:
+        with client.benchmarks.with_streaming_response.update_scenarios(
+            id="id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            benchmark = response.parse()
+            assert_matches_type(BenchmarkView, benchmark, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_update_scenarios(self, client: Runloop) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `id` but received ''"):
+            client.benchmarks.with_raw_response.update_scenarios(
+                id="",
+            )
+
 
 class TestAsyncBenchmarks:
     parametrize = pytest.mark.parametrize(
@@ -436,7 +480,6 @@ async def test_path_params_retrieve(self, async_client: AsyncRunloop) -> None:
     async def test_method_update(self, async_client: AsyncRunloop) -> None:
         benchmark = await async_client.benchmarks.update(
             id="id",
-            name="name",
         )
         assert_matches_type(BenchmarkView, benchmark, path=["response"])
 
@@ -444,10 +487,10 @@ async def test_method_update(self, async_client: AsyncRunloop) -> None:
     async def test_method_update_with_all_params(self, async_client: AsyncRunloop) -> None:
         benchmark = await async_client.benchmarks.update(
             id="id",
-            name="name",
             attribution="attribution",
             description="description",
             metadata={"foo": "string"},
+            name="name",
             required_environment_variables=["string"],
             required_secret_names=["string"],
             scenario_ids=["string"],
@@ -458,7 +501,6 @@ async def test_method_update_with_all_params(self, async_client: AsyncRunloop) -
     async def test_raw_response_update(self, async_client: AsyncRunloop) -> None:
         response = await async_client.benchmarks.with_raw_response.update(
             id="id",
-            name="name",
         )
 
         assert response.is_closed is True
@@ -470,7 +512,6 @@ async def test_raw_response_update(self, async_client: AsyncRunloop) -> None:
     async def test_streaming_response_update(self, async_client: AsyncRunloop) -> None:
         async with async_client.benchmarks.with_streaming_response.update(
             id="id",
-            name="name",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -485,7 +526,6 @@ async def test_path_params_update(self, async_client: AsyncRunloop) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `id` but received ''"):
             await async_client.benchmarks.with_raw_response.update(
                 id="",
-                name="name",
             )
 
     @parametrize
@@ -497,6 +537,7 @@ async def test_method_list(self, async_client: AsyncRunloop) -> None:
     async def test_method_list_with_all_params(self, async_client: AsyncRunloop) -> None:
         benchmark = await async_client.benchmarks.list(
             limit=0,
+            name="name",
             starting_after="starting_after",
         )
         assert_matches_type(AsyncBenchmarksCursorIDPage[BenchmarkView], benchmark, path=["response"])
@@ -671,3 +712,50 @@ async def test_streaming_response_start_run(self, async_client: AsyncRunloop) ->
             assert_matches_type(BenchmarkRunView, benchmark, path=["response"])
 
         assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_update_scenarios(self, async_client: AsyncRunloop) -> None:
+        benchmark = await async_client.benchmarks.update_scenarios(
+            id="id",
+        )
+        assert_matches_type(BenchmarkView, benchmark, path=["response"])
+
+    @parametrize
+    async def test_method_update_scenarios_with_all_params(self, async_client: AsyncRunloop) -> None:
+        benchmark = await async_client.benchmarks.update_scenarios(
+            id="id",
+            scenarios_to_add=["string"],
+            scenarios_to_remove=["string"],
+        )
+        assert_matches_type(BenchmarkView, benchmark, path=["response"])
+
+    @parametrize
+    async def test_raw_response_update_scenarios(self, async_client: AsyncRunloop) -> None:
+        response = await async_client.benchmarks.with_raw_response.update_scenarios(
+            id="id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        benchmark = await response.parse()
+        assert_matches_type(BenchmarkView, benchmark, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_update_scenarios(self, async_client: AsyncRunloop) -> None:
+        async with async_client.benchmarks.with_streaming_response.update_scenarios(
+            id="id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            benchmark = await response.parse()
+            assert_matches_type(BenchmarkView, benchmark, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_update_scenarios(self, async_client: AsyncRunloop) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `id` but received ''"):
+            await async_client.benchmarks.with_raw_response.update_scenarios(
+                id="",
+            )
diff --git a/tests/api_resources/test_blueprints.py b/tests/api_resources/test_blueprints.py
index 4be6d1677..578e4dfb5 100644
--- a/tests/api_resources/test_blueprints.py
+++ b/tests/api_resources/test_blueprints.py
@@ -183,6 +183,7 @@ def test_method_list_with_all_params(self, client: Runloop) -> None:
             limit=0,
             name="name",
             starting_after="starting_after",
+            status="status",
         )
         assert_matches_type(SyncBlueprintsCursorIDPage[BlueprintView], blueprint, path=["response"])
 
@@ -323,6 +324,7 @@ def test_method_list_public_with_all_params(self, client: Runloop) -> None:
             limit=0,
             name="name",
             starting_after="starting_after",
+            status="status",
         )
         assert_matches_type(SyncBlueprintsCursorIDPage[BlueprintView], blueprint, path=["response"])
 
@@ -641,6 +643,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncRunloop) ->
             limit=0,
             name="name",
             starting_after="starting_after",
+            status="status",
         )
         assert_matches_type(AsyncBlueprintsCursorIDPage[BlueprintView], blueprint, path=["response"])
 
@@ -781,6 +784,7 @@ async def test_method_list_public_with_all_params(self, async_client: AsyncRunlo
             limit=0,
             name="name",
             starting_after="starting_after",
+            status="status",
         )
         assert_matches_type(AsyncBlueprintsCursorIDPage[BlueprintView], blueprint, path=["response"])
 
diff --git a/tests/api_resources/test_scenarios.py b/tests/api_resources/test_scenarios.py
index b9dadb8b9..3345828c1 100644
--- a/tests/api_resources/test_scenarios.py
+++ b/tests/api_resources/test_scenarios.py
@@ -291,6 +291,7 @@ def test_method_list_with_all_params(self, client: Runloop) -> None:
             limit=0,
             name="name",
             starting_after="starting_after",
+            validation_type="validation_type",
         )
         assert_matches_type(SyncScenariosCursorIDPage[ScenarioView], scenario, path=["response"])
 
@@ -696,6 +697,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncRunloop) ->
             limit=0,
             name="name",
             starting_after="starting_after",
+            validation_type="validation_type",
         )
         assert_matches_type(AsyncScenariosCursorIDPage[ScenarioView], scenario, path=["response"])
 
diff --git a/tests/sdk/async_devbox/test_core.py b/tests/sdk/async_devbox/test_core.py
index 5d3405c80..9925fa724 100644
--- a/tests/sdk/async_devbox/test_core.py
+++ b/tests/sdk/async_devbox/test_core.py
@@ -26,21 +26,21 @@ class TestAsyncDevbox:
 
     def test_init(self, mock_async_client: AsyncMock) -> None:
         """Test AsyncDevbox initialization."""
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
-        assert devbox.id == "dev_123"
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
+        assert devbox.id == "dbx_123"
 
     def test_repr(self, mock_async_client: AsyncMock) -> None:
         """Test AsyncDevbox string representation."""
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
-        assert repr(devbox) == "<AsyncDevbox id='dev_123'>"
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
+        assert repr(devbox) == "<AsyncDevbox id='dbx_123'>"
 
     @pytest.mark.asyncio
     async def test_context_manager_enter_exit(self, mock_async_client: AsyncMock, devbox_view: MockDevboxView) -> None:
         """Test context manager behavior with successful shutdown."""
         mock_async_client.devboxes.shutdown = AsyncMock(return_value=devbox_view)
 
-        async with AsyncDevbox(mock_async_client, "dev_123") as devbox:
-            assert devbox.id == "dev_123"
+        async with AsyncDevbox(mock_async_client, "dbx_123") as devbox:
+            assert devbox.id == "dbx_123"
 
         call_kwargs = mock_async_client.devboxes.shutdown.call_args[1]
         assert "timeout" not in call_kwargs
@@ -51,7 +51,7 @@ async def test_context_manager_exception_handling(self, mock_async_client: Async
         mock_async_client.devboxes.shutdown = AsyncMock(side_effect=RuntimeError("Shutdown failed"))
 
         with pytest.raises(ValueError, match="Test error"):
-            async with AsyncDevbox(mock_async_client, "dev_123"):
+            async with AsyncDevbox(mock_async_client, "dbx_123"):
                 raise ValueError("Test error")
 
         # Shutdown should be called even when body raises exception
@@ -62,7 +62,7 @@ async def test_get_info(self, mock_async_client: AsyncMock, devbox_view: MockDev
         """Test get_info method."""
         mock_async_client.devboxes.retrieve = AsyncMock(return_value=devbox_view)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         result = await devbox.get_info(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -72,7 +72,7 @@ async def test_get_info(self, mock_async_client: AsyncMock, devbox_view: MockDev
 
         assert result == devbox_view
         mock_async_client.devboxes.retrieve.assert_called_once_with(
-            "dev_123",
+            "dbx_123",
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
             extra_body={"key": "value"},
@@ -85,12 +85,12 @@ async def test_await_running(self, mock_async_client: AsyncMock, devbox_view: Mo
         mock_async_client.devboxes.await_running = AsyncMock(return_value=devbox_view)
         polling_config = PollingConfig(timeout_seconds=60.0)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         result = await devbox.await_running(polling_config=polling_config)
 
         assert result == devbox_view
         mock_async_client.devboxes.await_running.assert_called_once_with(
-            "dev_123",
+            "dbx_123",
             polling_config=polling_config,
         )
 
@@ -100,12 +100,12 @@ async def test_await_suspended(self, mock_async_client: AsyncMock, devbox_view:
         mock_async_client.devboxes.await_suspended = AsyncMock(return_value=devbox_view)
         polling_config = PollingConfig(timeout_seconds=60.0)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         result = await devbox.await_suspended(polling_config=polling_config)
 
         assert result == devbox_view
         mock_async_client.devboxes.await_suspended.assert_called_once_with(
-            "dev_123",
+            "dbx_123",
             polling_config=polling_config,
         )
 
@@ -114,7 +114,7 @@ async def test_shutdown(self, mock_async_client: AsyncMock, devbox_view: MockDev
         """Test shutdown method."""
         mock_async_client.devboxes.shutdown = AsyncMock(return_value=devbox_view)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         result = await devbox.shutdown(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -125,7 +125,7 @@ async def test_shutdown(self, mock_async_client: AsyncMock, devbox_view: MockDev
 
         assert result == devbox_view
         mock_async_client.devboxes.shutdown.assert_called_once_with(
-            "dev_123",
+            "dbx_123",
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
             extra_body={"key": "value"},
@@ -138,7 +138,7 @@ async def test_suspend(self, mock_async_client: AsyncMock, devbox_view: MockDevb
         """Test suspend method."""
         mock_async_client.devboxes.suspend = AsyncMock(return_value=devbox_view)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         result = await devbox.suspend(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -149,7 +149,7 @@ async def test_suspend(self, mock_async_client: AsyncMock, devbox_view: MockDevb
 
         assert result == devbox_view
         mock_async_client.devboxes.suspend.assert_called_once_with(
-            "dev_123",
+            "dbx_123",
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
             extra_body={"key": "value"},
@@ -162,7 +162,7 @@ async def test_resume(self, mock_async_client: AsyncMock, devbox_view: MockDevbo
         """Test resume method."""
         mock_async_client.devboxes.resume = AsyncMock(return_value=devbox_view)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         result = await devbox.resume(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -173,7 +173,7 @@ async def test_resume(self, mock_async_client: AsyncMock, devbox_view: MockDevbo
 
         assert result == devbox_view
         mock_async_client.devboxes.resume.assert_called_once_with(
-            "dev_123",
+            "dbx_123",
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
             extra_body={"key": "value"},
@@ -186,7 +186,7 @@ async def test_keep_alive(self, mock_async_client: AsyncMock) -> None:
         """Test keep_alive method."""
         mock_async_client.devboxes.keep_alive = AsyncMock(return_value=object())
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         result = await devbox.keep_alive(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -197,7 +197,7 @@ async def test_keep_alive(self, mock_async_client: AsyncMock) -> None:
 
         assert result is not None  # Verify return value is propagated
         mock_async_client.devboxes.keep_alive.assert_called_once_with(
-            "dev_123",
+            "dbx_123",
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
             extra_body={"key": "value"},
@@ -208,13 +208,13 @@ async def test_keep_alive(self, mock_async_client: AsyncMock) -> None:
     @pytest.mark.asyncio
     async def test_snapshot_disk(self, mock_async_client: AsyncMock) -> None:
         """Test snapshot_disk waits for completion."""
-        snapshot_data = SimpleNamespace(id="snap_123")
+        snapshot_data = SimpleNamespace(id="snp_123")
         snapshot_status = SimpleNamespace(status="completed")
 
         mock_async_client.devboxes.snapshot_disk_async = AsyncMock(return_value=snapshot_data)
         mock_async_client.devboxes.disk_snapshots.await_completed = AsyncMock(return_value=snapshot_status)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         polling_config = PollingConfig(timeout_seconds=60.0)
         snapshot = await devbox.snapshot_disk(
             name="test-snapshot",
@@ -223,7 +223,7 @@ async def test_snapshot_disk(self, mock_async_client: AsyncMock) -> None:
             extra_headers={"X-Custom": "value"},
         )
 
-        assert snapshot.id == "snap_123"
+        assert snapshot.id == "snp_123"
         mock_async_client.devboxes.snapshot_disk_async.assert_called_once()
         call_kwargs = mock_async_client.devboxes.snapshot_disk_async.call_args[1]
         assert "commit_message" not in call_kwargs
@@ -240,17 +240,17 @@ async def test_snapshot_disk(self, mock_async_client: AsyncMock) -> None:
     @pytest.mark.asyncio
     async def test_snapshot_disk_async(self, mock_async_client: AsyncMock) -> None:
         """Test snapshot_disk_async returns immediately."""
-        snapshot_data = SimpleNamespace(id="snap_123")
+        snapshot_data = SimpleNamespace(id="snp_123")
         mock_async_client.devboxes.snapshot_disk_async = AsyncMock(return_value=snapshot_data)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         snapshot = await devbox.snapshot_disk_async(
             name="test-snapshot",
             metadata={"key": "value"},
             extra_headers={"X-Custom": "value"},
         )
 
-        assert snapshot.id == "snap_123"
+        assert snapshot.id == "snp_123"
         mock_async_client.devboxes.snapshot_disk_async.assert_called_once()
         call_kwargs = mock_async_client.devboxes.snapshot_disk_async.call_args[1]
         assert "commit_message" not in call_kwargs
@@ -265,7 +265,7 @@ async def test_close(self, mock_async_client: AsyncMock, devbox_view: MockDevbox
         """Test close method calls shutdown."""
         mock_async_client.devboxes.shutdown = AsyncMock(return_value=devbox_view)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         await devbox.close()
 
         mock_async_client.devboxes.shutdown.assert_called_once()
@@ -274,21 +274,21 @@ async def test_close(self, mock_async_client: AsyncMock, devbox_view: MockDevbox
 
     def test_cmd_property(self, mock_async_client: AsyncMock) -> None:
         """Test cmd property returns AsyncCommandInterface."""
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         cmd = devbox.cmd
         assert isinstance(cmd, AsyncCommandInterface)
         assert cmd._devbox is devbox
 
     def test_file_property(self, mock_async_client: AsyncMock) -> None:
         """Test file property returns AsyncFileInterface."""
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         file_interface = devbox.file
         assert isinstance(file_interface, AsyncFileInterface)
         assert file_interface._devbox is devbox
 
     def test_net_property(self, mock_async_client: AsyncMock) -> None:
         """Test net property returns AsyncNetworkInterface."""
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         net = devbox.net
         assert isinstance(net, AsyncNetworkInterface)
         assert net._devbox is devbox
diff --git a/tests/sdk/async_devbox/test_edge_cases.py b/tests/sdk/async_devbox/test_edge_cases.py
index fa5b89c7a..94d9e661b 100644
--- a/tests/sdk/async_devbox/test_edge_cases.py
+++ b/tests/sdk/async_devbox/test_edge_cases.py
@@ -21,6 +21,6 @@ async def test_async_network_error(self, mock_async_client: AsyncMock) -> None:
         """Test handling of network errors in async."""
         mock_async_client.devboxes.retrieve = AsyncMock(side_effect=httpx.NetworkError("Connection failed"))
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         with pytest.raises(httpx.NetworkError):
             await devbox.get_info()
diff --git a/tests/sdk/async_devbox/test_interfaces.py b/tests/sdk/async_devbox/test_interfaces.py
index bcb2a306b..52c439c22 100644
--- a/tests/sdk/async_devbox/test_interfaces.py
+++ b/tests/sdk/async_devbox/test_interfaces.py
@@ -27,7 +27,7 @@ async def test_exec_without_callbacks(
         mock_async_client.devboxes.execute_async = AsyncMock(return_value=execution_view)
         mock_async_client.devboxes.executions.await_completed = AsyncMock(return_value=execution_view)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         result = await devbox.cmd.exec("echo hello")
 
         assert result.exit_code == 0
@@ -42,13 +42,13 @@ async def test_exec_without_callbacks(
     async def test_exec_with_stdout_callback(self, mock_async_client: AsyncMock, mock_async_stream: AsyncMock) -> None:
         """Test exec with stdout callback."""
         execution_async = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="running",
         )
         execution_completed = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout="output",
@@ -61,7 +61,7 @@ async def test_exec_with_stdout_callback(self, mock_async_client: AsyncMock, moc
 
         stdout_calls: list[str] = []
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         result = await devbox.cmd.exec("echo hello", stdout=stdout_calls.append)
 
         assert result.exit_code == 0
@@ -73,19 +73,19 @@ async def test_exec_async_returns_execution(
     ) -> None:
         """Test exec_async returns AsyncExecution object."""
         execution_async = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="running",
         )
 
         mock_async_client.devboxes.execute_async = AsyncMock(return_value=execution_async)
         mock_async_client.devboxes.executions.stream_stdout_updates = AsyncMock(return_value=mock_async_stream)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         execution = await devbox.cmd.exec_async("long-running command")
 
-        assert execution.execution_id == "exec_123"
-        assert execution.devbox_id == "dev_123"
+        assert execution.execution_id == "exn_123"
+        assert execution.devbox_id == "dbx_123"
         mock_async_client.devboxes.execute_async.assert_called_once()
 
 
@@ -97,7 +97,7 @@ async def test_read(self, mock_async_client: AsyncMock) -> None:
         """Test file read."""
         mock_async_client.devboxes.read_file_contents = AsyncMock(return_value="file content")
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         result = await devbox.file.read(file_path="/path/to/file")
 
         assert result == "file content"
@@ -109,7 +109,7 @@ async def test_write_string(self, mock_async_client: AsyncMock) -> None:
         execution_detail = SimpleNamespace()
         mock_async_client.devboxes.write_file_contents = AsyncMock(return_value=execution_detail)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         result = await devbox.file.write(file_path="/path/to/file", contents="content")
 
         assert result == execution_detail
@@ -121,7 +121,7 @@ async def test_write_bytes(self, mock_async_client: AsyncMock) -> None:
         execution_detail = SimpleNamespace()
         mock_async_client.devboxes.write_file_contents = AsyncMock(return_value=execution_detail)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         result = await devbox.file.write(file_path="/path/to/file", contents="content")
 
         assert result == execution_detail
@@ -134,7 +134,7 @@ async def test_download(self, mock_async_client: AsyncMock) -> None:
         mock_response.read = AsyncMock(return_value=b"file content")
         mock_async_client.devboxes.download_file = AsyncMock(return_value=mock_response)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         result = await devbox.file.download(path="/path/to/file")
 
         assert result == b"file content"
@@ -146,7 +146,7 @@ async def test_upload(self, mock_async_client: AsyncMock, tmp_path: Path) -> Non
         execution_detail = SimpleNamespace()
         mock_async_client.devboxes.upload_file = AsyncMock(return_value=execution_detail)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         # Create a temporary file for upload
         temp_file = tmp_path / "test_file.txt"
         temp_file.write_text("test content")
@@ -166,7 +166,7 @@ async def test_create_ssh_key(self, mock_async_client: AsyncMock) -> None:
         ssh_key_response = SimpleNamespace(public_key="ssh-rsa ...")
         mock_async_client.devboxes.create_ssh_key = AsyncMock(return_value=ssh_key_response)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         result = await devbox.net.create_ssh_key(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -184,7 +184,7 @@ async def test_create_tunnel(self, mock_async_client: AsyncMock) -> None:
         tunnel_view = SimpleNamespace(tunnel_id="tunnel_123")
         mock_async_client.devboxes.create_tunnel = AsyncMock(return_value=tunnel_view)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         result = await devbox.net.create_tunnel(
             port=8080,
             extra_headers={"X-Custom": "value"},
@@ -202,7 +202,7 @@ async def test_remove_tunnel(self, mock_async_client: AsyncMock) -> None:
         """Test remove tunnel."""
         mock_async_client.devboxes.remove_tunnel = AsyncMock(return_value=object())
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         result = await devbox.net.remove_tunnel(
             port=8080,
             extra_headers={"X-Custom": "value"},
diff --git a/tests/sdk/async_devbox/test_streaming.py b/tests/sdk/async_devbox/test_streaming.py
index cd33a8f26..3bb3e1a7b 100644
--- a/tests/sdk/async_devbox/test_streaming.py
+++ b/tests/sdk/async_devbox/test_streaming.py
@@ -25,8 +25,8 @@ class TestAsyncDevboxStreaming:
 
     def test_start_streaming_no_callbacks(self, mock_async_client: AsyncMock) -> None:
         """Test _start_streaming returns None when no callbacks."""
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
-        result = devbox._start_streaming("exec_123", stdout=None, stderr=None, output=None)
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
+        result = devbox._start_streaming("exn_123", stdout=None, stderr=None, output=None)
         assert result is None
 
     @pytest.mark.asyncio
@@ -46,9 +46,9 @@ async def async_iter():
 
         mock_async_client.devboxes.executions.stream_stdout_updates = AsyncMock(return_value=mock_async_stream)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         stdout_calls: list[str] = []
-        result = devbox._start_streaming("exec_123", stdout=stdout_calls.append, stderr=None, output=None)
+        result = devbox._start_streaming("exn_123", stdout=stdout_calls.append, stderr=None, output=None)
 
         assert result is not None
         assert isinstance(result, _AsyncStreamingGroup)
@@ -76,9 +76,9 @@ async def async_iter():
 
         mock_async_client.devboxes.executions.stream_stderr_updates = AsyncMock(return_value=mock_async_stream)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         stderr_calls: list[str] = []
-        result = devbox._start_streaming("exec_123", stdout=None, stderr=stderr_calls.append, output=None)
+        result = devbox._start_streaming("exn_123", stdout=None, stderr=stderr_calls.append, output=None)
 
         assert result is not None
         assert isinstance(result, _AsyncStreamingGroup)
@@ -107,9 +107,9 @@ async def async_iter():
         mock_async_client.devboxes.executions.stream_stdout_updates = AsyncMock(return_value=mock_async_stream)
         mock_async_client.devboxes.executions.stream_stderr_updates = AsyncMock(return_value=mock_async_stream)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         output_calls: list[str] = []
-        result = devbox._start_streaming("exec_123", stdout=None, stderr=None, output=output_calls.append)
+        result = devbox._start_streaming("exn_123", stdout=None, stderr=None, output=output_calls.append)
 
         assert result is not None
         assert isinstance(result, _AsyncStreamingGroup)
@@ -136,7 +136,7 @@ async def async_iter() -> AsyncIterator[SimpleNamespace]:
         mock_async_stream.__aenter__ = AsyncMock(return_value=mock_async_stream)
         mock_async_stream.__aexit__ = AsyncMock(return_value=None)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         calls: list[str] = []
 
         async def stream_factory() -> AsyncStream[ExecutionUpdateChunk]:
@@ -166,7 +166,7 @@ async def async_iter() -> AsyncIterator[SimpleNamespace]:
         mock_async_stream.__aenter__ = AsyncMock(return_value=mock_async_stream)
         mock_async_stream.__aexit__ = AsyncMock(return_value=None)
 
-        devbox = AsyncDevbox(mock_async_client, "dev_123")
+        devbox = AsyncDevbox(mock_async_client, "dbx_123")
         calls: list[str] = []
 
         async def stream_factory() -> AsyncStream[ExecutionUpdateChunk]:
diff --git a/tests/sdk/conftest.py b/tests/sdk/conftest.py
index c5546fe55..f22b542c6 100644
--- a/tests/sdk/conftest.py
+++ b/tests/sdk/conftest.py
@@ -15,13 +15,17 @@
 
 # Test ID constants
 TEST_IDS = {
-    "devbox": "dev_123",
-    "execution": "exec_123",
-    "snapshot": "snap_123",
-    "blueprint": "bp_123",
+    "devbox": "dbx_123",
+    "execution": "exn_123",
+    "snapshot": "snp_123",
+    "blueprint": "bpt_123",
     "object": "obj_123",
-    "scorer": "scorer_123",
-    "agent": "agent_123",
+    "scorer": "sco_123",
+    "agent": "agt_123",
+    "scenario": "scn_123",
+    "scenario_run": "scr_123",
+    "benchmark": "bmd_123",
+    "benchmark_run": "bmr_123",
 }
 
 # Test URL constants
@@ -42,7 +46,7 @@
 class MockDevboxView:
     """Mock DevboxView for testing."""
 
-    id: str = "dev_123"
+    id: str = TEST_IDS["devbox"]
     status: str = "running"
     name: str = "test-devbox"
 
@@ -51,8 +55,8 @@ class MockDevboxView:
 class MockExecutionView:
     """Mock DevboxAsyncExecutionDetailView for testing."""
 
-    execution_id: str = "exec_123"
-    devbox_id: str = "dev_123"
+    execution_id: str = TEST_IDS["execution"]
+    devbox_id: str = TEST_IDS["devbox"]
     status: str = "completed"
     exit_status: int = 0
     stdout: str = "output"
@@ -65,7 +69,7 @@ class MockExecutionView:
 class MockSnapshotView:
     """Mock DevboxSnapshotView for testing."""
 
-    id: str = "snap_123"
+    id: str = TEST_IDS["snapshot"]
     status: str = "completed"
     name: str = "test-snapshot"
 
@@ -74,7 +78,7 @@ class MockSnapshotView:
 class MockBlueprintView:
     """Mock BlueprintView for testing."""
 
-    id: str = "bp_123"
+    id: str = TEST_IDS["blueprint"]
     status: str = "built"
     name: str = "test-blueprint"
 
@@ -83,7 +87,7 @@ class MockBlueprintView:
 class MockObjectView:
     """Mock ObjectView for testing."""
 
-    id: str = "obj_123"
+    id: str = TEST_IDS["object"]
     upload_url: str = "https://upload.example.com/obj_123"
     name: str = "test-object"
 
@@ -92,7 +96,7 @@ class MockObjectView:
 class MockScorerView:
     """Mock ScorerView for testing."""
 
-    id: str = "scorer_123"
+    id: str = TEST_IDS["scorer"]
     bash_script: str = "echo 'score=1.0'"
     type: str = "test_scorer"
 
@@ -101,7 +105,7 @@ class MockScorerView:
 class MockAgentView:
     """Mock AgentView for testing."""
 
-    id: str = "agent_123"
+    id: str = TEST_IDS["agent"]
     name: str = "test-agent"
     create_time_ms: int = 1234567890000
     is_public: bool = False
@@ -112,7 +116,7 @@ class MockAgentView:
 class MockScenarioView:
     """Mock ScenarioView for testing."""
 
-    id: str = "scn_123"
+    id: str = TEST_IDS["scenario"]
     name: str = "test-scenario"
     metadata: Dict[str, str] = field(default_factory=dict)
 
@@ -121,14 +125,48 @@ class MockScenarioView:
 class MockScenarioRunView:
     """Mock ScenarioRunView for testing."""
 
-    id: str = "run_123"
-    devbox_id: str = "dev_123"
-    scenario_id: str = "scn_123"
+    id: str = TEST_IDS["scenario_run"]
+    devbox_id: str = TEST_IDS["devbox"]
+    scenario_id: str = TEST_IDS["scenario"]
     state: str = "running"
     metadata: Dict[str, str] = field(default_factory=dict)
     scoring_contract_result: object = None
 
 
+@dataclass
+class MockBenchmarkView:
+    """Mock BenchmarkView for testing."""
+
+    id: str = TEST_IDS["benchmark"]
+    name: str = "test-benchmark"
+    metadata: Dict[str, str] = field(default_factory=dict)
+    scenario_ids: list[str] = field(default_factory=list)
+
+
+@dataclass
+class MockBenchmarkRunView:
+    """Mock BenchmarkRunView for testing."""
+
+    id: str = TEST_IDS["benchmark_run"]
+    benchmark_id: str = TEST_IDS["benchmark"]
+    state: str = "running"
+    metadata: Dict[str, str] = field(default_factory=dict)
+    start_time_ms: int = 1234567890000
+    duration_ms: int | None = None
+    score: float | None = None
+
+
+class AsyncIterableMock:
+    """A simple async iterable mock for testing paginated responses."""
+
+    def __init__(self, items: list[Any]) -> None:
+        self._items = items
+
+    async def __aiter__(self):
+        for item in self._items:
+            yield item
+
+
 def create_mock_httpx_client(methods: dict[str, Any] | None = None) -> AsyncMock:
     """
     Create a mock httpx.AsyncClient with proper context manager setup.
@@ -237,6 +275,18 @@ def scenario_run_view() -> MockScenarioRunView:
     return MockScenarioRunView()
 
 
+@pytest.fixture
+def benchmark_view() -> MockBenchmarkView:
+    """Create a mock BenchmarkView."""
+    return MockBenchmarkView()
+
+
+@pytest.fixture
+def benchmark_run_view() -> MockBenchmarkRunView:
+    """Create a mock BenchmarkRunView."""
+    return MockBenchmarkRunView()
+
+
 @pytest.fixture
 def mock_httpx_response() -> Mock:
     """Create a mock httpx.Response."""
diff --git a/tests/sdk/devbox/test_core.py b/tests/sdk/devbox/test_core.py
index b482e030b..c12b02485 100644
--- a/tests/sdk/devbox/test_core.py
+++ b/tests/sdk/devbox/test_core.py
@@ -29,20 +29,20 @@ class TestDevbox:
 
     def test_init(self, mock_client: Mock) -> None:
         """Test Devbox initialization."""
-        devbox = Devbox(mock_client, "dev_123")
-        assert devbox.id == "dev_123"
+        devbox = Devbox(mock_client, "dbx_123")
+        assert devbox.id == "dbx_123"
 
     def test_repr(self, mock_client: Mock) -> None:
         """Test Devbox string representation."""
-        devbox = Devbox(mock_client, "dev_123")
-        assert repr(devbox) == "<Devbox id='dev_123'>"
+        devbox = Devbox(mock_client, "dbx_123")
+        assert repr(devbox) == "<Devbox id='dbx_123'>"
 
     def test_context_manager_enter_exit(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
         """Test context manager behavior with successful shutdown."""
         mock_client.devboxes.shutdown.return_value = devbox_view
 
-        with Devbox(mock_client, "dev_123") as devbox:
-            assert devbox.id == "dev_123"
+        with Devbox(mock_client, "dbx_123") as devbox:
+            assert devbox.id == "dbx_123"
 
         call_kwargs = mock_client.devboxes.shutdown.call_args[1]
         assert "timeout" not in call_kwargs
@@ -52,7 +52,7 @@ def test_context_manager_exception_handling(self, mock_client: Mock) -> None:
         mock_client.devboxes.shutdown.side_effect = RuntimeError("Shutdown failed")
 
         with pytest.raises(ValueError, match="Test error"):
-            with Devbox(mock_client, "dev_123"):
+            with Devbox(mock_client, "dbx_123"):
                 raise ValueError("Test error")
 
         # Shutdown should be called even when body raises exception
@@ -62,7 +62,7 @@ def test_get_info(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
         """Test get_info method."""
         mock_client.devboxes.retrieve.return_value = devbox_view
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         result = devbox.get_info(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -72,7 +72,7 @@ def test_get_info(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
 
         assert result == devbox_view
         mock_client.devboxes.retrieve.assert_called_once_with(
-            "dev_123",
+            "dbx_123",
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
             extra_body={"key": "value"},
@@ -84,12 +84,12 @@ def test_await_running(self, mock_client: Mock, devbox_view: MockDevboxView) ->
         mock_client.devboxes.await_running.return_value = devbox_view
         polling_config = PollingConfig(timeout_seconds=60.0)
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         result = devbox.await_running(polling_config=polling_config)
 
         assert result == devbox_view
         mock_client.devboxes.await_running.assert_called_once_with(
-            "dev_123",
+            "dbx_123",
             polling_config=polling_config,
         )
 
@@ -98,12 +98,12 @@ def test_await_suspended(self, mock_client: Mock, devbox_view: MockDevboxView) -
         mock_client.devboxes.await_suspended.return_value = devbox_view
         polling_config = PollingConfig(timeout_seconds=60.0)
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         result = devbox.await_suspended(polling_config=polling_config)
 
         assert result == devbox_view
         mock_client.devboxes.await_suspended.assert_called_once_with(
-            "dev_123",
+            "dbx_123",
             polling_config=polling_config,
         )
 
@@ -111,7 +111,7 @@ def test_shutdown(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
         """Test shutdown method."""
         mock_client.devboxes.shutdown.return_value = devbox_view
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         result = devbox.shutdown(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -122,7 +122,7 @@ def test_shutdown(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
 
         assert result == devbox_view
         mock_client.devboxes.shutdown.assert_called_once_with(
-            "dev_123",
+            "dbx_123",
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
             extra_body={"key": "value"},
@@ -136,7 +136,7 @@ def test_suspend(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
         mock_client.devboxes.await_suspended.return_value = devbox_view
         polling_config = PollingConfig(timeout_seconds=60.0)
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         result = devbox.suspend(
             polling_config=polling_config,
             extra_headers={"X-Custom": "value"},
@@ -148,7 +148,7 @@ def test_suspend(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
 
         assert result == devbox_view
         mock_client.devboxes.suspend.assert_called_once_with(
-            "dev_123",
+            "dbx_123",
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
             extra_body={"key": "value"},
@@ -156,7 +156,7 @@ def test_suspend(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
             idempotency_key="key-123",
         )
         mock_client.devboxes.await_suspended.assert_called_once_with(
-            "dev_123",
+            "dbx_123",
             polling_config=polling_config,
         )
 
@@ -166,7 +166,7 @@ def test_resume(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
         mock_client.devboxes.await_running.return_value = devbox_view
         polling_config = PollingConfig(timeout_seconds=60.0)
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         result = devbox.resume(
             polling_config=polling_config,
             extra_headers={"X-Custom": "value"},
@@ -178,7 +178,7 @@ def test_resume(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
 
         assert result == devbox_view
         mock_client.devboxes.resume.assert_called_once_with(
-            "dev_123",
+            "dbx_123",
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
             extra_body={"key": "value"},
@@ -186,7 +186,7 @@ def test_resume(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
             idempotency_key="key-123",
         )
         mock_client.devboxes.await_running.assert_called_once_with(
-            "dev_123",
+            "dbx_123",
             polling_config=polling_config,
         )
 
@@ -194,7 +194,7 @@ def test_keep_alive(self, mock_client: Mock) -> None:
         """Test keep_alive method."""
         mock_client.devboxes.keep_alive.return_value = object()
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         result = devbox.keep_alive(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -205,7 +205,7 @@ def test_keep_alive(self, mock_client: Mock) -> None:
 
         assert result is not None  # Verify return value is propagated
         mock_client.devboxes.keep_alive.assert_called_once_with(
-            "dev_123",
+            "dbx_123",
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
             extra_body={"key": "value"},
@@ -215,13 +215,13 @@ def test_keep_alive(self, mock_client: Mock) -> None:
 
     def test_snapshot_disk(self, mock_client: Mock) -> None:
         """Test snapshot_disk waits for completion."""
-        snapshot_data = SimpleNamespace(id="snap_123")
+        snapshot_data = SimpleNamespace(id="snp_123")
         snapshot_status = SimpleNamespace(status="completed")
 
         mock_client.devboxes.snapshot_disk_async.return_value = snapshot_data
         mock_client.devboxes.disk_snapshots.await_completed.return_value = snapshot_status
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         polling_config = PollingConfig(timeout_seconds=60.0)
         snapshot = devbox.snapshot_disk(
             name="test-snapshot",
@@ -230,7 +230,7 @@ def test_snapshot_disk(self, mock_client: Mock) -> None:
             extra_headers={"X-Custom": "value"},
         )
 
-        assert snapshot.id == "snap_123"
+        assert snapshot.id == "snp_123"
         call_kwargs = mock_client.devboxes.snapshot_disk_async.call_args[1]
         assert "commit_message" not in call_kwargs or call_kwargs["commit_message"] in (omit, None)
         assert call_kwargs["metadata"] == {"key": "value"}
@@ -244,17 +244,17 @@ def test_snapshot_disk(self, mock_client: Mock) -> None:
 
     def test_snapshot_disk_async(self, mock_client: Mock) -> None:
         """Test snapshot_disk_async returns immediately."""
-        snapshot_data = SimpleNamespace(id="snap_123")
+        snapshot_data = SimpleNamespace(id="snp_123")
         mock_client.devboxes.snapshot_disk_async.return_value = snapshot_data
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         snapshot = devbox.snapshot_disk_async(
             name="test-snapshot",
             metadata={"key": "value"},
             extra_headers={"X-Custom": "value"},
         )
 
-        assert snapshot.id == "snap_123"
+        assert snapshot.id == "snp_123"
         call_kwargs = mock_client.devboxes.snapshot_disk_async.call_args[1]
         assert "commit_message" not in call_kwargs or call_kwargs["commit_message"] in (omit, None)
         assert call_kwargs["metadata"] == {"key": "value"}
@@ -270,7 +270,7 @@ def test_close(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
         """Test close method calls shutdown."""
         mock_client.devboxes.shutdown.return_value = devbox_view
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         devbox.close()
 
         call_kwargs = mock_client.devboxes.shutdown.call_args[1]
@@ -278,21 +278,21 @@ def test_close(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
 
     def test_cmd_property(self, mock_client: Mock) -> None:
         """Test cmd property returns CommandInterface."""
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         cmd = devbox.cmd
         assert isinstance(cmd, CommandInterface)
         assert cmd._devbox is devbox
 
     def test_file_property(self, mock_client: Mock) -> None:
         """Test file property returns FileInterface."""
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         file_interface = devbox.file
         assert isinstance(file_interface, FileInterface)
         assert file_interface._devbox is devbox
 
     def test_net_property(self, mock_client: Mock) -> None:
         """Test net property returns NetworkInterface."""
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         net = devbox.net
         assert isinstance(net, NetworkInterface)
         assert net._devbox is devbox
diff --git a/tests/sdk/devbox/test_edge_cases.py b/tests/sdk/devbox/test_edge_cases.py
index ff2491f66..23341f0c0 100644
--- a/tests/sdk/devbox/test_edge_cases.py
+++ b/tests/sdk/devbox/test_edge_cases.py
@@ -31,7 +31,7 @@ def test_network_error(self, mock_client: Mock) -> None:
         """Test handling of network errors."""
         mock_client.devboxes.retrieve.side_effect = httpx.NetworkError("Connection failed")
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         with pytest.raises(httpx.NetworkError):
             devbox.get_info()
 
@@ -50,7 +50,7 @@ def test_api_error(self, mock_client: Mock, status_code: int, message: str) -> N
 
         mock_client.devboxes.retrieve.side_effect = error
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         with pytest.raises(APIStatusError):
             devbox.get_info()
 
@@ -58,7 +58,7 @@ def test_timeout_error(self, mock_client: Mock) -> None:
         """Test handling of timeout errors."""
         mock_client.devboxes.retrieve.side_effect = httpx.TimeoutException("Request timed out")
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         with pytest.raises(httpx.TimeoutException):
             devbox.get_info(timeout=1.0)
 
@@ -68,19 +68,19 @@ class TestDevboxEdgeCases:
 
     def test_empty_responses(self, mock_client: Mock) -> None:
         """Test handling of empty responses."""
-        empty_view = SimpleNamespace(id="dev_123", status="", name="")
+        empty_view = SimpleNamespace(id="dbx_123", status="", name="")
         mock_client.devboxes.retrieve.return_value = empty_view
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         result = devbox.get_info()
         assert result == empty_view
 
     def test_none_values(self, mock_client: Mock) -> None:
         """Test handling of None values."""
-        view_with_none = SimpleNamespace(id="dev_123", status=None, name=None)
+        view_with_none = SimpleNamespace(id="dbx_123", status=None, name=None)
         mock_client.devboxes.retrieve.return_value = view_with_none
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         result = devbox.get_info()
         assert result.status is None
         assert result.name is None
@@ -89,9 +89,9 @@ def test_concurrent_operations(
         self, mock_client: Mock, thread_cleanup: tuple[list[threading.Thread], list[threading.Event]]
     ) -> None:
         """Test concurrent operations."""
-        mock_client.devboxes.retrieve.return_value = SimpleNamespace(id="dev_123", status="running")
+        mock_client.devboxes.retrieve.return_value = SimpleNamespace(id="dbx_123", status="running")
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         results: list[DevboxView] = []
 
         def get_info() -> None:
@@ -118,13 +118,13 @@ def test_context_manager_vs_manual_cleanup(self, mock_client: Mock, devbox_view:
         mock_client.devboxes.shutdown.return_value = devbox_view
 
         # Context manager approach (Pythonic)
-        with Devbox(mock_client, "dev_123"):
+        with Devbox(mock_client, "dbx_123"):
             pass
 
         mock_client.devboxes.shutdown.assert_called_once()
 
         # Manual cleanup (TypeScript-like)
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         devbox.shutdown()
         assert mock_client.devboxes.shutdown.call_count == 2
 
diff --git a/tests/sdk/devbox/test_interfaces.py b/tests/sdk/devbox/test_interfaces.py
index a8ca574ba..66ef8fa7b 100644
--- a/tests/sdk/devbox/test_interfaces.py
+++ b/tests/sdk/devbox/test_interfaces.py
@@ -24,7 +24,7 @@ def test_exec_without_callbacks(self, mock_client: Mock, execution_view: MockExe
         mock_client.devboxes.execute_async.return_value = execution_view
         mock_client.devboxes.executions.await_completed.return_value = execution_view
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         result = devbox.cmd.exec("echo hello")
 
         assert result.exit_code == 0
@@ -38,13 +38,13 @@ def test_exec_without_callbacks(self, mock_client: Mock, execution_view: MockExe
     def test_exec_with_stdout_callback(self, mock_client: Mock, mock_stream: Mock) -> None:
         """Test exec with stdout callback."""
         execution_async = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="running",
         )
         execution_completed = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout="output",
@@ -57,7 +57,7 @@ def test_exec_with_stdout_callback(self, mock_client: Mock, mock_stream: Mock) -
 
         stdout_calls: list[str] = []
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         result = devbox.cmd.exec("echo hello", stdout=stdout_calls.append)
 
         assert result.exit_code == 0
@@ -67,13 +67,13 @@ def test_exec_with_stdout_callback(self, mock_client: Mock, mock_stream: Mock) -
     def test_exec_with_stderr_callback(self, mock_client: Mock, mock_stream: Mock) -> None:
         """Test exec with stderr callback."""
         execution_async = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="running",
         )
         execution_completed = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout="",
@@ -86,7 +86,7 @@ def test_exec_with_stderr_callback(self, mock_client: Mock, mock_stream: Mock) -
 
         stderr_calls: list[str] = []
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         result = devbox.cmd.exec("echo hello", stderr=stderr_calls.append)
 
         assert result.exit_code == 0
@@ -95,13 +95,13 @@ def test_exec_with_stderr_callback(self, mock_client: Mock, mock_stream: Mock) -
     def test_exec_with_output_callback(self, mock_client: Mock, mock_stream: Mock) -> None:
         """Test exec with output callback."""
         execution_async = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="running",
         )
         execution_completed = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout="output",
@@ -115,7 +115,7 @@ def test_exec_with_output_callback(self, mock_client: Mock, mock_stream: Mock) -
 
         output_calls: list[str] = []
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         result = devbox.cmd.exec("echo hello", output=output_calls.append)
 
         assert result.exit_code == 0
@@ -124,13 +124,13 @@ def test_exec_with_output_callback(self, mock_client: Mock, mock_stream: Mock) -
     def test_exec_with_all_callbacks(self, mock_client: Mock, mock_stream: Mock) -> None:
         """Test exec with all callbacks."""
         execution_async = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="running",
         )
         execution_completed = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout="output",
@@ -146,7 +146,7 @@ def test_exec_with_all_callbacks(self, mock_client: Mock, mock_stream: Mock) ->
         stderr_calls: list[str] = []
         output_calls: list[str] = []
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         result = devbox.cmd.exec(
             "echo hello",
             stdout=stdout_calls.append,
@@ -160,19 +160,19 @@ def test_exec_with_all_callbacks(self, mock_client: Mock, mock_stream: Mock) ->
     def test_exec_async_returns_execution(self, mock_client: Mock, mock_stream: Mock) -> None:
         """Test exec_async returns Execution object."""
         execution_async = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="running",
         )
 
         mock_client.devboxes.execute_async.return_value = execution_async
         mock_client.devboxes.executions.stream_stdout_updates.return_value = mock_stream
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         execution = devbox.cmd.exec_async("long-running command")
 
-        assert execution.execution_id == "exec_123"
-        assert execution.devbox_id == "dev_123"
+        assert execution.execution_id == "exn_123"
+        assert execution.devbox_id == "dbx_123"
         mock_client.devboxes.execute_async.assert_called_once()
 
 
@@ -183,7 +183,7 @@ def test_read(self, mock_client: Mock) -> None:
         """Test file read."""
         mock_client.devboxes.read_file_contents.return_value = "file content"
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         result = devbox.file.read(file_path="/path/to/file")
 
         assert result == "file content"
@@ -196,7 +196,7 @@ def test_write_string(self, mock_client: Mock) -> None:
         execution_detail = SimpleNamespace()
         mock_client.devboxes.write_file_contents.return_value = execution_detail
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         result = devbox.file.write(file_path="/path/to/file", contents="content")
 
         assert result == execution_detail
@@ -210,7 +210,7 @@ def test_write_bytes(self, mock_client: Mock) -> None:
         execution_detail = SimpleNamespace()
         mock_client.devboxes.write_file_contents.return_value = execution_detail
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         result = devbox.file.write(file_path="/path/to/file", contents="content")
 
         assert result == execution_detail
@@ -225,7 +225,7 @@ def test_download(self, mock_client: Mock) -> None:
         mock_response.read.return_value = b"file content"
         mock_client.devboxes.download_file.return_value = mock_response
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         result = devbox.file.download(path="/path/to/file")
 
         assert result == b"file content"
@@ -238,7 +238,7 @@ def test_upload(self, mock_client: Mock, tmp_path: Path) -> None:
         execution_detail = SimpleNamespace()
         mock_client.devboxes.upload_file.return_value = execution_detail
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         # Create a temporary file for upload
         temp_file = tmp_path / "test_file.txt"
         temp_file.write_text("test content")
@@ -260,7 +260,7 @@ def test_create_ssh_key(self, mock_client: Mock) -> None:
         ssh_key_response = SimpleNamespace(public_key="ssh-rsa ...")
         mock_client.devboxes.create_ssh_key.return_value = ssh_key_response
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         result = devbox.net.create_ssh_key(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -271,7 +271,7 @@ def test_create_ssh_key(self, mock_client: Mock) -> None:
 
         assert result == ssh_key_response
         mock_client.devboxes.create_ssh_key.assert_called_once_with(
-            "dev_123",
+            "dbx_123",
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
             extra_body={"key": "value"},
@@ -284,7 +284,7 @@ def test_create_tunnel(self, mock_client: Mock) -> None:
         tunnel_view = SimpleNamespace(port=8080)
         mock_client.devboxes.create_tunnel.return_value = tunnel_view
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         result = devbox.net.create_tunnel(
             port=8080,
             extra_headers={"X-Custom": "value"},
@@ -296,7 +296,7 @@ def test_create_tunnel(self, mock_client: Mock) -> None:
 
         assert result == tunnel_view
         mock_client.devboxes.create_tunnel.assert_called_once_with(
-            "dev_123",
+            "dbx_123",
             port=8080,
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -309,7 +309,7 @@ def test_remove_tunnel(self, mock_client: Mock) -> None:
         """Test remove tunnel."""
         mock_client.devboxes.remove_tunnel.return_value = object()
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         result = devbox.net.remove_tunnel(
             port=8080,
             extra_headers={"X-Custom": "value"},
@@ -321,7 +321,7 @@ def test_remove_tunnel(self, mock_client: Mock) -> None:
 
         assert result is not None  # Verify return value is propagated
         mock_client.devboxes.remove_tunnel.assert_called_once_with(
-            "dev_123",
+            "dbx_123",
             port=8080,
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
diff --git a/tests/sdk/devbox/test_streaming.py b/tests/sdk/devbox/test_streaming.py
index 4550b94a2..6d44a4e5e 100644
--- a/tests/sdk/devbox/test_streaming.py
+++ b/tests/sdk/devbox/test_streaming.py
@@ -26,17 +26,17 @@ class TestDevboxStreaming:
 
     def test_start_streaming_no_callbacks(self, mock_client: Mock) -> None:
         """Test _start_streaming returns None when no callbacks."""
-        devbox = Devbox(mock_client, "dev_123")
-        result = devbox._start_streaming("exec_123", stdout=None, stderr=None, output=None)
+        devbox = Devbox(mock_client, "dbx_123")
+        result = devbox._start_streaming("exn_123", stdout=None, stderr=None, output=None)
         assert result is None
 
     def test_start_streaming_stdout_only(self, mock_client: Mock, mock_stream: Mock) -> None:
         """Test _start_streaming with stdout callback only."""
         mock_client.devboxes.executions.stream_stdout_updates.return_value = mock_stream
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         stdout_calls: list[str] = []
-        result = devbox._start_streaming("exec_123", stdout=stdout_calls.append, stderr=None, output=None)
+        result = devbox._start_streaming("exn_123", stdout=stdout_calls.append, stderr=None, output=None)
 
         assert result is not None
         assert isinstance(result, _StreamingGroup)
@@ -47,9 +47,9 @@ def test_start_streaming_stderr_only(self, mock_client: Mock, mock_stream: Mock)
         """Test _start_streaming with stderr callback only."""
         mock_client.devboxes.executions.stream_stderr_updates.return_value = mock_stream
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         stderr_calls: list[str] = []
-        result = devbox._start_streaming("exec_123", stdout=None, stderr=stderr_calls.append, output=None)
+        result = devbox._start_streaming("exn_123", stdout=None, stderr=stderr_calls.append, output=None)
 
         assert result is not None
         assert isinstance(result, _StreamingGroup)
@@ -61,9 +61,9 @@ def test_start_streaming_output_only(self, mock_client: Mock, mock_stream: Mock)
         mock_client.devboxes.executions.stream_stdout_updates.return_value = mock_stream
         mock_client.devboxes.executions.stream_stderr_updates.return_value = mock_stream
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         output_calls: list[str] = []
-        result = devbox._start_streaming("exec_123", stdout=None, stderr=None, output=output_calls.append)
+        result = devbox._start_streaming("exn_123", stdout=None, stderr=None, output=output_calls.append)
 
         assert result is not None
         assert isinstance(result, _StreamingGroup)
@@ -74,12 +74,12 @@ def test_start_streaming_all_callbacks(self, mock_client: Mock, mock_stream: Moc
         mock_client.devboxes.executions.stream_stdout_updates.return_value = mock_stream
         mock_client.devboxes.executions.stream_stderr_updates.return_value = mock_stream
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         stdout_calls: list[str] = []
         stderr_calls: list[str] = []
         output_calls: list[str] = []
         result = devbox._start_streaming(
-            "exec_123",
+            "exn_123",
             stdout=stdout_calls.append,
             stderr=stderr_calls.append,
             output=output_calls.append,
@@ -104,7 +104,7 @@ def test_spawn_stream_thread(
         mock_stream.__enter__ = Mock(return_value=mock_stream)
         mock_stream.__exit__ = Mock(return_value=None)
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         stop_event = threading.Event()
         calls: list[str] = []
 
@@ -147,7 +147,7 @@ def test_spawn_stream_thread_stop_event(
         mock_stream.__enter__ = Mock(return_value=mock_stream)
         mock_stream.__exit__ = Mock(return_value=None)
 
-        devbox = Devbox(mock_client, "dev_123")
+        devbox = Devbox(mock_client, "dbx_123")
         stop_event = threading.Event()
         calls: list[str] = []
 
diff --git a/tests/sdk/test_agent.py b/tests/sdk/test_agent.py
index 7580b44d8..f1bbb083d 100644
--- a/tests/sdk/test_agent.py
+++ b/tests/sdk/test_agent.py
@@ -13,19 +13,19 @@ class TestAgent:
 
     def test_init(self, mock_client: Mock) -> None:
         """Test Agent initialization."""
-        agent = Agent(mock_client, "agent_123")
-        assert agent.id == "agent_123"
+        agent = Agent(mock_client, "agt_123")
+        assert agent.id == "agt_123"
 
     def test_repr(self, mock_client: Mock) -> None:
         """Test Agent string representation."""
-        agent = Agent(mock_client, "agent_123")
-        assert repr(agent) == "<Agent id='agent_123'>"
+        agent = Agent(mock_client, "agt_123")
+        assert repr(agent) == "<Agent id='agt_123'>"
 
     def test_get_info(self, mock_client: Mock, agent_view: MockAgentView) -> None:
         """Test get_info method."""
         mock_client.agents.retrieve.return_value = agent_view
 
-        agent = Agent(mock_client, "agent_123")
+        agent = Agent(mock_client, "agt_123")
         result = agent.get_info(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -35,7 +35,7 @@ def test_get_info(self, mock_client: Mock, agent_view: MockAgentView) -> None:
 
         assert result == agent_view
         mock_client.agents.retrieve.assert_called_once_with(
-            "agent_123",
+            "agt_123",
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
             extra_body={"key": "value"},
diff --git a/tests/sdk/test_async_agent.py b/tests/sdk/test_async_agent.py
index a2bb9496c..be7efa845 100644
--- a/tests/sdk/test_async_agent.py
+++ b/tests/sdk/test_async_agent.py
@@ -15,20 +15,20 @@ class TestAsyncAgent:
 
     def test_init(self, mock_async_client: AsyncMock) -> None:
         """Test AsyncAgent initialization."""
-        agent = AsyncAgent(mock_async_client, "agent_123")
-        assert agent.id == "agent_123"
+        agent = AsyncAgent(mock_async_client, "agt_123")
+        assert agent.id == "agt_123"
 
     def test_repr(self, mock_async_client: AsyncMock) -> None:
         """Test AsyncAgent string representation."""
-        agent = AsyncAgent(mock_async_client, "agent_123")
-        assert repr(agent) == "<AsyncAgent id='agent_123'>"
+        agent = AsyncAgent(mock_async_client, "agt_123")
+        assert repr(agent) == "<AsyncAgent id='agt_123'>"
 
     @pytest.mark.asyncio
     async def test_get_info(self, mock_async_client: AsyncMock, agent_view: MockAgentView) -> None:
         """Test get_info method."""
         mock_async_client.agents.retrieve = AsyncMock(return_value=agent_view)
 
-        agent = AsyncAgent(mock_async_client, "agent_123")
+        agent = AsyncAgent(mock_async_client, "agt_123")
         result = await agent.get_info(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -38,7 +38,7 @@ async def test_get_info(self, mock_async_client: AsyncMock, agent_view: MockAgen
 
         assert result == agent_view
         mock_async_client.agents.retrieve.assert_called_once_with(
-            "agent_123",
+            "agt_123",
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
             extra_body={"key": "value"},
diff --git a/tests/sdk/test_async_benchmark.py b/tests/sdk/test_async_benchmark.py
new file mode 100644
index 000000000..d7d72daad
--- /dev/null
+++ b/tests/sdk/test_async_benchmark.py
@@ -0,0 +1,130 @@
+"""Comprehensive tests for async AsyncBenchmark class."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from unittest.mock import AsyncMock
+
+from tests.sdk.conftest import MockBenchmarkView, MockBenchmarkRunView
+from runloop_api_client.sdk.async_benchmark import AsyncBenchmark
+from runloop_api_client.sdk.async_benchmark_run import AsyncBenchmarkRun
+
+
+class TestAsyncBenchmark:
+    """Tests for AsyncBenchmark class."""
+
+    def test_init(self, mock_async_client: AsyncMock) -> None:
+        """Test AsyncBenchmark initialization."""
+        benchmark = AsyncBenchmark(mock_async_client, "bmd_123")
+        assert benchmark.id == "bmd_123"
+        assert repr(benchmark) == "<AsyncBenchmark id='bmd_123'>"
+
+    async def test_get_info(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None:
+        """Test get_info method."""
+        mock_async_client.benchmarks.retrieve = AsyncMock(return_value=benchmark_view)
+
+        benchmark = AsyncBenchmark(mock_async_client, "bmd_123")
+        result = await benchmark.get_info()
+
+        assert result == benchmark_view
+        mock_async_client.benchmarks.retrieve.assert_awaited_once_with("bmd_123")
+
+    async def test_update(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None:
+        """Test update method."""
+        benchmark_view.name = "updated-name"
+        mock_async_client.benchmarks.update = AsyncMock(return_value=benchmark_view)
+
+        benchmark = AsyncBenchmark(mock_async_client, "bmd_123")
+        result = await benchmark.update(name="updated-name")
+
+        assert result == benchmark_view
+        mock_async_client.benchmarks.update.assert_awaited_once_with("bmd_123", name="updated-name")
+
+    async def test_run(self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView) -> None:
+        """Test run method."""
+        mock_async_client.benchmarks.start_run = AsyncMock(return_value=benchmark_run_view)
+
+        benchmark = AsyncBenchmark(mock_async_client, "bmd_123")
+        result = await benchmark.start_run(run_name="test-run", metadata={"key": "value"})
+
+        assert isinstance(result, AsyncBenchmarkRun)
+        assert result.id == benchmark_run_view.id
+        assert result.benchmark_id == benchmark_run_view.benchmark_id
+        mock_async_client.benchmarks.start_run.assert_awaited_once_with(
+            benchmark_id="bmd_123", run_name="test-run", metadata={"key": "value"}
+        )
+
+    async def test_add_scenarios(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None:
+        """Test add_scenarios method."""
+        benchmark_view.scenario_ids = ["scn_001", "scn_002"]
+        mock_async_client.benchmarks.update_scenarios = AsyncMock(return_value=benchmark_view)
+
+        benchmark = AsyncBenchmark(mock_async_client, "bmd_123")
+        result = await benchmark.add_scenarios(["scn_001", "scn_002"])
+
+        assert result == benchmark_view
+        mock_async_client.benchmarks.update_scenarios.assert_awaited_once_with(
+            "bmd_123", scenarios_to_add=["scn_001", "scn_002"]
+        )
+
+    async def test_remove_scenarios(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None:
+        """Test remove_scenarios method."""
+        mock_async_client.benchmarks.update_scenarios = AsyncMock(return_value=benchmark_view)
+
+        benchmark = AsyncBenchmark(mock_async_client, "bmd_123")
+        result = await benchmark.remove_scenarios(["scn_001"])
+
+        assert result == benchmark_view
+        mock_async_client.benchmarks.update_scenarios.assert_awaited_once_with(
+            "bmd_123", scenarios_to_remove=["scn_001"]
+        )
+
+    async def test_list_runs_single(
+        self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView
+    ) -> None:
+        """Test list_runs method with single result."""
+        page = SimpleNamespace(runs=[benchmark_run_view])
+        mock_async_client.benchmarks.runs.list = AsyncMock(return_value=page)
+
+        benchmark = AsyncBenchmark(mock_async_client, "bmd_123")
+        result = await benchmark.list_runs()
+
+        assert len(result) == 1
+        assert isinstance(result[0], AsyncBenchmarkRun)
+        assert result[0].id == benchmark_run_view.id
+        assert result[0].benchmark_id == benchmark_run_view.benchmark_id
+        mock_async_client.benchmarks.runs.list.assert_awaited_once_with(benchmark_id="bmd_123")
+
+    async def test_list_runs_multiple(self, mock_async_client: AsyncMock) -> None:
+        """Test list_runs method with multiple results."""
+        run_view1 = MockBenchmarkRunView(id="bmr_001")
+        run_view2 = MockBenchmarkRunView(id="bmr_002")
+        page = SimpleNamespace(runs=[run_view1, run_view2])
+        mock_async_client.benchmarks.runs.list = AsyncMock(return_value=page)
+
+        benchmark = AsyncBenchmark(mock_async_client, "bmd_123")
+        result = await benchmark.list_runs()
+
+        assert len(result) == 2
+        assert isinstance(result[0], AsyncBenchmarkRun)
+        assert isinstance(result[1], AsyncBenchmarkRun)
+        assert result[0].id == run_view1.id
+        assert result[0].benchmark_id == run_view1.benchmark_id
+        assert result[1].id == run_view2.id
+        assert result[1].benchmark_id == run_view2.benchmark_id
+        mock_async_client.benchmarks.runs.list.assert_awaited_once_with(benchmark_id="bmd_123")
+
+    async def test_list_runs_with_params(
+        self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView
+    ) -> None:
+        """Test list_runs method with filtering parameters."""
+        page = SimpleNamespace(runs=[benchmark_run_view])
+        mock_async_client.benchmarks.runs.list = AsyncMock(return_value=page)
+
+        benchmark = AsyncBenchmark(mock_async_client, "bmd_123")
+        result = await benchmark.list_runs(limit=10, name="test-run")
+
+        assert len(result) == 1
+        mock_async_client.benchmarks.runs.list.assert_awaited_once_with(
+            benchmark_id="bmd_123", limit=10, name="test-run"
+        )
diff --git a/tests/sdk/test_async_benchmark_run.py b/tests/sdk/test_async_benchmark_run.py
new file mode 100644
index 000000000..dd6e230d2
--- /dev/null
+++ b/tests/sdk/test_async_benchmark_run.py
@@ -0,0 +1,120 @@
+"""Comprehensive tests for async AsyncBenchmarkRun class."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from unittest.mock import AsyncMock
+
+from tests.sdk.conftest import MockScenarioRunView, MockBenchmarkRunView
+from runloop_api_client.sdk.async_scenario_run import AsyncScenarioRun
+from runloop_api_client.sdk.async_benchmark_run import AsyncBenchmarkRun
+
+
+class TestAsyncBenchmarkRun:
+    """Tests for AsyncBenchmarkRun class."""
+
+    def test_init(self, mock_async_client: AsyncMock) -> None:
+        """Test AsyncBenchmarkRun initialization."""
+        run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123")
+        assert run.id == "bmr_123"
+        assert run.benchmark_id == "bmd_123"
+
+    def test_repr(self, mock_async_client: AsyncMock) -> None:
+        """Test AsyncBenchmarkRun string representation."""
+        run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123")
+        assert repr(run) == "<AsyncBenchmarkRun id='bmr_123'>"
+
+    async def test_get_info(self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView) -> None:
+        """Test get_info method."""
+        mock_async_client.benchmarks.runs.retrieve = AsyncMock(return_value=benchmark_run_view)
+
+        run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123")
+        result = await run.get_info()
+
+        assert result == benchmark_run_view
+        mock_async_client.benchmarks.runs.retrieve.assert_awaited_once_with("bmr_123")
+
+    async def test_cancel(self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView) -> None:
+        """Test cancel method."""
+        benchmark_run_view.state = "canceled"
+        mock_async_client.benchmarks.runs.cancel = AsyncMock(return_value=benchmark_run_view)
+
+        run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123")
+        result = await run.cancel()
+
+        assert result == benchmark_run_view
+        assert result.state == "canceled"
+        mock_async_client.benchmarks.runs.cancel.assert_awaited_once_with("bmr_123")
+
+    async def test_complete(self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView) -> None:
+        """Test complete method."""
+        benchmark_run_view.state = "completed"
+        mock_async_client.benchmarks.runs.complete = AsyncMock(return_value=benchmark_run_view)
+
+        run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123")
+        result = await run.complete()
+
+        assert result == benchmark_run_view
+        assert result.state == "completed"
+        mock_async_client.benchmarks.runs.complete.assert_awaited_once_with("bmr_123")
+
+    async def test_list_scenario_runs_empty(self, mock_async_client: AsyncMock) -> None:
+        """Test list_scenario_runs method with empty results."""
+        page = SimpleNamespace(runs=[])
+        mock_async_client.benchmarks.runs.list_scenario_runs = AsyncMock(return_value=page)
+
+        run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123")
+        result = await run.list_scenario_runs()
+
+        assert len(result) == 0
+        mock_async_client.benchmarks.runs.list_scenario_runs.assert_awaited_once_with("bmr_123")
+
+    async def test_list_scenario_runs_single(
+        self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView
+    ) -> None:
+        """Test list_scenario_runs method with single result."""
+        page = SimpleNamespace(runs=[scenario_run_view])
+        mock_async_client.benchmarks.runs.list_scenario_runs = AsyncMock(return_value=page)
+
+        run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123")
+        result = await run.list_scenario_runs()
+
+        assert len(result) == 1
+        assert isinstance(result[0], AsyncScenarioRun)
+        assert result[0].id == scenario_run_view.id
+        assert result[0].devbox_id == scenario_run_view.devbox_id
+        mock_async_client.benchmarks.runs.list_scenario_runs.assert_awaited_once_with("bmr_123")
+
+    async def test_list_scenario_runs_multiple(self, mock_async_client: AsyncMock) -> None:
+        """Test list_scenario_runs method with multiple results."""
+        scenario_run_view1 = MockScenarioRunView(id="scr_001", devbox_id="dev_001")
+        scenario_run_view2 = MockScenarioRunView(id="scr_002", devbox_id="dev_002")
+        page = SimpleNamespace(runs=[scenario_run_view1, scenario_run_view2])
+        mock_async_client.benchmarks.runs.list_scenario_runs = AsyncMock(return_value=page)
+
+        run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123")
+        result = await run.list_scenario_runs()
+
+        assert len(result) == 2
+        assert isinstance(result[0], AsyncScenarioRun)
+        assert isinstance(result[1], AsyncScenarioRun)
+        assert result[0].id == "scr_001"
+        assert result[1].id == "scr_002"
+        mock_async_client.benchmarks.runs.list_scenario_runs.assert_awaited_once_with("bmr_123")
+
+    async def test_list_scenario_runs_with_params(
+        self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView
+    ) -> None:
+        """Test list_scenario_runs method with filtering parameters."""
+        page = SimpleNamespace(runs=[scenario_run_view])
+        mock_async_client.benchmarks.runs.list_scenario_runs = AsyncMock(return_value=page)
+
+        run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123")
+        result = await run.list_scenario_runs(limit=10, state="completed")
+
+        assert len(result) == 1
+        assert isinstance(result[0], AsyncScenarioRun)
+        assert result[0].id == scenario_run_view.id
+        mock_async_client.benchmarks.runs.list_scenario_runs.assert_awaited_once_with(
+            "bmr_123", limit=10, state="completed"
+        )
diff --git a/tests/sdk/test_async_blueprint.py b/tests/sdk/test_async_blueprint.py
index 75901a445..4c7de1e22 100644
--- a/tests/sdk/test_async_blueprint.py
+++ b/tests/sdk/test_async_blueprint.py
@@ -16,20 +16,20 @@ class TestAsyncBlueprint:
 
     def test_init(self, mock_async_client: AsyncMock) -> None:
         """Test AsyncBlueprint initialization."""
-        blueprint = AsyncBlueprint(mock_async_client, "bp_123")
-        assert blueprint.id == "bp_123"
+        blueprint = AsyncBlueprint(mock_async_client, "bpt_123")
+        assert blueprint.id == "bpt_123"
 
     def test_repr(self, mock_async_client: AsyncMock) -> None:
         """Test AsyncBlueprint string representation."""
-        blueprint = AsyncBlueprint(mock_async_client, "bp_123")
-        assert repr(blueprint) == "<AsyncBlueprint id='bp_123'>"
+        blueprint = AsyncBlueprint(mock_async_client, "bpt_123")
+        assert repr(blueprint) == "<AsyncBlueprint id='bpt_123'>"
 
     @pytest.mark.asyncio
     async def test_get_info(self, mock_async_client: AsyncMock, blueprint_view: MockBlueprintView) -> None:
         """Test get_info method."""
         mock_async_client.blueprints.retrieve = AsyncMock(return_value=blueprint_view)
 
-        blueprint = AsyncBlueprint(mock_async_client, "bp_123")
+        blueprint = AsyncBlueprint(mock_async_client, "bpt_123")
         result = await blueprint.get_info(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -46,7 +46,7 @@ async def test_logs(self, mock_async_client: AsyncMock) -> None:
         logs_view = SimpleNamespace(logs=[])
         mock_async_client.blueprints.logs = AsyncMock(return_value=logs_view)
 
-        blueprint = AsyncBlueprint(mock_async_client, "bp_123")
+        blueprint = AsyncBlueprint(mock_async_client, "bpt_123")
         result = await blueprint.logs(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -62,7 +62,7 @@ async def test_delete(self, mock_async_client: AsyncMock) -> None:
         """Test delete method."""
         mock_async_client.blueprints.delete = AsyncMock(return_value=object())
 
-        blueprint = AsyncBlueprint(mock_async_client, "bp_123")
+        blueprint = AsyncBlueprint(mock_async_client, "bpt_123")
         result = await blueprint.delete(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -78,7 +78,7 @@ async def test_create_devbox(self, mock_async_client: AsyncMock, devbox_view: Mo
         """Test create_devbox method."""
         mock_async_client.devboxes.create_and_await_running = AsyncMock(return_value=devbox_view)
 
-        blueprint = AsyncBlueprint(mock_async_client, "bp_123")
+        blueprint = AsyncBlueprint(mock_async_client, "bpt_123")
         devbox = await blueprint.create_devbox(
             name="test-devbox",
             metadata={"key": "value"},
@@ -86,5 +86,5 @@ async def test_create_devbox(self, mock_async_client: AsyncMock, devbox_view: Mo
             extra_headers={"X-Custom": "value"},
         )
 
-        assert devbox.id == "dev_123"
+        assert devbox.id == "dbx_123"
         mock_async_client.devboxes.create_and_await_running.assert_awaited_once()
diff --git a/tests/sdk/test_async_execution.py b/tests/sdk/test_async_execution.py
index 06629cf63..f05633263 100644
--- a/tests/sdk/test_async_execution.py
+++ b/tests/sdk/test_async_execution.py
@@ -91,9 +91,9 @@ class TestAsyncExecution:
 
     def test_init(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
         """Test AsyncExecution initialization."""
-        execution = AsyncExecution(mock_async_client, "dev_123", execution_view)  # type: ignore[arg-type]
-        assert execution.execution_id == "exec_123"
-        assert execution.devbox_id == "dev_123"
+        execution = AsyncExecution(mock_async_client, "dbx_123", execution_view)  # type: ignore[arg-type]
+        assert execution.execution_id == "exn_123"
+        assert execution.devbox_id == "dbx_123"
         assert execution._initial_result == execution_view
 
     @pytest.mark.asyncio
@@ -113,19 +113,19 @@ async def task() -> None:
         async_task_cleanup.extend(tasks)
         streaming_group = _AsyncStreamingGroup(tasks)
 
-        execution = AsyncExecution(mock_async_client, "dev_123", execution_view, streaming_group)  # type: ignore[arg-type]
+        execution = AsyncExecution(mock_async_client, "dbx_123", execution_view, streaming_group)  # type: ignore[arg-type]
         assert execution._streaming_group is streaming_group
 
     def test_properties(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
         """Test AsyncExecution properties."""
-        execution = AsyncExecution(mock_async_client, "dev_123", execution_view)  # type: ignore[arg-type]
-        assert execution.execution_id == "exec_123"
-        assert execution.devbox_id == "dev_123"
+        execution = AsyncExecution(mock_async_client, "dbx_123", execution_view)  # type: ignore[arg-type]
+        assert execution.execution_id == "exn_123"
+        assert execution.devbox_id == "dbx_123"
 
     def test_repr(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
         """Test AsyncExecution repr formatting."""
-        execution = AsyncExecution(mock_async_client, "dev_123", execution_view)  # type: ignore[arg-type]
-        assert repr(execution) == "<AsyncExecution id='exec_123'>"
+        execution = AsyncExecution(mock_async_client, "dbx_123", execution_view)  # type: ignore[arg-type]
+        assert repr(execution) == "<AsyncExecution id='exn_123'>"
 
     @pytest.mark.asyncio
     async def test_result_already_completed(
@@ -134,14 +134,14 @@ async def test_result_already_completed(
         """Test result when execution is already completed."""
         mock_async_client.devboxes.wait_for_command = AsyncMock(return_value=execution_view)
 
-        execution = AsyncExecution(mock_async_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        execution = AsyncExecution(mock_async_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         result = await execution.result()
 
         assert result.exit_code == 0
         assert await result.stdout(num_lines=10) == "output"
         mock_async_client.devboxes.wait_for_command.assert_awaited_once_with(
-            "exec_123",
-            devbox_id="dev_123",
+            "exn_123",
+            devbox_id="dbx_123",
             statuses=["completed"],
         )
 
@@ -149,13 +149,13 @@ async def test_result_already_completed(
     async def test_result_needs_polling(self, mock_async_client: AsyncMock) -> None:
         """Test result when execution needs polling."""
         running_execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="running",
         )
         completed_execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout="output",
@@ -166,14 +166,14 @@ async def test_result_needs_polling(self, mock_async_client: AsyncMock) -> None:
 
         mock_async_client.devboxes.wait_for_command = AsyncMock(return_value=completed_execution)
 
-        execution = AsyncExecution(mock_async_client, "dev_123", running_execution)  # type: ignore[arg-type]
+        execution = AsyncExecution(mock_async_client, "dbx_123", running_execution)  # type: ignore[arg-type]
         result = await execution.result()
 
         assert result.exit_code == 0
         assert await result.stdout(num_lines=10) == "output"
         mock_async_client.devboxes.wait_for_command.assert_awaited_once_with(
-            "exec_123",
-            devbox_id="dev_123",
+            "exn_123",
+            devbox_id="dbx_123",
             statuses=["completed"],
         )
 
@@ -181,13 +181,13 @@ async def test_result_needs_polling(self, mock_async_client: AsyncMock) -> None:
     async def test_result_with_streaming_group(self, mock_async_client: AsyncMock) -> None:
         """Test result with streaming group cleanup."""
         running_execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="running",
         )
         completed_execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout="output",
@@ -202,7 +202,7 @@ async def task() -> None:
         tasks = [asyncio.create_task(task())]
         streaming_group = _AsyncStreamingGroup(tasks)
 
-        execution = AsyncExecution(mock_async_client, "dev_123", running_execution, streaming_group)  # type: ignore[arg-type]
+        execution = AsyncExecution(mock_async_client, "dbx_123", running_execution, streaming_group)  # type: ignore[arg-type]
         result = await execution.result()
 
         assert result.exit_code == 0
@@ -213,8 +213,8 @@ async def task() -> None:
     async def test_result_passes_options(self, mock_async_client: AsyncMock) -> None:
         """Ensure result forwards options to wait_for_command."""
         execution_view = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout="output",
@@ -223,12 +223,12 @@ async def test_result_passes_options(self, mock_async_client: AsyncMock) -> None
 
         mock_async_client.devboxes.wait_for_command = AsyncMock(return_value=execution_view)
 
-        execution = AsyncExecution(mock_async_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        execution = AsyncExecution(mock_async_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         await execution.result(timeout=30.0, idempotency_key="abc123")
 
         mock_async_client.devboxes.wait_for_command.assert_awaited_once_with(
-            "exec_123",
-            devbox_id="dev_123",
+            "exn_123",
+            devbox_id="dbx_123",
             statuses=["completed"],
             timeout=30.0,
             idempotency_key="abc123",
@@ -238,20 +238,20 @@ async def test_result_passes_options(self, mock_async_client: AsyncMock) -> None
     async def test_get_state(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
         """Test get_state method."""
         updated_execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="running",
         )
         mock_async_client.devboxes.executions.retrieve = AsyncMock(return_value=updated_execution)
 
-        execution = AsyncExecution(mock_async_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        execution = AsyncExecution(mock_async_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         result = await execution.get_state()
 
         assert result == updated_execution
         assert execution._initial_result == execution_view
         mock_async_client.devboxes.executions.retrieve.assert_awaited_once_with(
-            "exec_123",
-            devbox_id="dev_123",
+            "exn_123",
+            devbox_id="dbx_123",
         )
 
     @pytest.mark.asyncio
@@ -259,10 +259,10 @@ async def test_kill(self, mock_async_client: AsyncMock, execution_view: MockExec
         """Test kill method."""
         mock_async_client.devboxes.executions.kill = AsyncMock(return_value=None)
 
-        execution = AsyncExecution(mock_async_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        execution = AsyncExecution(mock_async_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         await execution.kill()
 
         mock_async_client.devboxes.executions.kill.assert_awaited_once_with(
-            "exec_123",
-            devbox_id="dev_123",
+            "exn_123",
+            devbox_id="dbx_123",
         )
diff --git a/tests/sdk/test_async_execution_result.py b/tests/sdk/test_async_execution_result.py
index cf8a23caa..58802cc4f 100644
--- a/tests/sdk/test_async_execution_result.py
+++ b/tests/sdk/test_async_execution_result.py
@@ -16,31 +16,31 @@ class TestAsyncExecutionResult:
 
     def test_init(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
         """Test AsyncExecutionResult initialization."""
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         # Verify via public API
-        assert result.devbox_id == "dev_123"
-        assert result.execution_id == "exec_123"
+        assert result.devbox_id == "dbx_123"
+        assert result.execution_id == "exn_123"
 
     def test_devbox_id_property(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
         """Test devbox_id property."""
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view)  # type: ignore[arg-type]
-        assert result.devbox_id == "dev_123"
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view)  # type: ignore[arg-type]
+        assert result.devbox_id == "dbx_123"
 
     def test_execution_id_property(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
         """Test execution_id property."""
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view)  # type: ignore[arg-type]
-        assert result.execution_id == "exec_123"
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view)  # type: ignore[arg-type]
+        assert result.execution_id == "exn_123"
 
     def test_exit_code_property(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
         """Test exit_code property."""
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         assert result.exit_code == 0
 
     def test_exit_code_none(self, mock_async_client: AsyncMock) -> None:
         """Test exit_code property when exit_status is None."""
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="running",
             exit_status=None,
             stdout="",
@@ -48,19 +48,19 @@ def test_exit_code_none(self, mock_async_client: AsyncMock) -> None:
             stdout_truncated=False,
             stderr_truncated=False,
         )
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution)  # type: ignore[arg-type]
         assert result.exit_code is None
 
     def test_success_property(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
         """Test success property."""
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         assert result.success is True
 
     def test_success_false(self, mock_async_client: AsyncMock) -> None:
         """Test success property when exit code is non-zero."""
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=1,
             stdout="",
@@ -68,19 +68,19 @@ def test_success_false(self, mock_async_client: AsyncMock) -> None:
             stdout_truncated=False,
             stderr_truncated=False,
         )
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution)  # type: ignore[arg-type]
         assert result.success is False
 
     def test_failed_property(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
         """Test failed property when exit code is zero."""
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         assert result.failed is False
 
     def test_failed_true(self, mock_async_client: AsyncMock) -> None:
         """Test failed property when exit code is non-zero."""
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=1,
             stdout="",
@@ -88,14 +88,14 @@ def test_failed_true(self, mock_async_client: AsyncMock) -> None:
             stdout_truncated=False,
             stderr_truncated=False,
         )
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution)  # type: ignore[arg-type]
         assert result.failed is True
 
     def test_failed_none(self, mock_async_client: AsyncMock) -> None:
         """Test failed property when exit_status is None."""
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="running",
             exit_status=None,
             stdout="",
@@ -103,13 +103,13 @@ def test_failed_none(self, mock_async_client: AsyncMock) -> None:
             stdout_truncated=False,
             stderr_truncated=False,
         )
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution)  # type: ignore[arg-type]
         assert result.failed is False
 
     @pytest.mark.asyncio
     async def test_stdout(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
         """Test stdout method."""
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         assert await result.stdout() == "output"
         assert await result.stdout(num_lines=10) == "output"
 
@@ -117,8 +117,8 @@ async def test_stdout(self, mock_async_client: AsyncMock, execution_view: MockEx
     async def test_stdout_empty(self, mock_async_client: AsyncMock) -> None:
         """Test stdout method when stdout is None."""
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout=None,
@@ -126,15 +126,15 @@ async def test_stdout_empty(self, mock_async_client: AsyncMock) -> None:
             stdout_truncated=False,
             stderr_truncated=False,
         )
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution)  # type: ignore[arg-type]
         assert await result.stdout() == ""
 
     @pytest.mark.asyncio
     async def test_stderr(self, mock_async_client: AsyncMock) -> None:
         """Test stderr method."""
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=1,
             stdout="",
@@ -142,19 +142,19 @@ async def test_stderr(self, mock_async_client: AsyncMock) -> None:
             stdout_truncated=False,
             stderr_truncated=False,
         )
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution)  # type: ignore[arg-type]
         assert await result.stderr() == "error message"
         assert await result.stderr(num_lines=20) == "error message"
 
     @pytest.mark.asyncio
     async def test_stderr_empty(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
         """Test stderr method when stderr is None."""
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         assert await result.stderr() == ""
 
     def test_result_property(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
         """Test result property."""
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         assert result.result == execution_view
 
     @pytest.mark.asyncio
@@ -176,8 +176,8 @@ async def mock_iter():
         mock_async_client.devboxes.executions.stream_stdout_updates = AsyncMock(return_value=mock_async_stream)
 
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout="partial",
@@ -185,13 +185,13 @@ async def mock_iter():
             stdout_truncated=True,
             stderr_truncated=False,
         )
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution)  # type: ignore[arg-type]
 
         # Should stream full output
         output = await result.stdout()
         assert output == "line1\nline2\nline3\n"
         mock_async_client.devboxes.executions.stream_stdout_updates.assert_awaited_once_with(
-            "exec_123", devbox_id="dev_123"
+            "exn_123", devbox_id="dbx_123"
         )
 
     @pytest.mark.asyncio
@@ -212,8 +212,8 @@ async def mock_iter():
         mock_async_client.devboxes.executions.stream_stderr_updates = AsyncMock(return_value=mock_async_stream)
 
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout="",
@@ -221,13 +221,13 @@ async def mock_iter():
             stdout_truncated=False,
             stderr_truncated=True,
         )
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution)  # type: ignore[arg-type]
 
         # Should stream full output
         output = await result.stderr()
         assert output == "error1\nerror2\n"
         mock_async_client.devboxes.executions.stream_stderr_updates.assert_awaited_once_with(
-            "exec_123", devbox_id="dev_123"
+            "exn_123", devbox_id="dbx_123"
         )
 
     @pytest.mark.asyncio
@@ -248,8 +248,8 @@ async def mock_iter():
         mock_async_client.devboxes.executions.stream_stdout_updates = AsyncMock(return_value=mock_async_stream)
 
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout="line1\n",
@@ -257,7 +257,7 @@ async def mock_iter():
             stdout_truncated=True,
             stderr_truncated=False,
         )
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution)  # type: ignore[arg-type]
 
         # Should stream and return last 2 lines
         output = await result.stdout(num_lines=2)
@@ -267,8 +267,8 @@ async def mock_iter():
     async def test_stdout_no_streaming_when_not_truncated(self, mock_async_client: AsyncMock) -> None:
         """Test stdout doesn't stream when not truncated."""
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout="complete output",
@@ -276,7 +276,7 @@ async def test_stdout_no_streaming_when_not_truncated(self, mock_async_client: A
             stdout_truncated=False,
             stderr_truncated=False,
         )
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution)  # type: ignore[arg-type]
 
         # Should return existing output without streaming
         output = await result.stdout()
@@ -286,8 +286,8 @@ async def test_stdout_no_streaming_when_not_truncated(self, mock_async_client: A
     async def test_stdout_with_num_lines_no_truncation(self, mock_async_client: AsyncMock) -> None:
         """Test stdout with num_lines when not truncated."""
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout="line1\nline2\nline3\nline4\nline5",
@@ -295,7 +295,7 @@ async def test_stdout_with_num_lines_no_truncation(self, mock_async_client: Asyn
             stdout_truncated=False,
             stderr_truncated=False,
         )
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution)  # type: ignore[arg-type]
 
         # Should return last 2 lines without streaming
         output = await result.stdout(num_lines=2)
@@ -303,7 +303,7 @@ async def test_stdout_with_num_lines_no_truncation(self, mock_async_client: Asyn
 
     def test_count_non_empty_lines(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
         """Test the _count_non_empty_lines helper method."""
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view)  # type: ignore[arg-type]
 
         # Test various input strings
         assert result._count_non_empty_lines("") == 0
@@ -315,7 +315,7 @@ def test_count_non_empty_lines(self, mock_async_client: AsyncMock, execution_vie
 
     def test_get_last_n_lines(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None:
         """Test the _get_last_n_lines helper method."""
-        result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view)  # type: ignore[arg-type]
 
         # Test various scenarios
         assert result._get_last_n_lines("", 5) == ""
diff --git a/tests/sdk/test_async_ops.py b/tests/sdk/test_async_ops.py
index f8a16e1c0..7e36e938d 100644
--- a/tests/sdk/test_async_ops.py
+++ b/tests/sdk/test_async_ops.py
@@ -17,6 +17,7 @@
     MockScorerView,
     MockScenarioView,
     MockSnapshotView,
+    MockBenchmarkView,
     MockBlueprintView,
     create_mock_httpx_response,
 )
@@ -27,12 +28,14 @@
     AsyncAgentOps,
     AsyncScenario,
     AsyncSnapshot,
+    AsyncBenchmark,
     AsyncBlueprint,
     AsyncDevboxOps,
     AsyncScorerOps,
     AsyncRunloopSDK,
     AsyncScenarioOps,
     AsyncSnapshotOps,
+    AsyncBenchmarkOps,
     AsyncBlueprintOps,
     AsyncStorageObject,
     AsyncStorageObjectOps,
@@ -56,7 +59,7 @@ async def test_create(self, mock_async_client: AsyncMock, devbox_view: MockDevbo
         )
 
         assert isinstance(devbox, AsyncDevbox)
-        assert devbox.id == "dev_123"
+        assert devbox.id == "dbx_123"
         mock_async_client.devboxes.create_and_await_running.assert_awaited_once()
 
     @pytest.mark.asyncio
@@ -66,13 +69,13 @@ async def test_create_from_blueprint_id(self, mock_async_client: AsyncMock, devb
 
         ops = AsyncDevboxOps(mock_async_client)
         devbox = await ops.create_from_blueprint_id(
-            "bp_123",
+            "bpt_123",
             name="test-devbox",
         )
 
         assert isinstance(devbox, AsyncDevbox)
         call_kwargs = mock_async_client.devboxes.create_and_await_running.call_args[1]
-        assert call_kwargs["blueprint_id"] == "bp_123"
+        assert call_kwargs["blueprint_id"] == "bpt_123"
 
     @pytest.mark.asyncio
     async def test_create_from_blueprint_name(self, mock_async_client: AsyncMock, devbox_view: MockDevboxView) -> None:
@@ -96,21 +99,21 @@ async def test_create_from_snapshot(self, mock_async_client: AsyncMock, devbox_v
 
         ops = AsyncDevboxOps(mock_async_client)
         devbox = await ops.create_from_snapshot(
-            "snap_123",
+            "snp_123",
             name="test-devbox",
         )
 
         assert isinstance(devbox, AsyncDevbox)
         call_kwargs = mock_async_client.devboxes.create_and_await_running.call_args[1]
-        assert call_kwargs["snapshot_id"] == "snap_123"
+        assert call_kwargs["snapshot_id"] == "snp_123"
 
     def test_from_id(self, mock_async_client: AsyncMock) -> None:
         """Test from_id method."""
         ops = AsyncDevboxOps(mock_async_client)
-        devbox = ops.from_id("dev_123")
+        devbox = ops.from_id("dbx_123")
 
         assert isinstance(devbox, AsyncDevbox)
-        assert devbox.id == "dev_123"
+        assert devbox.id == "dbx_123"
         # Verify from_id does not wait for running status
         if hasattr(mock_async_client.devboxes, "await_running"):
             assert not mock_async_client.devboxes.await_running.called
@@ -142,7 +145,7 @@ async def test_list_single(self, mock_async_client: AsyncMock, devbox_view: Mock
 
         assert len(devboxes) == 1
         assert isinstance(devboxes[0], AsyncDevbox)
-        assert devboxes[0].id == "dev_123"
+        assert devboxes[0].id == "dbx_123"
         mock_async_client.devboxes.list.assert_awaited_once()
 
     @pytest.mark.asyncio
@@ -174,7 +177,7 @@ async def test_list_empty(self, mock_async_client: AsyncMock) -> None:
         mock_async_client.devboxes.disk_snapshots.list = AsyncMock(return_value=page)
 
         ops = AsyncSnapshotOps(mock_async_client)
-        snapshots = await ops.list(devbox_id="dev_123", limit=10)
+        snapshots = await ops.list(devbox_id="dbx_123", limit=10)
 
         assert len(snapshots) == 0
         mock_async_client.devboxes.disk_snapshots.list.assert_awaited_once()
@@ -187,14 +190,14 @@ async def test_list_single(self, mock_async_client: AsyncMock, snapshot_view: Mo
 
         ops = AsyncSnapshotOps(mock_async_client)
         snapshots = await ops.list(
-            devbox_id="dev_123",
+            devbox_id="dbx_123",
             limit=10,
             starting_after="snap_000",
         )
 
         assert len(snapshots) == 1
         assert isinstance(snapshots[0], AsyncSnapshot)
-        assert snapshots[0].id == "snap_123"
+        assert snapshots[0].id == "snp_123"
         mock_async_client.devboxes.disk_snapshots.list.assert_awaited_once()
 
     @pytest.mark.asyncio
@@ -206,7 +209,7 @@ async def test_list_multiple(self, mock_async_client: AsyncMock) -> None:
         mock_async_client.devboxes.disk_snapshots.list = AsyncMock(return_value=page)
 
         ops = AsyncSnapshotOps(mock_async_client)
-        snapshots = await ops.list(devbox_id="dev_123", limit=10)
+        snapshots = await ops.list(devbox_id="dbx_123", limit=10)
 
         assert len(snapshots) == 2
         assert isinstance(snapshots[0], AsyncSnapshot)
@@ -218,10 +221,10 @@ async def test_list_multiple(self, mock_async_client: AsyncMock) -> None:
     def test_from_id(self, mock_async_client: AsyncMock) -> None:
         """Test from_id method."""
         ops = AsyncSnapshotOps(mock_async_client)
-        snapshot = ops.from_id("snap_123")
+        snapshot = ops.from_id("snp_123")
 
         assert isinstance(snapshot, AsyncSnapshot)
-        assert snapshot.id == "snap_123"
+        assert snapshot.id == "snp_123"
 
 
 class TestAsyncBlueprintOps:
@@ -239,16 +242,16 @@ async def test_create(self, mock_async_client: AsyncMock, blueprint_view: MockBl
         )
 
         assert isinstance(blueprint, AsyncBlueprint)
-        assert blueprint.id == "bp_123"
+        assert blueprint.id == "bpt_123"
         mock_async_client.blueprints.create_and_await_build_complete.assert_awaited_once()
 
     def test_from_id(self, mock_async_client: AsyncMock) -> None:
         """Test from_id method."""
         ops = AsyncBlueprintOps(mock_async_client)
-        blueprint = ops.from_id("bp_123")
+        blueprint = ops.from_id("bpt_123")
 
         assert isinstance(blueprint, AsyncBlueprint)
-        assert blueprint.id == "bp_123"
+        assert blueprint.id == "bpt_123"
 
     @pytest.mark.asyncio
     async def test_list_empty(self, mock_async_client: AsyncMock) -> None:
@@ -277,7 +280,7 @@ async def test_list_single(self, mock_async_client: AsyncMock, blueprint_view: M
 
         assert len(blueprints) == 1
         assert isinstance(blueprints[0], AsyncBlueprint)
-        assert blueprints[0].id == "bp_123"
+        assert blueprints[0].id == "bpt_123"
         mock_async_client.blueprints.list.assert_awaited_once()
 
     @pytest.mark.asyncio
@@ -712,16 +715,16 @@ async def test_create(self, mock_async_client: AsyncMock, scorer_view: MockScore
         )
 
         assert isinstance(scorer, AsyncScorer)
-        assert scorer.id == "scorer_123"
+        assert scorer.id == "sco_123"
         mock_async_client.scenarios.scorers.create.assert_awaited_once()
 
     def test_from_id(self, mock_async_client: AsyncMock) -> None:
         """Test from_id method."""
         ops = AsyncScorerOps(mock_async_client)
-        scorer = ops.from_id("scorer_123")
+        scorer = ops.from_id("sco_123")
 
         assert isinstance(scorer, AsyncScorer)
-        assert scorer.id == "scorer_123"
+        assert scorer.id == "sco_123"
 
     @pytest.mark.asyncio
     async def test_list_empty(self, mock_async_client: AsyncMock) -> None:
@@ -756,7 +759,7 @@ async def async_iter():
 
         assert len(scorers) == 1
         assert isinstance(scorers[0], AsyncScorer)
-        assert scorers[0].id == "scorer_123"
+        assert scorers[0].id == "sco_123"
         mock_async_client.scenarios.scorers.list.assert_awaited_once()
 
     @pytest.mark.asyncio
@@ -793,19 +796,20 @@ async def test_create(self, mock_async_client: AsyncMock, agent_view: MockAgentV
         client = AsyncAgentOps(mock_async_client)
         agent = await client.create(
             name="test-agent",
+            version="1.2.3",
         )
 
         assert isinstance(agent, AsyncAgent)
-        assert agent.id == "agent_123"
+        assert agent.id == "agt_123"
         mock_async_client.agents.create.assert_called_once()
 
     def test_from_id(self, mock_async_client: AsyncMock) -> None:
         """Test from_id method."""
         client = AsyncAgentOps(mock_async_client)
-        agent = client.from_id("agent_123")
+        agent = client.from_id("agt_123")
 
         assert isinstance(agent, AsyncAgent)
-        assert agent.id == "agent_123"
+        assert agent.id == "agt_123"
 
     @pytest.mark.asyncio
     async def test_list(self, mock_async_client: AsyncMock) -> None:
@@ -901,10 +905,11 @@ async def test_create_from_npm(self, mock_async_client: AsyncMock, agent_view: M
         agent = await client.create_from_npm(
             name="test-agent",
             package_name="@runloop/example-agent",
+            version="1.2.3",
         )
 
         assert isinstance(agent, AsyncAgent)
-        assert agent.id == "agent_123"
+        assert agent.id == "agt_123"
         mock_async_client.agents.create.assert_awaited_once_with(
             source={
                 "type": "npm",
@@ -913,6 +918,7 @@ async def test_create_from_npm(self, mock_async_client: AsyncMock, agent_view: M
                 },
             },
             name="test-agent",
+            version="1.2.3",
         )
 
     @pytest.mark.asyncio
@@ -926,25 +932,25 @@ async def test_create_from_npm_with_all_options(
         agent = await client.create_from_npm(
             name="test-agent",
             package_name="@runloop/example-agent",
-            npm_version="1.2.3",
             registry_url="https://registry.example.com",
             agent_setup=["npm install", "npm run setup"],
+            version="1.2.3",
             extra_headers={"X-Custom": "header"},
         )
 
         assert isinstance(agent, AsyncAgent)
-        assert agent.id == "agent_123"
+        assert agent.id == "agt_123"
         mock_async_client.agents.create.assert_awaited_once_with(
             source={
                 "type": "npm",
                 "npm": {
                     "package_name": "@runloop/example-agent",
-                    "npm_version": "1.2.3",
                     "registry_url": "https://registry.example.com",
                     "agent_setup": ["npm install", "npm run setup"],
                 },
             },
             name="test-agent",
+            version="1.2.3",
             extra_headers={"X-Custom": "header"},
         )
 
@@ -957,6 +963,7 @@ async def test_create_from_npm_raises_when_source_provided(self, mock_async_clie
             await client.create_from_npm(
                 name="test-agent",
                 package_name="@runloop/example-agent",
+                version="1.2.3",
                 source={"type": "git", "git": {"repository": "https://github.com/example/repo"}},
             )
 
@@ -969,10 +976,11 @@ async def test_create_from_pip(self, mock_async_client: AsyncMock, agent_view: M
         agent = await client.create_from_pip(
             name="test-agent",
             package_name="runloop-example-agent",
+            version="1.2.3",
         )
 
         assert isinstance(agent, AsyncAgent)
-        assert agent.id == "agent_123"
+        assert agent.id == "agt_123"
         mock_async_client.agents.create.assert_awaited_once_with(
             source={
                 "type": "pip",
@@ -981,6 +989,7 @@ async def test_create_from_pip(self, mock_async_client: AsyncMock, agent_view: M
                 },
             },
             name="test-agent",
+            version="1.2.3",
         )
 
     @pytest.mark.asyncio
@@ -994,24 +1003,24 @@ async def test_create_from_pip_with_all_options(
         agent = await client.create_from_pip(
             name="test-agent",
             package_name="runloop-example-agent",
-            pip_version="1.2.3",
             registry_url="https://pypi.example.com",
             agent_setup=["pip install extra-deps"],
+            version="1.2.3",
         )
 
         assert isinstance(agent, AsyncAgent)
-        assert agent.id == "agent_123"
+        assert agent.id == "agt_123"
         mock_async_client.agents.create.assert_awaited_once_with(
             source={
                 "type": "pip",
                 "pip": {
                     "package_name": "runloop-example-agent",
-                    "pip_version": "1.2.3",
                     "registry_url": "https://pypi.example.com",
                     "agent_setup": ["pip install extra-deps"],
                 },
             },
             name="test-agent",
+            version="1.2.3",
         )
 
     @pytest.mark.asyncio
@@ -1023,10 +1032,11 @@ async def test_create_from_git(self, mock_async_client: AsyncMock, agent_view: M
         agent = await client.create_from_git(
             name="test-agent",
             repository="https://github.com/example/agent-repo",
+            version="1.2.3",
         )
 
         assert isinstance(agent, AsyncAgent)
-        assert agent.id == "agent_123"
+        assert agent.id == "agt_123"
         mock_async_client.agents.create.assert_awaited_once_with(
             source={
                 "type": "git",
@@ -1035,6 +1045,7 @@ async def test_create_from_git(self, mock_async_client: AsyncMock, agent_view: M
                 },
             },
             name="test-agent",
+            version="1.2.3",
         )
 
     @pytest.mark.asyncio
@@ -1050,10 +1061,11 @@ async def test_create_from_git_with_all_options(
             repository="https://github.com/example/agent-repo",
             ref="develop",
             agent_setup=["npm install", "npm run build"],
+            version="1.2.3",
         )
 
         assert isinstance(agent, AsyncAgent)
-        assert agent.id == "agent_123"
+        assert agent.id == "agt_123"
         mock_async_client.agents.create.assert_awaited_once_with(
             source={
                 "type": "git",
@@ -1064,6 +1076,7 @@ async def test_create_from_git_with_all_options(
                 },
             },
             name="test-agent",
+            version="1.2.3",
         )
 
     @pytest.mark.asyncio
@@ -1075,10 +1088,11 @@ async def test_create_from_object(self, mock_async_client: AsyncMock, agent_view
         agent = await client.create_from_object(
             name="test-agent",
             object_id="obj_123",
+            version="1.2.3",
         )
 
         assert isinstance(agent, AsyncAgent)
-        assert agent.id == "agent_123"
+        assert agent.id == "agt_123"
         mock_async_client.agents.create.assert_awaited_once_with(
             source={
                 "type": "object",
@@ -1087,6 +1101,7 @@ async def test_create_from_object(self, mock_async_client: AsyncMock, agent_view
                 },
             },
             name="test-agent",
+            version="1.2.3",
         )
 
     @pytest.mark.asyncio
@@ -1101,10 +1116,11 @@ async def test_create_from_object_with_agent_setup(
             name="test-agent",
             object_id="obj_123",
             agent_setup=["chmod +x setup.sh", "./setup.sh"],
+            version="1.2.3",
         )
 
         assert isinstance(agent, AsyncAgent)
-        assert agent.id == "agent_123"
+        assert agent.id == "agt_123"
         mock_async_client.agents.create.assert_awaited_once_with(
             source={
                 "type": "object",
@@ -1114,6 +1130,7 @@ async def test_create_from_object_with_agent_setup(
                 },
             },
             name="test-agent",
+            version="1.2.3",
         )
 
 
@@ -1186,6 +1203,62 @@ async def async_iter():
         mock_async_client.scenarios.list.assert_awaited_once()
 
 
+class TestAsyncBenchmarkOps:
+    """Tests for AsyncBenchmarkOps class."""
+
+    @pytest.mark.asyncio
+    async def test_create(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None:
+        """Test create method."""
+        mock_async_client.benchmarks.create = AsyncMock(return_value=benchmark_view)
+
+        ops = AsyncBenchmarkOps(mock_async_client)
+        benchmark = await ops.create(name="test-benchmark", scenario_ids=["scn_001", "scn_002"])
+
+        assert isinstance(benchmark, AsyncBenchmark)
+        assert benchmark.id == "bmd_123"
+        mock_async_client.benchmarks.create.assert_awaited_once_with(
+            name="test-benchmark", scenario_ids=["scn_001", "scn_002"]
+        )
+
+    def test_from_id(self, mock_async_client: AsyncMock) -> None:
+        """Test from_id method."""
+        ops = AsyncBenchmarkOps(mock_async_client)
+        benchmark = ops.from_id("bmd_123")
+
+        assert isinstance(benchmark, AsyncBenchmark)
+        assert benchmark.id == "bmd_123"
+
+    @pytest.mark.asyncio
+    async def test_list_multiple(self, mock_async_client: AsyncMock) -> None:
+        """Test list method with multiple results."""
+        benchmark_view1 = MockBenchmarkView(id="bmd_001", name="benchmark-1")
+        benchmark_view2 = MockBenchmarkView(id="bmd_002", name="benchmark-2")
+        page = SimpleNamespace(benchmarks=[benchmark_view1, benchmark_view2])
+        mock_async_client.benchmarks.list = AsyncMock(return_value=page)
+
+        ops = AsyncBenchmarkOps(mock_async_client)
+        benchmarks = await ops.list(limit=10)
+
+        assert len(benchmarks) == 2
+        assert isinstance(benchmarks[0], AsyncBenchmark)
+        assert isinstance(benchmarks[1], AsyncBenchmark)
+        assert benchmarks[0].id == "bmd_001"
+        assert benchmarks[1].id == "bmd_002"
+        mock_async_client.benchmarks.list.assert_awaited_once_with(limit=10)
+
+    @pytest.mark.asyncio
+    async def test_list_with_name_filter(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None:
+        """Test list method with name filter."""
+        page = SimpleNamespace(benchmarks=[benchmark_view])
+        mock_async_client.benchmarks.list = AsyncMock(return_value=page)
+
+        ops = AsyncBenchmarkOps(mock_async_client)
+        benchmarks = await ops.list(name="test-benchmark", limit=10)
+
+        assert len(benchmarks) == 1
+        mock_async_client.benchmarks.list.assert_awaited_once_with(name="test-benchmark", limit=10)
+
+
 class TestAsyncRunloopSDK:
     """Tests for AsyncRunloopSDK class."""
 
@@ -1194,6 +1267,7 @@ def test_init(self) -> None:
         runloop = AsyncRunloopSDK(bearer_token="test-token")
         assert runloop.api is not None
         assert isinstance(runloop.agent, AsyncAgentOps)
+        assert isinstance(runloop.benchmark, AsyncBenchmarkOps)
         assert isinstance(runloop.devbox, AsyncDevboxOps)
         assert isinstance(runloop.scorer, AsyncScorerOps)
         assert isinstance(runloop.snapshot, AsyncSnapshotOps)
diff --git a/tests/sdk/test_async_scenario.py b/tests/sdk/test_async_scenario.py
index 22a8f457a..cffca9b82 100644
--- a/tests/sdk/test_async_scenario.py
+++ b/tests/sdk/test_async_scenario.py
@@ -99,8 +99,8 @@ async def test_run_async(self, mock_async_client: AsyncMock, scenario_run_view:
         scenario = AsyncScenario(mock_async_client, "scn_123")
         run = await scenario.run_async(run_name="test-run")
 
-        assert run.id == "run_123"
-        assert run.devbox_id == "dev_123"
+        assert run.id == "scr_123"
+        assert run.devbox_id == "dbx_123"
         mock_async_client.scenarios.start_run.assert_awaited_once_with(
             scenario_id="scn_123",
             run_name="test-run",
@@ -113,8 +113,8 @@ async def test_run(self, mock_async_client: AsyncMock, scenario_run_view: MockSc
         scenario = AsyncScenario(mock_async_client, "scn_123")
         run = await scenario.run(run_name="test-run")
 
-        assert run.id == "run_123"
-        assert run.devbox_id == "dev_123"
+        assert run.id == "scr_123"
+        assert run.devbox_id == "dbx_123"
         mock_async_client.scenarios.start_run_and_await_env_ready.assert_awaited_once_with(
             scenario_id="scn_123",
             run_name="test-run",
diff --git a/tests/sdk/test_async_scenario_run.py b/tests/sdk/test_async_scenario_run.py
index 010ad6cbb..c034524a0 100644
--- a/tests/sdk/test_async_scenario_run.py
+++ b/tests/sdk/test_async_scenario_run.py
@@ -15,31 +15,31 @@ class TestAsyncScenarioRun:
 
     def test_init(self, mock_async_client: AsyncMock) -> None:
         """Test AsyncScenarioRun initialization."""
-        run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
-        assert run.id == "run_123"
-        assert run.devbox_id == "dev_123"
+        run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
+        assert run.id == "scr_123"
+        assert run.devbox_id == "dbx_123"
 
     def test_repr(self, mock_async_client: AsyncMock) -> None:
         """Test AsyncScenarioRun string representation."""
-        run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
-        assert repr(run) == "<AsyncScenarioRun id='run_123'>"
+        run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
+        assert repr(run) == "<AsyncScenarioRun id='scr_123'>"
 
     def test_devbox_property(self, mock_async_client: AsyncMock) -> None:
         """Test devbox property returns AsyncDevbox wrapper."""
-        run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+        run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
         devbox = run.devbox
 
-        assert devbox.id == "dev_123"
+        assert devbox.id == "dbx_123"
 
     async def test_get_info(self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView) -> None:
         """Test get_info method."""
         mock_async_client.scenarios.runs.retrieve = AsyncMock(return_value=scenario_run_view)
 
-        run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+        run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
         result = await run.get_info()
 
         assert result == scenario_run_view
-        mock_async_client.scenarios.runs.retrieve.assert_awaited_once_with("run_123")
+        mock_async_client.scenarios.runs.retrieve.assert_awaited_once_with("scr_123")
 
     async def test_await_env_ready(
         self,
@@ -51,10 +51,10 @@ async def test_await_env_ready(
         mock_async_client.devboxes.await_running = AsyncMock(return_value=devbox_view)
         mock_async_client.scenarios.runs.retrieve = AsyncMock(return_value=scenario_run_view)
 
-        run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+        run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
         result = await run.await_env_ready()
 
-        mock_async_client.devboxes.await_running.assert_awaited_once_with("dev_123", polling_config=None)
+        mock_async_client.devboxes.await_running.assert_awaited_once_with("dbx_123", polling_config=None)
         assert result == scenario_run_view
 
     async def test_score(self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView) -> None:
@@ -62,33 +62,33 @@ async def test_score(self, mock_async_client: AsyncMock, scenario_run_view: Mock
         scenario_run_view.state = "scoring"
         mock_async_client.scenarios.runs.score = AsyncMock(return_value=scenario_run_view)
 
-        run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+        run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
         result = await run.score()
 
         assert result == scenario_run_view
-        mock_async_client.scenarios.runs.score.assert_awaited_once_with("run_123")
+        mock_async_client.scenarios.runs.score.assert_awaited_once_with("scr_123")
 
     async def test_await_scored(self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView) -> None:
         """Test await_scored method."""
         scenario_run_view.state = "scored"
         mock_async_client.scenarios.runs.await_scored = AsyncMock(return_value=scenario_run_view)
 
-        run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+        run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
         result = await run.await_scored()
 
         assert result == scenario_run_view
-        mock_async_client.scenarios.runs.await_scored.assert_awaited_once_with("run_123")
+        mock_async_client.scenarios.runs.await_scored.assert_awaited_once_with("scr_123")
 
     async def test_score_and_await(self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView) -> None:
         """Test score_and_await method."""
         scenario_run_view.state = "scored"
         mock_async_client.scenarios.runs.score_and_await = AsyncMock(return_value=scenario_run_view)
 
-        run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+        run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
         result = await run.score_and_await()
 
         assert result == scenario_run_view
-        mock_async_client.scenarios.runs.score_and_await.assert_awaited_once_with("run_123")
+        mock_async_client.scenarios.runs.score_and_await.assert_awaited_once_with("scr_123")
 
     async def test_score_and_complete(
         self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView
@@ -97,33 +97,33 @@ async def test_score_and_complete(
         scenario_run_view.state = "completed"
         mock_async_client.scenarios.runs.score_and_complete = AsyncMock(return_value=scenario_run_view)
 
-        run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+        run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
         result = await run.score_and_complete()
 
         assert result == scenario_run_view
-        mock_async_client.scenarios.runs.score_and_complete.assert_awaited_once_with("run_123")
+        mock_async_client.scenarios.runs.score_and_complete.assert_awaited_once_with("scr_123")
 
     async def test_complete(self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView) -> None:
         """Test complete method."""
         scenario_run_view.state = "completed"
         mock_async_client.scenarios.runs.complete = AsyncMock(return_value=scenario_run_view)
 
-        run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+        run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
         result = await run.complete()
 
         assert result == scenario_run_view
-        mock_async_client.scenarios.runs.complete.assert_awaited_once_with("run_123")
+        mock_async_client.scenarios.runs.complete.assert_awaited_once_with("scr_123")
 
     async def test_cancel(self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView) -> None:
         """Test cancel method."""
         scenario_run_view.state = "canceled"
         mock_async_client.scenarios.runs.cancel = AsyncMock(return_value=scenario_run_view)
 
-        run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+        run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
         result = await run.cancel()
 
         assert result == scenario_run_view
-        mock_async_client.scenarios.runs.cancel.assert_awaited_once_with("run_123")
+        mock_async_client.scenarios.runs.cancel.assert_awaited_once_with("scr_123")
 
     async def test_download_logs(self, mock_async_client: AsyncMock, tmp_path: Path) -> None:
         """Test download_logs method writes to file."""
@@ -131,11 +131,11 @@ async def test_download_logs(self, mock_async_client: AsyncMock, tmp_path: Path)
         mock_response.write_to_file = AsyncMock()
         mock_async_client.scenarios.runs.download_logs = AsyncMock(return_value=mock_response)
 
-        run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+        run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
         output_path = tmp_path / "logs.zip"
         await run.download_logs(output_path)
 
-        mock_async_client.scenarios.runs.download_logs.assert_awaited_once_with("run_123")
+        mock_async_client.scenarios.runs.download_logs.assert_awaited_once_with("scr_123")
         mock_response.write_to_file.assert_awaited_once_with(output_path)
 
     async def test_get_score_when_scored(self, mock_async_client: AsyncMock) -> None:
@@ -144,19 +144,19 @@ async def test_get_score_when_scored(self, mock_async_client: AsyncMock) -> None
         run_view = MockScenarioRunView(state="scored", scoring_contract_result=scoring_result)
         mock_async_client.scenarios.runs.retrieve = AsyncMock(return_value=run_view)
 
-        run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+        run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
         result = await run.get_score()
 
         assert result == scoring_result
-        mock_async_client.scenarios.runs.retrieve.assert_awaited_once_with("run_123")
+        mock_async_client.scenarios.runs.retrieve.assert_awaited_once_with("scr_123")
 
     async def test_get_score_when_not_scored(self, mock_async_client: AsyncMock) -> None:
         """Test get_score returns None when not scored."""
         run_view = MockScenarioRunView(state="running", scoring_contract_result=None)
         mock_async_client.scenarios.runs.retrieve = AsyncMock(return_value=run_view)
 
-        run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123")
+        run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123")
         result = await run.get_score()
 
         assert result is None
-        mock_async_client.scenarios.runs.retrieve.assert_awaited_once_with("run_123")
+        mock_async_client.scenarios.runs.retrieve.assert_awaited_once_with("scr_123")
diff --git a/tests/sdk/test_async_scorer.py b/tests/sdk/test_async_scorer.py
index a3eeea884..253ae9585 100644
--- a/tests/sdk/test_async_scorer.py
+++ b/tests/sdk/test_async_scorer.py
@@ -16,20 +16,20 @@ class TestAsyncScorer:
 
     def test_init(self, mock_async_client: AsyncMock) -> None:
         """Test AsyncScorer initialization."""
-        scorer = AsyncScorer(mock_async_client, "scorer_123")
-        assert scorer.id == "scorer_123"
+        scorer = AsyncScorer(mock_async_client, "sco_123")
+        assert scorer.id == "sco_123"
 
     def test_repr(self, mock_async_client: AsyncMock) -> None:
         """Test AsyncScorer string representation."""
-        scorer = AsyncScorer(mock_async_client, "scorer_123")
-        assert repr(scorer) == "<AsyncScorer id='scorer_123'>"
+        scorer = AsyncScorer(mock_async_client, "sco_123")
+        assert repr(scorer) == "<AsyncScorer id='sco_123'>"
 
     @pytest.mark.asyncio
     async def test_get_info(self, mock_async_client: AsyncMock, scorer_view: MockScorerView) -> None:
         """Test get_info method."""
         mock_async_client.scenarios.scorers.retrieve = AsyncMock(return_value=scorer_view)
 
-        scorer = AsyncScorer(mock_async_client, "scorer_123")
+        scorer = AsyncScorer(mock_async_client, "sco_123")
         result = await scorer.get_info()
 
         assert result == scorer_view
@@ -38,10 +38,10 @@ async def test_get_info(self, mock_async_client: AsyncMock, scorer_view: MockSco
     @pytest.mark.asyncio
     async def test_update(self, mock_async_client: AsyncMock) -> None:
         """Test update method."""
-        update_response = SimpleNamespace(id="scorer_123", type="updated_scorer", bash_script="echo 'score=1.0'")
+        update_response = SimpleNamespace(id="sco_123", type="updated_scorer", bash_script="echo 'score=1.0'")
         mock_async_client.scenarios.scorers.update = AsyncMock(return_value=update_response)
 
-        scorer = AsyncScorer(mock_async_client, "scorer_123")
+        scorer = AsyncScorer(mock_async_client, "sco_123")
         result = await scorer.update(
             type="updated_scorer",
             bash_script="echo 'score=1.0'",
@@ -60,7 +60,7 @@ async def test_validate(self, mock_async_client: AsyncMock) -> None:
         )
         mock_async_client.scenarios.scorers.validate = AsyncMock(return_value=validate_response)
 
-        scorer = AsyncScorer(mock_async_client, "scorer_123")
+        scorer = AsyncScorer(mock_async_client, "sco_123")
         result = await scorer.validate(
             scoring_context={"test": "context"},
         )
diff --git a/tests/sdk/test_async_snapshot.py b/tests/sdk/test_async_snapshot.py
index a7b946c11..e9dca48bc 100644
--- a/tests/sdk/test_async_snapshot.py
+++ b/tests/sdk/test_async_snapshot.py
@@ -17,20 +17,20 @@ class TestAsyncSnapshot:
 
     def test_init(self, mock_async_client: AsyncMock) -> None:
         """Test AsyncSnapshot initialization."""
-        snapshot = AsyncSnapshot(mock_async_client, "snap_123")
-        assert snapshot.id == "snap_123"
+        snapshot = AsyncSnapshot(mock_async_client, "snp_123")
+        assert snapshot.id == "snp_123"
 
     def test_repr(self, mock_async_client: AsyncMock) -> None:
         """Test AsyncSnapshot string representation."""
-        snapshot = AsyncSnapshot(mock_async_client, "snap_123")
-        assert repr(snapshot) == "<AsyncSnapshot id='snap_123'>"
+        snapshot = AsyncSnapshot(mock_async_client, "snp_123")
+        assert repr(snapshot) == "<AsyncSnapshot id='snp_123'>"
 
     @pytest.mark.asyncio
     async def test_get_info(self, mock_async_client: AsyncMock, snapshot_view: MockSnapshotView) -> None:
         """Test get_info method."""
         mock_async_client.devboxes.disk_snapshots.query_status = AsyncMock(return_value=snapshot_view)
 
-        snapshot = AsyncSnapshot(mock_async_client, "snap_123")
+        snapshot = AsyncSnapshot(mock_async_client, "snp_123")
         result = await snapshot.get_info(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -44,10 +44,10 @@ async def test_get_info(self, mock_async_client: AsyncMock, snapshot_view: MockS
     @pytest.mark.asyncio
     async def test_update(self, mock_async_client: AsyncMock) -> None:
         """Test update method."""
-        updated_snapshot = SimpleNamespace(id="snap_123", name="updated-name")
+        updated_snapshot = SimpleNamespace(id="snp_123", name="updated-name")
         mock_async_client.devboxes.disk_snapshots.update = AsyncMock(return_value=updated_snapshot)
 
-        snapshot = AsyncSnapshot(mock_async_client, "snap_123")
+        snapshot = AsyncSnapshot(mock_async_client, "snp_123")
         result = await snapshot.update(
             commit_message="Update message",
             metadata={"key": "value"},
@@ -67,7 +67,7 @@ async def test_delete(self, mock_async_client: AsyncMock) -> None:
         """Test delete method."""
         mock_async_client.devboxes.disk_snapshots.delete = AsyncMock(return_value=object())
 
-        snapshot = AsyncSnapshot(mock_async_client, "snap_123")
+        snapshot = AsyncSnapshot(mock_async_client, "snp_123")
         result = await snapshot.delete(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -85,7 +85,7 @@ async def test_await_completed(self, mock_async_client: AsyncMock, snapshot_view
         mock_async_client.devboxes.disk_snapshots.await_completed = AsyncMock(return_value=snapshot_view)
         polling_config = PollingConfig(timeout_seconds=60.0)
 
-        snapshot = AsyncSnapshot(mock_async_client, "snap_123")
+        snapshot = AsyncSnapshot(mock_async_client, "snp_123")
         result = await snapshot.await_completed(
             polling_config=polling_config,
             extra_headers={"X-Custom": "value"},
@@ -102,7 +102,7 @@ async def test_create_devbox(self, mock_async_client: AsyncMock, devbox_view: Mo
         """Test create_devbox method."""
         mock_async_client.devboxes.create_and_await_running = AsyncMock(return_value=devbox_view)
 
-        snapshot = AsyncSnapshot(mock_async_client, "snap_123")
+        snapshot = AsyncSnapshot(mock_async_client, "snp_123")
         devbox = await snapshot.create_devbox(
             name="test-devbox",
             metadata={"key": "value"},
@@ -110,5 +110,5 @@ async def test_create_devbox(self, mock_async_client: AsyncMock, devbox_view: Mo
             extra_headers={"X-Custom": "value"},
         )
 
-        assert devbox.id == "dev_123"
+        assert devbox.id == "dbx_123"
         mock_async_client.devboxes.create_and_await_running.assert_awaited_once()
diff --git a/tests/sdk/test_benchmark.py b/tests/sdk/test_benchmark.py
new file mode 100644
index 000000000..1f4f12751
--- /dev/null
+++ b/tests/sdk/test_benchmark.py
@@ -0,0 +1,122 @@
+"""Comprehensive tests for sync Benchmark class."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from unittest.mock import Mock
+
+from tests.sdk.conftest import MockBenchmarkView, MockBenchmarkRunView
+from runloop_api_client.sdk.benchmark import Benchmark
+from runloop_api_client.sdk.benchmark_run import BenchmarkRun
+
+
+class TestBenchmark:
+    """Tests for Benchmark class."""
+
+    def test_init(self, mock_client: Mock) -> None:
+        """Test Benchmark initialization."""
+        benchmark = Benchmark(mock_client, "bmd_123")
+        assert benchmark.id == "bmd_123"
+        assert repr(benchmark) == "<Benchmark id='bmd_123'>"
+
+    def test_get_info(self, mock_client: Mock, benchmark_view: MockBenchmarkView) -> None:
+        """Test get_info method."""
+        mock_client.benchmarks.retrieve.return_value = benchmark_view
+
+        benchmark = Benchmark(mock_client, "bmd_123")
+        result = benchmark.get_info()
+
+        assert result == benchmark_view
+        mock_client.benchmarks.retrieve.assert_called_once_with("bmd_123")
+
+    def test_update(self, mock_client: Mock, benchmark_view: MockBenchmarkView) -> None:
+        """Test update method."""
+        benchmark_view.name = "updated-name"
+        mock_client.benchmarks.update.return_value = benchmark_view
+
+        benchmark = Benchmark(mock_client, "bmd_123")
+        result = benchmark.update(name="updated-name")
+
+        assert result == benchmark_view
+        mock_client.benchmarks.update.assert_called_once_with("bmd_123", name="updated-name")
+
+    def test_run(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None:
+        """Test run method."""
+        mock_client.benchmarks.start_run.return_value = benchmark_run_view
+
+        benchmark = Benchmark(mock_client, "bmd_123")
+        result = benchmark.start_run(run_name="test-run", metadata={"key": "value"})
+
+        assert isinstance(result, BenchmarkRun)
+        assert result.id == benchmark_run_view.id
+        assert result.benchmark_id == benchmark_run_view.benchmark_id
+        mock_client.benchmarks.start_run.assert_called_once_with(
+            benchmark_id="bmd_123", run_name="test-run", metadata={"key": "value"}
+        )
+
+    def test_add_scenarios(self, mock_client: Mock, benchmark_view: MockBenchmarkView) -> None:
+        """Test add_scenarios method."""
+        benchmark_view.scenario_ids = ["scn_001", "scn_002"]
+        mock_client.benchmarks.update_scenarios.return_value = benchmark_view
+
+        benchmark = Benchmark(mock_client, "bmd_123")
+        result = benchmark.add_scenarios(["scn_001", "scn_002"])
+
+        assert result == benchmark_view
+        mock_client.benchmarks.update_scenarios.assert_called_once_with(
+            "bmd_123", scenarios_to_add=["scn_001", "scn_002"]
+        )
+
+    def test_remove_scenarios(self, mock_client: Mock, benchmark_view: MockBenchmarkView) -> None:
+        """Test remove_scenarios method."""
+        mock_client.benchmarks.update_scenarios.return_value = benchmark_view
+
+        benchmark = Benchmark(mock_client, "bmd_123")
+        result = benchmark.remove_scenarios(["scn_001"])
+
+        assert result == benchmark_view
+        mock_client.benchmarks.update_scenarios.assert_called_once_with("bmd_123", scenarios_to_remove=["scn_001"])
+
+    def test_list_runs_single(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None:
+        """Test list_runs method with single result."""
+        page = SimpleNamespace(runs=[benchmark_run_view])
+        mock_client.benchmarks.runs.list.return_value = page
+
+        benchmark = Benchmark(mock_client, "bmd_123")
+        result = benchmark.list_runs()
+
+        assert len(result) == 1
+        assert isinstance(result[0], BenchmarkRun)
+        assert result[0].id == benchmark_run_view.id
+        assert result[0].benchmark_id == benchmark_run_view.benchmark_id
+        mock_client.benchmarks.runs.list.assert_called_once_with(benchmark_id="bmd_123")
+
+    def test_list_runs_multiple(self, mock_client: Mock) -> None:
+        """Test list_runs method with multiple results."""
+        run_view1 = MockBenchmarkRunView(id="bmr_001")
+        run_view2 = MockBenchmarkRunView(id="bmr_002")
+        page = SimpleNamespace(runs=[run_view1, run_view2])
+        mock_client.benchmarks.runs.list.return_value = page
+
+        benchmark = Benchmark(mock_client, "bmd_123")
+        result = benchmark.list_runs()
+
+        assert len(result) == 2
+        assert isinstance(result[0], BenchmarkRun)
+        assert isinstance(result[1], BenchmarkRun)
+        assert result[0].id == run_view1.id
+        assert result[0].benchmark_id == run_view1.benchmark_id
+        assert result[1].id == run_view2.id
+        assert result[1].benchmark_id == run_view2.benchmark_id
+        mock_client.benchmarks.runs.list.assert_called_once_with(benchmark_id="bmd_123")
+
+    def test_list_runs_with_params(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None:
+        """Test list_runs method with filtering parameters."""
+        page = SimpleNamespace(runs=[benchmark_run_view])
+        mock_client.benchmarks.runs.list.return_value = page
+
+        benchmark = Benchmark(mock_client, "bmd_123")
+        result = benchmark.list_runs(limit=10, name="test-run")
+
+        assert len(result) == 1
+        mock_client.benchmarks.runs.list.assert_called_once_with(benchmark_id="bmd_123", limit=10, name="test-run")
diff --git a/tests/sdk/test_benchmark_run.py b/tests/sdk/test_benchmark_run.py
new file mode 100644
index 000000000..e7a826a90
--- /dev/null
+++ b/tests/sdk/test_benchmark_run.py
@@ -0,0 +1,114 @@
+"""Comprehensive tests for sync BenchmarkRun class."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from unittest.mock import Mock
+
+from tests.sdk.conftest import MockScenarioRunView, MockBenchmarkRunView
+from runloop_api_client.sdk.scenario_run import ScenarioRun
+from runloop_api_client.sdk.benchmark_run import BenchmarkRun
+
+
+class TestBenchmarkRun:
+    """Tests for BenchmarkRun class."""
+
+    def test_init(self, mock_client: Mock) -> None:
+        """Test BenchmarkRun initialization."""
+        run = BenchmarkRun(mock_client, "bmr_123", "bmd_123")
+        assert run.id == "bmr_123"
+        assert run.benchmark_id == "bmd_123"
+
+    def test_repr(self, mock_client: Mock) -> None:
+        """Test BenchmarkRun string representation."""
+        run = BenchmarkRun(mock_client, "bmr_123", "bmd_123")
+        assert repr(run) == "<BenchmarkRun id='bmr_123'>"
+
+    def test_get_info(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None:
+        """Test get_info method."""
+        mock_client.benchmarks.runs.retrieve.return_value = benchmark_run_view
+
+        run = BenchmarkRun(mock_client, "bmr_123", "bmd_123")
+        result = run.get_info()
+
+        assert result == benchmark_run_view
+        mock_client.benchmarks.runs.retrieve.assert_called_once_with("bmr_123")
+
+    def test_cancel(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None:
+        """Test cancel method."""
+        benchmark_run_view.state = "canceled"
+        mock_client.benchmarks.runs.cancel.return_value = benchmark_run_view
+
+        run = BenchmarkRun(mock_client, "bmr_123", "bmd_123")
+        result = run.cancel()
+
+        assert result == benchmark_run_view
+        assert result.state == "canceled"
+        mock_client.benchmarks.runs.cancel.assert_called_once_with("bmr_123")
+
+    def test_complete(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None:
+        """Test complete method."""
+        benchmark_run_view.state = "completed"
+        mock_client.benchmarks.runs.complete.return_value = benchmark_run_view
+
+        run = BenchmarkRun(mock_client, "bmr_123", "bmd_123")
+        result = run.complete()
+
+        assert result == benchmark_run_view
+        assert result.state == "completed"
+        mock_client.benchmarks.runs.complete.assert_called_once_with("bmr_123")
+
+    def test_list_scenario_runs_empty(self, mock_client: Mock) -> None:
+        """Test list_scenario_runs method with empty results."""
+        page = SimpleNamespace(runs=[])
+        mock_client.benchmarks.runs.list_scenario_runs.return_value = page
+
+        run = BenchmarkRun(mock_client, "bmr_123", "bmd_123")
+        result = run.list_scenario_runs()
+
+        assert len(result) == 0
+        mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bmr_123")
+
+    def test_list_scenario_runs_single(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
+        """Test list_scenario_runs method with single result."""
+        page = SimpleNamespace(runs=[scenario_run_view])
+        mock_client.benchmarks.runs.list_scenario_runs.return_value = page
+
+        run = BenchmarkRun(mock_client, "bmr_123", "bmd_123")
+        result = run.list_scenario_runs()
+
+        assert len(result) == 1
+        assert isinstance(result[0], ScenarioRun)
+        assert result[0].id == scenario_run_view.id
+        assert result[0].devbox_id == scenario_run_view.devbox_id
+        mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bmr_123")
+
+    def test_list_scenario_runs_multiple(self, mock_client: Mock) -> None:
+        """Test list_scenario_runs method with multiple results."""
+        scenario_run_view1 = MockScenarioRunView(id="scr_001", devbox_id="dev_001")
+        scenario_run_view2 = MockScenarioRunView(id="scr_002", devbox_id="dev_002")
+        page = SimpleNamespace(runs=[scenario_run_view1, scenario_run_view2])
+        mock_client.benchmarks.runs.list_scenario_runs.return_value = page
+
+        run = BenchmarkRun(mock_client, "bmr_123", "bmd_123")
+        result = run.list_scenario_runs()
+
+        assert len(result) == 2
+        assert isinstance(result[0], ScenarioRun)
+        assert isinstance(result[1], ScenarioRun)
+        assert result[0].id == "scr_001"
+        assert result[1].id == "scr_002"
+        mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bmr_123")
+
+    def test_list_scenario_runs_with_params(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
+        """Test list_scenario_runs method with filtering parameters."""
+        page = SimpleNamespace(runs=[scenario_run_view])
+        mock_client.benchmarks.runs.list_scenario_runs.return_value = page
+
+        run = BenchmarkRun(mock_client, "bmr_123", "bmd_123")
+        result = run.list_scenario_runs(limit=10, state="completed")
+
+        assert len(result) == 1
+        assert isinstance(result[0], ScenarioRun)
+        assert result[0].id == scenario_run_view.id
+        mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bmr_123", limit=10, state="completed")
diff --git a/tests/sdk/test_blueprint.py b/tests/sdk/test_blueprint.py
index 2c6bc6580..40cbed3f6 100644
--- a/tests/sdk/test_blueprint.py
+++ b/tests/sdk/test_blueprint.py
@@ -14,19 +14,19 @@ class TestBlueprint:
 
     def test_init(self, mock_client: Mock) -> None:
         """Test Blueprint initialization."""
-        blueprint = Blueprint(mock_client, "bp_123")
-        assert blueprint.id == "bp_123"
+        blueprint = Blueprint(mock_client, "bpt_123")
+        assert blueprint.id == "bpt_123"
 
     def test_repr(self, mock_client: Mock) -> None:
         """Test Blueprint string representation."""
-        blueprint = Blueprint(mock_client, "bp_123")
-        assert repr(blueprint) == "<Blueprint id='bp_123'>"
+        blueprint = Blueprint(mock_client, "bpt_123")
+        assert repr(blueprint) == "<Blueprint id='bpt_123'>"
 
     def test_get_info(self, mock_client: Mock, blueprint_view: MockBlueprintView) -> None:
         """Test get_info method."""
         mock_client.blueprints.retrieve.return_value = blueprint_view
 
-        blueprint = Blueprint(mock_client, "bp_123")
+        blueprint = Blueprint(mock_client, "bpt_123")
         result = blueprint.get_info(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -36,7 +36,7 @@ def test_get_info(self, mock_client: Mock, blueprint_view: MockBlueprintView) ->
 
         assert result == blueprint_view
         mock_client.blueprints.retrieve.assert_called_once_with(
-            "bp_123",
+            "bpt_123",
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
             extra_body={"key": "value"},
@@ -48,7 +48,7 @@ def test_logs(self, mock_client: Mock) -> None:
         logs_view = SimpleNamespace(logs=[])
         mock_client.blueprints.logs.return_value = logs_view
 
-        blueprint = Blueprint(mock_client, "bp_123")
+        blueprint = Blueprint(mock_client, "bpt_123")
         result = blueprint.logs(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -58,7 +58,7 @@ def test_logs(self, mock_client: Mock) -> None:
 
         assert result == logs_view
         mock_client.blueprints.logs.assert_called_once_with(
-            "bp_123",
+            "bpt_123",
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
             extra_body={"key": "value"},
@@ -69,7 +69,7 @@ def test_delete(self, mock_client: Mock) -> None:
         """Test delete method."""
         mock_client.blueprints.delete.return_value = object()
 
-        blueprint = Blueprint(mock_client, "bp_123")
+        blueprint = Blueprint(mock_client, "bpt_123")
         result = blueprint.delete(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -79,7 +79,7 @@ def test_delete(self, mock_client: Mock) -> None:
 
         assert result is not None  # Verify return value is propagated
         mock_client.blueprints.delete.assert_called_once_with(
-            "bp_123",
+            "bpt_123",
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
             extra_body={"key": "value"},
@@ -90,7 +90,7 @@ def test_create_devbox(self, mock_client: Mock, devbox_view: MockDevboxView) ->
         """Test create_devbox method."""
         mock_client.devboxes.create_and_await_running.return_value = devbox_view
 
-        blueprint = Blueprint(mock_client, "bp_123")
+        blueprint = Blueprint(mock_client, "bpt_123")
         devbox = blueprint.create_devbox(
             name="test-devbox",
             metadata={"key": "value"},
@@ -98,9 +98,9 @@ def test_create_devbox(self, mock_client: Mock, devbox_view: MockDevboxView) ->
             extra_headers={"X-Custom": "value"},
         )
 
-        assert devbox.id == "dev_123"
+        assert devbox.id == "dbx_123"
         mock_client.devboxes.create_and_await_running.assert_called_once()
         call_kwargs = mock_client.devboxes.create_and_await_running.call_args[1]
-        assert call_kwargs["blueprint_id"] == "bp_123"
+        assert call_kwargs["blueprint_id"] == "bpt_123"
         assert call_kwargs["name"] == "test-devbox"
         assert call_kwargs["metadata"] == {"key": "value"}
diff --git a/tests/sdk/test_execution.py b/tests/sdk/test_execution.py
index 63b244d0e..249d670bf 100644
--- a/tests/sdk/test_execution.py
+++ b/tests/sdk/test_execution.py
@@ -83,9 +83,9 @@ class TestExecution:
 
     def test_init(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
         """Test Execution initialization."""
-        execution = Execution(mock_client, "dev_123", execution_view)  # type: ignore[arg-type]
-        assert execution.execution_id == "exec_123"
-        assert execution.devbox_id == "dev_123"
+        execution = Execution(mock_client, "dbx_123", execution_view)  # type: ignore[arg-type]
+        assert execution.execution_id == "exn_123"
+        assert execution.devbox_id == "dbx_123"
         assert execution._initial_result == execution_view
 
     def test_init_with_streaming_group(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
@@ -94,46 +94,46 @@ def test_init_with_streaming_group(self, mock_client: Mock, execution_view: Mock
         stop_event = threading.Event()
         streaming_group = _StreamingGroup(threads, stop_event)
 
-        execution = Execution(mock_client, "dev_123", execution_view, streaming_group)  # type: ignore[arg-type]
+        execution = Execution(mock_client, "dbx_123", execution_view, streaming_group)  # type: ignore[arg-type]
         assert execution._streaming_group is streaming_group
 
     def test_properties(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
         """Test Execution properties."""
-        execution = Execution(mock_client, "dev_123", execution_view)  # type: ignore[arg-type]
-        assert execution.execution_id == "exec_123"
-        assert execution.devbox_id == "dev_123"
+        execution = Execution(mock_client, "dbx_123", execution_view)  # type: ignore[arg-type]
+        assert execution.execution_id == "exn_123"
+        assert execution.devbox_id == "dbx_123"
 
     def test_repr(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
         """Test Execution repr formatting."""
-        execution = Execution(mock_client, "dev_123", execution_view)  # type: ignore[arg-type]
-        assert repr(execution) == "<Execution id='exec_123'>"
+        execution = Execution(mock_client, "dbx_123", execution_view)  # type: ignore[arg-type]
+        assert repr(execution) == "<Execution id='exn_123'>"
 
     def test_result_already_completed(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
         """Test result delegates to wait_for_command when already completed."""
         mock_client.devboxes = Mock()
         mock_client.devboxes.wait_for_command.return_value = execution_view
 
-        execution = Execution(mock_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        execution = Execution(mock_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         result = execution.result()
 
         assert result.exit_code == 0
         assert result.stdout(num_lines=10) == "output"
         mock_client.devboxes.wait_for_command.assert_called_once_with(
-            "exec_123",
-            devbox_id="dev_123",
+            "exn_123",
+            devbox_id="dbx_123",
             statuses=["completed"],
         )
 
     def test_result_needs_polling(self, mock_client: Mock) -> None:
         """Test result when execution needs to poll for completion."""
         running_execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="running",
         )
         completed_execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout="output",
@@ -145,27 +145,27 @@ def test_result_needs_polling(self, mock_client: Mock) -> None:
         mock_client.devboxes = Mock()
         mock_client.devboxes.wait_for_command.return_value = completed_execution
 
-        execution = Execution(mock_client, "dev_123", running_execution)  # type: ignore[arg-type]
+        execution = Execution(mock_client, "dbx_123", running_execution)  # type: ignore[arg-type]
         result = execution.result()
 
         assert result.exit_code == 0
         assert result.stdout(num_lines=10) == "output"
         mock_client.devboxes.wait_for_command.assert_called_once_with(
-            "exec_123",
-            devbox_id="dev_123",
+            "exn_123",
+            devbox_id="dbx_123",
             statuses=["completed"],
         )
 
     def test_result_with_streaming_group(self, mock_client: Mock) -> None:
         """Test result waits for streaming group to finish."""
         running_execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="running",
         )
         completed_execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout="output",
@@ -180,7 +180,7 @@ def test_result_with_streaming_group(self, mock_client: Mock) -> None:
         thread.start()
         streaming_group = _StreamingGroup([thread], stop_event)
 
-        execution = Execution(mock_client, "dev_123", running_execution, streaming_group)  # type: ignore[arg-type]
+        execution = Execution(mock_client, "dbx_123", running_execution, streaming_group)  # type: ignore[arg-type]
         result = execution.result()
 
         assert result.exit_code == 0
@@ -190,8 +190,8 @@ def test_result_with_streaming_group(self, mock_client: Mock) -> None:
     def test_result_passes_options(self, mock_client: Mock) -> None:
         """Ensure options are forwarded to wait_for_command."""
         execution_view = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout="output",
@@ -201,12 +201,12 @@ def test_result_passes_options(self, mock_client: Mock) -> None:
         mock_client.devboxes = Mock()
         mock_client.devboxes.wait_for_command.return_value = execution_view
 
-        execution = Execution(mock_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        execution = Execution(mock_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         execution.result(timeout=30.0, idempotency_key="abc123")
 
         mock_client.devboxes.wait_for_command.assert_called_once_with(
-            "exec_123",
-            devbox_id="dev_123",
+            "exn_123",
+            devbox_id="dbx_123",
             statuses=["completed"],
             timeout=30.0,
             idempotency_key="abc123",
@@ -215,31 +215,31 @@ def test_result_passes_options(self, mock_client: Mock) -> None:
     def test_get_state(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
         """Test get_state method."""
         updated_execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="running",
         )
         mock_client.devboxes.executions = Mock()
         mock_client.devboxes.executions.retrieve.return_value = updated_execution
 
-        execution = Execution(mock_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        execution = Execution(mock_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         result = execution.get_state()
 
         assert result == updated_execution
         assert execution._initial_result == execution_view
         mock_client.devboxes.executions.retrieve.assert_called_once_with(
-            "exec_123",
-            devbox_id="dev_123",
+            "exn_123",
+            devbox_id="dbx_123",
         )
 
     def test_kill(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
         """Test kill method."""
         mock_client.devboxes.executions.kill.return_value = None
 
-        execution = Execution(mock_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        execution = Execution(mock_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         execution.kill()
 
         mock_client.devboxes.executions.kill.assert_called_once_with(
-            "exec_123",
-            devbox_id="dev_123",
+            "exn_123",
+            devbox_id="dbx_123",
         )
diff --git a/tests/sdk/test_execution_result.py b/tests/sdk/test_execution_result.py
index 689b108d5..7bc4fbfef 100644
--- a/tests/sdk/test_execution_result.py
+++ b/tests/sdk/test_execution_result.py
@@ -14,31 +14,31 @@ class TestExecutionResult:
 
     def test_init(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
         """Test ExecutionResult initialization."""
-        result = ExecutionResult(mock_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        result = ExecutionResult(mock_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         # Verify via public API
-        assert result.devbox_id == "dev_123"
-        assert result.execution_id == "exec_123"
+        assert result.devbox_id == "dbx_123"
+        assert result.execution_id == "exn_123"
 
     def test_devbox_id_property(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
         """Test devbox_id property."""
-        result = ExecutionResult(mock_client, "dev_123", execution_view)  # type: ignore[arg-type]
-        assert result.devbox_id == "dev_123"
+        result = ExecutionResult(mock_client, "dbx_123", execution_view)  # type: ignore[arg-type]
+        assert result.devbox_id == "dbx_123"
 
     def test_execution_id_property(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
         """Test execution_id property."""
-        result = ExecutionResult(mock_client, "dev_123", execution_view)  # type: ignore[arg-type]
-        assert result.execution_id == "exec_123"
+        result = ExecutionResult(mock_client, "dbx_123", execution_view)  # type: ignore[arg-type]
+        assert result.execution_id == "exn_123"
 
     def test_exit_code_property(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
         """Test exit_code property."""
-        result = ExecutionResult(mock_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        result = ExecutionResult(mock_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         assert result.exit_code == 0
 
     def test_exit_code_none(self, mock_client: Mock) -> None:
         """Test exit_code property when exit_status is None."""
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="running",
             exit_status=None,
             stdout="",
@@ -46,19 +46,19 @@ def test_exit_code_none(self, mock_client: Mock) -> None:
             stdout_truncated=False,
             stderr_truncated=False,
         )
-        result = ExecutionResult(mock_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = ExecutionResult(mock_client, "dbx_123", execution)  # type: ignore[arg-type]
         assert result.exit_code is None
 
     def test_success_property(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
         """Test success property."""
-        result = ExecutionResult(mock_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        result = ExecutionResult(mock_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         assert result.success is True
 
     def test_success_false(self, mock_client: Mock) -> None:
         """Test success property when exit code is non-zero."""
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=1,
             stdout="",
@@ -66,19 +66,19 @@ def test_success_false(self, mock_client: Mock) -> None:
             stdout_truncated=False,
             stderr_truncated=False,
         )
-        result = ExecutionResult(mock_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = ExecutionResult(mock_client, "dbx_123", execution)  # type: ignore[arg-type]
         assert result.success is False
 
     def test_failed_property(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
         """Test failed property when exit code is zero."""
-        result = ExecutionResult(mock_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        result = ExecutionResult(mock_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         assert result.failed is False
 
     def test_failed_true(self, mock_client: Mock) -> None:
         """Test failed property when exit code is non-zero."""
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=1,
             stdout="",
@@ -86,14 +86,14 @@ def test_failed_true(self, mock_client: Mock) -> None:
             stdout_truncated=False,
             stderr_truncated=False,
         )
-        result = ExecutionResult(mock_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = ExecutionResult(mock_client, "dbx_123", execution)  # type: ignore[arg-type]
         assert result.failed is True
 
     def test_failed_none(self, mock_client: Mock) -> None:
         """Test failed property when exit_status is None."""
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="running",
             exit_status=None,
             stdout="",
@@ -101,20 +101,20 @@ def test_failed_none(self, mock_client: Mock) -> None:
             stdout_truncated=False,
             stderr_truncated=False,
         )
-        result = ExecutionResult(mock_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = ExecutionResult(mock_client, "dbx_123", execution)  # type: ignore[arg-type]
         assert result.failed is False
 
     def test_stdout(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
         """Test stdout method."""
-        result = ExecutionResult(mock_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        result = ExecutionResult(mock_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         assert result.stdout() == "output"
         assert result.stdout(num_lines=10) == "output"
 
     def test_stdout_empty(self, mock_client: Mock) -> None:
         """Test stdout method when stdout is None."""
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout=None,
@@ -122,14 +122,14 @@ def test_stdout_empty(self, mock_client: Mock) -> None:
             stdout_truncated=False,
             stderr_truncated=False,
         )
-        result = ExecutionResult(mock_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = ExecutionResult(mock_client, "dbx_123", execution)  # type: ignore[arg-type]
         assert result.stdout() == ""
 
     def test_stderr(self, mock_client: Mock) -> None:
         """Test stderr method."""
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=1,
             stdout="",
@@ -137,18 +137,18 @@ def test_stderr(self, mock_client: Mock) -> None:
             stdout_truncated=False,
             stderr_truncated=False,
         )
-        result = ExecutionResult(mock_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = ExecutionResult(mock_client, "dbx_123", execution)  # type: ignore[arg-type]
         assert result.stderr() == "error message"
         assert result.stderr(num_lines=20) == "error message"
 
     def test_stderr_empty(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
         """Test stderr method when stderr is None."""
-        result = ExecutionResult(mock_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        result = ExecutionResult(mock_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         assert result.stderr() == ""
 
     def test_result_property(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
         """Test result property."""
-        result = ExecutionResult(mock_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        result = ExecutionResult(mock_client, "dbx_123", execution_view)  # type: ignore[arg-type]
         assert result.result == execution_view
 
     def test_stdout_with_truncation_and_streaming(self, mock_client: Mock, mock_stream: Mock) -> None:
@@ -165,8 +165,8 @@ def test_stdout_with_truncation_and_streaming(self, mock_client: Mock, mock_stre
         mock_client.devboxes.executions.stream_stdout_updates = Mock(return_value=mock_stream)
 
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout="partial",
@@ -174,12 +174,12 @@ def test_stdout_with_truncation_and_streaming(self, mock_client: Mock, mock_stre
             stdout_truncated=True,
             stderr_truncated=False,
         )
-        result = ExecutionResult(mock_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = ExecutionResult(mock_client, "dbx_123", execution)  # type: ignore[arg-type]
 
         # Should stream full output
         output = result.stdout()
         assert output == "line1\nline2\nline3\n"
-        mock_client.devboxes.executions.stream_stdout_updates.assert_called_once_with("exec_123", devbox_id="dev_123")
+        mock_client.devboxes.executions.stream_stdout_updates.assert_called_once_with("exn_123", devbox_id="dbx_123")
 
     def test_stderr_with_truncation_and_streaming(self, mock_client: Mock, mock_stream: Mock) -> None:
         """Test stderr streams full output when truncated."""
@@ -194,8 +194,8 @@ def test_stderr_with_truncation_and_streaming(self, mock_client: Mock, mock_stre
         mock_client.devboxes.executions.stream_stderr_updates = Mock(return_value=mock_stream)
 
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout="",
@@ -203,12 +203,12 @@ def test_stderr_with_truncation_and_streaming(self, mock_client: Mock, mock_stre
             stdout_truncated=False,
             stderr_truncated=True,
         )
-        result = ExecutionResult(mock_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = ExecutionResult(mock_client, "dbx_123", execution)  # type: ignore[arg-type]
 
         # Should stream full output
         output = result.stderr()
         assert output == "error1\nerror2\n"
-        mock_client.devboxes.executions.stream_stderr_updates.assert_called_once_with("exec_123", devbox_id="dev_123")
+        mock_client.devboxes.executions.stream_stderr_updates.assert_called_once_with("exn_123", devbox_id="dbx_123")
 
     def test_stdout_with_num_lines_when_truncated(self, mock_client: Mock, mock_stream: Mock) -> None:
         """Test stdout with num_lines parameter when truncated."""
@@ -223,8 +223,8 @@ def test_stdout_with_num_lines_when_truncated(self, mock_client: Mock, mock_stre
         mock_client.devboxes.executions.stream_stdout_updates = Mock(return_value=mock_stream)
 
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout="line1\n",
@@ -232,7 +232,7 @@ def test_stdout_with_num_lines_when_truncated(self, mock_client: Mock, mock_stre
             stdout_truncated=True,
             stderr_truncated=False,
         )
-        result = ExecutionResult(mock_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = ExecutionResult(mock_client, "dbx_123", execution)  # type: ignore[arg-type]
 
         # Should stream and return last 2 lines
         output = result.stdout(num_lines=2)
@@ -241,8 +241,8 @@ def test_stdout_with_num_lines_when_truncated(self, mock_client: Mock, mock_stre
     def test_stdout_no_streaming_when_not_truncated(self, mock_client: Mock) -> None:
         """Test stdout doesn't stream when not truncated."""
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout="complete output",
@@ -250,7 +250,7 @@ def test_stdout_no_streaming_when_not_truncated(self, mock_client: Mock) -> None
             stdout_truncated=False,
             stderr_truncated=False,
         )
-        result = ExecutionResult(mock_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = ExecutionResult(mock_client, "dbx_123", execution)  # type: ignore[arg-type]
 
         # Should return existing output without streaming
         output = result.stdout()
@@ -259,8 +259,8 @@ def test_stdout_no_streaming_when_not_truncated(self, mock_client: Mock) -> None
     def test_stdout_with_num_lines_no_truncation(self, mock_client: Mock) -> None:
         """Test stdout with num_lines when not truncated."""
         execution = SimpleNamespace(
-            execution_id="exec_123",
-            devbox_id="dev_123",
+            execution_id="exn_123",
+            devbox_id="dbx_123",
             status="completed",
             exit_status=0,
             stdout="line1\nline2\nline3\nline4\nline5",
@@ -268,7 +268,7 @@ def test_stdout_with_num_lines_no_truncation(self, mock_client: Mock) -> None:
             stdout_truncated=False,
             stderr_truncated=False,
         )
-        result = ExecutionResult(mock_client, "dev_123", execution)  # type: ignore[arg-type]
+        result = ExecutionResult(mock_client, "dbx_123", execution)  # type: ignore[arg-type]
 
         # Should return last 2 lines without streaming
         output = result.stdout(num_lines=2)
@@ -276,7 +276,7 @@ def test_stdout_with_num_lines_no_truncation(self, mock_client: Mock) -> None:
 
     def test_count_non_empty_lines(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
         """Test the _count_non_empty_lines helper method."""
-        result = ExecutionResult(mock_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        result = ExecutionResult(mock_client, "dbx_123", execution_view)  # type: ignore[arg-type]
 
         # Test various input strings
         assert result._count_non_empty_lines("") == 0
@@ -288,7 +288,7 @@ def test_count_non_empty_lines(self, mock_client: Mock, execution_view: MockExec
 
     def test_get_last_n_lines(self, mock_client: Mock, execution_view: MockExecutionView) -> None:
         """Test the _get_last_n_lines helper method."""
-        result = ExecutionResult(mock_client, "dev_123", execution_view)  # type: ignore[arg-type]
+        result = ExecutionResult(mock_client, "dbx_123", execution_view)  # type: ignore[arg-type]
 
         # Test various scenarios
         assert result._get_last_n_lines("", 5) == ""
diff --git a/tests/sdk/test_ops.py b/tests/sdk/test_ops.py
index 7ac503933..af54776af 100644
--- a/tests/sdk/test_ops.py
+++ b/tests/sdk/test_ops.py
@@ -17,6 +17,7 @@
     MockScorerView,
     MockScenarioView,
     MockSnapshotView,
+    MockBenchmarkView,
     MockBlueprintView,
     create_mock_httpx_response,
 )
@@ -27,12 +28,14 @@
     AgentOps,
     Scenario,
     Snapshot,
+    Benchmark,
     Blueprint,
     DevboxOps,
     ScorerOps,
     RunloopSDK,
     ScenarioOps,
     SnapshotOps,
+    BenchmarkOps,
     BlueprintOps,
     StorageObject,
     StorageObjectOps,
@@ -55,7 +58,7 @@ def test_create(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
         )
 
         assert isinstance(devbox, Devbox)
-        assert devbox.id == "dev_123"
+        assert devbox.id == "dbx_123"
         mock_client.devboxes.create_and_await_running.assert_called_once()
 
     def test_create_from_blueprint_id(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
@@ -64,15 +67,15 @@ def test_create_from_blueprint_id(self, mock_client: Mock, devbox_view: MockDevb
 
         ops = DevboxOps(mock_client)
         devbox = ops.create_from_blueprint_id(
-            "bp_123",
+            "bpt_123",
             name="test-devbox",
             metadata={"key": "value"},
         )
 
         assert isinstance(devbox, Devbox)
-        assert devbox.id == "dev_123"
+        assert devbox.id == "dbx_123"
         call_kwargs = mock_client.devboxes.create_and_await_running.call_args[1]
-        assert call_kwargs["blueprint_id"] == "bp_123"
+        assert call_kwargs["blueprint_id"] == "bpt_123"
 
     def test_create_from_blueprint_name(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
         """Test create_from_blueprint_name method."""
@@ -94,24 +97,24 @@ def test_create_from_snapshot(self, mock_client: Mock, devbox_view: MockDevboxVi
 
         ops = DevboxOps(mock_client)
         devbox = ops.create_from_snapshot(
-            "snap_123",
+            "snp_123",
             name="test-devbox",
         )
 
         assert isinstance(devbox, Devbox)
         call_kwargs = mock_client.devboxes.create_and_await_running.call_args[1]
-        assert call_kwargs["snapshot_id"] == "snap_123"
+        assert call_kwargs["snapshot_id"] == "snp_123"
 
     def test_from_id(self, mock_client: Mock, devbox_view: MockDevboxView) -> None:
         """Test from_id method waits for running."""
         mock_client.devboxes.await_running.return_value = devbox_view
 
         ops = DevboxOps(mock_client)
-        devbox = ops.from_id("dev_123")
+        devbox = ops.from_id("dbx_123")
 
         assert isinstance(devbox, Devbox)
-        assert devbox.id == "dev_123"
-        mock_client.devboxes.await_running.assert_called_once_with("dev_123")
+        assert devbox.id == "dbx_123"
+        mock_client.devboxes.await_running.assert_called_once_with("dbx_123")
 
     def test_list_empty(self, mock_client: Mock) -> None:
         """Test list method with empty results."""
@@ -138,7 +141,7 @@ def test_list_single(self, mock_client: Mock, devbox_view: MockDevboxView) -> No
 
         assert len(devboxes) == 1
         assert isinstance(devboxes[0], Devbox)
-        assert devboxes[0].id == "dev_123"
+        assert devboxes[0].id == "dbx_123"
         mock_client.devboxes.list.assert_called_once()
 
     def test_list_multiple(self, mock_client: Mock) -> None:
@@ -168,7 +171,7 @@ def test_list_empty(self, mock_client: Mock) -> None:
         mock_client.devboxes.disk_snapshots.list.return_value = page
 
         ops = SnapshotOps(mock_client)
-        snapshots = ops.list(devbox_id="dev_123", limit=10)
+        snapshots = ops.list(devbox_id="dbx_123", limit=10)
 
         assert len(snapshots) == 0
         mock_client.devboxes.disk_snapshots.list.assert_called_once()
@@ -180,14 +183,14 @@ def test_list_single(self, mock_client: Mock, snapshot_view: MockSnapshotView) -
 
         ops = SnapshotOps(mock_client)
         snapshots = ops.list(
-            devbox_id="dev_123",
+            devbox_id="dbx_123",
             limit=10,
             starting_after="snap_000",
         )
 
         assert len(snapshots) == 1
         assert isinstance(snapshots[0], Snapshot)
-        assert snapshots[0].id == "snap_123"
+        assert snapshots[0].id == "snp_123"
         mock_client.devboxes.disk_snapshots.list.assert_called_once()
 
     def test_list_multiple(self, mock_client: Mock) -> None:
@@ -198,7 +201,7 @@ def test_list_multiple(self, mock_client: Mock) -> None:
         mock_client.devboxes.disk_snapshots.list.return_value = page
 
         ops = SnapshotOps(mock_client)
-        snapshots = ops.list(devbox_id="dev_123", limit=10)
+        snapshots = ops.list(devbox_id="dbx_123", limit=10)
 
         assert len(snapshots) == 2
         assert isinstance(snapshots[0], Snapshot)
@@ -210,10 +213,10 @@ def test_list_multiple(self, mock_client: Mock) -> None:
     def test_from_id(self, mock_client: Mock) -> None:
         """Test from_id method."""
         ops = SnapshotOps(mock_client)
-        snapshot = ops.from_id("snap_123")
+        snapshot = ops.from_id("snp_123")
 
         assert isinstance(snapshot, Snapshot)
-        assert snapshot.id == "snap_123"
+        assert snapshot.id == "snp_123"
 
 
 class TestBlueprintOps:
@@ -230,16 +233,16 @@ def test_create(self, mock_client: Mock, blueprint_view: MockBlueprintView) -> N
         )
 
         assert isinstance(blueprint, Blueprint)
-        assert blueprint.id == "bp_123"
+        assert blueprint.id == "bpt_123"
         mock_client.blueprints.create_and_await_build_complete.assert_called_once()
 
     def test_from_id(self, mock_client: Mock) -> None:
         """Test from_id method."""
         ops = BlueprintOps(mock_client)
-        blueprint = ops.from_id("bp_123")
+        blueprint = ops.from_id("bpt_123")
 
         assert isinstance(blueprint, Blueprint)
-        assert blueprint.id == "bp_123"
+        assert blueprint.id == "bpt_123"
 
     def test_list_empty(self, mock_client: Mock) -> None:
         """Test list method with empty results."""
@@ -266,7 +269,7 @@ def test_list_single(self, mock_client: Mock, blueprint_view: MockBlueprintView)
 
         assert len(blueprints) == 1
         assert isinstance(blueprints[0], Blueprint)
-        assert blueprints[0].id == "bp_123"
+        assert blueprints[0].id == "bpt_123"
         mock_client.blueprints.list.assert_called_once()
 
     def test_list_multiple(self, mock_client: Mock) -> None:
@@ -660,16 +663,16 @@ def test_create(self, mock_client: Mock, scorer_view: MockScorerView) -> None:
         )
 
         assert isinstance(scorer, Scorer)
-        assert scorer.id == "scorer_123"
+        assert scorer.id == "sco_123"
         mock_client.scenarios.scorers.create.assert_called_once()
 
     def test_from_id(self, mock_client: Mock) -> None:
         """Test from_id method."""
         ops = ScorerOps(mock_client)
-        scorer = ops.from_id("scorer_123")
+        scorer = ops.from_id("sco_123")
 
         assert isinstance(scorer, Scorer)
-        assert scorer.id == "scorer_123"
+        assert scorer.id == "sco_123"
 
     def test_list_empty(self, mock_client: Mock) -> None:
         """Test list method with empty results."""
@@ -693,7 +696,7 @@ def test_list_single(self, mock_client: Mock, scorer_view: MockScorerView) -> No
 
         assert len(scorers) == 1
         assert isinstance(scorers[0], Scorer)
-        assert scorers[0].id == "scorer_123"
+        assert scorers[0].id == "sco_123"
         mock_client.scenarios.scorers.list.assert_called_once()
 
     def test_list_multiple(self, mock_client: Mock) -> None:
@@ -723,19 +726,20 @@ def test_create(self, mock_client: Mock, agent_view: MockAgentView) -> None:
         client = AgentOps(mock_client)
         agent = client.create(
             name="test-agent",
+            version="1.2.3",
         )
 
         assert isinstance(agent, Agent)
-        assert agent.id == "agent_123"
+        assert agent.id == "agt_123"
         mock_client.agents.create.assert_called_once()
 
     def test_from_id(self, mock_client: Mock) -> None:
         """Test from_id method."""
         client = AgentOps(mock_client)
-        agent = client.from_id("agent_123")
+        agent = client.from_id("agt_123")
 
         assert isinstance(agent, Agent)
-        assert agent.id == "agent_123"
+        assert agent.id == "agt_123"
 
     def test_list(self, mock_client: Mock) -> None:
         """Test list method."""
@@ -817,10 +821,11 @@ def test_create_from_npm(self, mock_client: Mock, agent_view: MockAgentView) ->
         agent = client.create_from_npm(
             name="test-agent",
             package_name="@runloop/example-agent",
+            version="1.2.3",
         )
 
         assert isinstance(agent, Agent)
-        assert agent.id == "agent_123"
+        assert agent.id == "agt_123"
         mock_client.agents.create.assert_called_once_with(
             source={
                 "type": "npm",
@@ -829,6 +834,7 @@ def test_create_from_npm(self, mock_client: Mock, agent_view: MockAgentView) ->
                 },
             },
             name="test-agent",
+            version="1.2.3",
         )
 
     def test_create_from_npm_with_all_options(self, mock_client: Mock, agent_view: MockAgentView) -> None:
@@ -838,26 +844,26 @@ def test_create_from_npm_with_all_options(self, mock_client: Mock, agent_view: M
         client = AgentOps(mock_client)
         agent = client.create_from_npm(
             package_name="@runloop/example-agent",
-            npm_version="1.2.3",
             registry_url="https://registry.example.com",
             agent_setup=["npm install", "npm run setup"],
             name="test-agent",
+            version="1.2.3",
             extra_headers={"X-Custom": "header"},
         )
 
         assert isinstance(agent, Agent)
-        assert agent.id == "agent_123"
+        assert agent.id == "agt_123"
         mock_client.agents.create.assert_called_once_with(
             source={
                 "type": "npm",
                 "npm": {
                     "package_name": "@runloop/example-agent",
-                    "npm_version": "1.2.3",
                     "registry_url": "https://registry.example.com",
                     "agent_setup": ["npm install", "npm run setup"],
                 },
             },
             name="test-agent",
+            version="1.2.3",
             extra_headers={"X-Custom": "header"},
         )
 
@@ -869,6 +875,7 @@ def test_create_from_npm_raises_when_source_provided(self, mock_client: Mock) ->
             client.create_from_npm(
                 package_name="@runloop/example-agent",
                 name="test-agent",
+                version="1.2.3",
                 source={"type": "git", "git": {"repository": "https://github.com/example/repo"}},
             )
 
@@ -880,10 +887,11 @@ def test_create_from_pip(self, mock_client: Mock, agent_view: MockAgentView) ->
         agent = client.create_from_pip(
             package_name="runloop-example-agent",
             name="test-agent",
+            version="1.2.3",
         )
 
         assert isinstance(agent, Agent)
-        assert agent.id == "agent_123"
+        assert agent.id == "agt_123"
         mock_client.agents.create.assert_called_once_with(
             source={
                 "type": "pip",
@@ -892,6 +900,7 @@ def test_create_from_pip(self, mock_client: Mock, agent_view: MockAgentView) ->
                 },
             },
             name="test-agent",
+            version="1.2.3",
         )
 
     def test_create_from_pip_with_all_options(self, mock_client: Mock, agent_view: MockAgentView) -> None:
@@ -901,25 +910,25 @@ def test_create_from_pip_with_all_options(self, mock_client: Mock, agent_view: M
         client = AgentOps(mock_client)
         agent = client.create_from_pip(
             package_name="runloop-example-agent",
-            pip_version="1.2.3",
             registry_url="https://pypi.example.com",
             agent_setup=["pip install extra-deps"],
             name="test-agent",
+            version="1.2.3",
         )
 
         assert isinstance(agent, Agent)
-        assert agent.id == "agent_123"
+        assert agent.id == "agt_123"
         mock_client.agents.create.assert_called_once_with(
             source={
                 "type": "pip",
                 "pip": {
                     "package_name": "runloop-example-agent",
-                    "pip_version": "1.2.3",
                     "registry_url": "https://pypi.example.com",
                     "agent_setup": ["pip install extra-deps"],
                 },
             },
             name="test-agent",
+            version="1.2.3",
         )
 
     def test_create_from_git(self, mock_client: Mock, agent_view: MockAgentView) -> None:
@@ -930,10 +939,11 @@ def test_create_from_git(self, mock_client: Mock, agent_view: MockAgentView) ->
         agent = client.create_from_git(
             repository="https://github.com/example/agent-repo",
             name="test-agent",
+            version="1.2.3",
         )
 
         assert isinstance(agent, Agent)
-        assert agent.id == "agent_123"
+        assert agent.id == "agt_123"
         mock_client.agents.create.assert_called_once_with(
             source={
                 "type": "git",
@@ -942,6 +952,7 @@ def test_create_from_git(self, mock_client: Mock, agent_view: MockAgentView) ->
                 },
             },
             name="test-agent",
+            version="1.2.3",
         )
 
     def test_create_from_git_with_all_options(self, mock_client: Mock, agent_view: MockAgentView) -> None:
@@ -954,10 +965,11 @@ def test_create_from_git_with_all_options(self, mock_client: Mock, agent_view: M
             ref="develop",
             agent_setup=["npm install", "npm run build"],
             name="test-agent",
+            version="1.2.3",
         )
 
         assert isinstance(agent, Agent)
-        assert agent.id == "agent_123"
+        assert agent.id == "agt_123"
         mock_client.agents.create.assert_called_once_with(
             source={
                 "type": "git",
@@ -968,6 +980,7 @@ def test_create_from_git_with_all_options(self, mock_client: Mock, agent_view: M
                 },
             },
             name="test-agent",
+            version="1.2.3",
         )
 
     def test_create_from_object(self, mock_client: Mock, agent_view: MockAgentView) -> None:
@@ -978,10 +991,11 @@ def test_create_from_object(self, mock_client: Mock, agent_view: MockAgentView)
         agent = client.create_from_object(
             object_id="obj_123",
             name="test-agent",
+            version="1.2.3",
         )
 
         assert isinstance(agent, Agent)
-        assert agent.id == "agent_123"
+        assert agent.id == "agt_123"
         mock_client.agents.create.assert_called_once_with(
             source={
                 "type": "object",
@@ -990,6 +1004,7 @@ def test_create_from_object(self, mock_client: Mock, agent_view: MockAgentView)
                 },
             },
             name="test-agent",
+            version="1.2.3",
         )
 
     def test_create_from_object_with_agent_setup(self, mock_client: Mock, agent_view: MockAgentView) -> None:
@@ -1001,10 +1016,11 @@ def test_create_from_object_with_agent_setup(self, mock_client: Mock, agent_view
             object_id="obj_123",
             agent_setup=["chmod +x setup.sh", "./setup.sh"],
             name="test-agent",
+            version="1.2.3",
         )
 
         assert isinstance(agent, Agent)
-        assert agent.id == "agent_123"
+        assert agent.id == "agt_123"
         mock_client.agents.create.assert_called_once_with(
             source={
                 "type": "object",
@@ -1014,6 +1030,7 @@ def test_create_from_object_with_agent_setup(self, mock_client: Mock, agent_view
                 },
             },
             name="test-agent",
+            version="1.2.3",
         )
 
 
@@ -1071,6 +1088,59 @@ def test_list_multiple(self, mock_client: Mock) -> None:
         mock_client.scenarios.list.assert_called_once()
 
 
+class TestBenchmarkOps:
+    """Tests for BenchmarkOps class."""
+
+    def test_create(self, mock_client: Mock, benchmark_view: MockBenchmarkView) -> None:
+        """Test create method."""
+        mock_client.benchmarks.create.return_value = benchmark_view
+
+        ops = BenchmarkOps(mock_client)
+        benchmark = ops.create(name="test-benchmark", scenario_ids=["scn_001", "scn_002"])
+
+        assert isinstance(benchmark, Benchmark)
+        assert benchmark.id == "bmd_123"
+        mock_client.benchmarks.create.assert_called_once_with(
+            name="test-benchmark", scenario_ids=["scn_001", "scn_002"]
+        )
+
+    def test_from_id(self, mock_client: Mock) -> None:
+        """Test from_id method."""
+        ops = BenchmarkOps(mock_client)
+        benchmark = ops.from_id("bmd_123")
+
+        assert isinstance(benchmark, Benchmark)
+        assert benchmark.id == "bmd_123"
+
+    def test_list_multiple(self, mock_client: Mock) -> None:
+        """Test list method with multiple results."""
+        benchmark_view1 = MockBenchmarkView(id="bmd_001", name="benchmark-1")
+        benchmark_view2 = MockBenchmarkView(id="bmd_002", name="benchmark-2")
+        page = SimpleNamespace(benchmarks=[benchmark_view1, benchmark_view2])
+        mock_client.benchmarks.list.return_value = page
+
+        ops = BenchmarkOps(mock_client)
+        benchmarks = ops.list(limit=10)
+
+        assert len(benchmarks) == 2
+        assert isinstance(benchmarks[0], Benchmark)
+        assert isinstance(benchmarks[1], Benchmark)
+        assert benchmarks[0].id == "bmd_001"
+        assert benchmarks[1].id == "bmd_002"
+        mock_client.benchmarks.list.assert_called_once_with(limit=10)
+
+    def test_list_with_name_filter(self, mock_client: Mock, benchmark_view: MockBenchmarkView) -> None:
+        """Test list method with name filter."""
+        page = SimpleNamespace(benchmarks=[benchmark_view])
+        mock_client.benchmarks.list.return_value = page
+
+        ops = BenchmarkOps(mock_client)
+        benchmarks = ops.list(name="test-benchmark", limit=10)
+
+        assert len(benchmarks) == 1
+        mock_client.benchmarks.list.assert_called_once_with(name="test-benchmark", limit=10)
+
+
 class TestRunloopSDK:
     """Tests for RunloopSDK class."""
 
@@ -1079,6 +1149,7 @@ def test_init(self) -> None:
         runloop = RunloopSDK(bearer_token="test-token")
         assert runloop.api is not None
         assert isinstance(runloop.agent, AgentOps)
+        assert isinstance(runloop.benchmark, BenchmarkOps)
         assert isinstance(runloop.devbox, DevboxOps)
         assert isinstance(runloop.scorer, ScorerOps)
         assert isinstance(runloop.snapshot, SnapshotOps)
diff --git a/tests/sdk/test_scenario.py b/tests/sdk/test_scenario.py
index 3504c1714..e3aa5f1c8 100644
--- a/tests/sdk/test_scenario.py
+++ b/tests/sdk/test_scenario.py
@@ -99,8 +99,8 @@ def test_run_async(self, mock_client: Mock, scenario_run_view: MockScenarioRunVi
         scenario = Scenario(mock_client, "scn_123")
         run = scenario.run_async(run_name="test-run")
 
-        assert run.id == "run_123"
-        assert run.devbox_id == "dev_123"
+        assert run.id == "scr_123"
+        assert run.devbox_id == "dbx_123"
         mock_client.scenarios.start_run.assert_called_once_with(
             scenario_id="scn_123",
             run_name="test-run",
@@ -113,8 +113,8 @@ def test_run(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) ->
         scenario = Scenario(mock_client, "scn_123")
         run = scenario.run(run_name="test-run")
 
-        assert run.id == "run_123"
-        assert run.devbox_id == "dev_123"
+        assert run.id == "scr_123"
+        assert run.devbox_id == "dbx_123"
         mock_client.scenarios.start_run_and_await_env_ready.assert_called_once_with(
             scenario_id="scn_123",
             run_name="test-run",
diff --git a/tests/sdk/test_scenario_run.py b/tests/sdk/test_scenario_run.py
index 54ea6e89b..339e365f8 100644
--- a/tests/sdk/test_scenario_run.py
+++ b/tests/sdk/test_scenario_run.py
@@ -15,31 +15,31 @@ class TestScenarioRun:
 
     def test_init(self, mock_client: Mock) -> None:
         """Test ScenarioRun initialization."""
-        run = ScenarioRun(mock_client, "run_123", "dev_123")
-        assert run.id == "run_123"
-        assert run.devbox_id == "dev_123"
+        run = ScenarioRun(mock_client, "scr_123", "dbx_123")
+        assert run.id == "scr_123"
+        assert run.devbox_id == "dbx_123"
 
     def test_repr(self, mock_client: Mock) -> None:
         """Test ScenarioRun string representation."""
-        run = ScenarioRun(mock_client, "run_123", "dev_123")
-        assert repr(run) == "<ScenarioRun id='run_123'>"
+        run = ScenarioRun(mock_client, "scr_123", "dbx_123")
+        assert repr(run) == "<ScenarioRun id='scr_123'>"
 
     def test_devbox_property(self, mock_client: Mock) -> None:
         """Test devbox property returns Devbox wrapper."""
-        run = ScenarioRun(mock_client, "run_123", "dev_123")
+        run = ScenarioRun(mock_client, "scr_123", "dbx_123")
         devbox = run.devbox
 
-        assert devbox.id == "dev_123"
+        assert devbox.id == "dbx_123"
 
     def test_get_info(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
         """Test get_info method."""
         mock_client.scenarios.runs.retrieve.return_value = scenario_run_view
 
-        run = ScenarioRun(mock_client, "run_123", "dev_123")
+        run = ScenarioRun(mock_client, "scr_123", "dbx_123")
         result = run.get_info()
 
         assert result == scenario_run_view
-        mock_client.scenarios.runs.retrieve.assert_called_once_with("run_123")
+        mock_client.scenarios.runs.retrieve.assert_called_once_with("scr_123")
 
     def test_await_env_ready(
         self, mock_client: Mock, scenario_run_view: MockScenarioRunView, devbox_view: MockDevboxView
@@ -48,10 +48,10 @@ def test_await_env_ready(
         mock_client.devboxes.await_running.return_value = devbox_view
         mock_client.scenarios.runs.retrieve.return_value = scenario_run_view
 
-        run = ScenarioRun(mock_client, "run_123", "dev_123")
+        run = ScenarioRun(mock_client, "scr_123", "dbx_123")
         result = run.await_env_ready()
 
-        mock_client.devboxes.await_running.assert_called_once_with("dev_123", polling_config=None)
+        mock_client.devboxes.await_running.assert_called_once_with("dbx_123", polling_config=None)
         assert result == scenario_run_view
 
     def test_score(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
@@ -59,66 +59,66 @@ def test_score(self, mock_client: Mock, scenario_run_view: MockScenarioRunView)
         scenario_run_view.state = "scoring"
         mock_client.scenarios.runs.score.return_value = scenario_run_view
 
-        run = ScenarioRun(mock_client, "run_123", "dev_123")
+        run = ScenarioRun(mock_client, "scr_123", "dbx_123")
         result = run.score()
 
         assert result == scenario_run_view
-        mock_client.scenarios.runs.score.assert_called_once_with("run_123")
+        mock_client.scenarios.runs.score.assert_called_once_with("scr_123")
 
     def test_await_scored(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
         """Test await_scored method."""
         scenario_run_view.state = "scored"
         mock_client.scenarios.runs.await_scored.return_value = scenario_run_view
 
-        run = ScenarioRun(mock_client, "run_123", "dev_123")
+        run = ScenarioRun(mock_client, "scr_123", "dbx_123")
         result = run.await_scored()
 
         assert result == scenario_run_view
-        mock_client.scenarios.runs.await_scored.assert_called_once_with("run_123")
+        mock_client.scenarios.runs.await_scored.assert_called_once_with("scr_123")
 
     def test_score_and_await(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
         """Test score_and_await method."""
         scenario_run_view.state = "scored"
         mock_client.scenarios.runs.score_and_await.return_value = scenario_run_view
 
-        run = ScenarioRun(mock_client, "run_123", "dev_123")
+        run = ScenarioRun(mock_client, "scr_123", "dbx_123")
         result = run.score_and_await()
 
         assert result == scenario_run_view
-        mock_client.scenarios.runs.score_and_await.assert_called_once_with("run_123")
+        mock_client.scenarios.runs.score_and_await.assert_called_once_with("scr_123")
 
     def test_score_and_complete(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
         """Test score_and_complete method."""
         scenario_run_view.state = "completed"
         mock_client.scenarios.runs.score_and_complete.return_value = scenario_run_view
 
-        run = ScenarioRun(mock_client, "run_123", "dev_123")
+        run = ScenarioRun(mock_client, "scr_123", "dbx_123")
         result = run.score_and_complete()
 
         assert result == scenario_run_view
-        mock_client.scenarios.runs.score_and_complete.assert_called_once_with("run_123")
+        mock_client.scenarios.runs.score_and_complete.assert_called_once_with("scr_123")
 
     def test_complete(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
         """Test complete method."""
         scenario_run_view.state = "completed"
         mock_client.scenarios.runs.complete.return_value = scenario_run_view
 
-        run = ScenarioRun(mock_client, "run_123", "dev_123")
+        run = ScenarioRun(mock_client, "scr_123", "dbx_123")
         result = run.complete()
 
         assert result == scenario_run_view
-        mock_client.scenarios.runs.complete.assert_called_once_with("run_123")
+        mock_client.scenarios.runs.complete.assert_called_once_with("scr_123")
 
     def test_cancel(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None:
         """Test cancel method."""
         scenario_run_view.state = "canceled"
         mock_client.scenarios.runs.cancel.return_value = scenario_run_view
 
-        run = ScenarioRun(mock_client, "run_123", "dev_123")
+        run = ScenarioRun(mock_client, "scr_123", "dbx_123")
         result = run.cancel()
 
         assert result == scenario_run_view
-        mock_client.scenarios.runs.cancel.assert_called_once_with("run_123")
+        mock_client.scenarios.runs.cancel.assert_called_once_with("scr_123")
 
     def test_download_logs(self, mock_client: Mock, tmp_path: Path) -> None:
         """Test download_logs method writes to file."""
@@ -126,11 +126,11 @@ def test_download_logs(self, mock_client: Mock, tmp_path: Path) -> None:
         mock_response.write_to_file = Mock()
         mock_client.scenarios.runs.download_logs.return_value = mock_response
 
-        run = ScenarioRun(mock_client, "run_123", "dev_123")
+        run = ScenarioRun(mock_client, "scr_123", "dbx_123")
         output_path = tmp_path / "logs.zip"
         run.download_logs(output_path)
 
-        mock_client.scenarios.runs.download_logs.assert_called_once_with("run_123")
+        mock_client.scenarios.runs.download_logs.assert_called_once_with("scr_123")
         mock_response.write_to_file.assert_called_once_with(output_path)
 
     def test_get_score_when_scored(self, mock_client: Mock) -> None:
@@ -139,19 +139,19 @@ def test_get_score_when_scored(self, mock_client: Mock) -> None:
         run_view = MockScenarioRunView(state="scored", scoring_contract_result=scoring_result)
         mock_client.scenarios.runs.retrieve.return_value = run_view
 
-        run = ScenarioRun(mock_client, "run_123", "dev_123")
+        run = ScenarioRun(mock_client, "scr_123", "dbx_123")
         result = run.get_score()
 
         assert result == scoring_result
-        mock_client.scenarios.runs.retrieve.assert_called_once_with("run_123")
+        mock_client.scenarios.runs.retrieve.assert_called_once_with("scr_123")
 
     def test_get_score_when_not_scored(self, mock_client: Mock) -> None:
         """Test get_score returns None when not scored."""
         run_view = MockScenarioRunView(state="running", scoring_contract_result=None)
         mock_client.scenarios.runs.retrieve.return_value = run_view
 
-        run = ScenarioRun(mock_client, "run_123", "dev_123")
+        run = ScenarioRun(mock_client, "scr_123", "dbx_123")
         result = run.get_score()
 
         assert result is None
-        mock_client.scenarios.runs.retrieve.assert_called_once_with("run_123")
+        mock_client.scenarios.runs.retrieve.assert_called_once_with("scr_123")
diff --git a/tests/sdk/test_scorer.py b/tests/sdk/test_scorer.py
index 761a487cb..91b430db0 100644
--- a/tests/sdk/test_scorer.py
+++ b/tests/sdk/test_scorer.py
@@ -14,30 +14,30 @@ class TestScorer:
 
     def test_init(self, mock_client: Mock) -> None:
         """Test Scorer initialization."""
-        scorer = Scorer(mock_client, "scorer_123")
-        assert scorer.id == "scorer_123"
+        scorer = Scorer(mock_client, "sco_123")
+        assert scorer.id == "sco_123"
 
     def test_repr(self, mock_client: Mock) -> None:
         """Test Scorer string representation."""
-        scorer = Scorer(mock_client, "scorer_123")
-        assert repr(scorer) == "<Scorer id='scorer_123'>"
+        scorer = Scorer(mock_client, "sco_123")
+        assert repr(scorer) == "<Scorer id='sco_123'>"
 
     def test_get_info(self, mock_client: Mock, scorer_view: MockScorerView) -> None:
         """Test get_info method."""
         mock_client.scenarios.scorers.retrieve.return_value = scorer_view
 
-        scorer = Scorer(mock_client, "scorer_123")
+        scorer = Scorer(mock_client, "sco_123")
         result = scorer.get_info()
 
         assert result == scorer_view
-        mock_client.scenarios.scorers.retrieve.assert_called_once_with("scorer_123")
+        mock_client.scenarios.scorers.retrieve.assert_called_once_with("sco_123")
 
     def test_update(self, mock_client: Mock) -> None:
         """Test update method."""
-        update_response = SimpleNamespace(id="scorer_123", type="updated_scorer", bash_script="echo 'score=1.0'")
+        update_response = SimpleNamespace(id="sco_123", type="updated_scorer", bash_script="echo 'score=1.0'")
         mock_client.scenarios.scorers.update.return_value = update_response
 
-        scorer = Scorer(mock_client, "scorer_123")
+        scorer = Scorer(mock_client, "sco_123")
         result = scorer.update(
             type="updated_scorer",
             bash_script="echo 'score=1.0'",
@@ -45,7 +45,7 @@ def test_update(self, mock_client: Mock) -> None:
 
         assert result == update_response
         mock_client.scenarios.scorers.update.assert_called_once_with(
-            "scorer_123",
+            "sco_123",
             type="updated_scorer",
             bash_script="echo 'score=1.0'",
         )
@@ -59,13 +59,13 @@ def test_validate(self, mock_client: Mock) -> None:
         )
         mock_client.scenarios.scorers.validate.return_value = validate_response
 
-        scorer = Scorer(mock_client, "scorer_123")
+        scorer = Scorer(mock_client, "sco_123")
         result = scorer.validate(
             scoring_context={"test": "context"},
         )
 
         assert result == validate_response
         mock_client.scenarios.scorers.validate.assert_called_once_with(
-            "scorer_123",
+            "sco_123",
             scoring_context={"test": "context"},
         )
diff --git a/tests/sdk/test_snapshot.py b/tests/sdk/test_snapshot.py
index 383e812cc..4b066e29a 100644
--- a/tests/sdk/test_snapshot.py
+++ b/tests/sdk/test_snapshot.py
@@ -15,19 +15,19 @@ class TestSnapshot:
 
     def test_init(self, mock_client: Mock) -> None:
         """Test Snapshot initialization."""
-        snapshot = Snapshot(mock_client, "snap_123")
-        assert snapshot.id == "snap_123"
+        snapshot = Snapshot(mock_client, "snp_123")
+        assert snapshot.id == "snp_123"
 
     def test_repr(self, mock_client: Mock) -> None:
         """Test Snapshot string representation."""
-        snapshot = Snapshot(mock_client, "snap_123")
-        assert repr(snapshot) == "<Snapshot id='snap_123'>"
+        snapshot = Snapshot(mock_client, "snp_123")
+        assert repr(snapshot) == "<Snapshot id='snp_123'>"
 
     def test_get_info(self, mock_client: Mock, snapshot_view: MockSnapshotView) -> None:
         """Test get_info method."""
         mock_client.devboxes.disk_snapshots.query_status.return_value = snapshot_view
 
-        snapshot = Snapshot(mock_client, "snap_123")
+        snapshot = Snapshot(mock_client, "snp_123")
         result = snapshot.get_info(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -37,7 +37,7 @@ def test_get_info(self, mock_client: Mock, snapshot_view: MockSnapshotView) -> N
 
         assert result == snapshot_view
         mock_client.devboxes.disk_snapshots.query_status.assert_called_once_with(
-            "snap_123",
+            "snp_123",
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
             extra_body={"key": "value"},
@@ -46,10 +46,10 @@ def test_get_info(self, mock_client: Mock, snapshot_view: MockSnapshotView) -> N
 
     def test_update(self, mock_client: Mock) -> None:
         """Test update method."""
-        updated_snapshot = SimpleNamespace(id="snap_123", name="updated-name")
+        updated_snapshot = SimpleNamespace(id="snp_123", name="updated-name")
         mock_client.devboxes.disk_snapshots.update.return_value = updated_snapshot
 
-        snapshot = Snapshot(mock_client, "snap_123")
+        snapshot = Snapshot(mock_client, "snp_123")
         result = snapshot.update(
             commit_message="Update message",
             metadata={"key": "value"},
@@ -63,7 +63,7 @@ def test_update(self, mock_client: Mock) -> None:
 
         assert result == updated_snapshot
         mock_client.devboxes.disk_snapshots.update.assert_called_once_with(
-            "snap_123",
+            "snp_123",
             commit_message="Update message",
             metadata={"key": "value"},
             name="updated-name",
@@ -78,7 +78,7 @@ def test_delete(self, mock_client: Mock) -> None:
         """Test delete method."""
         mock_client.devboxes.disk_snapshots.delete.return_value = object()
 
-        snapshot = Snapshot(mock_client, "snap_123")
+        snapshot = Snapshot(mock_client, "snp_123")
         result = snapshot.delete(
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -89,7 +89,7 @@ def test_delete(self, mock_client: Mock) -> None:
 
         assert result is not None  # Verify return value is propagated
         mock_client.devboxes.disk_snapshots.delete.assert_called_once_with(
-            "snap_123",
+            "snp_123",
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
             extra_body={"key": "value"},
@@ -102,7 +102,7 @@ def test_await_completed(self, mock_client: Mock, snapshot_view: MockSnapshotVie
         mock_client.devboxes.disk_snapshots.await_completed.return_value = snapshot_view
         polling_config = PollingConfig(timeout_seconds=60.0)
 
-        snapshot = Snapshot(mock_client, "snap_123")
+        snapshot = Snapshot(mock_client, "snp_123")
         result = snapshot.await_completed(
             polling_config=polling_config,
             extra_headers={"X-Custom": "value"},
@@ -113,7 +113,7 @@ def test_await_completed(self, mock_client: Mock, snapshot_view: MockSnapshotVie
 
         assert result == snapshot_view
         mock_client.devboxes.disk_snapshots.await_completed.assert_called_once_with(
-            "snap_123",
+            "snp_123",
             polling_config=polling_config,
             extra_headers={"X-Custom": "value"},
             extra_query={"param": "value"},
@@ -125,7 +125,7 @@ def test_create_devbox(self, mock_client: Mock, devbox_view: MockDevboxView) ->
         """Test create_devbox method."""
         mock_client.devboxes.create_and_await_running.return_value = devbox_view
 
-        snapshot = Snapshot(mock_client, "snap_123")
+        snapshot = Snapshot(mock_client, "snp_123")
         devbox = snapshot.create_devbox(
             name="test-devbox",
             metadata={"key": "value"},
@@ -133,9 +133,9 @@ def test_create_devbox(self, mock_client: Mock, devbox_view: MockDevboxView) ->
             extra_headers={"X-Custom": "value"},
         )
 
-        assert devbox.id == "dev_123"
+        assert devbox.id == "dbx_123"
         mock_client.devboxes.create_and_await_running.assert_called_once()
         call_kwargs = mock_client.devboxes.create_and_await_running.call_args[1]
-        assert call_kwargs["snapshot_id"] == "snap_123"
+        assert call_kwargs["snapshot_id"] == "snp_123"
         assert call_kwargs["name"] == "test-devbox"
         assert call_kwargs["metadata"] == {"key": "value"}
diff --git a/tests/smoketests/sdk/test_agent.py b/tests/smoketests/sdk/test_agent.py
index deb659087..7ddfb6f70 100644
--- a/tests/smoketests/sdk/test_agent.py
+++ b/tests/smoketests/sdk/test_agent.py
@@ -12,6 +12,7 @@
 
 THIRTY_SECOND_TIMEOUT = 30
 TWO_MINUTE_TIMEOUT = 120
+AGENT_VERSION = "1.2.3"
 
 
 class TestAgentLifecycle:
@@ -23,6 +24,7 @@ def test_agent_create_basic(self, sdk_client: RunloopSDK) -> None:
         name = unique_name("sdk-agent-test-basic")
         agent = sdk_client.agent.create(
             name=name,
+            version=AGENT_VERSION,
             source={
                 "type": "npm",
                 "npm": {
@@ -52,6 +54,7 @@ def test_agent_get_info(self, sdk_client: RunloopSDK) -> None:
         name = unique_name("sdk-agent-test-info")
         agent = sdk_client.agent.create(
             name=name,
+            version=AGENT_VERSION,
             source={
                 "type": "npm",
                 "npm": {
@@ -90,6 +93,7 @@ def test_get_agent_by_id(self, sdk_client: RunloopSDK) -> None:
         # Create an agent
         created = sdk_client.agent.create(
             name=unique_name("sdk-agent-test-retrieve"),
+            version=AGENT_VERSION,
             source={
                 "type": "npm",
                 "npm": {
@@ -121,9 +125,15 @@ def test_list_multiple_agents(self, sdk_client: RunloopSDK) -> None:
         }
 
         # Create multiple agents
-        agent1 = sdk_client.agent.create(name=unique_name("sdk-agent-test-list-1"), source=source_config)
-        agent2 = sdk_client.agent.create(name=unique_name("sdk-agent-test-list-2"), source=source_config)
-        agent3 = sdk_client.agent.create(name=unique_name("sdk-agent-test-list-3"), source=source_config)
+        agent1 = sdk_client.agent.create(
+            name=unique_name("sdk-agent-test-list-1"), source=source_config, version=AGENT_VERSION
+        )
+        agent2 = sdk_client.agent.create(
+            name=unique_name("sdk-agent-test-list-2"), source=source_config, version=AGENT_VERSION
+        )
+        agent3 = sdk_client.agent.create(
+            name=unique_name("sdk-agent-test-list-3"), source=source_config, version=AGENT_VERSION
+        )
 
         try:
             # List agents
@@ -153,6 +163,7 @@ def test_agent_with_source_npm(self, sdk_client: RunloopSDK) -> None:
 
         agent = sdk_client.agent.create(
             name=name,
+            version=AGENT_VERSION,
             source={
                 "type": "npm",
                 "npm": {
@@ -178,6 +189,7 @@ def test_agent_with_source_git(self, sdk_client: RunloopSDK) -> None:
 
         agent = sdk_client.agent.create(
             name=name,
+            version=AGENT_VERSION,
             source={
                 "type": "git",
                 "git": {
diff --git a/tests/smoketests/sdk/test_async_agent.py b/tests/smoketests/sdk/test_async_agent.py
index fb9d17b42..36129605f 100644
--- a/tests/smoketests/sdk/test_async_agent.py
+++ b/tests/smoketests/sdk/test_async_agent.py
@@ -12,6 +12,7 @@
 
 THIRTY_SECOND_TIMEOUT = 30
 TWO_MINUTE_TIMEOUT = 120
+AGENT_VERSION = "1.2.3"
 
 
 class TestAsyncAgentLifecycle:
@@ -23,6 +24,7 @@ async def test_agent_create_basic(self, async_sdk_client: AsyncRunloopSDK) -> No
         name = unique_name("sdk-async-agent-test-basic")
         agent = await async_sdk_client.agent.create(
             name=name,
+            version=AGENT_VERSION,
             source={
                 "type": "npm",
                 "npm": {
@@ -52,6 +54,7 @@ async def test_agent_get_info(self, async_sdk_client: AsyncRunloopSDK) -> None:
         name = unique_name("sdk-async-agent-test-info")
         agent = await async_sdk_client.agent.create(
             name=name,
+            version=AGENT_VERSION,
             source={
                 "type": "npm",
                 "npm": {
@@ -90,6 +93,7 @@ async def test_get_agent_by_id(self, async_sdk_client: AsyncRunloopSDK) -> None:
         # Create an agent
         created = await async_sdk_client.agent.create(
             name=unique_name("sdk-async-agent-test-retrieve"),
+            version=AGENT_VERSION,
             source={
                 "type": "npm",
                 "npm": {
@@ -122,13 +126,13 @@ async def test_list_multiple_agents(self, async_sdk_client: AsyncRunloopSDK) ->
 
         # Create multiple agents
         agent1 = await async_sdk_client.agent.create(
-            name=unique_name("sdk-async-agent-test-list-1"), source=source_config
+            name=unique_name("sdk-async-agent-test-list-1"), source=source_config, version=AGENT_VERSION
         )
         agent2 = await async_sdk_client.agent.create(
-            name=unique_name("sdk-async-agent-test-list-2"), source=source_config
+            name=unique_name("sdk-async-agent-test-list-2"), source=source_config, version=AGENT_VERSION
         )
         agent3 = await async_sdk_client.agent.create(
-            name=unique_name("sdk-async-agent-test-list-3"), source=source_config
+            name=unique_name("sdk-async-agent-test-list-3"), source=source_config, version=AGENT_VERSION
         )
 
         try:
@@ -159,6 +163,7 @@ async def test_agent_with_source_npm(self, async_sdk_client: AsyncRunloopSDK) ->
 
         agent = await async_sdk_client.agent.create(
             name=name,
+            version=AGENT_VERSION,
             source={
                 "type": "npm",
                 "npm": {
@@ -184,6 +189,7 @@ async def test_agent_with_source_git(self, async_sdk_client: AsyncRunloopSDK) ->
 
         agent = await async_sdk_client.agent.create(
             name=name,
+            version=AGENT_VERSION,
             source={
                 "type": "git",
                 "git": {
diff --git a/tests/smoketests/sdk/test_async_benchmark.py b/tests/smoketests/sdk/test_async_benchmark.py
new file mode 100644
index 000000000..7316355a6
--- /dev/null
+++ b/tests/smoketests/sdk/test_async_benchmark.py
@@ -0,0 +1,192 @@
+"""Asynchronous SDK smoke tests for AsyncBenchmark operations.
+
+These tests validate the AsyncBenchmark class against the real API.
+We create a dedicated smoketest benchmark and scenarios with consistent names
+so that resources are reused across test runs (since there's no delete endpoint).
+"""
+
+from __future__ import annotations
+
+from typing import List, Tuple
+
+import pytest
+
+from runloop_api_client import AsyncRunloopSDK
+from runloop_api_client.sdk import AsyncScenario, AsyncBenchmark, AsyncScenarioRun, AsyncBenchmarkRun
+
+pytestmark = [pytest.mark.smoketest]
+
+TWO_MINUTE_TIMEOUT = 120
+
+# Consistent names for smoketest resources
+SMOKETEST_BENCHMARK_NAME = "sdk-smoketest-benchmark"
+SMOKETEST_SCENARIO_1_NAME = "sdk-smoketest-scenario-1"
+SMOKETEST_SCENARIO_2_NAME = "sdk-smoketest-scenario-2"
+
+
+async def get_or_create_scenario(
+    async_sdk_client: AsyncRunloopSDK,
+    name: str,
+    problem_statement: str,
+) -> AsyncScenario:
+    """Get an existing scenario by name or create a new one."""
+    # Check if scenario already exists
+    scenarios = await async_sdk_client.scenario.list(name=name, limit=1)
+    for scenario in scenarios:
+        # Return the first matching scenario
+        return scenario
+
+    # Create a new scenario using the SDK builder
+    return await (
+        async_sdk_client.scenario.builder(name)
+        .with_problem_statement(problem_statement)
+        .add_shell_command_scorer("pass-scorer", command="exit 0")
+        .push()
+    )
+
+
+async def get_or_create_benchmark(
+    async_sdk_client: AsyncRunloopSDK,
+    name: str,
+    scenario_ids: List[str],
+) -> AsyncBenchmark:
+    """Get an existing benchmark by name or create a new one."""
+    # Check if benchmark already exists
+    benchmarks = await async_sdk_client.benchmark.list(name=name, limit=1)
+    for benchmark in benchmarks:
+        # Return the first matching benchmark
+        return benchmark
+
+    # Create a new benchmark
+    return await async_sdk_client.benchmark.create(
+        name=name,
+        scenario_ids=scenario_ids,
+        description="Smoketest benchmark for SDK testing",
+    )
+
+
+@pytest.fixture(scope="module")
+async def smoketest_benchmark(
+    async_sdk_client: AsyncRunloopSDK,
+) -> Tuple[AsyncBenchmark, List[str]]:
+    """Create or retrieve the smoketest benchmark and scenario IDs."""
+    # Create or get scenarios
+    scenario_1 = await get_or_create_scenario(
+        async_sdk_client,
+        SMOKETEST_SCENARIO_1_NAME,
+        "Smoketest scenario 1 - basic validation",
+    )
+    scenario_2 = await get_or_create_scenario(
+        async_sdk_client,
+        SMOKETEST_SCENARIO_2_NAME,
+        "Smoketest scenario 2 - basic validation",
+    )
+
+    scenario_ids = [scenario_1.id, scenario_2.id]
+
+    # Create or get benchmark
+    benchmark = await get_or_create_benchmark(
+        async_sdk_client,
+        SMOKETEST_BENCHMARK_NAME,
+        scenario_ids,
+    )
+
+    return benchmark, scenario_ids
+
+
+class TestAsyncBenchmarkRun:
+    """Test AsyncBenchmark run operations."""
+
+    @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+    async def test_benchmark_run_and_cancel(
+        self,
+        async_sdk_client: AsyncRunloopSDK,
+        smoketest_benchmark: Tuple[AsyncBenchmark, List[str]],
+    ) -> None:
+        """Test starting and canceling a benchmark run.
+
+        This test:
+        1. Uses the smoketest benchmark fixture
+        2. Starts a new benchmark run via the AsyncBenchmark class
+        3. Validates the run object
+        4. Cancels the run
+        """
+        benchmark, scenario_ids = smoketest_benchmark
+
+        # Start a run
+        run = await benchmark.start_run(run_name="sdk-smoketest-async-benchmark-run")
+        scenario_runs: List[AsyncScenarioRun] = []
+
+        try:
+            assert isinstance(run, AsyncBenchmarkRun)
+            assert run.id is not None
+            assert run.benchmark_id == benchmark.id
+
+            # Get run info
+            info = await run.get_info()
+            assert info.id == run.id
+            assert info.state == "running"
+
+            # Run the scenarios
+            for scenario_id in scenario_ids:
+                scenario = async_sdk_client.scenario.from_id(scenario_id)
+                scenario_runs.append(
+                    await scenario.run_async(
+                        benchmark_run_id=run.id, run_name="sdk-smoketest-async-benchmark-run-scenario"
+                    )
+                )
+
+            benchmark_scenario_runs = await run.list_scenario_runs()
+            assert isinstance(benchmark_scenario_runs, list)
+            assert len(benchmark_scenario_runs) == len(scenario_runs)
+            for scenario_run in benchmark_scenario_runs:
+                assert isinstance(scenario_run, AsyncScenarioRun)
+                assert any(
+                    scenario_run.id == scenario_run.id and scenario_run.devbox_id == scenario_run.devbox_id
+                    for scenario_run in scenario_runs
+                )
+
+            # Cancel the scenario run
+            for scenario_run in scenario_runs:
+                scenario_result = await scenario_run.cancel()
+                assert scenario_result.state in ["canceled", "completed"]
+
+            # Cancel the benchmark run
+            result = await run.cancel()
+            assert result.state in ["canceled", "completed"]
+
+        except Exception:
+            # Ensure cleanup on any error
+            for scenario_run in scenario_runs:
+                await scenario_run.cancel()
+            await run.cancel()
+            raise
+
+
+class TestAsyncBenchmarkListRuns:
+    """Test AsyncBenchmark list_runs operations."""
+
+    @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+    async def test_list_runs(
+        self,
+        smoketest_benchmark: Tuple[AsyncBenchmark, List[str]],
+    ) -> None:
+        """Test listing benchmark runs.
+
+        This test:
+        1. Uses the smoketest benchmark fixture
+        2. Lists its runs
+        3. Validates returned objects are AsyncBenchmarkRun instances
+        """
+        benchmark, _ = smoketest_benchmark
+
+        runs = await benchmark.list_runs()
+        assert isinstance(runs, list)
+        if not runs:
+            pytest.skip("No runs available to test")
+
+        # Verify returned items are AsyncBenchmarkRun objects
+        for run in runs:
+            assert isinstance(run, AsyncBenchmarkRun)
+            assert run.id is not None
+            assert run.benchmark_id == benchmark.id
diff --git a/tests/smoketests/sdk/test_benchmark.py b/tests/smoketests/sdk/test_benchmark.py
new file mode 100644
index 000000000..2dfe5bb6c
--- /dev/null
+++ b/tests/smoketests/sdk/test_benchmark.py
@@ -0,0 +1,190 @@
+"""Synchronous SDK smoke tests for Benchmark operations.
+
+These tests validate the Benchmark class against the real API.
+We create a dedicated smoketest benchmark and scenarios with consistent names
+so that resources are reused across test runs (since there's no delete endpoint).
+"""
+
+from __future__ import annotations
+
+from typing import List, Tuple
+
+import pytest
+
+from runloop_api_client import RunloopSDK
+from runloop_api_client.sdk import Scenario, Benchmark, ScenarioRun, BenchmarkRun
+
+pytestmark = [pytest.mark.smoketest]
+
+TWO_MINUTE_TIMEOUT = 120
+
+# Consistent names for smoketest resources
+SMOKETEST_BENCHMARK_NAME = "sdk-smoketest-benchmark"
+SMOKETEST_SCENARIO_1_NAME = "sdk-smoketest-scenario-1"
+SMOKETEST_SCENARIO_2_NAME = "sdk-smoketest-scenario-2"
+
+
+def get_or_create_scenario(
+    sdk_client: RunloopSDK,
+    name: str,
+    problem_statement: str,
+) -> Scenario:
+    """Get an existing scenario by name or create a new one."""
+    # Check if scenario already exists
+    scenarios = sdk_client.scenario.list(name=name, limit=1)
+    for scenario in scenarios:
+        # Return the first matching scenario
+        return scenario
+
+    # Create a new scenario using the SDK builder
+    return (
+        sdk_client.scenario.builder(name)
+        .with_problem_statement(problem_statement)
+        .add_shell_command_scorer("pass-scorer", command="exit 0")
+        .push()
+    )
+
+
+def get_or_create_benchmark(
+    sdk_client: RunloopSDK,
+    name: str,
+    scenario_ids: List[str],
+) -> Benchmark:
+    """Get an existing benchmark by name or create a new one."""
+    # Check if benchmark already exists
+    benchmarks = sdk_client.benchmark.list(name=name, limit=1)
+    for benchmark in benchmarks:
+        # Return the first matching benchmark
+        return benchmark
+
+    # Create a new benchmark
+    return sdk_client.benchmark.create(
+        name=name,
+        scenario_ids=scenario_ids,
+        description="Smoketest benchmark for SDK testing",
+    )
+
+
+@pytest.fixture(scope="module")
+def smoketest_benchmark(
+    sdk_client: RunloopSDK,
+) -> Tuple[Benchmark, List[str]]:
+    """Create or retrieve the smoketest benchmark and scenarios."""
+    # Create or get scenarios
+    scenario_1 = get_or_create_scenario(
+        sdk_client,
+        SMOKETEST_SCENARIO_1_NAME,
+        "Smoketest scenario 1 - basic validation",
+    )
+    scenario_2 = get_or_create_scenario(
+        sdk_client,
+        SMOKETEST_SCENARIO_2_NAME,
+        "Smoketest scenario 2 - basic validation",
+    )
+
+    scenario_ids = [scenario_1.id, scenario_2.id]
+
+    # Create or get benchmark
+    benchmark = get_or_create_benchmark(
+        sdk_client,
+        SMOKETEST_BENCHMARK_NAME,
+        scenario_ids,
+    )
+
+    return benchmark, scenario_ids
+
+
+class TestBenchmarkRun:
+    """Test Benchmark run operations."""
+
+    @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+    def test_benchmark_run_lifecycle(
+        self,
+        sdk_client: RunloopSDK,
+        smoketest_benchmark: Tuple[Benchmark, List[str]],
+    ) -> None:
+        """Test starting and canceling a benchmark run.
+
+        This test:
+        1. Uses the smoketest benchmark fixture
+        2. Starts a new benchmark run via the Benchmark class
+        3. Validates the run object
+        4. Cancels the run
+        """
+        benchmark, scenario_ids = smoketest_benchmark
+
+        # Start a run
+        run = benchmark.start_run(run_name="sdk-smoketest-benchmark-run")
+        scenario_runs: List[ScenarioRun] = []
+
+        try:
+            assert isinstance(run, BenchmarkRun)
+            assert run.id is not None
+            assert run.benchmark_id == benchmark.id
+
+            # Get run info
+            info = run.get_info()
+            assert info.id == run.id
+            assert info.state == "running"
+
+            # Start a scenario run
+            for scenario_id in scenario_ids:
+                scenario = sdk_client.scenario.from_id(scenario_id)
+                scenario_runs.append(
+                    scenario.run(benchmark_run_id=run.id, run_name="sdk-smoketest-benchmark-run-scenario")
+                )
+
+            benchmark_scenario_runs = run.list_scenario_runs()
+            assert isinstance(benchmark_scenario_runs, list)
+            assert len(benchmark_scenario_runs) == len(scenario_runs)
+            for scenario_run in benchmark_scenario_runs:
+                assert isinstance(scenario_run, ScenarioRun)
+                assert any(
+                    scenario_run.id == scenario_run.id and scenario_run.devbox_id == scenario_run.devbox_id
+                    for scenario_run in scenario_runs
+                )
+
+            # Cancel the scenario runs
+            for scenario_run in scenario_runs:
+                scenario_result = scenario_run.cancel()
+                assert scenario_result.state in ["canceled", "completed"]
+
+            # Cancel the benchmark run
+            result = run.cancel()
+            assert result.state in ["canceled", "completed"]
+
+        except Exception:
+            # Ensure cleanup on any error
+            for scenario_run in scenario_runs:
+                scenario_run.cancel()
+            run.cancel()
+            raise
+
+
+class TestBenchmarkListRuns:
+    """Test Benchmark list_runs operations."""
+
+    @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+    def test_list_runs(
+        self,
+        smoketest_benchmark: Tuple[Benchmark, List[str]],
+    ) -> None:
+        """Test listing benchmark runs.
+
+        This test:
+        1. Uses the smoketest benchmark fixture
+        2. Lists its runs
+        3. Validates returned objects are BenchmarkRun instances
+        """
+        benchmark, _ = smoketest_benchmark
+
+        runs = benchmark.list_runs()
+        assert isinstance(runs, list)
+        if not runs:
+            pytest.skip("No runs available to test")
+
+        # Verify returned items are BenchmarkRun objects
+        for run in runs:
+            assert isinstance(run, BenchmarkRun)
+            assert run.id is not None
+            assert run.benchmark_id == benchmark.id
diff --git a/tests/smoketests/test_snapshots.py b/tests/smoketests/test_snapshots.py
index 71b592320..0fc43ca23 100644
--- a/tests/smoketests/test_snapshots.py
+++ b/tests/smoketests/test_snapshots.py
@@ -31,7 +31,7 @@ def _cleanup(client: Runloop) -> Iterator[None]:  # pyright: ignore[reportUnused
 _snapshot_id = None
 
 
-@pytest.mark.timeout(30)
+@pytest.mark.timeout(120)
 def test_snapshot_devbox(client: Runloop) -> None:
     global _devbox_id, _snapshot_id
     created = client.devboxes.create_and_await_running(