diff --git a/.release-please-manifest.json b/.release-please-manifest.json index d0ab6645f..2a8f4ffdd 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "1.2.0" + ".": "1.3.0" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index ca5e82df9..aa9206944 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ -configured_endpoints: 97 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/runloop-ai%2Frunloop-f2df3524e4b99c38b634c334d098aa2c7d543d5ea0f49c4dd8f4d92723b81b94.yml -openapi_spec_hash: c377abec5716d1d6c5b01a527a5bfdfb -config_hash: 2363f563f42501d2b1587a4f64bdccaf +configured_endpoints: 98 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/runloop-ai%2Frunloop-5271153bd2f82579803953bd2fa1b9ea6466c979118804f64379fb14e9a9c436.yml +openapi_spec_hash: 95ac224a4b0f10e9ba6129a86746c9d4 +config_hash: cb8534d20a68a49b92726bedd50f8bb1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 0165349d7..1d1ed6b23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,42 @@ # Changelog +## 1.3.0 (2025-12-20) + +Full Changelog: [v1.2.0...v1.3.0](https://github.com/runloopai/api-client-python/compare/v1.2.0...v1.3.0) + +### ⚠ BREAKING CHANGES + +* remove support for pydantic-v1, pydantic-v2 is now default ([#710](https://github.com/runloopai/api-client-python/issues/710)) + +### Features + +* **benchmarks:** add `update_scenarios` method to benchmarks resource ([71ec221](https://github.com/runloopai/api-client-python/commit/71ec221f1d0cad7aac33c0299d3f8b1aa97d0741)) +* **devbox:** added stdin streaming endpoint ([83ae56a](https://github.com/runloopai/api-client-python/commit/83ae56a22a9c1d4528719321b9565731532191f2)) +* **scenarios:** add scenario builder to sdk ([#706](https://github.com/runloopai/api-client-python/issues/706)) ([2d41a15](https://github.com/runloopai/api-client-python/commit/2d41a15b4455ed8d7f6a8063cf19b82d51edeef8)) +* **sdk:** add Benchmark and AsyncBenchmark classes ([#714](https://github.com/runloopai/api-client-python/issues/714)) ([8909d8a](https://github.com/runloopai/api-client-python/commit/8909d8aabfc2f1c80ff74b636225b42cac6725ff)) +* **sdk:** add BenchmarkOps and AsyncBenchmarkOps to SDK ([#716](https://github.com/runloopai/api-client-python/issues/716)) ([9b434d9](https://github.com/runloopai/api-client-python/commit/9b434d9bc7ebdcea2b156689403d853a932f0d9e)) +* **sdk:** add BenchmarkRun and AsyncBenchmarkRun classes ([#712](https://github.com/runloopai/api-client-python/issues/712)) ([6aa83e2](https://github.com/runloopai/api-client-python/commit/6aa83e2a6c8a55694435bd2b707340770f0a326a)) + + +### Bug Fixes + +* **benchmarks:** `update()` for benchmarks and scenarios replaces all provided fields and does not modify unspecified fields ([#6702](https://github.com/runloopai/api-client-python/issues/6702)) ([cfd04b6](https://github.com/runloopai/api-client-python/commit/cfd04b6e7781534fd0e775e1b00793ad53814a47)) +* **types:** allow pyright to infer TypedDict types within SequenceNotStr ([3241717](https://github.com/runloopai/api-client-python/commit/32417177128b5f5d90b852a5460fe6823198cf9b)) +* use async_to_httpx_files in patch method ([88f8fb9](https://github.com/runloopai/api-client-python/commit/88f8fb92e1d48ff6f95833a7ee1e376bef76e0e1)) + + +### Chores + +* add documentation url to pypi project page ([#711](https://github.com/runloopai/api-client-python/issues/711)) ([7afb327](https://github.com/runloopai/api-client-python/commit/7afb32731842ebee4f479837959ccac856bd5e85)) +* add missing docstrings ([a198632](https://github.com/runloopai/api-client-python/commit/a198632f6a3936bcf5b5b4f4e6324461c4853893)) +* **internal:** add missing files argument to base client ([b7065e2](https://github.com/runloopai/api-client-python/commit/b7065e204d00f853bcac75637680dc198346a804)) +* remove support for pydantic-v1, pydantic-v2 is now default ([#710](https://github.com/runloopai/api-client-python/issues/710)) ([fb3cc3d](https://github.com/runloopai/api-client-python/commit/fb3cc3d354d4279542cd20f44857f2ec28be7082)) + + +### Documentation + +* reformat sidebar and index pages to be more transparent, add favicon to browser tab ([#715](https://github.com/runloopai/api-client-python/issues/715)) ([1161b8f](https://github.com/runloopai/api-client-python/commit/1161b8fbe8d78dc572e0310da009e1bcc7dec36f)) + ## 1.2.0 (2025-12-09) Full Changelog: [v1.1.0...v1.2.0](https://github.com/runloopai/api-client-python/compare/v1.1.0...v1.2.0) diff --git a/api.md b/api.md index 17cc5978f..83c2c8b60 100644 --- a/api.md +++ b/api.md @@ -20,6 +20,8 @@ from runloop_api_client.types import ( BenchmarkCreateParameters, BenchmarkRunListView, BenchmarkRunView, + BenchmarkScenarioUpdateParameters, + BenchmarkUpdateParameters, BenchmarkView, ScenarioDefinitionListView, StartBenchmarkRunParameters, @@ -35,6 +37,7 @@ Methods: - client.benchmarks.definitions(id, \*\*params) -> ScenarioDefinitionListView - client.benchmarks.list_public(\*\*params) -> SyncBenchmarksCursorIDPage[BenchmarkView] - client.benchmarks.start_run(\*\*params) -> BenchmarkRunView +- client.benchmarks.update_scenarios(id, \*\*params) -> BenchmarkView ## Runs diff --git a/pyproject.toml b/pyproject.toml index 2c90fa53a..5007d5e66 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "runloop_api_client" -version = "1.2.0" +version = "1.3.0" description = "The official Python library for the runloop API" dynamic = ["readme"] license = "MIT" @@ -15,7 +15,7 @@ dependencies = [ "anyio>=3.5.0, <5", "distro>=1.7.0, <2", "sniffio", - "uuid-utils>=0.11.0", + "uuid-utils>=0.11.0", ] requires-python = ">= 3.9" diff --git a/requirements-dev.lock b/requirements-dev.lock index b9f3f2862..c48025dbf 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -94,7 +94,7 @@ python-dateutil==2.9.0.post0 ; python_full_version < '3.10' # via time-machine respx==0.22.0 rich==14.2.0 -ruff==0.14.8 +ruff==0.14.9 six==1.17.0 ; python_full_version < '3.10' # via python-dateutil sniffio==1.3.1 diff --git a/src/runloop_api_client/_base_client.py b/src/runloop_api_client/_base_client.py index f639d4201..5c05c86c5 100644 --- a/src/runloop_api_client/_base_client.py +++ b/src/runloop_api_client/_base_client.py @@ -1247,9 +1247,12 @@ def patch( *, cast_to: Type[ResponseT], body: Body | None = None, + files: RequestFiles | None = None, options: RequestOptions = {}, ) -> ResponseT: - opts = FinalRequestOptions.construct(method="patch", url=path, json_data=body, **options) + opts = FinalRequestOptions.construct( + method="patch", url=path, json_data=body, files=to_httpx_files(files), **options + ) return self.request(cast_to, opts) def put( @@ -1767,9 +1770,12 @@ async def patch( *, cast_to: Type[ResponseT], body: Body | None = None, + files: RequestFiles | None = None, options: RequestOptions = {}, ) -> ResponseT: - opts = FinalRequestOptions.construct(method="patch", url=path, json_data=body, **options) + opts = FinalRequestOptions.construct( + method="patch", url=path, json_data=body, files=await async_to_httpx_files(files), **options + ) return await self.request(cast_to, opts) async def put( diff --git a/src/runloop_api_client/_types.py b/src/runloop_api_client/_types.py index a2a12e84e..31df93064 100644 --- a/src/runloop_api_client/_types.py +++ b/src/runloop_api_client/_types.py @@ -243,6 +243,9 @@ class HttpxSendArgs(TypedDict, total=False): if TYPE_CHECKING: # This works because str.__contains__ does not accept object (either in typeshed or at runtime) # https://github.com/hauntsaninja/useful_types/blob/5e9710f3875107d068e7679fd7fec9cfab0eff3b/useful_types/__init__.py#L285 + # + # Note: index() and count() methods are intentionally omitted to allow pyright to properly + # infer TypedDict types when dict literals are used in lists assigned to SequenceNotStr. class SequenceNotStr(Protocol[_T_co]): @overload def __getitem__(self, index: SupportsIndex, /) -> _T_co: ... @@ -251,8 +254,6 @@ def __getitem__(self, index: slice, /) -> Sequence[_T_co]: ... def __contains__(self, value: object, /) -> bool: ... def __len__(self) -> int: ... def __iter__(self) -> Iterator[_T_co]: ... - def index(self, value: Any, start: int = 0, stop: int = ..., /) -> int: ... - def count(self, value: Any, /) -> int: ... def __reversed__(self) -> Iterator[_T_co]: ... else: # just point this to a normal `Sequence` at runtime to avoid having to special case diff --git a/src/runloop_api_client/_version.py b/src/runloop_api_client/_version.py index e13ec2fd5..c746bdc5e 100644 --- a/src/runloop_api_client/_version.py +++ b/src/runloop_api_client/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "runloop_api_client" -__version__ = "1.2.0" # x-release-please-version +__version__ = "1.3.0" # x-release-please-version diff --git a/src/runloop_api_client/resources/agents.py b/src/runloop_api_client/resources/agents.py index 6ff202d74..9ac9f8c02 100644 --- a/src/runloop_api_client/resources/agents.py +++ b/src/runloop_api_client/resources/agents.py @@ -49,6 +49,7 @@ def create( self, *, name: str, + version: str, source: Optional[AgentSource] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -66,6 +67,8 @@ def create( Args: name: The name of the Agent. + version: The version of the Agent. Must be a semver string (e.g., '2.0.65') or a SHA. + source: The source configuration for the Agent. extra_headers: Send extra headers @@ -83,6 +86,7 @@ def create( body=maybe_transform( { "name": name, + "version": version, "source": source, }, agent_create_params.AgentCreateParams, @@ -138,6 +142,7 @@ def list( name: str | Omit = omit, search: str | Omit = omit, starting_after: str | Omit = omit, + version: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -151,7 +156,7 @@ def list( Args: is_public: Filter agents by public visibility. - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. name: Filter agents by name (partial match supported). @@ -159,6 +164,8 @@ def list( starting_after: Load the next page of data starting after the item with the given ID. + version: Filter by version. Use 'latest' to get the most recently created agent. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -182,6 +189,7 @@ def list( "name": name, "search": search, "starting_after": starting_after, + "version": version, }, agent_list_params.AgentListParams, ), @@ -214,6 +222,7 @@ async def create( self, *, name: str, + version: str, source: Optional[AgentSource] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -231,6 +240,8 @@ async def create( Args: name: The name of the Agent. + version: The version of the Agent. Must be a semver string (e.g., '2.0.65') or a SHA. + source: The source configuration for the Agent. extra_headers: Send extra headers @@ -248,6 +259,7 @@ async def create( body=await async_maybe_transform( { "name": name, + "version": version, "source": source, }, agent_create_params.AgentCreateParams, @@ -303,6 +315,7 @@ def list( name: str | Omit = omit, search: str | Omit = omit, starting_after: str | Omit = omit, + version: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -316,7 +329,7 @@ def list( Args: is_public: Filter agents by public visibility. - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. name: Filter agents by name (partial match supported). @@ -324,6 +337,8 @@ def list( starting_after: Load the next page of data starting after the item with the given ID. + version: Filter by version. Use 'latest' to get the most recently created agent. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -347,6 +362,7 @@ def list( "name": name, "search": search, "starting_after": starting_after, + "version": version, }, agent_list_params.AgentListParams, ), diff --git a/src/runloop_api_client/resources/benchmarks/benchmarks.py b/src/runloop_api_client/resources/benchmarks/benchmarks.py index c30885e9e..9d9a30b5d 100644 --- a/src/runloop_api_client/resources/benchmarks/benchmarks.py +++ b/src/runloop_api_client/resources/benchmarks/benchmarks.py @@ -21,6 +21,7 @@ benchmark_start_run_params, benchmark_definitions_params, benchmark_list_public_params, + benchmark_update_scenarios_params, ) from ..._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given from ..._utils import maybe_transform, async_maybe_transform @@ -88,16 +89,16 @@ def create( Create a Benchmark with a set of Scenarios. Args: - name: The name of the Benchmark. This must be unique. + name: The unique name of the Benchmark. attribution: Attribution information for the benchmark. description: Detailed description of the benchmark. - metadata: User defined metadata to attach to the benchmark for organization. + metadata: User defined metadata to attach to the benchmark. required_environment_variables: Environment variables required to run the benchmark. If any required variables - are not supplied, the benchmark will fail to start + are not supplied, the benchmark will fail to start. required_secret_names: Secrets required to run the benchmark with (environment variable name will be mapped to the your user secret by name). If any of these secrets are not @@ -176,12 +177,12 @@ def update( self, id: str, *, - name: str, attribution: Optional[str] | Omit = omit, description: Optional[str] | Omit = omit, metadata: Optional[Dict[str, str]] | Omit = omit, + name: Optional[str] | Omit = omit, required_environment_variables: Optional[SequenceNotStr[str]] | Omit = omit, - required_secret_names: SequenceNotStr[str] | Omit = omit, + required_secret_names: Optional[SequenceNotStr[str]] | Omit = omit, scenario_ids: Optional[SequenceNotStr[str]] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -191,26 +192,30 @@ def update( timeout: float | httpx.Timeout | None | NotGiven = not_given, idempotency_key: str | None = None, ) -> BenchmarkView: - """ - Update a Benchmark with a set of Scenarios. + """Update a Benchmark. + + Fields that are null will preserve the existing value. + Fields that are provided (including empty values) will replace the existing + value entirely. Args: - name: The name of the Benchmark. This must be unique. + attribution: Attribution information for the benchmark. Pass in empty string to clear. - attribution: Attribution information for the benchmark. + description: Detailed description of the benchmark. Pass in empty string to clear. - description: Detailed description of the benchmark. + metadata: User defined metadata to attach to the benchmark. Pass in empty map to clear. - metadata: User defined metadata to attach to the benchmark for organization. + name: The unique name of the Benchmark. Cannot be blank. required_environment_variables: Environment variables required to run the benchmark. If any required variables - are not supplied, the benchmark will fail to start + are not supplied, the benchmark will fail to start. Pass in empty list to clear. required_secret_names: Secrets required to run the benchmark with (environment variable name will be mapped to the your user secret by name). If any of these secrets are not - provided or the mapping is incorrect, the benchmark will fail to start. + provided or the mapping is incorrect, the benchmark will fail to start. Pass in + empty list to clear. - scenario_ids: The Scenario IDs that make up the Benchmark. + scenario_ids: The Scenario IDs that make up the Benchmark. Pass in empty list to clear. extra_headers: Send extra headers @@ -228,10 +233,10 @@ def update( f"/v1/benchmarks/{id}", body=maybe_transform( { - "name": name, "attribution": attribution, "description": description, "metadata": metadata, + "name": name, "required_environment_variables": required_environment_variables, "required_secret_names": required_secret_names, "scenario_ids": scenario_ids, @@ -252,6 +257,7 @@ def list( self, *, limit: int | Omit = omit, + name: str | Omit = omit, starting_after: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -264,7 +270,9 @@ def list( List all Benchmarks matching filter. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. + + name: Filter by name starting_after: Load the next page of data starting after the item with the given ID. @@ -287,6 +295,7 @@ def list( query=maybe_transform( { "limit": limit, + "name": name, "starting_after": starting_after, }, benchmark_list_params.BenchmarkListParams, @@ -312,7 +321,7 @@ def definitions( Get scenario definitions for a previously created Benchmark. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. starting_after: Load the next page of data starting after the item with the given ID. @@ -360,7 +369,7 @@ def list_public( List all public benchmarks matching filter. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. starting_after: Load the next page of data starting after the item with the given ID. @@ -449,6 +458,59 @@ def start_run( cast_to=BenchmarkRunView, ) + def update_scenarios( + self, + id: str, + *, + scenarios_to_add: Optional[SequenceNotStr[str]] | Omit = omit, + scenarios_to_remove: Optional[SequenceNotStr[str]] | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + idempotency_key: str | None = None, + ) -> BenchmarkView: + """ + Add and/or remove Scenario IDs from an existing Benchmark. + + Args: + scenarios_to_add: Scenario IDs to add to the Benchmark. + + scenarios_to_remove: Scenario IDs to remove from the Benchmark. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + + idempotency_key: Specify a custom idempotency key for this request + """ + if not id: + raise ValueError(f"Expected a non-empty value for `id` but received {id!r}") + return self._post( + f"/v1/benchmarks/{id}/scenarios", + body=maybe_transform( + { + "scenarios_to_add": scenarios_to_add, + "scenarios_to_remove": scenarios_to_remove, + }, + benchmark_update_scenarios_params.BenchmarkUpdateScenariosParams, + ), + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + idempotency_key=idempotency_key, + ), + cast_to=BenchmarkView, + ) + class AsyncBenchmarksResource(AsyncAPIResource): @cached_property @@ -496,16 +558,16 @@ async def create( Create a Benchmark with a set of Scenarios. Args: - name: The name of the Benchmark. This must be unique. + name: The unique name of the Benchmark. attribution: Attribution information for the benchmark. description: Detailed description of the benchmark. - metadata: User defined metadata to attach to the benchmark for organization. + metadata: User defined metadata to attach to the benchmark. required_environment_variables: Environment variables required to run the benchmark. If any required variables - are not supplied, the benchmark will fail to start + are not supplied, the benchmark will fail to start. required_secret_names: Secrets required to run the benchmark with (environment variable name will be mapped to the your user secret by name). If any of these secrets are not @@ -584,12 +646,12 @@ async def update( self, id: str, *, - name: str, attribution: Optional[str] | Omit = omit, description: Optional[str] | Omit = omit, metadata: Optional[Dict[str, str]] | Omit = omit, + name: Optional[str] | Omit = omit, required_environment_variables: Optional[SequenceNotStr[str]] | Omit = omit, - required_secret_names: SequenceNotStr[str] | Omit = omit, + required_secret_names: Optional[SequenceNotStr[str]] | Omit = omit, scenario_ids: Optional[SequenceNotStr[str]] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -599,26 +661,30 @@ async def update( timeout: float | httpx.Timeout | None | NotGiven = not_given, idempotency_key: str | None = None, ) -> BenchmarkView: - """ - Update a Benchmark with a set of Scenarios. + """Update a Benchmark. + + Fields that are null will preserve the existing value. + Fields that are provided (including empty values) will replace the existing + value entirely. Args: - name: The name of the Benchmark. This must be unique. + attribution: Attribution information for the benchmark. Pass in empty string to clear. - attribution: Attribution information for the benchmark. + description: Detailed description of the benchmark. Pass in empty string to clear. - description: Detailed description of the benchmark. + metadata: User defined metadata to attach to the benchmark. Pass in empty map to clear. - metadata: User defined metadata to attach to the benchmark for organization. + name: The unique name of the Benchmark. Cannot be blank. required_environment_variables: Environment variables required to run the benchmark. If any required variables - are not supplied, the benchmark will fail to start + are not supplied, the benchmark will fail to start. Pass in empty list to clear. required_secret_names: Secrets required to run the benchmark with (environment variable name will be mapped to the your user secret by name). If any of these secrets are not - provided or the mapping is incorrect, the benchmark will fail to start. + provided or the mapping is incorrect, the benchmark will fail to start. Pass in + empty list to clear. - scenario_ids: The Scenario IDs that make up the Benchmark. + scenario_ids: The Scenario IDs that make up the Benchmark. Pass in empty list to clear. extra_headers: Send extra headers @@ -636,10 +702,10 @@ async def update( f"/v1/benchmarks/{id}", body=await async_maybe_transform( { - "name": name, "attribution": attribution, "description": description, "metadata": metadata, + "name": name, "required_environment_variables": required_environment_variables, "required_secret_names": required_secret_names, "scenario_ids": scenario_ids, @@ -660,6 +726,7 @@ def list( self, *, limit: int | Omit = omit, + name: str | Omit = omit, starting_after: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -672,7 +739,9 @@ def list( List all Benchmarks matching filter. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. + + name: Filter by name starting_after: Load the next page of data starting after the item with the given ID. @@ -695,6 +764,7 @@ def list( query=maybe_transform( { "limit": limit, + "name": name, "starting_after": starting_after, }, benchmark_list_params.BenchmarkListParams, @@ -720,7 +790,7 @@ async def definitions( Get scenario definitions for a previously created Benchmark. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. starting_after: Load the next page of data starting after the item with the given ID. @@ -768,7 +838,7 @@ def list_public( List all public benchmarks matching filter. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. starting_after: Load the next page of data starting after the item with the given ID. @@ -857,6 +927,59 @@ async def start_run( cast_to=BenchmarkRunView, ) + async def update_scenarios( + self, + id: str, + *, + scenarios_to_add: Optional[SequenceNotStr[str]] | Omit = omit, + scenarios_to_remove: Optional[SequenceNotStr[str]] | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + idempotency_key: str | None = None, + ) -> BenchmarkView: + """ + Add and/or remove Scenario IDs from an existing Benchmark. + + Args: + scenarios_to_add: Scenario IDs to add to the Benchmark. + + scenarios_to_remove: Scenario IDs to remove from the Benchmark. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + + idempotency_key: Specify a custom idempotency key for this request + """ + if not id: + raise ValueError(f"Expected a non-empty value for `id` but received {id!r}") + return await self._post( + f"/v1/benchmarks/{id}/scenarios", + body=await async_maybe_transform( + { + "scenarios_to_add": scenarios_to_add, + "scenarios_to_remove": scenarios_to_remove, + }, + benchmark_update_scenarios_params.BenchmarkUpdateScenariosParams, + ), + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + idempotency_key=idempotency_key, + ), + cast_to=BenchmarkView, + ) + class BenchmarksResourceWithRawResponse: def __init__(self, benchmarks: BenchmarksResource) -> None: @@ -883,6 +1006,9 @@ def __init__(self, benchmarks: BenchmarksResource) -> None: self.start_run = to_raw_response_wrapper( benchmarks.start_run, ) + self.update_scenarios = to_raw_response_wrapper( + benchmarks.update_scenarios, + ) @cached_property def runs(self) -> RunsResourceWithRawResponse: @@ -914,6 +1040,9 @@ def __init__(self, benchmarks: AsyncBenchmarksResource) -> None: self.start_run = async_to_raw_response_wrapper( benchmarks.start_run, ) + self.update_scenarios = async_to_raw_response_wrapper( + benchmarks.update_scenarios, + ) @cached_property def runs(self) -> AsyncRunsResourceWithRawResponse: @@ -945,6 +1074,9 @@ def __init__(self, benchmarks: BenchmarksResource) -> None: self.start_run = to_streamed_response_wrapper( benchmarks.start_run, ) + self.update_scenarios = to_streamed_response_wrapper( + benchmarks.update_scenarios, + ) @cached_property def runs(self) -> RunsResourceWithStreamingResponse: @@ -976,6 +1108,9 @@ def __init__(self, benchmarks: AsyncBenchmarksResource) -> None: self.start_run = async_to_streamed_response_wrapper( benchmarks.start_run, ) + self.update_scenarios = async_to_streamed_response_wrapper( + benchmarks.update_scenarios, + ) @cached_property def runs(self) -> AsyncRunsResourceWithStreamingResponse: diff --git a/src/runloop_api_client/resources/benchmarks/runs.py b/src/runloop_api_client/resources/benchmarks/runs.py index 6d69d160b..cdab6fd30 100644 --- a/src/runloop_api_client/resources/benchmarks/runs.py +++ b/src/runloop_api_client/resources/benchmarks/runs.py @@ -83,6 +83,7 @@ def list( *, benchmark_id: str | Omit = omit, limit: int | Omit = omit, + name: str | Omit = omit, starting_after: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -97,7 +98,9 @@ def list( Args: benchmark_id: The Benchmark ID to filter by. - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. + + name: Filter by name starting_after: Load the next page of data starting after the item with the given ID. @@ -121,6 +124,7 @@ def list( { "benchmark_id": benchmark_id, "limit": limit, + "name": name, "starting_after": starting_after, }, run_list_params.RunListParams, @@ -227,7 +231,7 @@ def list_scenario_runs( List started scenario runs for a benchmark run. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. starting_after: Load the next page of data starting after the item with the given ID. @@ -322,6 +326,7 @@ def list( *, benchmark_id: str | Omit = omit, limit: int | Omit = omit, + name: str | Omit = omit, starting_after: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -336,7 +341,9 @@ def list( Args: benchmark_id: The Benchmark ID to filter by. - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. + + name: Filter by name starting_after: Load the next page of data starting after the item with the given ID. @@ -360,6 +367,7 @@ def list( { "benchmark_id": benchmark_id, "limit": limit, + "name": name, "starting_after": starting_after, }, run_list_params.RunListParams, @@ -466,7 +474,7 @@ def list_scenario_runs( List started scenario runs for a benchmark run. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. starting_after: Load the next page of data starting after the item with the given ID. diff --git a/src/runloop_api_client/resources/blueprints.py b/src/runloop_api_client/resources/blueprints.py index 8cc04c2e3..818365271 100644 --- a/src/runloop_api_client/resources/blueprints.py +++ b/src/runloop_api_client/resources/blueprints.py @@ -389,6 +389,7 @@ def list( limit: int | Omit = omit, name: str | Omit = omit, starting_after: str | Omit = omit, + status: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -400,12 +401,14 @@ def list( List all Blueprints or filter by name. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. name: Filter by name starting_after: Load the next page of data starting after the item with the given ID. + status: Filter by build status (queued, provisioning, building, failed, build_complete) + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -427,6 +430,7 @@ def list( "limit": limit, "name": name, "starting_after": starting_after, + "status": status, }, blueprint_list_params.BlueprintListParams, ), @@ -558,6 +562,7 @@ def list_public( limit: int | Omit = omit, name: str | Omit = omit, starting_after: str | Omit = omit, + status: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -569,12 +574,14 @@ def list_public( List all public Blueprints that are available to all users. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. name: Filter by name starting_after: Load the next page of data starting after the item with the given ID. + status: Filter by build status (queued, provisioning, building, failed, build_complete) + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -596,6 +603,7 @@ def list_public( "limit": limit, "name": name, "starting_after": starting_after, + "status": status, }, blueprint_list_public_params.BlueprintListPublicParams, ), @@ -1028,6 +1036,7 @@ def list( limit: int | Omit = omit, name: str | Omit = omit, starting_after: str | Omit = omit, + status: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -1039,12 +1048,14 @@ def list( List all Blueprints or filter by name. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. name: Filter by name starting_after: Load the next page of data starting after the item with the given ID. + status: Filter by build status (queued, provisioning, building, failed, build_complete) + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -1066,6 +1077,7 @@ def list( "limit": limit, "name": name, "starting_after": starting_after, + "status": status, }, blueprint_list_params.BlueprintListParams, ), @@ -1197,6 +1209,7 @@ def list_public( limit: int | Omit = omit, name: str | Omit = omit, starting_after: str | Omit = omit, + status: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -1208,12 +1221,14 @@ def list_public( List all public Blueprints that are available to all users. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. name: Filter by name starting_after: Load the next page of data starting after the item with the given ID. + status: Filter by build status (queued, provisioning, building, failed, build_complete) + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -1235,6 +1250,7 @@ def list_public( "limit": limit, "name": name, "starting_after": starting_after, + "status": status, }, blueprint_list_public_params.BlueprintListPublicParams, ), diff --git a/src/runloop_api_client/resources/devboxes/devboxes.py b/src/runloop_api_client/resources/devboxes/devboxes.py index fc13c722d..dc7b1b492 100644 --- a/src/runloop_api_client/resources/devboxes/devboxes.py +++ b/src/runloop_api_client/resources/devboxes/devboxes.py @@ -558,7 +558,7 @@ def list( List all Devboxes while optionally filtering by status. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. starting_after: Load the next page of data starting after the item with the given ID. @@ -1102,7 +1102,7 @@ def list_disk_snapshots( Args: devbox_id: Devbox ID to filter by. - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. metadata_key: Filter snapshots by metadata key-value pair. Can be used multiple times for different keys. @@ -2093,7 +2093,7 @@ def list( List all Devboxes while optionally filtering by status. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. starting_after: Load the next page of data starting after the item with the given ID. @@ -2638,7 +2638,7 @@ def list_disk_snapshots( Args: devbox_id: Devbox ID to filter by. - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. metadata_key: Filter snapshots by metadata key-value pair. Can be used multiple times for different keys. diff --git a/src/runloop_api_client/resources/devboxes/disk_snapshots.py b/src/runloop_api_client/resources/devboxes/disk_snapshots.py index 0e3530374..b896adbb6 100644 --- a/src/runloop_api_client/resources/devboxes/disk_snapshots.py +++ b/src/runloop_api_client/resources/devboxes/disk_snapshots.py @@ -130,7 +130,7 @@ def list( Args: devbox_id: Devbox ID to filter by. - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. metadata_key: Filter snapshots by metadata key-value pair. Can be used multiple times for different keys. @@ -381,7 +381,7 @@ def list( Args: devbox_id: Devbox ID to filter by. - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. metadata_key: Filter snapshots by metadata key-value pair. Can be used multiple times for different keys. diff --git a/src/runloop_api_client/resources/objects.py b/src/runloop_api_client/resources/objects.py index 4d7d2e0a3..409d5f6f3 100644 --- a/src/runloop_api_client/resources/objects.py +++ b/src/runloop_api_client/resources/objects.py @@ -162,7 +162,7 @@ def list( Args: content_type: Filter storage objects by content type. - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. name: Filter storage objects by name (partial match supported). @@ -352,7 +352,7 @@ def list_public( Args: content_type: Filter storage objects by content type. - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. name: Filter storage objects by name (partial match supported). @@ -530,7 +530,7 @@ def list( Args: content_type: Filter storage objects by content type. - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. name: Filter storage objects by name (partial match supported). @@ -720,7 +720,7 @@ def list_public( Args: content_type: Filter storage objects by content type. - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. name: Filter storage objects by name (partial match supported). diff --git a/src/runloop_api_client/resources/repositories.py b/src/runloop_api_client/resources/repositories.py index e2b238750..a22075540 100644 --- a/src/runloop_api_client/resources/repositories.py +++ b/src/runloop_api_client/resources/repositories.py @@ -163,7 +163,7 @@ def list( List all available repository connections. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. name: Filter by repository name @@ -542,7 +542,7 @@ def list( List all available repository connections. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. name: Filter by repository name diff --git a/src/runloop_api_client/resources/scenarios/runs.py b/src/runloop_api_client/resources/scenarios/runs.py index a6a16a5a0..3ea9a960f 100644 --- a/src/runloop_api_client/resources/scenarios/runs.py +++ b/src/runloop_api_client/resources/scenarios/runs.py @@ -89,9 +89,12 @@ def retrieve( def list( self, *, + benchmark_run_id: str | Omit = omit, limit: int | Omit = omit, + name: str | Omit = omit, scenario_id: str | Omit = omit, starting_after: str | Omit = omit, + state: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -103,12 +106,18 @@ def list( List all ScenarioRuns matching filter. Args: - limit: The limit of items to return. Default is 20. + benchmark_run_id: Filter by benchmark run ID + + limit: The limit of items to return. Default is 20. Max is 5000. + + name: Filter by name scenario_id: Filter runs associated to Scenario given ID starting_after: Load the next page of data starting after the item with the given ID. + state: Filter by state + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -127,9 +136,12 @@ def list( timeout=timeout, query=maybe_transform( { + "benchmark_run_id": benchmark_run_id, "limit": limit, + "name": name, "scenario_id": scenario_id, "starting_after": starting_after, + "state": state, }, run_list_params.RunListParams, ), @@ -497,9 +509,12 @@ async def retrieve( def list( self, *, + benchmark_run_id: str | Omit = omit, limit: int | Omit = omit, + name: str | Omit = omit, scenario_id: str | Omit = omit, starting_after: str | Omit = omit, + state: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -511,12 +526,18 @@ def list( List all ScenarioRuns matching filter. Args: - limit: The limit of items to return. Default is 20. + benchmark_run_id: Filter by benchmark run ID + + limit: The limit of items to return. Default is 20. Max is 5000. + + name: Filter by name scenario_id: Filter runs associated to Scenario given ID starting_after: Load the next page of data starting after the item with the given ID. + state: Filter by state + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -535,9 +556,12 @@ def list( timeout=timeout, query=maybe_transform( { + "benchmark_run_id": benchmark_run_id, "limit": limit, + "name": name, "scenario_id": scenario_id, "starting_after": starting_after, + "state": state, }, run_list_params.RunListParams, ), diff --git a/src/runloop_api_client/resources/scenarios/scenarios.py b/src/runloop_api_client/resources/scenarios/scenarios.py index 6b7c729f4..bd961a285 100644 --- a/src/runloop_api_client/resources/scenarios/scenarios.py +++ b/src/runloop_api_client/resources/scenarios/scenarios.py @@ -221,31 +221,32 @@ def update( timeout: float | httpx.Timeout | None | NotGiven = not_given, idempotency_key: str | None = None, ) -> ScenarioView: - """ - Update a Scenario, a repeatable AI coding evaluation test that defines the - starting environment as well as evaluation success criteria. Only provided - fields will be updated. + """Update a Scenario. + + Fields that are null will preserve the existing value. Fields + that are provided (including empty values) will replace the existing value + entirely. Args: environment_parameters: The Environment in which the Scenario will run. input_context: The input context for the Scenario. - metadata: User defined metadata to attach to the scenario for organization. + metadata: User defined metadata to attach to the scenario. Pass in empty map to clear. - name: Name of the scenario. + name: Name of the scenario. Cannot be blank. reference_output: A string representation of the reference output to solve the scenario. Commonly can be the result of a git diff or a sequence of command actions to apply to the - environment. + environment. Pass in empty string to clear. - required_environment_variables: Environment variables required to run the scenario. + required_environment_variables: Environment variables required to run the scenario. Pass in empty list to clear. - required_secret_names: Secrets required to run the scenario. + required_secret_names: Secrets required to run the scenario. Pass in empty list to clear. scoring_contract: The scoring contract for the Scenario. - validation_type: Validation strategy. + validation_type: Validation strategy. Pass in empty string to clear. extra_headers: Send extra headers @@ -292,6 +293,7 @@ def list( limit: int | Omit = omit, name: str | Omit = omit, starting_after: str | Omit = omit, + validation_type: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -305,12 +307,14 @@ def list( Args: benchmark_id: Filter scenarios by benchmark ID. - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. name: Query for Scenarios with a given name. starting_after: Load the next page of data starting after the item with the given ID. + validation_type: Filter by validation type + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -333,6 +337,7 @@ def list( "limit": limit, "name": name, "starting_after": starting_after, + "validation_type": validation_type, }, scenario_list_params.ScenarioListParams, ), @@ -357,7 +362,7 @@ def list_public( List all public scenarios matching filter. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. name: Query for Scenarios with a given name. @@ -678,31 +683,32 @@ async def update( timeout: float | httpx.Timeout | None | NotGiven = not_given, idempotency_key: str | None = None, ) -> ScenarioView: - """ - Update a Scenario, a repeatable AI coding evaluation test that defines the - starting environment as well as evaluation success criteria. Only provided - fields will be updated. + """Update a Scenario. + + Fields that are null will preserve the existing value. Fields + that are provided (including empty values) will replace the existing value + entirely. Args: environment_parameters: The Environment in which the Scenario will run. input_context: The input context for the Scenario. - metadata: User defined metadata to attach to the scenario for organization. + metadata: User defined metadata to attach to the scenario. Pass in empty map to clear. - name: Name of the scenario. + name: Name of the scenario. Cannot be blank. reference_output: A string representation of the reference output to solve the scenario. Commonly can be the result of a git diff or a sequence of command actions to apply to the - environment. + environment. Pass in empty string to clear. - required_environment_variables: Environment variables required to run the scenario. + required_environment_variables: Environment variables required to run the scenario. Pass in empty list to clear. - required_secret_names: Secrets required to run the scenario. + required_secret_names: Secrets required to run the scenario. Pass in empty list to clear. scoring_contract: The scoring contract for the Scenario. - validation_type: Validation strategy. + validation_type: Validation strategy. Pass in empty string to clear. extra_headers: Send extra headers @@ -749,6 +755,7 @@ def list( limit: int | Omit = omit, name: str | Omit = omit, starting_after: str | Omit = omit, + validation_type: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -762,12 +769,14 @@ def list( Args: benchmark_id: Filter scenarios by benchmark ID. - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. name: Query for Scenarios with a given name. starting_after: Load the next page of data starting after the item with the given ID. + validation_type: Filter by validation type + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -790,6 +799,7 @@ def list( "limit": limit, "name": name, "starting_after": starting_after, + "validation_type": validation_type, }, scenario_list_params.ScenarioListParams, ), @@ -814,7 +824,7 @@ def list_public( List all public scenarios matching filter. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. name: Query for Scenarios with a given name. diff --git a/src/runloop_api_client/resources/scenarios/scorers.py b/src/runloop_api_client/resources/scenarios/scorers.py index 5b083f8e4..9e5d5e198 100644 --- a/src/runloop_api_client/resources/scenarios/scorers.py +++ b/src/runloop_api_client/resources/scenarios/scorers.py @@ -201,7 +201,7 @@ def list( List all Scenario Scorers matching filter. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. starting_after: Load the next page of data starting after the item with the given ID. @@ -460,7 +460,7 @@ def list( List all Scenario Scorers matching filter. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. starting_after: Load the next page of data starting after the item with the given ID. diff --git a/src/runloop_api_client/resources/secrets.py b/src/runloop_api_client/resources/secrets.py index 8e170fca2..892557497 100644 --- a/src/runloop_api_client/resources/secrets.py +++ b/src/runloop_api_client/resources/secrets.py @@ -160,7 +160,7 @@ def list( for security reasons. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. extra_headers: Send extra headers @@ -363,7 +363,7 @@ async def list( for security reasons. Args: - limit: The limit of items to return. Default is 20. + limit: The limit of items to return. Default is 20. Max is 5000. extra_headers: Send extra headers diff --git a/src/runloop_api_client/sdk/__init__.py b/src/runloop_api_client/sdk/__init__.py index 5773b9d53..610017b79 100644 --- a/src/runloop_api_client/sdk/__init__.py +++ b/src/runloop_api_client/sdk/__init__.py @@ -5,7 +5,17 @@ from __future__ import annotations -from .sync import AgentOps, DevboxOps, ScorerOps, RunloopSDK, ScenarioOps, SnapshotOps, BlueprintOps, StorageObjectOps +from .sync import ( + AgentOps, + DevboxOps, + ScorerOps, + RunloopSDK, + ScenarioOps, + SnapshotOps, + BenchmarkOps, + BlueprintOps, + StorageObjectOps, +) from .agent import Agent from ._types import ScenarioPreview from .async_ import ( @@ -15,6 +25,7 @@ AsyncRunloopSDK, AsyncScenarioOps, AsyncSnapshotOps, + AsyncBenchmarkOps, AsyncBlueprintOps, AsyncStorageObjectOps, ) @@ -22,20 +33,24 @@ from .scorer import Scorer from .scenario import Scenario from .snapshot import Snapshot +from .benchmark import Benchmark from .blueprint import Blueprint from .execution import Execution from .async_agent import AsyncAgent from .async_devbox import AsyncDevbox, AsyncNamedShell from .async_scorer import AsyncScorer from .scenario_run import ScenarioRun +from .benchmark_run import BenchmarkRun from .async_scenario import AsyncScenario from .async_snapshot import AsyncSnapshot from .storage_object import StorageObject +from .async_benchmark import AsyncBenchmark from .async_blueprint import AsyncBlueprint from .async_execution import AsyncExecution from .execution_result import ExecutionResult from .scenario_builder import ScenarioBuilder from .async_scenario_run import AsyncScenarioRun +from .async_benchmark_run import AsyncBenchmarkRun from .async_storage_object import AsyncStorageObject from .async_execution_result import AsyncExecutionResult from .async_scenario_builder import AsyncScenarioBuilder @@ -47,6 +62,8 @@ # Management interfaces "AgentOps", "AsyncAgentOps", + "BenchmarkOps", + "AsyncBenchmarkOps", "DevboxOps", "AsyncDevboxOps", "BlueprintOps", @@ -62,6 +79,10 @@ # Resource classes "Agent", "AsyncAgent", + "Benchmark", + "AsyncBenchmark", + "BenchmarkRun", + "AsyncBenchmarkRun", "Devbox", "AsyncDevbox", "Execution", diff --git a/src/runloop_api_client/sdk/_types.py b/src/runloop_api_client/sdk/_types.py index be09f6eed..c3024b4ca 100644 --- a/src/runloop_api_client/sdk/_types.py +++ b/src/runloop_api_client/sdk/_types.py @@ -1,32 +1,41 @@ from typing import Union, Callable, Optional from typing_extensions import TypedDict +from ..types import ( + InputContext, + ScenarioView, + AgentListParams, + DevboxListParams, + ObjectListParams, + AgentCreateParams, + DevboxCreateParams, + ObjectCreateParams, + ScenarioListParams, + BenchmarkListParams, + BlueprintListParams, + ObjectDownloadParams, + ScenarioUpdateParams, + BenchmarkCreateParams, + BenchmarkUpdateParams, + BlueprintCreateParams, + DevboxUploadFileParams, + DevboxCreateTunnelParams, + DevboxDownloadFileParams, + DevboxRemoveTunnelParams, + DevboxSnapshotDiskParams, + DevboxReadFileContentsParams, + DevboxWriteFileContentsParams, +) from .._types import Body, Query, Headers, Timeout, NotGiven from ..lib.polling import PollingConfig from ..types.devboxes import DiskSnapshotListParams, DiskSnapshotUpdateParams from ..types.scenarios import ScorerListParams, ScorerCreateParams, ScorerUpdateParams, ScorerValidateParams -from ..types.input_context import InputContext -from ..types.scenario_view import ScenarioView -from ..types.agent_list_params import AgentListParams -from ..types.devbox_list_params import DevboxListParams -from ..types.object_list_params import ObjectListParams -from ..types.agent_create_params import AgentCreateParams -from ..types.devbox_create_params import DevboxCreateParams, DevboxBaseCreateParams -from ..types.object_create_params import ObjectCreateParams -from ..types.scenario_list_params import ScenarioListParams -from ..types.blueprint_list_params import BlueprintListParams -from ..types.object_download_params import ObjectDownloadParams -from ..types.scenario_update_params import ScenarioUpdateParams -from ..types.blueprint_create_params import BlueprintCreateParams -from ..types.devbox_upload_file_params import DevboxUploadFileParams +from ..types.benchmarks import RunListScenarioRunsParams +from ..types.devbox_create_params import DevboxBaseCreateParams from ..types.scenario_start_run_params import ScenarioStartRunBaseParams -from ..types.devbox_create_tunnel_params import DevboxCreateTunnelParams -from ..types.devbox_download_file_params import DevboxDownloadFileParams +from ..types.benchmark_start_run_params import BenchmarkSelfStartRunParams +from ..types.benchmarks.run_list_params import RunSelfListParams from ..types.devbox_execute_async_params import DevboxNiceExecuteAsyncParams -from ..types.devbox_remove_tunnel_params import DevboxRemoveTunnelParams -from ..types.devbox_snapshot_disk_params import DevboxSnapshotDiskParams -from ..types.devbox_read_file_contents_params import DevboxReadFileContentsParams -from ..types.devbox_write_file_contents_params import DevboxWriteFileContentsParams LogCallback = Callable[[str], None] @@ -203,3 +212,27 @@ class ScenarioPreview(ScenarioView): input_context: InputContextPreview # type: ignore[assignment] """The input context for the Scenario.""" + + +class SDKBenchmarkCreateParams(BenchmarkCreateParams, LongRequestOptions): + pass + + +class SDKBenchmarkListParams(BenchmarkListParams, BaseRequestOptions): + pass + + +class SDKBenchmarkUpdateParams(BenchmarkUpdateParams, LongRequestOptions): + pass + + +class SDKBenchmarkStartRunParams(BenchmarkSelfStartRunParams, LongRequestOptions): + pass + + +class SDKBenchmarkListRunsParams(RunSelfListParams, BaseRequestOptions): + pass + + +class SDKBenchmarkRunListScenarioRunsParams(RunListScenarioRunsParams, BaseRequestOptions): + pass diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py index 4bcd08fc1..6e6e828ff 100644 --- a/src/runloop_api_client/sdk/async_.py +++ b/src/runloop_api_client/sdk/async_.py @@ -21,7 +21,9 @@ SDKObjectCreateParams, SDKScenarioListParams, SDKScorerCreateParams, + SDKBenchmarkListParams, SDKBlueprintListParams, + SDKBenchmarkCreateParams, SDKBlueprintCreateParams, SDKDiskSnapshotListParams, SDKDevboxCreateFromImageParams, @@ -34,6 +36,7 @@ from .async_scorer import AsyncScorer from .async_scenario import AsyncScenario from .async_snapshot import AsyncSnapshot +from .async_benchmark import AsyncBenchmark from .async_blueprint import AsyncBlueprint from ..lib.context_loader import TarFilter, build_directory_tar from .async_storage_object import AsyncStorageObject @@ -599,7 +602,6 @@ async def create_from_npm( self, *, package_name: str, - npm_version: Optional[str] = None, registry_url: Optional[str] = None, agent_setup: Optional[list[str]] = None, **params: Unpack[SDKAgentCreateParams], @@ -608,8 +610,6 @@ async def create_from_npm( :param package_name: NPM package name :type package_name: str - :param npm_version: NPM version constraint, defaults to None - :type npm_version: Optional[str], optional :param registry_url: NPM registry URL, defaults to None :type registry_url: Optional[str], optional :param agent_setup: Setup commands to run after installation, defaults to None @@ -625,8 +625,6 @@ async def create_from_npm( ) npm_config: Npm = {"package_name": package_name} - if npm_version is not None: - npm_config["npm_version"] = npm_version if registry_url is not None: npm_config["registry_url"] = registry_url if agent_setup is not None: @@ -639,7 +637,6 @@ async def create_from_pip( self, *, package_name: str, - pip_version: Optional[str] = None, registry_url: Optional[str] = None, agent_setup: Optional[list[str]] = None, **params: Unpack[SDKAgentCreateParams], @@ -648,8 +645,6 @@ async def create_from_pip( :param package_name: Pip package name :type package_name: str - :param pip_version: Pip version constraint, defaults to None - :type pip_version: Optional[str], optional :param registry_url: Pip registry URL, defaults to None :type registry_url: Optional[str], optional :param agent_setup: Setup commands to run after installation, defaults to None @@ -665,8 +660,6 @@ async def create_from_pip( ) pip_config: Pip = {"package_name": package_name} - if pip_version is not None: - pip_config["pip_version"] = pip_version if registry_url is not None: pip_config["registry_url"] = registry_url if agent_setup is not None: @@ -825,6 +818,55 @@ async def list(self, **params: Unpack[SDKScenarioListParams]) -> list[AsyncScena return [AsyncScenario(self._client, item.id) async for item in page] +class AsyncBenchmarkOps: + """Manage benchmarks (async). Access via ``runloop.benchmark``. + + Example: + >>> runloop = AsyncRunloopSDK() + >>> benchmarks = await runloop.benchmark.list() + >>> benchmark = runloop.benchmark.from_id("bmd_xxx") + >>> run = await benchmark.start_run(run_name="evaluation-v1") + """ + + def __init__(self, client: AsyncRunloop) -> None: + """Initialize AsyncBenchmarkOps. + + :param client: AsyncRunloop client instance + :type client: AsyncRunloop + """ + self._client = client + + async def create(self, **params: Unpack[SDKBenchmarkCreateParams]) -> AsyncBenchmark: + """Create a new benchmark. + + :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkCreateParams` for available parameters + :return: The newly created benchmark + :rtype: AsyncBenchmark + """ + response = await self._client.benchmarks.create(**params) + return AsyncBenchmark(self._client, response.id) + + def from_id(self, benchmark_id: str) -> AsyncBenchmark: + """Get an AsyncBenchmark instance for an existing benchmark ID. + + :param benchmark_id: ID of the benchmark + :type benchmark_id: str + :return: AsyncBenchmark instance for the given ID + :rtype: AsyncBenchmark + """ + return AsyncBenchmark(self._client, benchmark_id) + + async def list(self, **params: Unpack[SDKBenchmarkListParams]) -> list[AsyncBenchmark]: + """List all benchmarks, optionally filtered by parameters. + + :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkListParams` for available parameters + :return: List of benchmarks + :rtype: list[AsyncBenchmark] + """ + page = await self._client.benchmarks.list(**params) + return [AsyncBenchmark(self._client, item.id) for item in page.benchmarks] + + class AsyncRunloopSDK: """High-level asynchronous entry point for the Runloop SDK. @@ -836,6 +878,8 @@ class AsyncRunloopSDK: :vartype api: AsyncRunloop :ivar agent: High-level async interface for agent management. :vartype agent: AsyncAgentOps + :ivar benchmark: High-level async interface for benchmark management + :vartype benchmark: AsyncBenchmarkOps :ivar devbox: High-level async interface for devbox management :vartype devbox: AsyncDevboxOps :ivar blueprint: High-level async interface for blueprint management @@ -859,6 +903,7 @@ class AsyncRunloopSDK: api: AsyncRunloop agent: AsyncAgentOps + benchmark: AsyncBenchmarkOps devbox: AsyncDevboxOps blueprint: AsyncBlueprintOps scenario: AsyncScenarioOps @@ -905,6 +950,7 @@ def __init__( ) self.agent = AsyncAgentOps(self.api) + self.benchmark = AsyncBenchmarkOps(self.api) self.devbox = AsyncDevboxOps(self.api) self.blueprint = AsyncBlueprintOps(self.api) self.scenario = AsyncScenarioOps(self.api) diff --git a/src/runloop_api_client/sdk/async_benchmark.py b/src/runloop_api_client/sdk/async_benchmark.py new file mode 100644 index 000000000..63443e37b --- /dev/null +++ b/src/runloop_api_client/sdk/async_benchmark.py @@ -0,0 +1,164 @@ +"""AsyncBenchmark resource class for asynchronous operations.""" + +from __future__ import annotations + +from typing import List +from typing_extensions import Unpack, override + +from ..types import BenchmarkView +from ._types import ( + BaseRequestOptions, + LongRequestOptions, + SDKBenchmarkUpdateParams, + SDKBenchmarkListRunsParams, + SDKBenchmarkStartRunParams, +) +from .._types import SequenceNotStr +from .._client import AsyncRunloop +from .async_benchmark_run import AsyncBenchmarkRun + + +class AsyncBenchmark: + """A benchmark for evaluating agent performance across scenarios (async). + + Provides async methods for retrieving benchmark details, updating the benchmark, + managing scenarios, and starting benchmark runs. Obtain instances via + ``runloop.benchmark.from_id()`` or ``runloop.benchmark.list()``. + + Example: + >>> benchmark = runloop.benchmark.from_id("bmd_xxx") + >>> info = await benchmark.get_info() + >>> run = await benchmark.start_run(run_name="evaluation-v1") + >>> for scenario_id in info.scenario_ids: + ... scenario = await runloop.scenario.from_id(scenario_id) + ... scenario_run = await scenario.run(benchmark_run_id=run.id, run_name="evaluation-v1") + """ + + def __init__(self, client: AsyncRunloop, benchmark_id: str) -> None: + """Create an AsyncBenchmark instance. + + :param client: AsyncRunloop client instance + :type client: AsyncRunloop + :param benchmark_id: Benchmark ID + :type benchmark_id: str + """ + self._client = client + self._id = benchmark_id + + @override + def __repr__(self) -> str: + return f"" + + @property + def id(self) -> str: + """Return the benchmark ID. + + :return: Unique benchmark ID + :rtype: str + """ + return self._id + + async def get_info( + self, + **options: Unpack[BaseRequestOptions], + ) -> BenchmarkView: + """Retrieve current benchmark details. + + :param options: See :typeddict:`~runloop_api_client.sdk._types.BaseRequestOptions` for available options + :return: Current benchmark info + :rtype: BenchmarkView + """ + return await self._client.benchmarks.retrieve( + self._id, + **options, + ) + + async def update( + self, + **params: Unpack[SDKBenchmarkUpdateParams], + ) -> BenchmarkView: + """Update the benchmark. + + Only provided fields will be updated. + + :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkUpdateParams` for available parameters + :return: Updated benchmark info + :rtype: BenchmarkView + """ + return await self._client.benchmarks.update( + self._id, + **params, + ) + + async def start_run( + self, + **params: Unpack[SDKBenchmarkStartRunParams], + ) -> AsyncBenchmarkRun: + """Start a new benchmark run. + + Creates a new benchmark run and returns an AsyncBenchmarkRun instance for + managing the run lifecycle. + + :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkStartRunParams` for available parameters + :return: AsyncBenchmarkRun instance for managing the run + :rtype: AsyncBenchmarkRun + """ + run_view = await self._client.benchmarks.start_run( + benchmark_id=self._id, + **params, + ) + return AsyncBenchmarkRun(self._client, run_view.id, run_view.benchmark_id) + + async def add_scenarios( + self, + scenario_ids: SequenceNotStr[str], + **options: Unpack[LongRequestOptions], + ) -> BenchmarkView: + """Add scenarios to the benchmark. + + :param scenario_ids: List of scenario IDs to add + :type scenario_ids: SequenceNotStr[str] + :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options + :return: Updated benchmark info + :rtype: BenchmarkView + """ + return await self._client.benchmarks.update_scenarios( + self._id, + scenarios_to_add=scenario_ids, + **options, + ) + + async def remove_scenarios( + self, + scenario_ids: SequenceNotStr[str], + **options: Unpack[LongRequestOptions], + ) -> BenchmarkView: + """Remove scenarios from the benchmark. + + :param scenario_ids: List of scenario IDs to remove + :type scenario_ids: SequenceNotStr[str] + :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options + :return: Updated benchmark info + :rtype: BenchmarkView + """ + return await self._client.benchmarks.update_scenarios( + self._id, + scenarios_to_remove=scenario_ids, + **options, + ) + + async def list_runs( + self, + **params: Unpack[SDKBenchmarkListRunsParams], + ) -> List[AsyncBenchmarkRun]: + """List all runs for this benchmark. + + :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkListRunsParams` for available parameters + :return: List of async benchmark runs + :rtype: List[AsyncBenchmarkRun] + """ + page = await self._client.benchmarks.runs.list( + benchmark_id=self._id, + **params, + ) + return [AsyncBenchmarkRun(self._client, run.id, run.benchmark_id) for run in page.runs] diff --git a/src/runloop_api_client/sdk/async_benchmark_run.py b/src/runloop_api_client/sdk/async_benchmark_run.py new file mode 100644 index 000000000..f498d1408 --- /dev/null +++ b/src/runloop_api_client/sdk/async_benchmark_run.py @@ -0,0 +1,127 @@ +"""AsyncBenchmarkRun resource class for asynchronous operations.""" + +from __future__ import annotations + +from typing import List +from typing_extensions import Unpack, override + +from ..types import BenchmarkRunView +from ._types import BaseRequestOptions, LongRequestOptions, SDKBenchmarkRunListScenarioRunsParams +from .._client import AsyncRunloop +from .async_scenario_run import AsyncScenarioRun + + +class AsyncBenchmarkRun: + """A benchmark run for evaluating agent performance across scenarios (async). + + Provides async methods for monitoring run status, managing the run lifecycle, + and accessing scenario run results. Obtain instances via + ``benchmark.start_run()`` or ``benchmark.list_runs()``. + + Example: + >>> benchmark = runloop.benchmark.from_id("bench-xxx") + >>> run = await benchmark.start_run(run_name="evaluation-v1") + >>> info = await run.get_info() + >>> scenario_runs = await run.list_scenario_runs() + """ + + def __init__(self, client: AsyncRunloop, run_id: str, benchmark_id: str) -> None: + """Create an AsyncBenchmarkRun instance. + + :param client: AsyncRunloop client instance + :type client: AsyncRunloop + :param run_id: Benchmark run ID + :type run_id: str + :param benchmark_id: Parent benchmark ID + :type benchmark_id: str + """ + self._client = client + self._id = run_id + self._benchmark_id = benchmark_id + + @override + def __repr__(self) -> str: + return f"" + + @property + def id(self) -> str: + """Return the benchmark run ID. + + :return: Unique benchmark run ID + :rtype: str + """ + return self._id + + @property + def benchmark_id(self) -> str: + """Return the parent benchmark ID. + + :return: Parent benchmark ID + :rtype: str + """ + return self._benchmark_id + + async def get_info( + self, + **options: Unpack[BaseRequestOptions], + ) -> BenchmarkRunView: + """Retrieve current benchmark run status and metadata. + + :param options: See :typeddict:`~runloop_api_client.sdk._types.BaseRequestOptions` for available options + :return: Current benchmark run state info + :rtype: BenchmarkRunView + """ + return await self._client.benchmarks.runs.retrieve( + self._id, + **options, + ) + + async def cancel( + self, + **options: Unpack[LongRequestOptions], + ) -> BenchmarkRunView: + """Cancel the benchmark run. + + Stops all running scenarios and marks the run as canceled. + + :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options + :return: Updated benchmark run state + :rtype: BenchmarkRunView + """ + return await self._client.benchmarks.runs.cancel( + self._id, + **options, + ) + + async def complete( + self, + **options: Unpack[LongRequestOptions], + ) -> BenchmarkRunView: + """Complete the benchmark run. + + Marks the run as completed. Call this after all scenarios have finished. + + :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options + :return: Completed benchmark run state + :rtype: BenchmarkRunView + """ + return await self._client.benchmarks.runs.complete( + self._id, + **options, + ) + + async def list_scenario_runs( + self, + **params: Unpack[SDKBenchmarkRunListScenarioRunsParams], + ) -> List[AsyncScenarioRun]: + """List all scenario runs for this benchmark run. + + :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkRunListScenarioRunsParams` for available parameters + :return: List of async scenario run objects + :rtype: List[AsyncScenarioRun] + """ + page = await self._client.benchmarks.runs.list_scenario_runs( + self._id, + **params, + ) + return [AsyncScenarioRun(self._client, run.id, run.devbox_id) for run in page.runs] diff --git a/src/runloop_api_client/sdk/benchmark.py b/src/runloop_api_client/sdk/benchmark.py new file mode 100644 index 000000000..7e8ed826d --- /dev/null +++ b/src/runloop_api_client/sdk/benchmark.py @@ -0,0 +1,164 @@ +"""Benchmark resource class for synchronous operations.""" + +from __future__ import annotations + +from typing import List +from typing_extensions import Unpack, override + +from ..types import BenchmarkView +from ._types import ( + BaseRequestOptions, + LongRequestOptions, + SDKBenchmarkUpdateParams, + SDKBenchmarkListRunsParams, + SDKBenchmarkStartRunParams, +) +from .._types import SequenceNotStr +from .._client import Runloop +from .benchmark_run import BenchmarkRun + + +class Benchmark: + """A benchmark for evaluating agent performance across scenarios. + + Provides methods for retrieving benchmark details, updating the benchmark, + managing scenarios, and starting benchmark runs. Obtain instances via + ``runloop.benchmark.from_id()`` or ``runloop.benchmark.list()``. + + Example: + >>> benchmark = runloop.benchmark.from_id("bmd_xxx") + >>> info = benchmark.get_info() + >>> run = benchmark.start_run(run_name="evaluation-v1") + >>> for scenario_id in info.scenario_ids: + ... scenario = runloop.scenario.from_id(scenario_id) + ... scenario_run = scenario.run(benchmark_run_id=run.id, run_name="evaluation-v1") + """ + + def __init__(self, client: Runloop, benchmark_id: str) -> None: + """Create a Benchmark instance. + + :param client: Runloop client instance + :type client: Runloop + :param benchmark_id: Benchmark ID + :type benchmark_id: str + """ + self._client = client + self._id = benchmark_id + + @override + def __repr__(self) -> str: + return f"" + + @property + def id(self) -> str: + """Return the benchmark ID. + + :return: Unique benchmark ID + :rtype: str + """ + return self._id + + def get_info( + self, + **options: Unpack[BaseRequestOptions], + ) -> BenchmarkView: + """Retrieve current benchmark details. + + :param options: See :typeddict:`~runloop_api_client.sdk._types.BaseRequestOptions` for available options + :return: Current benchmark info + :rtype: BenchmarkView + """ + return self._client.benchmarks.retrieve( + self._id, + **options, + ) + + def update( + self, + **params: Unpack[SDKBenchmarkUpdateParams], + ) -> BenchmarkView: + """Update the benchmark. + + Only provided fields will be updated. + + :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkUpdateParams` for available parameters + :return: Updated benchmark info + :rtype: BenchmarkView + """ + return self._client.benchmarks.update( + self._id, + **params, + ) + + def start_run( + self, + **params: Unpack[SDKBenchmarkStartRunParams], + ) -> BenchmarkRun: + """Start a new benchmark run. + + Creates a new benchmark run and returns a BenchmarkRun instance for + managing the run lifecycle. + + :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkStartRunParams` for available parameters + :return: BenchmarkRun instance for managing the run + :rtype: BenchmarkRun + """ + run_view = self._client.benchmarks.start_run( + benchmark_id=self._id, + **params, + ) + return BenchmarkRun(self._client, run_view.id, run_view.benchmark_id) + + def add_scenarios( + self, + scenario_ids: SequenceNotStr[str], + **options: Unpack[LongRequestOptions], + ) -> BenchmarkView: + """Add scenarios to the benchmark. + + :param scenario_ids: List of scenario IDs to add + :type scenario_ids: SequenceNotStr[str] + :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options + :return: Updated benchmark info + :rtype: BenchmarkView + """ + return self._client.benchmarks.update_scenarios( + self._id, + scenarios_to_add=scenario_ids, + **options, + ) + + def remove_scenarios( + self, + scenario_ids: SequenceNotStr[str], + **options: Unpack[LongRequestOptions], + ) -> BenchmarkView: + """Remove scenarios from the benchmark. + + :param scenario_ids: List of scenario IDs to remove + :type scenario_ids: SequenceNotStr[str] + :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options + :return: Updated benchmark info + :rtype: BenchmarkView + """ + return self._client.benchmarks.update_scenarios( + self._id, + scenarios_to_remove=scenario_ids, + **options, + ) + + def list_runs( + self, + **params: Unpack[SDKBenchmarkListRunsParams], + ) -> List[BenchmarkRun]: + """List all runs for this benchmark. + + :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkListRunsParams` for available parameters + :return: List of benchmark runs + :rtype: List[BenchmarkRun] + """ + page = self._client.benchmarks.runs.list( + benchmark_id=self._id, + **params, + ) + return [BenchmarkRun(self._client, run.id, run.benchmark_id) for run in page.runs] diff --git a/src/runloop_api_client/sdk/benchmark_run.py b/src/runloop_api_client/sdk/benchmark_run.py new file mode 100644 index 000000000..10da7ba05 --- /dev/null +++ b/src/runloop_api_client/sdk/benchmark_run.py @@ -0,0 +1,127 @@ +"""BenchmarkRun resource class for synchronous operations.""" + +from __future__ import annotations + +from typing import List +from typing_extensions import Unpack, override + +from ..types import BenchmarkRunView +from ._types import BaseRequestOptions, LongRequestOptions, SDKBenchmarkRunListScenarioRunsParams +from .._client import Runloop +from .scenario_run import ScenarioRun + + +class BenchmarkRun: + """A benchmark run for evaluating agent performance across scenarios. + + Provides methods for monitoring run status, managing the run lifecycle, + and accessing scenario run results. Obtain instances via + ``benchmark.start_run()`` or ``benchmark.list_runs()``. + + Example: + >>> benchmark = runloop.benchmark.from_id("bench-xxx") + >>> run = benchmark.start_run(run_name="evaluation-v1") + >>> info = run.get_info() + >>> scenario_runs = run.list_scenario_runs() + """ + + def __init__(self, client: Runloop, run_id: str, benchmark_id: str) -> None: + """Create a BenchmarkRun instance. + + :param client: Runloop client instance + :type client: Runloop + :param run_id: Benchmark run ID + :type run_id: str + :param benchmark_id: Parent benchmark ID + :type benchmark_id: str + """ + self._client = client + self._id = run_id + self._benchmark_id = benchmark_id + + @override + def __repr__(self) -> str: + return f"" + + @property + def id(self) -> str: + """Return the benchmark run ID. + + :return: Unique benchmark run ID + :rtype: str + """ + return self._id + + @property + def benchmark_id(self) -> str: + """Return the parent benchmark ID. + + :return: Parent benchmark ID + :rtype: str + """ + return self._benchmark_id + + def get_info( + self, + **options: Unpack[BaseRequestOptions], + ) -> BenchmarkRunView: + """Retrieve current benchmark run status and metadata. + + :param options: See :typeddict:`~runloop_api_client.sdk._types.BaseRequestOptions` for available options + :return: Current benchmark run state info + :rtype: BenchmarkRunView + """ + return self._client.benchmarks.runs.retrieve( + self._id, + **options, + ) + + def cancel( + self, + **options: Unpack[LongRequestOptions], + ) -> BenchmarkRunView: + """Cancel the benchmark run. + + Stops all running scenarios and marks the run as canceled. + + :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options + :return: Updated benchmark run state + :rtype: BenchmarkRunView + """ + return self._client.benchmarks.runs.cancel( + self._id, + **options, + ) + + def complete( + self, + **options: Unpack[LongRequestOptions], + ) -> BenchmarkRunView: + """Complete the benchmark run. + + Marks the run as completed. Call this after all scenarios have finished. + + :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options + :return: Completed benchmark run state + :rtype: BenchmarkRunView + """ + return self._client.benchmarks.runs.complete( + self._id, + **options, + ) + + def list_scenario_runs( + self, + **params: Unpack[SDKBenchmarkRunListScenarioRunsParams], + ) -> List[ScenarioRun]: + """List all scenario runs for this benchmark run. + + :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkRunListScenarioRunsParams` for available parameters + :return: List of scenario run objects + :rtype: List[ScenarioRun] + """ + page = self._client.benchmarks.runs.list_scenario_runs( + self._id, + **params, + ) + return [ScenarioRun(self._client, run.id, run.devbox_id) for run in page.runs] diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py index f215c8116..d83eb5a6e 100644 --- a/src/runloop_api_client/sdk/sync.py +++ b/src/runloop_api_client/sdk/sync.py @@ -21,7 +21,9 @@ SDKObjectCreateParams, SDKScenarioListParams, SDKScorerCreateParams, + SDKBenchmarkListParams, SDKBlueprintListParams, + SDKBenchmarkCreateParams, SDKBlueprintCreateParams, SDKDiskSnapshotListParams, SDKDevboxCreateFromImageParams, @@ -33,6 +35,7 @@ from ._helpers import detect_content_type from .scenario import Scenario from .snapshot import Snapshot +from .benchmark import Benchmark from .blueprint import Blueprint from .storage_object import StorageObject from .scenario_builder import ScenarioBuilder @@ -594,7 +597,6 @@ def create_from_npm( self, *, package_name: str, - npm_version: Optional[str] = None, registry_url: Optional[str] = None, agent_setup: Optional[list[str]] = None, **params: Unpack[SDKAgentCreateParams], @@ -603,13 +605,11 @@ def create_from_npm( Example: >>> agent = runloop.agent.create_from_npm( - ... name="my-npm-agent", package_name="@runloop/example-agent", npm_version="^1.0.0" + ... name="my-npm-agent", package_name="@runloop/example-agent", version="1.0.0" ... ) :param package_name: NPM package name :type package_name: str - :param npm_version: NPM version constraint, defaults to None - :type npm_version: Optional[str], optional :param registry_url: NPM registry URL, defaults to None :type registry_url: Optional[str], optional :param agent_setup: Setup commands to run after installation, defaults to None @@ -625,8 +625,6 @@ def create_from_npm( ) npm_config: Npm = {"package_name": package_name} - if npm_version is not None: - npm_config["npm_version"] = npm_version if registry_url is not None: npm_config["registry_url"] = registry_url if agent_setup is not None: @@ -639,7 +637,6 @@ def create_from_pip( self, *, package_name: str, - pip_version: Optional[str] = None, registry_url: Optional[str] = None, agent_setup: Optional[list[str]] = None, **params: Unpack[SDKAgentCreateParams], @@ -648,13 +645,11 @@ def create_from_pip( Example: >>> agent = runloop.agent.create_from_pip( - ... name="my-pip-agent", package_name="runloop-example-agent", pip_version=">=1.0.0" + ... name="my-pip-agent", package_name="runloop-example-agent", version="1.0.0" ... ) :param package_name: Pip package name :type package_name: str - :param pip_version: Pip version constraint, defaults to None - :type pip_version: Optional[str], optional :param registry_url: Pip registry URL, defaults to None :type registry_url: Optional[str], optional :param agent_setup: Setup commands to run after installation, defaults to None @@ -670,8 +665,6 @@ def create_from_pip( ) pip_config: Pip = {"package_name": package_name} - if pip_version is not None: - pip_config["pip_version"] = pip_version if registry_url is not None: pip_config["registry_url"] = registry_url if agent_setup is not None: @@ -696,6 +689,7 @@ def create_from_git( ... repository="https://github.com/user/agent-repo", ... ref="main", ... agent_setup=["npm install", "npm run build"], + ... version="1.0.0", ... ) :param repository: Git repository URL @@ -737,7 +731,10 @@ def create_from_object( >>> obj = runloop.storage_object.upload_from_dir("./my-agent") >>> # Then create agent from the object >>> agent = runloop.agent.create_from_object( - ... name="my-object-agent", object_id=obj.id, agent_setup=["chmod +x setup.sh", "./setup.sh"] + ... name="my-object-agent", + ... object_id=obj.id, + ... agent_setup=["chmod +x setup.sh", "./setup.sh"], + ... version="1.0.0", ... ) :param object_id: Storage object ID @@ -846,6 +843,55 @@ def list(self, **params: Unpack[SDKScenarioListParams]) -> list[Scenario]: return [Scenario(self._client, item.id) for item in page] +class BenchmarkOps: + """Manage benchmarks. Access via ``runloop.benchmark``. + + Example: + >>> runloop = RunloopSDK() + >>> benchmarks = runloop.benchmark.list() + >>> benchmark = runloop.benchmark.from_id("bmd_xxx") + >>> run = benchmark.start_run(run_name="evaluation-v1") + """ + + def __init__(self, client: Runloop) -> None: + """Initialize BenchmarkOps. + + :param client: Runloop client instance + :type client: Runloop + """ + self._client = client + + def create(self, **params: Unpack[SDKBenchmarkCreateParams]) -> Benchmark: + """Create a new benchmark. + + :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkCreateParams` for available parameters + :return: The newly created benchmark + :rtype: Benchmark + """ + response = self._client.benchmarks.create(**params) + return Benchmark(self._client, response.id) + + def from_id(self, benchmark_id: str) -> Benchmark: + """Get a Benchmark instance for an existing benchmark ID. + + :param benchmark_id: ID of the benchmark + :type benchmark_id: str + :return: Benchmark instance for the given ID + :rtype: Benchmark + """ + return Benchmark(self._client, benchmark_id) + + def list(self, **params: Unpack[SDKBenchmarkListParams]) -> list[Benchmark]: + """List all benchmarks, optionally filtered by parameters. + + :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkListParams` for available parameters + :return: List of benchmarks + :rtype: list[Benchmark] + """ + page = self._client.benchmarks.list(**params) + return [Benchmark(self._client, item.id) for item in page.benchmarks] + + class RunloopSDK: """High-level synchronous entry point for the Runloop SDK. @@ -857,6 +903,8 @@ class RunloopSDK: :vartype api: Runloop :ivar agent: High-level interface for agent management. :vartype agent: AgentOps + :ivar benchmark: High-level interface for benchmark management + :vartype benchmark: BenchmarkOps :ivar devbox: High-level interface for devbox management :vartype devbox: DevboxOps :ivar blueprint: High-level interface for blueprint management @@ -880,6 +928,7 @@ class RunloopSDK: api: Runloop agent: AgentOps + benchmark: BenchmarkOps devbox: DevboxOps blueprint: BlueprintOps scenario: ScenarioOps @@ -926,6 +975,7 @@ def __init__( ) self.agent = AgentOps(self.api) + self.benchmark = BenchmarkOps(self.api) self.devbox = DevboxOps(self.api) self.blueprint = BlueprintOps(self.api) self.scenario = ScenarioOps(self.api) diff --git a/src/runloop_api_client/types/__init__.py b/src/runloop_api_client/types/__init__.py index 6856d9670..6afd070a3 100644 --- a/src/runloop_api_client/types/__init__.py +++ b/src/runloop_api_client/types/__init__.py @@ -97,6 +97,7 @@ from .repository_connection_list_view import RepositoryConnectionListView as RepositoryConnectionListView from .repository_inspection_list_view import RepositoryInspectionListView as RepositoryInspectionListView from .devbox_read_file_contents_params import DevboxReadFileContentsParams as DevboxReadFileContentsParams +from .benchmark_update_scenarios_params import BenchmarkUpdateScenariosParams as BenchmarkUpdateScenariosParams from .devbox_list_disk_snapshots_params import DevboxListDiskSnapshotsParams as DevboxListDiskSnapshotsParams from .devbox_snapshot_disk_async_params import DevboxSnapshotDiskAsyncParams as DevboxSnapshotDiskAsyncParams from .devbox_write_file_contents_params import DevboxWriteFileContentsParams as DevboxWriteFileContentsParams diff --git a/src/runloop_api_client/types/agent_create_params.py b/src/runloop_api_client/types/agent_create_params.py index 1a3372e7e..3c2deff2a 100644 --- a/src/runloop_api_client/types/agent_create_params.py +++ b/src/runloop_api_client/types/agent_create_params.py @@ -14,5 +14,8 @@ class AgentCreateParams(TypedDict, total=False): name: Required[str] """The name of the Agent.""" + version: Required[str] + """The version of the Agent. Must be a semver string (e.g., '2.0.65') or a SHA.""" + source: Optional[AgentSource] """The source configuration for the Agent.""" diff --git a/src/runloop_api_client/types/agent_list_params.py b/src/runloop_api_client/types/agent_list_params.py index a3199190b..3df89fc25 100644 --- a/src/runloop_api_client/types/agent_list_params.py +++ b/src/runloop_api_client/types/agent_list_params.py @@ -12,7 +12,7 @@ class AgentListParams(TypedDict, total=False): """Filter agents by public visibility.""" limit: int - """The limit of items to return. Default is 20.""" + """The limit of items to return. Default is 20. Max is 5000.""" name: str """Filter agents by name (partial match supported).""" @@ -22,3 +22,6 @@ class AgentListParams(TypedDict, total=False): starting_after: str """Load the next page of data starting after the item with the given ID.""" + + version: str + """Filter by version. Use 'latest' to get the most recently created agent.""" diff --git a/src/runloop_api_client/types/agent_list_view.py b/src/runloop_api_client/types/agent_list_view.py index c2a7be455..bfb1560e1 100644 --- a/src/runloop_api_client/types/agent_list_view.py +++ b/src/runloop_api_client/types/agent_list_view.py @@ -9,6 +9,8 @@ class AgentListView(BaseModel): + """A paginated list of Agents.""" + agents: List[AgentView] """The list of Agents.""" diff --git a/src/runloop_api_client/types/agent_view.py b/src/runloop_api_client/types/agent_view.py index 77e56d1b8..23b1f68ff 100644 --- a/src/runloop_api_client/types/agent_view.py +++ b/src/runloop_api_client/types/agent_view.py @@ -9,6 +9,8 @@ class AgentView(BaseModel): + """An Agent represents a registered AI agent entity.""" + id: str """The unique identifier of the Agent.""" @@ -21,5 +23,8 @@ class AgentView(BaseModel): name: str """The name of the Agent.""" + version: str + """The version of the Agent. A semver string (e.g., '2.0.65') or a SHA.""" + source: Optional[AgentSource] = None """The source configuration for the Agent.""" diff --git a/src/runloop_api_client/types/benchmark_create_params.py b/src/runloop_api_client/types/benchmark_create_params.py index 1aec35f5f..36f7b95a9 100644 --- a/src/runloop_api_client/types/benchmark_create_params.py +++ b/src/runloop_api_client/types/benchmark_create_params.py @@ -12,7 +12,7 @@ class BenchmarkCreateParams(TypedDict, total=False): name: Required[str] - """The name of the Benchmark. This must be unique.""" + """The unique name of the Benchmark.""" attribution: Optional[str] """Attribution information for the benchmark.""" @@ -21,12 +21,12 @@ class BenchmarkCreateParams(TypedDict, total=False): """Detailed description of the benchmark.""" metadata: Optional[Dict[str, str]] - """User defined metadata to attach to the benchmark for organization.""" + """User defined metadata to attach to the benchmark.""" required_environment_variables: Optional[SequenceNotStr[str]] """Environment variables required to run the benchmark. - If any required variables are not supplied, the benchmark will fail to start + If any required variables are not supplied, the benchmark will fail to start. """ required_secret_names: SequenceNotStr[str] diff --git a/src/runloop_api_client/types/benchmark_definitions_params.py b/src/runloop_api_client/types/benchmark_definitions_params.py index f92d57d76..97caff125 100644 --- a/src/runloop_api_client/types/benchmark_definitions_params.py +++ b/src/runloop_api_client/types/benchmark_definitions_params.py @@ -9,7 +9,7 @@ class BenchmarkDefinitionsParams(TypedDict, total=False): limit: int - """The limit of items to return. Default is 20.""" + """The limit of items to return. Default is 20. Max is 5000.""" starting_after: str """Load the next page of data starting after the item with the given ID.""" diff --git a/src/runloop_api_client/types/benchmark_list_params.py b/src/runloop_api_client/types/benchmark_list_params.py index 51b2b1320..4e8b0c78b 100644 --- a/src/runloop_api_client/types/benchmark_list_params.py +++ b/src/runloop_api_client/types/benchmark_list_params.py @@ -9,7 +9,10 @@ class BenchmarkListParams(TypedDict, total=False): limit: int - """The limit of items to return. Default is 20.""" + """The limit of items to return. Default is 20. Max is 5000.""" + + name: str + """Filter by name""" starting_after: str """Load the next page of data starting after the item with the given ID.""" diff --git a/src/runloop_api_client/types/benchmark_list_public_params.py b/src/runloop_api_client/types/benchmark_list_public_params.py index c5081922d..6dec4283b 100644 --- a/src/runloop_api_client/types/benchmark_list_public_params.py +++ b/src/runloop_api_client/types/benchmark_list_public_params.py @@ -9,7 +9,7 @@ class BenchmarkListPublicParams(TypedDict, total=False): limit: int - """The limit of items to return. Default is 20.""" + """The limit of items to return. Default is 20. Max is 5000.""" starting_after: str """Load the next page of data starting after the item with the given ID.""" diff --git a/src/runloop_api_client/types/benchmark_run_view.py b/src/runloop_api_client/types/benchmark_run_view.py index 00dd98fc2..07fd4c022 100644 --- a/src/runloop_api_client/types/benchmark_run_view.py +++ b/src/runloop_api_client/types/benchmark_run_view.py @@ -9,6 +9,10 @@ class BenchmarkRunView(BaseModel): + """ + A BenchmarkRunView represents a run of a complete set of Scenarios, organized under a Benchmark. + """ + id: str """The ID of the BenchmarkRun.""" diff --git a/src/runloop_api_client/types/benchmark_start_run_params.py b/src/runloop_api_client/types/benchmark_start_run_params.py index 7655ff5ad..edd65ca7c 100644 --- a/src/runloop_api_client/types/benchmark_start_run_params.py +++ b/src/runloop_api_client/types/benchmark_start_run_params.py @@ -11,10 +11,9 @@ __all__ = ["BenchmarkStartRunParams"] -class BenchmarkStartRunParams(TypedDict, total=False): - benchmark_id: Required[str] - """ID of the Benchmark to run.""" - +# Split into separate params so that OO SDK start_run params can omit the benchmark_id +# Neither of these params are exposed to the user, only the derived SDKBenchmarkStartRunParams +class BenchmarkSelfStartRunParams(TypedDict, total=False): metadata: Optional[Dict[str, str]] """User defined metadata to attach to the benchmark run for organization.""" @@ -23,3 +22,8 @@ class BenchmarkStartRunParams(TypedDict, total=False): run_profile: Annotated[Optional[RunProfile], PropertyInfo(alias="runProfile")] """Runtime configuration to use for this benchmark run""" + + +class BenchmarkStartRunParams(BenchmarkSelfStartRunParams, total=False): + benchmark_id: Required[str] + """ID of the Benchmark to run.""" diff --git a/src/runloop_api_client/types/benchmark_update_params.py b/src/runloop_api_client/types/benchmark_update_params.py index 1291e3e38..ce9e8fb0c 100644 --- a/src/runloop_api_client/types/benchmark_update_params.py +++ b/src/runloop_api_client/types/benchmark_update_params.py @@ -3,7 +3,7 @@ from __future__ import annotations from typing import Dict, Optional -from typing_extensions import Required, TypedDict +from typing_extensions import TypedDict from .._types import SequenceNotStr @@ -11,30 +11,32 @@ class BenchmarkUpdateParams(TypedDict, total=False): - name: Required[str] - """The name of the Benchmark. This must be unique.""" - attribution: Optional[str] - """Attribution information for the benchmark.""" + """Attribution information for the benchmark. Pass in empty string to clear.""" description: Optional[str] - """Detailed description of the benchmark.""" + """Detailed description of the benchmark. Pass in empty string to clear.""" metadata: Optional[Dict[str, str]] - """User defined metadata to attach to the benchmark for organization.""" + """User defined metadata to attach to the benchmark. Pass in empty map to clear.""" + + name: Optional[str] + """The unique name of the Benchmark. Cannot be blank.""" required_environment_variables: Optional[SequenceNotStr[str]] """Environment variables required to run the benchmark. - If any required variables are not supplied, the benchmark will fail to start + If any required variables are not supplied, the benchmark will fail to start. + Pass in empty list to clear. """ - required_secret_names: SequenceNotStr[str] + required_secret_names: Optional[SequenceNotStr[str]] """ Secrets required to run the benchmark with (environment variable name will be mapped to the your user secret by name). If any of these secrets are not - provided or the mapping is incorrect, the benchmark will fail to start. + provided or the mapping is incorrect, the benchmark will fail to start. Pass in + empty list to clear. """ scenario_ids: Optional[SequenceNotStr[str]] - """The Scenario IDs that make up the Benchmark.""" + """The Scenario IDs that make up the Benchmark. Pass in empty list to clear.""" diff --git a/src/runloop_api_client/types/benchmark_update_scenarios_params.py b/src/runloop_api_client/types/benchmark_update_scenarios_params.py new file mode 100644 index 000000000..2aca2b0d4 --- /dev/null +++ b/src/runloop_api_client/types/benchmark_update_scenarios_params.py @@ -0,0 +1,18 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Optional +from typing_extensions import TypedDict + +from .._types import SequenceNotStr + +__all__ = ["BenchmarkUpdateScenariosParams"] + + +class BenchmarkUpdateScenariosParams(TypedDict, total=False): + scenarios_to_add: Optional[SequenceNotStr[str]] + """Scenario IDs to add to the Benchmark.""" + + scenarios_to_remove: Optional[SequenceNotStr[str]] + """Scenario IDs to remove from the Benchmark.""" diff --git a/src/runloop_api_client/types/benchmark_view.py b/src/runloop_api_client/types/benchmark_view.py index 877c8fe26..4150847ac 100644 --- a/src/runloop_api_client/types/benchmark_view.py +++ b/src/runloop_api_client/types/benchmark_view.py @@ -10,6 +10,10 @@ class BenchmarkView(BaseModel): + """ + A BenchmarkDefinitionView represents a grouped set of Scenarios that together form a Benchmark. + """ + id: str """The ID of the Benchmark.""" diff --git a/src/runloop_api_client/types/benchmarks/run_list_params.py b/src/runloop_api_client/types/benchmarks/run_list_params.py index f93695b2a..a75e1b592 100644 --- a/src/runloop_api_client/types/benchmarks/run_list_params.py +++ b/src/runloop_api_client/types/benchmarks/run_list_params.py @@ -7,12 +7,19 @@ __all__ = ["RunListParams"] -class RunListParams(TypedDict, total=False): - benchmark_id: str - """The Benchmark ID to filter by.""" - +# Split into separate params so that OO SDK list_runs params can omit the benchmark_id +# Neither of these params are exposed to the user, only the derived SDKBenchmarkListRunsParams +class RunSelfListParams(TypedDict, total=False): limit: int - """The limit of items to return. Default is 20.""" + """The limit of items to return. Default is 20. Max is 5000.""" + + name: str + """Filter by name""" starting_after: str """Load the next page of data starting after the item with the given ID.""" + + +class RunListParams(RunSelfListParams, total=False): + benchmark_id: str + """The Benchmark ID to filter by.""" diff --git a/src/runloop_api_client/types/benchmarks/run_list_scenario_runs_params.py b/src/runloop_api_client/types/benchmarks/run_list_scenario_runs_params.py index 241df1a1f..ddce6aa4a 100644 --- a/src/runloop_api_client/types/benchmarks/run_list_scenario_runs_params.py +++ b/src/runloop_api_client/types/benchmarks/run_list_scenario_runs_params.py @@ -9,7 +9,7 @@ class RunListScenarioRunsParams(TypedDict, total=False): limit: int - """The limit of items to return. Default is 20.""" + """The limit of items to return. Default is 20. Max is 5000.""" starting_after: str """Load the next page of data starting after the item with the given ID.""" diff --git a/src/runloop_api_client/types/blueprint_build_parameters.py b/src/runloop_api_client/types/blueprint_build_parameters.py index 129a8047a..52ddfda7c 100644 --- a/src/runloop_api_client/types/blueprint_build_parameters.py +++ b/src/runloop_api_client/types/blueprint_build_parameters.py @@ -11,6 +11,8 @@ class BuildContext(BaseModel): + """A build context backed by an Object.""" + object_id: str """The ID of an object, whose contents are to be used as a build context.""" @@ -18,6 +20,8 @@ class BuildContext(BaseModel): class ServiceCredentials(BaseModel): + """The credentials of the container service.""" + password: str """The password of the container service.""" diff --git a/src/runloop_api_client/types/blueprint_create_params.py b/src/runloop_api_client/types/blueprint_create_params.py index d82de7f35..94156d2e9 100644 --- a/src/runloop_api_client/types/blueprint_create_params.py +++ b/src/runloop_api_client/types/blueprint_create_params.py @@ -71,6 +71,8 @@ class BlueprintCreateParams(TypedDict, total=False): class BuildContext(TypedDict, total=False): + """A build context backed by an Object.""" + object_id: Required[str] """The ID of an object, whose contents are to be used as a build context.""" @@ -78,6 +80,8 @@ class BuildContext(TypedDict, total=False): class ServiceCredentials(TypedDict, total=False): + """The credentials of the container service.""" + password: Required[str] """The password of the container service.""" diff --git a/src/runloop_api_client/types/blueprint_list_params.py b/src/runloop_api_client/types/blueprint_list_params.py index b0a3ade62..f72de7d2f 100644 --- a/src/runloop_api_client/types/blueprint_list_params.py +++ b/src/runloop_api_client/types/blueprint_list_params.py @@ -9,10 +9,13 @@ class BlueprintListParams(TypedDict, total=False): limit: int - """The limit of items to return. Default is 20.""" + """The limit of items to return. Default is 20. Max is 5000.""" name: str """Filter by name""" starting_after: str """Load the next page of data starting after the item with the given ID.""" + + status: str + """Filter by build status (queued, provisioning, building, failed, build_complete)""" diff --git a/src/runloop_api_client/types/blueprint_list_public_params.py b/src/runloop_api_client/types/blueprint_list_public_params.py index d6b11e78e..e0f224f32 100644 --- a/src/runloop_api_client/types/blueprint_list_public_params.py +++ b/src/runloop_api_client/types/blueprint_list_public_params.py @@ -9,10 +9,13 @@ class BlueprintListPublicParams(TypedDict, total=False): limit: int - """The limit of items to return. Default is 20.""" + """The limit of items to return. Default is 20. Max is 5000.""" name: str """Filter by name""" starting_after: str """Load the next page of data starting after the item with the given ID.""" + + status: str + """Filter by build status (queued, provisioning, building, failed, build_complete)""" diff --git a/src/runloop_api_client/types/blueprint_preview_params.py b/src/runloop_api_client/types/blueprint_preview_params.py index 9f6c4d9bc..4269b734f 100644 --- a/src/runloop_api_client/types/blueprint_preview_params.py +++ b/src/runloop_api_client/types/blueprint_preview_params.py @@ -71,6 +71,8 @@ class BlueprintPreviewParams(TypedDict, total=False): class BuildContext(TypedDict, total=False): + """A build context backed by an Object.""" + object_id: Required[str] """The ID of an object, whose contents are to be used as a build context.""" @@ -78,6 +80,8 @@ class BuildContext(TypedDict, total=False): class ServiceCredentials(TypedDict, total=False): + """The credentials of the container service.""" + password: Required[str] """The password of the container service.""" diff --git a/src/runloop_api_client/types/blueprint_view.py b/src/runloop_api_client/types/blueprint_view.py index 7a10d1686..851b09426 100644 --- a/src/runloop_api_client/types/blueprint_view.py +++ b/src/runloop_api_client/types/blueprint_view.py @@ -10,6 +10,8 @@ class ContainerizedServiceCredentials(BaseModel): + """The credentials of the container service.""" + password: str """The password of the container service.""" @@ -41,6 +43,11 @@ class ContainerizedService(BaseModel): class BlueprintView(BaseModel): + """Blueprints are ways to create customized starting points for Devboxes. + + They allow you to define custom starting points for Devboxes such that environment set up can be cached to improve Devbox boot times. + """ + id: str """The id of the Blueprint.""" diff --git a/src/runloop_api_client/types/devbox_list_disk_snapshots_params.py b/src/runloop_api_client/types/devbox_list_disk_snapshots_params.py index 7ffcf5386..d26c3fbd8 100644 --- a/src/runloop_api_client/types/devbox_list_disk_snapshots_params.py +++ b/src/runloop_api_client/types/devbox_list_disk_snapshots_params.py @@ -14,7 +14,7 @@ class DevboxListDiskSnapshotsParams(TypedDict, total=False): """Devbox ID to filter by.""" limit: int - """The limit of items to return. Default is 20.""" + """The limit of items to return. Default is 20. Max is 5000.""" metadata_key: Annotated[str, PropertyInfo(alias="metadata[key]")] """Filter snapshots by metadata key-value pair. diff --git a/src/runloop_api_client/types/devbox_list_params.py b/src/runloop_api_client/types/devbox_list_params.py index 066b2ed85..c508762da 100644 --- a/src/runloop_api_client/types/devbox_list_params.py +++ b/src/runloop_api_client/types/devbox_list_params.py @@ -9,7 +9,7 @@ class DevboxListParams(TypedDict, total=False): limit: int - """The limit of items to return. Default is 20.""" + """The limit of items to return. Default is 20. Max is 5000.""" starting_after: str """Load the next page of data starting after the item with the given ID.""" diff --git a/src/runloop_api_client/types/devbox_view.py b/src/runloop_api_client/types/devbox_view.py index 007af6575..e2c9a28d8 100644 --- a/src/runloop_api_client/types/devbox_view.py +++ b/src/runloop_api_client/types/devbox_view.py @@ -31,6 +31,11 @@ class StateTransition(BaseModel): class DevboxView(BaseModel): + """A Devbox represents a virtual development environment. + + It is an isolated sandbox that can be given to agents and used to run arbitrary code such as AI generated code. + """ + id: str """The ID of the Devbox.""" diff --git a/src/runloop_api_client/types/devboxes/browser_view.py b/src/runloop_api_client/types/devboxes/browser_view.py index d6d377a28..4486d76ec 100644 --- a/src/runloop_api_client/types/devboxes/browser_view.py +++ b/src/runloop_api_client/types/devboxes/browser_view.py @@ -7,6 +7,10 @@ class BrowserView(BaseModel): + """ + A Browser represents a managed implementation of a browser like Chromiumon top of Devboxes. It includes the tunnel to the live screen and the underlying DevboxView. + """ + connection_url: str """ The url to enable remote connection from browser automation tools like diff --git a/src/runloop_api_client/types/devboxes/computer_create_params.py b/src/runloop_api_client/types/devboxes/computer_create_params.py index febd5aef5..c2e32e035 100644 --- a/src/runloop_api_client/types/devboxes/computer_create_params.py +++ b/src/runloop_api_client/types/devboxes/computer_create_params.py @@ -17,6 +17,8 @@ class ComputerCreateParams(TypedDict, total=False): class DisplayDimensions(TypedDict, total=False): + """Customize the dimensions of the computer display.""" + display_height_px: Required[int] """The height of the display being controlled by the model in pixels.""" diff --git a/src/runloop_api_client/types/devboxes/computer_mouse_interaction_params.py b/src/runloop_api_client/types/devboxes/computer_mouse_interaction_params.py index b28a0723f..a3a02279d 100644 --- a/src/runloop_api_client/types/devboxes/computer_mouse_interaction_params.py +++ b/src/runloop_api_client/types/devboxes/computer_mouse_interaction_params.py @@ -23,6 +23,11 @@ class ComputerMouseInteractionParams(TypedDict, total=False): class Coordinate(TypedDict, total=False): + """ + The x (pixels from the left) and y (pixels from the top) coordinates for the mouse to move or click-drag. Required only by + `action=mouse_move` or `action=left_click_drag` + """ + x: Required[int] """The x coordinate (pixels from the left) for the mouse to move or click-drag.""" diff --git a/src/runloop_api_client/types/devboxes/computer_view.py b/src/runloop_api_client/types/devboxes/computer_view.py index 907629d54..4706d44a0 100644 --- a/src/runloop_api_client/types/devboxes/computer_view.py +++ b/src/runloop_api_client/types/devboxes/computer_view.py @@ -7,6 +7,10 @@ class ComputerView(BaseModel): + """ + A Computer represents an implementation of Anthropic Computer usage on top of Devboxes. It includes the tunnel to the live screen and the underlying DevboxView. + """ + devbox: DevboxView """The underlying devbox the computer setup is running on.""" diff --git a/src/runloop_api_client/types/devboxes/disk_snapshot_list_params.py b/src/runloop_api_client/types/devboxes/disk_snapshot_list_params.py index 7b0f3454f..73e60f457 100644 --- a/src/runloop_api_client/types/devboxes/disk_snapshot_list_params.py +++ b/src/runloop_api_client/types/devboxes/disk_snapshot_list_params.py @@ -14,7 +14,7 @@ class DiskSnapshotListParams(TypedDict, total=False): """Devbox ID to filter by.""" limit: int - """The limit of items to return. Default is 20.""" + """The limit of items to return. Default is 20. Max is 5000.""" metadata_key: Annotated[str, PropertyInfo(alias="metadata[key]")] """Filter snapshots by metadata key-value pair. diff --git a/src/runloop_api_client/types/input_context.py b/src/runloop_api_client/types/input_context.py index 5cc697db9..2daae5d45 100644 --- a/src/runloop_api_client/types/input_context.py +++ b/src/runloop_api_client/types/input_context.py @@ -8,6 +8,10 @@ class InputContext(BaseModel): + """ + InputContextView specifies the problem statement along with all additional context for a Scenario. + """ + problem_statement: str """The problem statement for the Scenario.""" diff --git a/src/runloop_api_client/types/input_context_param.py b/src/runloop_api_client/types/input_context_param.py index 7f977ad65..b0b495c4d 100644 --- a/src/runloop_api_client/types/input_context_param.py +++ b/src/runloop_api_client/types/input_context_param.py @@ -9,6 +9,10 @@ class InputContextParam(TypedDict, total=False): + """ + InputContextView specifies the problem statement along with all additional context for a Scenario. + """ + problem_statement: Required[str] """The problem statement for the Scenario.""" diff --git a/src/runloop_api_client/types/inspection_source_param.py b/src/runloop_api_client/types/inspection_source_param.py index ba9e7f397..0d1308d8e 100644 --- a/src/runloop_api_client/types/inspection_source_param.py +++ b/src/runloop_api_client/types/inspection_source_param.py @@ -9,6 +9,8 @@ class InspectionSourceParam(TypedDict, total=False): + """Use a RepositoryInspection a source of a Blueprint build.""" + inspection_id: Required[str] """The ID of a repository inspection.""" diff --git a/src/runloop_api_client/types/object_download_url_view.py b/src/runloop_api_client/types/object_download_url_view.py index eb35ac3db..d1e726ca5 100644 --- a/src/runloop_api_client/types/object_download_url_view.py +++ b/src/runloop_api_client/types/object_download_url_view.py @@ -6,5 +6,7 @@ class ObjectDownloadURLView(BaseModel): + """A response containing a presigned download URL for an Object.""" + download_url: str """The presigned download URL for the Object.""" diff --git a/src/runloop_api_client/types/object_list_params.py b/src/runloop_api_client/types/object_list_params.py index 084fac54d..eca1c7cdd 100644 --- a/src/runloop_api_client/types/object_list_params.py +++ b/src/runloop_api_client/types/object_list_params.py @@ -12,7 +12,7 @@ class ObjectListParams(TypedDict, total=False): """Filter storage objects by content type.""" limit: int - """The limit of items to return. Default is 20.""" + """The limit of items to return. Default is 20. Max is 5000.""" name: str """Filter storage objects by name (partial match supported).""" diff --git a/src/runloop_api_client/types/object_list_public_params.py b/src/runloop_api_client/types/object_list_public_params.py index 19b18ba49..67475b263 100644 --- a/src/runloop_api_client/types/object_list_public_params.py +++ b/src/runloop_api_client/types/object_list_public_params.py @@ -12,7 +12,7 @@ class ObjectListPublicParams(TypedDict, total=False): """Filter storage objects by content type.""" limit: int - """The limit of items to return. Default is 20.""" + """The limit of items to return. Default is 20. Max is 5000.""" name: str """Filter storage objects by name (partial match supported).""" diff --git a/src/runloop_api_client/types/object_list_view.py b/src/runloop_api_client/types/object_list_view.py index 049b1be81..cfd546c0c 100644 --- a/src/runloop_api_client/types/object_list_view.py +++ b/src/runloop_api_client/types/object_list_view.py @@ -9,6 +9,8 @@ class ObjectListView(BaseModel): + """A paginated list of Objects.""" + has_more: bool """True if there are more results available beyond this page.""" diff --git a/src/runloop_api_client/types/object_view.py b/src/runloop_api_client/types/object_view.py index 80aea62ed..d4ced655f 100644 --- a/src/runloop_api_client/types/object_view.py +++ b/src/runloop_api_client/types/object_view.py @@ -9,6 +9,8 @@ class ObjectView(BaseModel): + """An Object represents a stored data entity with metadata.""" + id: str """The unique identifier of the Object.""" diff --git a/src/runloop_api_client/types/repository_connection_view.py b/src/runloop_api_client/types/repository_connection_view.py index 74718de27..e126071e8 100644 --- a/src/runloop_api_client/types/repository_connection_view.py +++ b/src/runloop_api_client/types/repository_connection_view.py @@ -6,6 +6,8 @@ class RepositoryConnectionView(BaseModel): + """The ID of the Repository.""" + id: str """The ID of the Repository.""" diff --git a/src/runloop_api_client/types/repository_inspection_details.py b/src/runloop_api_client/types/repository_inspection_details.py index f83932d29..0870ce693 100644 --- a/src/runloop_api_client/types/repository_inspection_details.py +++ b/src/runloop_api_client/types/repository_inspection_details.py @@ -10,6 +10,8 @@ class WorkflowContextsActionsContext(BaseModel): + """Details about actions processing for this workflow.""" + actions_skipped_unnecessary: List[str] """ Actions that were skipped because they were unnecessary (e.g., upload @@ -26,6 +28,10 @@ class WorkflowContextsActionsContext(BaseModel): class WorkflowContexts(BaseModel): + """ + Workflow context containing file name and details about actions processing during inspection. + """ + actions_context: WorkflowContextsActionsContext """Details about actions processing for this workflow.""" diff --git a/src/runloop_api_client/types/repository_list_params.py b/src/runloop_api_client/types/repository_list_params.py index 91fd7f352..d5f7b248a 100644 --- a/src/runloop_api_client/types/repository_list_params.py +++ b/src/runloop_api_client/types/repository_list_params.py @@ -9,7 +9,7 @@ class RepositoryListParams(TypedDict, total=False): limit: int - """The limit of items to return. Default is 20.""" + """The limit of items to return. Default is 20. Max is 5000.""" name: str """Filter by repository name""" diff --git a/src/runloop_api_client/types/repository_manifest_view.py b/src/runloop_api_client/types/repository_manifest_view.py index 461722b59..acb862672 100644 --- a/src/runloop_api_client/types/repository_manifest_view.py +++ b/src/runloop_api_client/types/repository_manifest_view.py @@ -17,6 +17,8 @@ class ContainerConfig(BaseModel): + """Container configuration specifying the base image and setup commands.""" + base_image_name: str """The name of the base image. @@ -41,6 +43,10 @@ class Language(BaseModel): class WorkspaceDevCommands(BaseModel): + """ + Extracted common commands important to the developer life cycle like linting, testing, building, etc. + """ + build: Optional[List[str]] = None """Build command (e.g. npm run build).""" @@ -58,6 +64,10 @@ class WorkspaceDevCommands(BaseModel): class Workspace(BaseModel): + """ + A workspace is a buildable unit of code within a repository and often represents a deployable unit of code like a backend service or a frontend app. + """ + build_tool: List[str] """Name of the build tool used (e.g. pip, npm).""" @@ -96,6 +106,8 @@ class Workspace(BaseModel): class ContainerizedServiceCredentials(BaseModel): + """The credentials of the container service.""" + password: str """The password of the container service.""" @@ -127,6 +139,10 @@ class ContainerizedService(BaseModel): class RepositoryManifestView(BaseModel): + """ + The repository manifest contains container configuration and workspace definitions for a repository. + """ + container_config: ContainerConfig """Container configuration specifying the base image and setup commands.""" diff --git a/src/runloop_api_client/types/scenario_environment.py b/src/runloop_api_client/types/scenario_environment.py index 94e244df9..b6ac9b039 100644 --- a/src/runloop_api_client/types/scenario_environment.py +++ b/src/runloop_api_client/types/scenario_environment.py @@ -9,6 +9,10 @@ class ScenarioEnvironment(BaseModel): + """ + ScenarioEnvironmentParameters specify the environment in which a Scenario will be run. + """ + blueprint_id: Optional[str] = None """Use the blueprint with matching ID.""" diff --git a/src/runloop_api_client/types/scenario_environment_param.py b/src/runloop_api_client/types/scenario_environment_param.py index 5069e5943..6a219d250 100644 --- a/src/runloop_api_client/types/scenario_environment_param.py +++ b/src/runloop_api_client/types/scenario_environment_param.py @@ -11,6 +11,10 @@ class ScenarioEnvironmentParam(TypedDict, total=False): + """ + ScenarioEnvironmentParameters specify the environment in which a Scenario will be run. + """ + blueprint_id: Optional[str] """Use the blueprint with matching ID.""" diff --git a/src/runloop_api_client/types/scenario_list_params.py b/src/runloop_api_client/types/scenario_list_params.py index 917da6c94..45ff3a87b 100644 --- a/src/runloop_api_client/types/scenario_list_params.py +++ b/src/runloop_api_client/types/scenario_list_params.py @@ -12,10 +12,13 @@ class ScenarioListParams(TypedDict, total=False): """Filter scenarios by benchmark ID.""" limit: int - """The limit of items to return. Default is 20.""" + """The limit of items to return. Default is 20. Max is 5000.""" name: str """Query for Scenarios with a given name.""" starting_after: str """Load the next page of data starting after the item with the given ID.""" + + validation_type: str + """Filter by validation type""" diff --git a/src/runloop_api_client/types/scenario_list_public_params.py b/src/runloop_api_client/types/scenario_list_public_params.py index 7f413a517..be7e40b8d 100644 --- a/src/runloop_api_client/types/scenario_list_public_params.py +++ b/src/runloop_api_client/types/scenario_list_public_params.py @@ -9,7 +9,7 @@ class ScenarioListPublicParams(TypedDict, total=False): limit: int - """The limit of items to return. Default is 20.""" + """The limit of items to return. Default is 20. Max is 5000.""" name: str """Query for Scenarios with a given name.""" diff --git a/src/runloop_api_client/types/scenario_run_view.py b/src/runloop_api_client/types/scenario_run_view.py index 225e90a89..68d4c3573 100644 --- a/src/runloop_api_client/types/scenario_run_view.py +++ b/src/runloop_api_client/types/scenario_run_view.py @@ -10,6 +10,11 @@ class ScenarioRunView(BaseModel): + """A ScenarioRunView represents a single run of a Scenario on a Devbox. + + When completed, the ScenarioRun will contain the final score and output of the run. + """ + id: str """ID of the ScenarioRun.""" diff --git a/src/runloop_api_client/types/scenario_update_params.py b/src/runloop_api_client/types/scenario_update_params.py index 908988961..9d0fc65e5 100644 --- a/src/runloop_api_client/types/scenario_update_params.py +++ b/src/runloop_api_client/types/scenario_update_params.py @@ -21,26 +21,29 @@ class ScenarioUpdateParams(TypedDict, total=False): """The input context for the Scenario.""" metadata: Optional[Dict[str, str]] - """User defined metadata to attach to the scenario for organization.""" + """User defined metadata to attach to the scenario. Pass in empty map to clear.""" name: Optional[str] - """Name of the scenario.""" + """Name of the scenario. Cannot be blank.""" reference_output: Optional[str] """A string representation of the reference output to solve the scenario. Commonly can be the result of a git diff or a sequence of command actions to - apply to the environment. + apply to the environment. Pass in empty string to clear. """ required_environment_variables: Optional[SequenceNotStr[str]] - """Environment variables required to run the scenario.""" + """Environment variables required to run the scenario. + + Pass in empty list to clear. + """ required_secret_names: Optional[SequenceNotStr[str]] - """Secrets required to run the scenario.""" + """Secrets required to run the scenario. Pass in empty list to clear.""" scoring_contract: Optional[ScoringContractUpdateParam] """The scoring contract for the Scenario.""" validation_type: Optional[Literal["UNSPECIFIED", "FORWARD", "REVERSE", "EVALUATION"]] - """Validation strategy.""" + """Validation strategy. Pass in empty string to clear.""" diff --git a/src/runloop_api_client/types/scenario_view.py b/src/runloop_api_client/types/scenario_view.py index 58c0dbb26..5c5ba0164 100644 --- a/src/runloop_api_client/types/scenario_view.py +++ b/src/runloop_api_client/types/scenario_view.py @@ -12,6 +12,10 @@ class ScenarioView(BaseModel): + """ + A ScenarioDefinitionView represents a repeatable AI coding evaluation test, complete with initial environment and scoring contract. + """ + id: str """The ID of the Scenario.""" diff --git a/src/runloop_api_client/types/scenarios/run_list_params.py b/src/runloop_api_client/types/scenarios/run_list_params.py index 17a2715c4..97eeb425a 100644 --- a/src/runloop_api_client/types/scenarios/run_list_params.py +++ b/src/runloop_api_client/types/scenarios/run_list_params.py @@ -8,11 +8,20 @@ class RunListParams(TypedDict, total=False): + benchmark_run_id: str + """Filter by benchmark run ID""" + limit: int - """The limit of items to return. Default is 20.""" + """The limit of items to return. Default is 20. Max is 5000.""" + + name: str + """Filter by name""" scenario_id: str """Filter runs associated to Scenario given ID""" starting_after: str """Load the next page of data starting after the item with the given ID.""" + + state: str + """Filter by state""" diff --git a/src/runloop_api_client/types/scenarios/scorer_create_response.py b/src/runloop_api_client/types/scenarios/scorer_create_response.py index 376c50f70..2b6e665a1 100644 --- a/src/runloop_api_client/types/scenarios/scorer_create_response.py +++ b/src/runloop_api_client/types/scenarios/scorer_create_response.py @@ -6,6 +6,8 @@ class ScorerCreateResponse(BaseModel): + """A ScenarioScorerView represents a custom scoring function for a Scenario.""" + id: str """ID for the scenario scorer.""" diff --git a/src/runloop_api_client/types/scenarios/scorer_list_params.py b/src/runloop_api_client/types/scenarios/scorer_list_params.py index 0577a327e..f80e7f6ac 100644 --- a/src/runloop_api_client/types/scenarios/scorer_list_params.py +++ b/src/runloop_api_client/types/scenarios/scorer_list_params.py @@ -9,7 +9,7 @@ class ScorerListParams(TypedDict, total=False): limit: int - """The limit of items to return. Default is 20.""" + """The limit of items to return. Default is 20. Max is 5000.""" starting_after: str """Load the next page of data starting after the item with the given ID.""" diff --git a/src/runloop_api_client/types/scenarios/scorer_list_response.py b/src/runloop_api_client/types/scenarios/scorer_list_response.py index bdbc9b9de..46eb8802e 100644 --- a/src/runloop_api_client/types/scenarios/scorer_list_response.py +++ b/src/runloop_api_client/types/scenarios/scorer_list_response.py @@ -6,6 +6,8 @@ class ScorerListResponse(BaseModel): + """A ScenarioScorerView represents a custom scoring function for a Scenario.""" + id: str """ID for the scenario scorer.""" diff --git a/src/runloop_api_client/types/scenarios/scorer_retrieve_response.py b/src/runloop_api_client/types/scenarios/scorer_retrieve_response.py index ab0f85231..a67cd35c0 100644 --- a/src/runloop_api_client/types/scenarios/scorer_retrieve_response.py +++ b/src/runloop_api_client/types/scenarios/scorer_retrieve_response.py @@ -6,6 +6,8 @@ class ScorerRetrieveResponse(BaseModel): + """A ScenarioScorerView represents a custom scoring function for a Scenario.""" + id: str """ID for the scenario scorer.""" diff --git a/src/runloop_api_client/types/scenarios/scorer_update_response.py b/src/runloop_api_client/types/scenarios/scorer_update_response.py index 60a1b5e4b..91e668d22 100644 --- a/src/runloop_api_client/types/scenarios/scorer_update_response.py +++ b/src/runloop_api_client/types/scenarios/scorer_update_response.py @@ -6,6 +6,8 @@ class ScorerUpdateResponse(BaseModel): + """A ScenarioScorerView represents a custom scoring function for a Scenario.""" + id: str """ID for the scenario scorer.""" diff --git a/src/runloop_api_client/types/scoring_contract.py b/src/runloop_api_client/types/scoring_contract.py index f19f5aa54..d3c646fda 100644 --- a/src/runloop_api_client/types/scoring_contract.py +++ b/src/runloop_api_client/types/scoring_contract.py @@ -9,5 +9,9 @@ class ScoringContract(BaseModel): + """ + InputContextView specifies the problem statement along with all additional context for a Scenario. + """ + scoring_function_parameters: List[ScoringFunction] """A list of scoring functions used to evaluate the Scenario.""" diff --git a/src/runloop_api_client/types/scoring_contract_param.py b/src/runloop_api_client/types/scoring_contract_param.py index 4a68abb93..46f1b7b6d 100644 --- a/src/runloop_api_client/types/scoring_contract_param.py +++ b/src/runloop_api_client/types/scoring_contract_param.py @@ -11,5 +11,9 @@ class ScoringContractParam(TypedDict, total=False): + """ + InputContextView specifies the problem statement along with all additional context for a Scenario. + """ + scoring_function_parameters: Required[Iterable[ScoringFunctionParam]] """A list of scoring functions used to evaluate the Scenario.""" diff --git a/src/runloop_api_client/types/scoring_contract_result_view.py b/src/runloop_api_client/types/scoring_contract_result_view.py index 823de83c4..85e1a42c4 100644 --- a/src/runloop_api_client/types/scoring_contract_result_view.py +++ b/src/runloop_api_client/types/scoring_contract_result_view.py @@ -9,6 +9,10 @@ class ScoringContractResultView(BaseModel): + """ + A ScoringContractResultView represents the result of running all scoring functions on a given input context. + """ + score: float """Total score for all scoring contracts. This will be a value between 0 and 1.""" diff --git a/src/runloop_api_client/types/scoring_function.py b/src/runloop_api_client/types/scoring_function.py index ba4aea9e1..fe5d2a467 100644 --- a/src/runloop_api_client/types/scoring_function.py +++ b/src/runloop_api_client/types/scoring_function.py @@ -20,6 +20,8 @@ class ScorerAstGrepScoringFunction(BaseModel): + """AstGrepScoringFunction utilizes structured coach search for scoring.""" + pattern: str """AST pattern to match. @@ -37,6 +39,10 @@ class ScorerAstGrepScoringFunction(BaseModel): class ScorerBashScriptScoringFunction(BaseModel): + """ + BashScriptScoringFunction is a scoring function specified by a bash script that will be run in the context of your environment. + """ + type: Literal["bash_script_scorer"] bash_script: Optional[str] = None @@ -48,6 +54,10 @@ class ScorerBashScriptScoringFunction(BaseModel): class ScorerCommandScoringFunction(BaseModel): + """ + CommandScoringFunction executes a single command and checks the result.The output of the command will be printed. Scoring will passed if the command returns status code 0, otherwise it will be failed. + """ + type: Literal["command_scorer"] command: Optional[str] = None @@ -55,6 +65,8 @@ class ScorerCommandScoringFunction(BaseModel): class ScorerCustomScoringFunction(BaseModel): + """CustomScoringFunction is a custom, user defined scoring function.""" + custom_scorer_type: str """Type of the scoring function, previously registered with Runloop.""" @@ -65,6 +77,10 @@ class ScorerCustomScoringFunction(BaseModel): class ScorerPythonScriptScoringFunction(BaseModel): + """ + PythonScriptScoringFunction will run a python script in the context of your environment as a ScoringFunction. + """ + python_script: str """Python script to be run. @@ -96,6 +112,10 @@ class ScorerTestBasedScoringFunctionTestFile(BaseModel): class ScorerTestBasedScoringFunction(BaseModel): + """ + TestBasedScoringFunction writes test files to disk and executes a test command to verify the solution. + """ + type: Literal["test_based_scorer"] test_command: Optional[str] = None @@ -119,6 +139,8 @@ class ScorerTestBasedScoringFunction(BaseModel): class ScoringFunction(BaseModel): + """ScoringFunction specifies a method of scoring a Scenario.""" + name: str """Name of scoring function. Names must only contain ``[a-zA-Z0-9_-]``.""" diff --git a/src/runloop_api_client/types/scoring_function_param.py b/src/runloop_api_client/types/scoring_function_param.py index f9b6b26c7..033101d52 100644 --- a/src/runloop_api_client/types/scoring_function_param.py +++ b/src/runloop_api_client/types/scoring_function_param.py @@ -19,6 +19,8 @@ class ScorerAstGrepScoringFunction(TypedDict, total=False): + """AstGrepScoringFunction utilizes structured coach search for scoring.""" + pattern: Required[str] """AST pattern to match. @@ -36,6 +38,10 @@ class ScorerAstGrepScoringFunction(TypedDict, total=False): class ScorerBashScriptScoringFunction(TypedDict, total=False): + """ + BashScriptScoringFunction is a scoring function specified by a bash script that will be run in the context of your environment. + """ + type: Required[Literal["bash_script_scorer"]] bash_script: str @@ -47,6 +53,10 @@ class ScorerBashScriptScoringFunction(TypedDict, total=False): class ScorerCommandScoringFunction(TypedDict, total=False): + """ + CommandScoringFunction executes a single command and checks the result.The output of the command will be printed. Scoring will passed if the command returns status code 0, otherwise it will be failed. + """ + type: Required[Literal["command_scorer"]] command: str @@ -54,6 +64,8 @@ class ScorerCommandScoringFunction(TypedDict, total=False): class ScorerCustomScoringFunction(TypedDict, total=False): + """CustomScoringFunction is a custom, user defined scoring function.""" + custom_scorer_type: Required[str] """Type of the scoring function, previously registered with Runloop.""" @@ -64,6 +76,10 @@ class ScorerCustomScoringFunction(TypedDict, total=False): class ScorerPythonScriptScoringFunction(TypedDict, total=False): + """ + PythonScriptScoringFunction will run a python script in the context of your environment as a ScoringFunction. + """ + python_script: Required[str] """Python script to be run. @@ -95,6 +111,10 @@ class ScorerTestBasedScoringFunctionTestFile(TypedDict, total=False): class ScorerTestBasedScoringFunction(TypedDict, total=False): + """ + TestBasedScoringFunction writes test files to disk and executes a test command to verify the solution. + """ + type: Required[Literal["test_based_scorer"]] test_command: str @@ -115,6 +135,8 @@ class ScorerTestBasedScoringFunction(TypedDict, total=False): class ScoringFunctionParam(TypedDict, total=False): + """ScoringFunction specifies a method of scoring a Scenario.""" + name: Required[str] """Name of scoring function. Names must only contain ``[a-zA-Z0-9_-]``.""" diff --git a/src/runloop_api_client/types/scoring_function_result_view.py b/src/runloop_api_client/types/scoring_function_result_view.py index 8f782df11..4fe5b67cb 100644 --- a/src/runloop_api_client/types/scoring_function_result_view.py +++ b/src/runloop_api_client/types/scoring_function_result_view.py @@ -8,6 +8,10 @@ class ScoringFunctionResultView(BaseModel): + """ + A ScoringFunctionResultView represents the result of running a single scoring function on a given input context. + """ + output: str """Log output of the scoring function.""" diff --git a/src/runloop_api_client/types/secret_list_params.py b/src/runloop_api_client/types/secret_list_params.py index 296a66b62..13d25bd7e 100644 --- a/src/runloop_api_client/types/secret_list_params.py +++ b/src/runloop_api_client/types/secret_list_params.py @@ -9,4 +9,4 @@ class SecretListParams(TypedDict, total=False): limit: int - """The limit of items to return. Default is 20.""" + """The limit of items to return. Default is 20. Max is 5000.""" diff --git a/src/runloop_api_client/types/secret_list_view.py b/src/runloop_api_client/types/secret_list_view.py index d7feec9c5..4d66fa2e4 100644 --- a/src/runloop_api_client/types/secret_list_view.py +++ b/src/runloop_api_client/types/secret_list_view.py @@ -9,6 +9,8 @@ class SecretListView(BaseModel): + """A paginated list of Secrets.""" + has_more: bool """True if there are more results available beyond this page.""" diff --git a/src/runloop_api_client/types/secret_view.py b/src/runloop_api_client/types/secret_view.py index 1303f7bfd..bd1c8811e 100644 --- a/src/runloop_api_client/types/secret_view.py +++ b/src/runloop_api_client/types/secret_view.py @@ -6,6 +6,10 @@ class SecretView(BaseModel): + """ + A Secret represents a key-value pair that can be securely stored and used in Devboxes as environment variables. + """ + id: str """The unique identifier of the Secret.""" diff --git a/src/runloop_api_client/types/shared/agent_source.py b/src/runloop_api_client/types/shared/agent_source.py index 25bcbbc1d..9282d6181 100644 --- a/src/runloop_api_client/types/shared/agent_source.py +++ b/src/runloop_api_client/types/shared/agent_source.py @@ -8,6 +8,8 @@ class Git(BaseModel): + """Git source configuration""" + repository: str """Git repository URL""" @@ -19,20 +21,21 @@ class Git(BaseModel): class Npm(BaseModel): + """NPM source configuration""" + package_name: str """NPM package name""" agent_setup: Optional[List[str]] = None """Setup commands to run after installation""" - npm_version: Optional[str] = None - """NPM version constraint""" - registry_url: Optional[str] = None """NPM registry URL""" class Object(BaseModel): + """Object store source configuration""" + object_id: str """Object ID""" @@ -41,20 +44,21 @@ class Object(BaseModel): class Pip(BaseModel): + """Pip source configuration""" + package_name: str """Pip package name""" agent_setup: Optional[List[str]] = None """Setup commands to run after installation""" - pip_version: Optional[str] = None - """Pip version constraint""" - registry_url: Optional[str] = None """Pip registry URL""" class AgentSource(BaseModel): + """Agent source configuration.""" + type: str """Source type: npm, pip, object, or git""" diff --git a/src/runloop_api_client/types/shared/launch_parameters.py b/src/runloop_api_client/types/shared/launch_parameters.py index f70023d66..dc0ccfccd 100644 --- a/src/runloop_api_client/types/shared/launch_parameters.py +++ b/src/runloop_api_client/types/shared/launch_parameters.py @@ -10,6 +10,11 @@ class UserParameters(BaseModel): + """Specify the user for execution on Devbox. + + If not set, default `user` will be used. + """ + uid: int """User ID (UID) for the Linux user. Must be a non-negative integer.""" @@ -18,6 +23,10 @@ class UserParameters(BaseModel): class LaunchParameters(BaseModel): + """ + LaunchParameters enable you to customize the resources available to your Devbox as well as the environment set up that should be completed before the Devbox is marked as 'running'. + """ + after_idle: Optional[AfterIdle] = None """Configure Devbox lifecycle based on idle activity. diff --git a/src/runloop_api_client/types/shared_params/agent_source.py b/src/runloop_api_client/types/shared_params/agent_source.py index 9f5a50845..7132414c8 100644 --- a/src/runloop_api_client/types/shared_params/agent_source.py +++ b/src/runloop_api_client/types/shared_params/agent_source.py @@ -11,6 +11,8 @@ class Git(TypedDict, total=False): + """Git source configuration""" + repository: Required[str] """Git repository URL""" @@ -22,20 +24,21 @@ class Git(TypedDict, total=False): class Npm(TypedDict, total=False): + """NPM source configuration""" + package_name: Required[str] """NPM package name""" agent_setup: Optional[SequenceNotStr[str]] """Setup commands to run after installation""" - npm_version: Optional[str] - """NPM version constraint""" - registry_url: Optional[str] """NPM registry URL""" class Object(TypedDict, total=False): + """Object store source configuration""" + object_id: Required[str] """Object ID""" @@ -44,20 +47,21 @@ class Object(TypedDict, total=False): class Pip(TypedDict, total=False): + """Pip source configuration""" + package_name: Required[str] """Pip package name""" agent_setup: Optional[SequenceNotStr[str]] """Setup commands to run after installation""" - pip_version: Optional[str] - """Pip version constraint""" - registry_url: Optional[str] """Pip registry URL""" class AgentSource(TypedDict, total=False): + """Agent source configuration.""" + type: Required[str] """Source type: npm, pip, object, or git""" diff --git a/src/runloop_api_client/types/shared_params/launch_parameters.py b/src/runloop_api_client/types/shared_params/launch_parameters.py index f0fe87636..cd2a97ee4 100644 --- a/src/runloop_api_client/types/shared_params/launch_parameters.py +++ b/src/runloop_api_client/types/shared_params/launch_parameters.py @@ -12,6 +12,11 @@ class UserParameters(TypedDict, total=False): + """Specify the user for execution on Devbox. + + If not set, default `user` will be used. + """ + uid: Required[int] """User ID (UID) for the Linux user. Must be a non-negative integer.""" @@ -20,6 +25,10 @@ class UserParameters(TypedDict, total=False): class LaunchParameters(TypedDict, total=False): + """ + LaunchParameters enable you to customize the resources available to your Devbox as well as the environment set up that should be completed before the Devbox is marked as 'running'. + """ + after_idle: Optional[AfterIdle] """Configure Devbox lifecycle based on idle activity. diff --git a/tests/api_resources/benchmarks/test_runs.py b/tests/api_resources/benchmarks/test_runs.py index a95855518..9ab74fc9a 100644 --- a/tests/api_resources/benchmarks/test_runs.py +++ b/tests/api_resources/benchmarks/test_runs.py @@ -66,6 +66,7 @@ def test_method_list_with_all_params(self, client: Runloop) -> None: run = client.benchmarks.runs.list( benchmark_id="benchmark_id", limit=0, + name="name", starting_after="starting_after", ) assert_matches_type(SyncBenchmarkRunsCursorIDPage[BenchmarkRunView], run, path=["response"]) @@ -268,6 +269,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncRunloop) -> run = await async_client.benchmarks.runs.list( benchmark_id="benchmark_id", limit=0, + name="name", starting_after="starting_after", ) assert_matches_type(AsyncBenchmarkRunsCursorIDPage[BenchmarkRunView], run, path=["response"]) diff --git a/tests/api_resources/scenarios/test_runs.py b/tests/api_resources/scenarios/test_runs.py index 7b981e9bb..f3ac8eb88 100644 --- a/tests/api_resources/scenarios/test_runs.py +++ b/tests/api_resources/scenarios/test_runs.py @@ -72,9 +72,12 @@ def test_method_list(self, client: Runloop) -> None: @parametrize def test_method_list_with_all_params(self, client: Runloop) -> None: run = client.scenarios.runs.list( + benchmark_run_id="benchmark_run_id", limit=0, + name="name", scenario_id="scenario_id", starting_after="starting_after", + state="state", ) assert_matches_type(SyncBenchmarkRunsCursorIDPage[ScenarioRunView], run, path=["response"]) @@ -320,9 +323,12 @@ async def test_method_list(self, async_client: AsyncRunloop) -> None: @parametrize async def test_method_list_with_all_params(self, async_client: AsyncRunloop) -> None: run = await async_client.scenarios.runs.list( + benchmark_run_id="benchmark_run_id", limit=0, + name="name", scenario_id="scenario_id", starting_after="starting_after", + state="state", ) assert_matches_type(AsyncBenchmarkRunsCursorIDPage[ScenarioRunView], run, path=["response"]) diff --git a/tests/api_resources/test_agents.py b/tests/api_resources/test_agents.py index 6f8096491..693eec250 100644 --- a/tests/api_resources/test_agents.py +++ b/tests/api_resources/test_agents.py @@ -22,6 +22,7 @@ class TestAgents: def test_method_create(self, client: Runloop) -> None: agent = client.agents.create( name="name", + version="version", ) assert_matches_type(AgentView, agent, path=["response"]) @@ -29,6 +30,7 @@ def test_method_create(self, client: Runloop) -> None: def test_method_create_with_all_params(self, client: Runloop) -> None: agent = client.agents.create( name="name", + version="version", source={ "type": "type", "git": { @@ -39,7 +41,6 @@ def test_method_create_with_all_params(self, client: Runloop) -> None: "npm": { "package_name": "package_name", "agent_setup": ["string"], - "npm_version": "npm_version", "registry_url": "registry_url", }, "object": { @@ -49,7 +50,6 @@ def test_method_create_with_all_params(self, client: Runloop) -> None: "pip": { "package_name": "package_name", "agent_setup": ["string"], - "pip_version": "pip_version", "registry_url": "registry_url", }, }, @@ -60,6 +60,7 @@ def test_method_create_with_all_params(self, client: Runloop) -> None: def test_raw_response_create(self, client: Runloop) -> None: response = client.agents.with_raw_response.create( name="name", + version="version", ) assert response.is_closed is True @@ -71,6 +72,7 @@ def test_raw_response_create(self, client: Runloop) -> None: def test_streaming_response_create(self, client: Runloop) -> None: with client.agents.with_streaming_response.create( name="name", + version="version", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -131,6 +133,7 @@ def test_method_list_with_all_params(self, client: Runloop) -> None: name="name", search="search", starting_after="starting_after", + version="version", ) assert_matches_type(SyncAgentsCursorIDPage[AgentView], agent, path=["response"]) @@ -164,6 +167,7 @@ class TestAsyncAgents: async def test_method_create(self, async_client: AsyncRunloop) -> None: agent = await async_client.agents.create( name="name", + version="version", ) assert_matches_type(AgentView, agent, path=["response"]) @@ -171,6 +175,7 @@ async def test_method_create(self, async_client: AsyncRunloop) -> None: async def test_method_create_with_all_params(self, async_client: AsyncRunloop) -> None: agent = await async_client.agents.create( name="name", + version="version", source={ "type": "type", "git": { @@ -181,7 +186,6 @@ async def test_method_create_with_all_params(self, async_client: AsyncRunloop) - "npm": { "package_name": "package_name", "agent_setup": ["string"], - "npm_version": "npm_version", "registry_url": "registry_url", }, "object": { @@ -191,7 +195,6 @@ async def test_method_create_with_all_params(self, async_client: AsyncRunloop) - "pip": { "package_name": "package_name", "agent_setup": ["string"], - "pip_version": "pip_version", "registry_url": "registry_url", }, }, @@ -202,6 +205,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncRunloop) - async def test_raw_response_create(self, async_client: AsyncRunloop) -> None: response = await async_client.agents.with_raw_response.create( name="name", + version="version", ) assert response.is_closed is True @@ -213,6 +217,7 @@ async def test_raw_response_create(self, async_client: AsyncRunloop) -> None: async def test_streaming_response_create(self, async_client: AsyncRunloop) -> None: async with async_client.agents.with_streaming_response.create( name="name", + version="version", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -273,6 +278,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncRunloop) -> name="name", search="search", starting_after="starting_after", + version="version", ) assert_matches_type(AsyncAgentsCursorIDPage[AgentView], agent, path=["response"]) diff --git a/tests/api_resources/test_benchmarks.py b/tests/api_resources/test_benchmarks.py index 891756def..bb001a532 100644 --- a/tests/api_resources/test_benchmarks.py +++ b/tests/api_resources/test_benchmarks.py @@ -108,7 +108,6 @@ def test_path_params_retrieve(self, client: Runloop) -> None: def test_method_update(self, client: Runloop) -> None: benchmark = client.benchmarks.update( id="id", - name="name", ) assert_matches_type(BenchmarkView, benchmark, path=["response"]) @@ -116,10 +115,10 @@ def test_method_update(self, client: Runloop) -> None: def test_method_update_with_all_params(self, client: Runloop) -> None: benchmark = client.benchmarks.update( id="id", - name="name", attribution="attribution", description="description", metadata={"foo": "string"}, + name="name", required_environment_variables=["string"], required_secret_names=["string"], scenario_ids=["string"], @@ -130,7 +129,6 @@ def test_method_update_with_all_params(self, client: Runloop) -> None: def test_raw_response_update(self, client: Runloop) -> None: response = client.benchmarks.with_raw_response.update( id="id", - name="name", ) assert response.is_closed is True @@ -142,7 +140,6 @@ def test_raw_response_update(self, client: Runloop) -> None: def test_streaming_response_update(self, client: Runloop) -> None: with client.benchmarks.with_streaming_response.update( id="id", - name="name", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -157,7 +154,6 @@ def test_path_params_update(self, client: Runloop) -> None: with pytest.raises(ValueError, match=r"Expected a non-empty value for `id` but received ''"): client.benchmarks.with_raw_response.update( id="", - name="name", ) @parametrize @@ -169,6 +165,7 @@ def test_method_list(self, client: Runloop) -> None: def test_method_list_with_all_params(self, client: Runloop) -> None: benchmark = client.benchmarks.list( limit=0, + name="name", starting_after="starting_after", ) assert_matches_type(SyncBenchmarksCursorIDPage[BenchmarkView], benchmark, path=["response"]) @@ -344,6 +341,53 @@ def test_streaming_response_start_run(self, client: Runloop) -> None: assert cast(Any, response.is_closed) is True + @parametrize + def test_method_update_scenarios(self, client: Runloop) -> None: + benchmark = client.benchmarks.update_scenarios( + id="id", + ) + assert_matches_type(BenchmarkView, benchmark, path=["response"]) + + @parametrize + def test_method_update_scenarios_with_all_params(self, client: Runloop) -> None: + benchmark = client.benchmarks.update_scenarios( + id="id", + scenarios_to_add=["string"], + scenarios_to_remove=["string"], + ) + assert_matches_type(BenchmarkView, benchmark, path=["response"]) + + @parametrize + def test_raw_response_update_scenarios(self, client: Runloop) -> None: + response = client.benchmarks.with_raw_response.update_scenarios( + id="id", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + benchmark = response.parse() + assert_matches_type(BenchmarkView, benchmark, path=["response"]) + + @parametrize + def test_streaming_response_update_scenarios(self, client: Runloop) -> None: + with client.benchmarks.with_streaming_response.update_scenarios( + id="id", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + benchmark = response.parse() + assert_matches_type(BenchmarkView, benchmark, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_path_params_update_scenarios(self, client: Runloop) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `id` but received ''"): + client.benchmarks.with_raw_response.update_scenarios( + id="", + ) + class TestAsyncBenchmarks: parametrize = pytest.mark.parametrize( @@ -436,7 +480,6 @@ async def test_path_params_retrieve(self, async_client: AsyncRunloop) -> None: async def test_method_update(self, async_client: AsyncRunloop) -> None: benchmark = await async_client.benchmarks.update( id="id", - name="name", ) assert_matches_type(BenchmarkView, benchmark, path=["response"]) @@ -444,10 +487,10 @@ async def test_method_update(self, async_client: AsyncRunloop) -> None: async def test_method_update_with_all_params(self, async_client: AsyncRunloop) -> None: benchmark = await async_client.benchmarks.update( id="id", - name="name", attribution="attribution", description="description", metadata={"foo": "string"}, + name="name", required_environment_variables=["string"], required_secret_names=["string"], scenario_ids=["string"], @@ -458,7 +501,6 @@ async def test_method_update_with_all_params(self, async_client: AsyncRunloop) - async def test_raw_response_update(self, async_client: AsyncRunloop) -> None: response = await async_client.benchmarks.with_raw_response.update( id="id", - name="name", ) assert response.is_closed is True @@ -470,7 +512,6 @@ async def test_raw_response_update(self, async_client: AsyncRunloop) -> None: async def test_streaming_response_update(self, async_client: AsyncRunloop) -> None: async with async_client.benchmarks.with_streaming_response.update( id="id", - name="name", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -485,7 +526,6 @@ async def test_path_params_update(self, async_client: AsyncRunloop) -> None: with pytest.raises(ValueError, match=r"Expected a non-empty value for `id` but received ''"): await async_client.benchmarks.with_raw_response.update( id="", - name="name", ) @parametrize @@ -497,6 +537,7 @@ async def test_method_list(self, async_client: AsyncRunloop) -> None: async def test_method_list_with_all_params(self, async_client: AsyncRunloop) -> None: benchmark = await async_client.benchmarks.list( limit=0, + name="name", starting_after="starting_after", ) assert_matches_type(AsyncBenchmarksCursorIDPage[BenchmarkView], benchmark, path=["response"]) @@ -671,3 +712,50 @@ async def test_streaming_response_start_run(self, async_client: AsyncRunloop) -> assert_matches_type(BenchmarkRunView, benchmark, path=["response"]) assert cast(Any, response.is_closed) is True + + @parametrize + async def test_method_update_scenarios(self, async_client: AsyncRunloop) -> None: + benchmark = await async_client.benchmarks.update_scenarios( + id="id", + ) + assert_matches_type(BenchmarkView, benchmark, path=["response"]) + + @parametrize + async def test_method_update_scenarios_with_all_params(self, async_client: AsyncRunloop) -> None: + benchmark = await async_client.benchmarks.update_scenarios( + id="id", + scenarios_to_add=["string"], + scenarios_to_remove=["string"], + ) + assert_matches_type(BenchmarkView, benchmark, path=["response"]) + + @parametrize + async def test_raw_response_update_scenarios(self, async_client: AsyncRunloop) -> None: + response = await async_client.benchmarks.with_raw_response.update_scenarios( + id="id", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + benchmark = await response.parse() + assert_matches_type(BenchmarkView, benchmark, path=["response"]) + + @parametrize + async def test_streaming_response_update_scenarios(self, async_client: AsyncRunloop) -> None: + async with async_client.benchmarks.with_streaming_response.update_scenarios( + id="id", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + benchmark = await response.parse() + assert_matches_type(BenchmarkView, benchmark, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_path_params_update_scenarios(self, async_client: AsyncRunloop) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `id` but received ''"): + await async_client.benchmarks.with_raw_response.update_scenarios( + id="", + ) diff --git a/tests/api_resources/test_blueprints.py b/tests/api_resources/test_blueprints.py index 4be6d1677..578e4dfb5 100644 --- a/tests/api_resources/test_blueprints.py +++ b/tests/api_resources/test_blueprints.py @@ -183,6 +183,7 @@ def test_method_list_with_all_params(self, client: Runloop) -> None: limit=0, name="name", starting_after="starting_after", + status="status", ) assert_matches_type(SyncBlueprintsCursorIDPage[BlueprintView], blueprint, path=["response"]) @@ -323,6 +324,7 @@ def test_method_list_public_with_all_params(self, client: Runloop) -> None: limit=0, name="name", starting_after="starting_after", + status="status", ) assert_matches_type(SyncBlueprintsCursorIDPage[BlueprintView], blueprint, path=["response"]) @@ -641,6 +643,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncRunloop) -> limit=0, name="name", starting_after="starting_after", + status="status", ) assert_matches_type(AsyncBlueprintsCursorIDPage[BlueprintView], blueprint, path=["response"]) @@ -781,6 +784,7 @@ async def test_method_list_public_with_all_params(self, async_client: AsyncRunlo limit=0, name="name", starting_after="starting_after", + status="status", ) assert_matches_type(AsyncBlueprintsCursorIDPage[BlueprintView], blueprint, path=["response"]) diff --git a/tests/api_resources/test_scenarios.py b/tests/api_resources/test_scenarios.py index b9dadb8b9..3345828c1 100644 --- a/tests/api_resources/test_scenarios.py +++ b/tests/api_resources/test_scenarios.py @@ -291,6 +291,7 @@ def test_method_list_with_all_params(self, client: Runloop) -> None: limit=0, name="name", starting_after="starting_after", + validation_type="validation_type", ) assert_matches_type(SyncScenariosCursorIDPage[ScenarioView], scenario, path=["response"]) @@ -696,6 +697,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncRunloop) -> limit=0, name="name", starting_after="starting_after", + validation_type="validation_type", ) assert_matches_type(AsyncScenariosCursorIDPage[ScenarioView], scenario, path=["response"]) diff --git a/tests/sdk/async_devbox/test_core.py b/tests/sdk/async_devbox/test_core.py index 5d3405c80..9925fa724 100644 --- a/tests/sdk/async_devbox/test_core.py +++ b/tests/sdk/async_devbox/test_core.py @@ -26,21 +26,21 @@ class TestAsyncDevbox: def test_init(self, mock_async_client: AsyncMock) -> None: """Test AsyncDevbox initialization.""" - devbox = AsyncDevbox(mock_async_client, "dev_123") - assert devbox.id == "dev_123" + devbox = AsyncDevbox(mock_async_client, "dbx_123") + assert devbox.id == "dbx_123" def test_repr(self, mock_async_client: AsyncMock) -> None: """Test AsyncDevbox string representation.""" - devbox = AsyncDevbox(mock_async_client, "dev_123") - assert repr(devbox) == "" + devbox = AsyncDevbox(mock_async_client, "dbx_123") + assert repr(devbox) == "" @pytest.mark.asyncio async def test_context_manager_enter_exit(self, mock_async_client: AsyncMock, devbox_view: MockDevboxView) -> None: """Test context manager behavior with successful shutdown.""" mock_async_client.devboxes.shutdown = AsyncMock(return_value=devbox_view) - async with AsyncDevbox(mock_async_client, "dev_123") as devbox: - assert devbox.id == "dev_123" + async with AsyncDevbox(mock_async_client, "dbx_123") as devbox: + assert devbox.id == "dbx_123" call_kwargs = mock_async_client.devboxes.shutdown.call_args[1] assert "timeout" not in call_kwargs @@ -51,7 +51,7 @@ async def test_context_manager_exception_handling(self, mock_async_client: Async mock_async_client.devboxes.shutdown = AsyncMock(side_effect=RuntimeError("Shutdown failed")) with pytest.raises(ValueError, match="Test error"): - async with AsyncDevbox(mock_async_client, "dev_123"): + async with AsyncDevbox(mock_async_client, "dbx_123"): raise ValueError("Test error") # Shutdown should be called even when body raises exception @@ -62,7 +62,7 @@ async def test_get_info(self, mock_async_client: AsyncMock, devbox_view: MockDev """Test get_info method.""" mock_async_client.devboxes.retrieve = AsyncMock(return_value=devbox_view) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") result = await devbox.get_info( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -72,7 +72,7 @@ async def test_get_info(self, mock_async_client: AsyncMock, devbox_view: MockDev assert result == devbox_view mock_async_client.devboxes.retrieve.assert_called_once_with( - "dev_123", + "dbx_123", extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, extra_body={"key": "value"}, @@ -85,12 +85,12 @@ async def test_await_running(self, mock_async_client: AsyncMock, devbox_view: Mo mock_async_client.devboxes.await_running = AsyncMock(return_value=devbox_view) polling_config = PollingConfig(timeout_seconds=60.0) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") result = await devbox.await_running(polling_config=polling_config) assert result == devbox_view mock_async_client.devboxes.await_running.assert_called_once_with( - "dev_123", + "dbx_123", polling_config=polling_config, ) @@ -100,12 +100,12 @@ async def test_await_suspended(self, mock_async_client: AsyncMock, devbox_view: mock_async_client.devboxes.await_suspended = AsyncMock(return_value=devbox_view) polling_config = PollingConfig(timeout_seconds=60.0) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") result = await devbox.await_suspended(polling_config=polling_config) assert result == devbox_view mock_async_client.devboxes.await_suspended.assert_called_once_with( - "dev_123", + "dbx_123", polling_config=polling_config, ) @@ -114,7 +114,7 @@ async def test_shutdown(self, mock_async_client: AsyncMock, devbox_view: MockDev """Test shutdown method.""" mock_async_client.devboxes.shutdown = AsyncMock(return_value=devbox_view) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") result = await devbox.shutdown( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -125,7 +125,7 @@ async def test_shutdown(self, mock_async_client: AsyncMock, devbox_view: MockDev assert result == devbox_view mock_async_client.devboxes.shutdown.assert_called_once_with( - "dev_123", + "dbx_123", extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, extra_body={"key": "value"}, @@ -138,7 +138,7 @@ async def test_suspend(self, mock_async_client: AsyncMock, devbox_view: MockDevb """Test suspend method.""" mock_async_client.devboxes.suspend = AsyncMock(return_value=devbox_view) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") result = await devbox.suspend( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -149,7 +149,7 @@ async def test_suspend(self, mock_async_client: AsyncMock, devbox_view: MockDevb assert result == devbox_view mock_async_client.devboxes.suspend.assert_called_once_with( - "dev_123", + "dbx_123", extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, extra_body={"key": "value"}, @@ -162,7 +162,7 @@ async def test_resume(self, mock_async_client: AsyncMock, devbox_view: MockDevbo """Test resume method.""" mock_async_client.devboxes.resume = AsyncMock(return_value=devbox_view) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") result = await devbox.resume( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -173,7 +173,7 @@ async def test_resume(self, mock_async_client: AsyncMock, devbox_view: MockDevbo assert result == devbox_view mock_async_client.devboxes.resume.assert_called_once_with( - "dev_123", + "dbx_123", extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, extra_body={"key": "value"}, @@ -186,7 +186,7 @@ async def test_keep_alive(self, mock_async_client: AsyncMock) -> None: """Test keep_alive method.""" mock_async_client.devboxes.keep_alive = AsyncMock(return_value=object()) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") result = await devbox.keep_alive( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -197,7 +197,7 @@ async def test_keep_alive(self, mock_async_client: AsyncMock) -> None: assert result is not None # Verify return value is propagated mock_async_client.devboxes.keep_alive.assert_called_once_with( - "dev_123", + "dbx_123", extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, extra_body={"key": "value"}, @@ -208,13 +208,13 @@ async def test_keep_alive(self, mock_async_client: AsyncMock) -> None: @pytest.mark.asyncio async def test_snapshot_disk(self, mock_async_client: AsyncMock) -> None: """Test snapshot_disk waits for completion.""" - snapshot_data = SimpleNamespace(id="snap_123") + snapshot_data = SimpleNamespace(id="snp_123") snapshot_status = SimpleNamespace(status="completed") mock_async_client.devboxes.snapshot_disk_async = AsyncMock(return_value=snapshot_data) mock_async_client.devboxes.disk_snapshots.await_completed = AsyncMock(return_value=snapshot_status) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") polling_config = PollingConfig(timeout_seconds=60.0) snapshot = await devbox.snapshot_disk( name="test-snapshot", @@ -223,7 +223,7 @@ async def test_snapshot_disk(self, mock_async_client: AsyncMock) -> None: extra_headers={"X-Custom": "value"}, ) - assert snapshot.id == "snap_123" + assert snapshot.id == "snp_123" mock_async_client.devboxes.snapshot_disk_async.assert_called_once() call_kwargs = mock_async_client.devboxes.snapshot_disk_async.call_args[1] assert "commit_message" not in call_kwargs @@ -240,17 +240,17 @@ async def test_snapshot_disk(self, mock_async_client: AsyncMock) -> None: @pytest.mark.asyncio async def test_snapshot_disk_async(self, mock_async_client: AsyncMock) -> None: """Test snapshot_disk_async returns immediately.""" - snapshot_data = SimpleNamespace(id="snap_123") + snapshot_data = SimpleNamespace(id="snp_123") mock_async_client.devboxes.snapshot_disk_async = AsyncMock(return_value=snapshot_data) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") snapshot = await devbox.snapshot_disk_async( name="test-snapshot", metadata={"key": "value"}, extra_headers={"X-Custom": "value"}, ) - assert snapshot.id == "snap_123" + assert snapshot.id == "snp_123" mock_async_client.devboxes.snapshot_disk_async.assert_called_once() call_kwargs = mock_async_client.devboxes.snapshot_disk_async.call_args[1] assert "commit_message" not in call_kwargs @@ -265,7 +265,7 @@ async def test_close(self, mock_async_client: AsyncMock, devbox_view: MockDevbox """Test close method calls shutdown.""" mock_async_client.devboxes.shutdown = AsyncMock(return_value=devbox_view) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") await devbox.close() mock_async_client.devboxes.shutdown.assert_called_once() @@ -274,21 +274,21 @@ async def test_close(self, mock_async_client: AsyncMock, devbox_view: MockDevbox def test_cmd_property(self, mock_async_client: AsyncMock) -> None: """Test cmd property returns AsyncCommandInterface.""" - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") cmd = devbox.cmd assert isinstance(cmd, AsyncCommandInterface) assert cmd._devbox is devbox def test_file_property(self, mock_async_client: AsyncMock) -> None: """Test file property returns AsyncFileInterface.""" - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") file_interface = devbox.file assert isinstance(file_interface, AsyncFileInterface) assert file_interface._devbox is devbox def test_net_property(self, mock_async_client: AsyncMock) -> None: """Test net property returns AsyncNetworkInterface.""" - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") net = devbox.net assert isinstance(net, AsyncNetworkInterface) assert net._devbox is devbox diff --git a/tests/sdk/async_devbox/test_edge_cases.py b/tests/sdk/async_devbox/test_edge_cases.py index fa5b89c7a..94d9e661b 100644 --- a/tests/sdk/async_devbox/test_edge_cases.py +++ b/tests/sdk/async_devbox/test_edge_cases.py @@ -21,6 +21,6 @@ async def test_async_network_error(self, mock_async_client: AsyncMock) -> None: """Test handling of network errors in async.""" mock_async_client.devboxes.retrieve = AsyncMock(side_effect=httpx.NetworkError("Connection failed")) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") with pytest.raises(httpx.NetworkError): await devbox.get_info() diff --git a/tests/sdk/async_devbox/test_interfaces.py b/tests/sdk/async_devbox/test_interfaces.py index bcb2a306b..52c439c22 100644 --- a/tests/sdk/async_devbox/test_interfaces.py +++ b/tests/sdk/async_devbox/test_interfaces.py @@ -27,7 +27,7 @@ async def test_exec_without_callbacks( mock_async_client.devboxes.execute_async = AsyncMock(return_value=execution_view) mock_async_client.devboxes.executions.await_completed = AsyncMock(return_value=execution_view) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") result = await devbox.cmd.exec("echo hello") assert result.exit_code == 0 @@ -42,13 +42,13 @@ async def test_exec_without_callbacks( async def test_exec_with_stdout_callback(self, mock_async_client: AsyncMock, mock_async_stream: AsyncMock) -> None: """Test exec with stdout callback.""" execution_async = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="running", ) execution_completed = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout="output", @@ -61,7 +61,7 @@ async def test_exec_with_stdout_callback(self, mock_async_client: AsyncMock, moc stdout_calls: list[str] = [] - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") result = await devbox.cmd.exec("echo hello", stdout=stdout_calls.append) assert result.exit_code == 0 @@ -73,19 +73,19 @@ async def test_exec_async_returns_execution( ) -> None: """Test exec_async returns AsyncExecution object.""" execution_async = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="running", ) mock_async_client.devboxes.execute_async = AsyncMock(return_value=execution_async) mock_async_client.devboxes.executions.stream_stdout_updates = AsyncMock(return_value=mock_async_stream) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") execution = await devbox.cmd.exec_async("long-running command") - assert execution.execution_id == "exec_123" - assert execution.devbox_id == "dev_123" + assert execution.execution_id == "exn_123" + assert execution.devbox_id == "dbx_123" mock_async_client.devboxes.execute_async.assert_called_once() @@ -97,7 +97,7 @@ async def test_read(self, mock_async_client: AsyncMock) -> None: """Test file read.""" mock_async_client.devboxes.read_file_contents = AsyncMock(return_value="file content") - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") result = await devbox.file.read(file_path="/path/to/file") assert result == "file content" @@ -109,7 +109,7 @@ async def test_write_string(self, mock_async_client: AsyncMock) -> None: execution_detail = SimpleNamespace() mock_async_client.devboxes.write_file_contents = AsyncMock(return_value=execution_detail) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") result = await devbox.file.write(file_path="/path/to/file", contents="content") assert result == execution_detail @@ -121,7 +121,7 @@ async def test_write_bytes(self, mock_async_client: AsyncMock) -> None: execution_detail = SimpleNamespace() mock_async_client.devboxes.write_file_contents = AsyncMock(return_value=execution_detail) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") result = await devbox.file.write(file_path="/path/to/file", contents="content") assert result == execution_detail @@ -134,7 +134,7 @@ async def test_download(self, mock_async_client: AsyncMock) -> None: mock_response.read = AsyncMock(return_value=b"file content") mock_async_client.devboxes.download_file = AsyncMock(return_value=mock_response) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") result = await devbox.file.download(path="/path/to/file") assert result == b"file content" @@ -146,7 +146,7 @@ async def test_upload(self, mock_async_client: AsyncMock, tmp_path: Path) -> Non execution_detail = SimpleNamespace() mock_async_client.devboxes.upload_file = AsyncMock(return_value=execution_detail) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") # Create a temporary file for upload temp_file = tmp_path / "test_file.txt" temp_file.write_text("test content") @@ -166,7 +166,7 @@ async def test_create_ssh_key(self, mock_async_client: AsyncMock) -> None: ssh_key_response = SimpleNamespace(public_key="ssh-rsa ...") mock_async_client.devboxes.create_ssh_key = AsyncMock(return_value=ssh_key_response) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") result = await devbox.net.create_ssh_key( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -184,7 +184,7 @@ async def test_create_tunnel(self, mock_async_client: AsyncMock) -> None: tunnel_view = SimpleNamespace(tunnel_id="tunnel_123") mock_async_client.devboxes.create_tunnel = AsyncMock(return_value=tunnel_view) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") result = await devbox.net.create_tunnel( port=8080, extra_headers={"X-Custom": "value"}, @@ -202,7 +202,7 @@ async def test_remove_tunnel(self, mock_async_client: AsyncMock) -> None: """Test remove tunnel.""" mock_async_client.devboxes.remove_tunnel = AsyncMock(return_value=object()) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") result = await devbox.net.remove_tunnel( port=8080, extra_headers={"X-Custom": "value"}, diff --git a/tests/sdk/async_devbox/test_streaming.py b/tests/sdk/async_devbox/test_streaming.py index cd33a8f26..3bb3e1a7b 100644 --- a/tests/sdk/async_devbox/test_streaming.py +++ b/tests/sdk/async_devbox/test_streaming.py @@ -25,8 +25,8 @@ class TestAsyncDevboxStreaming: def test_start_streaming_no_callbacks(self, mock_async_client: AsyncMock) -> None: """Test _start_streaming returns None when no callbacks.""" - devbox = AsyncDevbox(mock_async_client, "dev_123") - result = devbox._start_streaming("exec_123", stdout=None, stderr=None, output=None) + devbox = AsyncDevbox(mock_async_client, "dbx_123") + result = devbox._start_streaming("exn_123", stdout=None, stderr=None, output=None) assert result is None @pytest.mark.asyncio @@ -46,9 +46,9 @@ async def async_iter(): mock_async_client.devboxes.executions.stream_stdout_updates = AsyncMock(return_value=mock_async_stream) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") stdout_calls: list[str] = [] - result = devbox._start_streaming("exec_123", stdout=stdout_calls.append, stderr=None, output=None) + result = devbox._start_streaming("exn_123", stdout=stdout_calls.append, stderr=None, output=None) assert result is not None assert isinstance(result, _AsyncStreamingGroup) @@ -76,9 +76,9 @@ async def async_iter(): mock_async_client.devboxes.executions.stream_stderr_updates = AsyncMock(return_value=mock_async_stream) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") stderr_calls: list[str] = [] - result = devbox._start_streaming("exec_123", stdout=None, stderr=stderr_calls.append, output=None) + result = devbox._start_streaming("exn_123", stdout=None, stderr=stderr_calls.append, output=None) assert result is not None assert isinstance(result, _AsyncStreamingGroup) @@ -107,9 +107,9 @@ async def async_iter(): mock_async_client.devboxes.executions.stream_stdout_updates = AsyncMock(return_value=mock_async_stream) mock_async_client.devboxes.executions.stream_stderr_updates = AsyncMock(return_value=mock_async_stream) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") output_calls: list[str] = [] - result = devbox._start_streaming("exec_123", stdout=None, stderr=None, output=output_calls.append) + result = devbox._start_streaming("exn_123", stdout=None, stderr=None, output=output_calls.append) assert result is not None assert isinstance(result, _AsyncStreamingGroup) @@ -136,7 +136,7 @@ async def async_iter() -> AsyncIterator[SimpleNamespace]: mock_async_stream.__aenter__ = AsyncMock(return_value=mock_async_stream) mock_async_stream.__aexit__ = AsyncMock(return_value=None) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") calls: list[str] = [] async def stream_factory() -> AsyncStream[ExecutionUpdateChunk]: @@ -166,7 +166,7 @@ async def async_iter() -> AsyncIterator[SimpleNamespace]: mock_async_stream.__aenter__ = AsyncMock(return_value=mock_async_stream) mock_async_stream.__aexit__ = AsyncMock(return_value=None) - devbox = AsyncDevbox(mock_async_client, "dev_123") + devbox = AsyncDevbox(mock_async_client, "dbx_123") calls: list[str] = [] async def stream_factory() -> AsyncStream[ExecutionUpdateChunk]: diff --git a/tests/sdk/conftest.py b/tests/sdk/conftest.py index c5546fe55..f22b542c6 100644 --- a/tests/sdk/conftest.py +++ b/tests/sdk/conftest.py @@ -15,13 +15,17 @@ # Test ID constants TEST_IDS = { - "devbox": "dev_123", - "execution": "exec_123", - "snapshot": "snap_123", - "blueprint": "bp_123", + "devbox": "dbx_123", + "execution": "exn_123", + "snapshot": "snp_123", + "blueprint": "bpt_123", "object": "obj_123", - "scorer": "scorer_123", - "agent": "agent_123", + "scorer": "sco_123", + "agent": "agt_123", + "scenario": "scn_123", + "scenario_run": "scr_123", + "benchmark": "bmd_123", + "benchmark_run": "bmr_123", } # Test URL constants @@ -42,7 +46,7 @@ class MockDevboxView: """Mock DevboxView for testing.""" - id: str = "dev_123" + id: str = TEST_IDS["devbox"] status: str = "running" name: str = "test-devbox" @@ -51,8 +55,8 @@ class MockDevboxView: class MockExecutionView: """Mock DevboxAsyncExecutionDetailView for testing.""" - execution_id: str = "exec_123" - devbox_id: str = "dev_123" + execution_id: str = TEST_IDS["execution"] + devbox_id: str = TEST_IDS["devbox"] status: str = "completed" exit_status: int = 0 stdout: str = "output" @@ -65,7 +69,7 @@ class MockExecutionView: class MockSnapshotView: """Mock DevboxSnapshotView for testing.""" - id: str = "snap_123" + id: str = TEST_IDS["snapshot"] status: str = "completed" name: str = "test-snapshot" @@ -74,7 +78,7 @@ class MockSnapshotView: class MockBlueprintView: """Mock BlueprintView for testing.""" - id: str = "bp_123" + id: str = TEST_IDS["blueprint"] status: str = "built" name: str = "test-blueprint" @@ -83,7 +87,7 @@ class MockBlueprintView: class MockObjectView: """Mock ObjectView for testing.""" - id: str = "obj_123" + id: str = TEST_IDS["object"] upload_url: str = "https://upload.example.com/obj_123" name: str = "test-object" @@ -92,7 +96,7 @@ class MockObjectView: class MockScorerView: """Mock ScorerView for testing.""" - id: str = "scorer_123" + id: str = TEST_IDS["scorer"] bash_script: str = "echo 'score=1.0'" type: str = "test_scorer" @@ -101,7 +105,7 @@ class MockScorerView: class MockAgentView: """Mock AgentView for testing.""" - id: str = "agent_123" + id: str = TEST_IDS["agent"] name: str = "test-agent" create_time_ms: int = 1234567890000 is_public: bool = False @@ -112,7 +116,7 @@ class MockAgentView: class MockScenarioView: """Mock ScenarioView for testing.""" - id: str = "scn_123" + id: str = TEST_IDS["scenario"] name: str = "test-scenario" metadata: Dict[str, str] = field(default_factory=dict) @@ -121,14 +125,48 @@ class MockScenarioView: class MockScenarioRunView: """Mock ScenarioRunView for testing.""" - id: str = "run_123" - devbox_id: str = "dev_123" - scenario_id: str = "scn_123" + id: str = TEST_IDS["scenario_run"] + devbox_id: str = TEST_IDS["devbox"] + scenario_id: str = TEST_IDS["scenario"] state: str = "running" metadata: Dict[str, str] = field(default_factory=dict) scoring_contract_result: object = None +@dataclass +class MockBenchmarkView: + """Mock BenchmarkView for testing.""" + + id: str = TEST_IDS["benchmark"] + name: str = "test-benchmark" + metadata: Dict[str, str] = field(default_factory=dict) + scenario_ids: list[str] = field(default_factory=list) + + +@dataclass +class MockBenchmarkRunView: + """Mock BenchmarkRunView for testing.""" + + id: str = TEST_IDS["benchmark_run"] + benchmark_id: str = TEST_IDS["benchmark"] + state: str = "running" + metadata: Dict[str, str] = field(default_factory=dict) + start_time_ms: int = 1234567890000 + duration_ms: int | None = None + score: float | None = None + + +class AsyncIterableMock: + """A simple async iterable mock for testing paginated responses.""" + + def __init__(self, items: list[Any]) -> None: + self._items = items + + async def __aiter__(self): + for item in self._items: + yield item + + def create_mock_httpx_client(methods: dict[str, Any] | None = None) -> AsyncMock: """ Create a mock httpx.AsyncClient with proper context manager setup. @@ -237,6 +275,18 @@ def scenario_run_view() -> MockScenarioRunView: return MockScenarioRunView() +@pytest.fixture +def benchmark_view() -> MockBenchmarkView: + """Create a mock BenchmarkView.""" + return MockBenchmarkView() + + +@pytest.fixture +def benchmark_run_view() -> MockBenchmarkRunView: + """Create a mock BenchmarkRunView.""" + return MockBenchmarkRunView() + + @pytest.fixture def mock_httpx_response() -> Mock: """Create a mock httpx.Response.""" diff --git a/tests/sdk/devbox/test_core.py b/tests/sdk/devbox/test_core.py index b482e030b..c12b02485 100644 --- a/tests/sdk/devbox/test_core.py +++ b/tests/sdk/devbox/test_core.py @@ -29,20 +29,20 @@ class TestDevbox: def test_init(self, mock_client: Mock) -> None: """Test Devbox initialization.""" - devbox = Devbox(mock_client, "dev_123") - assert devbox.id == "dev_123" + devbox = Devbox(mock_client, "dbx_123") + assert devbox.id == "dbx_123" def test_repr(self, mock_client: Mock) -> None: """Test Devbox string representation.""" - devbox = Devbox(mock_client, "dev_123") - assert repr(devbox) == "" + devbox = Devbox(mock_client, "dbx_123") + assert repr(devbox) == "" def test_context_manager_enter_exit(self, mock_client: Mock, devbox_view: MockDevboxView) -> None: """Test context manager behavior with successful shutdown.""" mock_client.devboxes.shutdown.return_value = devbox_view - with Devbox(mock_client, "dev_123") as devbox: - assert devbox.id == "dev_123" + with Devbox(mock_client, "dbx_123") as devbox: + assert devbox.id == "dbx_123" call_kwargs = mock_client.devboxes.shutdown.call_args[1] assert "timeout" not in call_kwargs @@ -52,7 +52,7 @@ def test_context_manager_exception_handling(self, mock_client: Mock) -> None: mock_client.devboxes.shutdown.side_effect = RuntimeError("Shutdown failed") with pytest.raises(ValueError, match="Test error"): - with Devbox(mock_client, "dev_123"): + with Devbox(mock_client, "dbx_123"): raise ValueError("Test error") # Shutdown should be called even when body raises exception @@ -62,7 +62,7 @@ def test_get_info(self, mock_client: Mock, devbox_view: MockDevboxView) -> None: """Test get_info method.""" mock_client.devboxes.retrieve.return_value = devbox_view - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") result = devbox.get_info( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -72,7 +72,7 @@ def test_get_info(self, mock_client: Mock, devbox_view: MockDevboxView) -> None: assert result == devbox_view mock_client.devboxes.retrieve.assert_called_once_with( - "dev_123", + "dbx_123", extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, extra_body={"key": "value"}, @@ -84,12 +84,12 @@ def test_await_running(self, mock_client: Mock, devbox_view: MockDevboxView) -> mock_client.devboxes.await_running.return_value = devbox_view polling_config = PollingConfig(timeout_seconds=60.0) - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") result = devbox.await_running(polling_config=polling_config) assert result == devbox_view mock_client.devboxes.await_running.assert_called_once_with( - "dev_123", + "dbx_123", polling_config=polling_config, ) @@ -98,12 +98,12 @@ def test_await_suspended(self, mock_client: Mock, devbox_view: MockDevboxView) - mock_client.devboxes.await_suspended.return_value = devbox_view polling_config = PollingConfig(timeout_seconds=60.0) - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") result = devbox.await_suspended(polling_config=polling_config) assert result == devbox_view mock_client.devboxes.await_suspended.assert_called_once_with( - "dev_123", + "dbx_123", polling_config=polling_config, ) @@ -111,7 +111,7 @@ def test_shutdown(self, mock_client: Mock, devbox_view: MockDevboxView) -> None: """Test shutdown method.""" mock_client.devboxes.shutdown.return_value = devbox_view - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") result = devbox.shutdown( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -122,7 +122,7 @@ def test_shutdown(self, mock_client: Mock, devbox_view: MockDevboxView) -> None: assert result == devbox_view mock_client.devboxes.shutdown.assert_called_once_with( - "dev_123", + "dbx_123", extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, extra_body={"key": "value"}, @@ -136,7 +136,7 @@ def test_suspend(self, mock_client: Mock, devbox_view: MockDevboxView) -> None: mock_client.devboxes.await_suspended.return_value = devbox_view polling_config = PollingConfig(timeout_seconds=60.0) - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") result = devbox.suspend( polling_config=polling_config, extra_headers={"X-Custom": "value"}, @@ -148,7 +148,7 @@ def test_suspend(self, mock_client: Mock, devbox_view: MockDevboxView) -> None: assert result == devbox_view mock_client.devboxes.suspend.assert_called_once_with( - "dev_123", + "dbx_123", extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, extra_body={"key": "value"}, @@ -156,7 +156,7 @@ def test_suspend(self, mock_client: Mock, devbox_view: MockDevboxView) -> None: idempotency_key="key-123", ) mock_client.devboxes.await_suspended.assert_called_once_with( - "dev_123", + "dbx_123", polling_config=polling_config, ) @@ -166,7 +166,7 @@ def test_resume(self, mock_client: Mock, devbox_view: MockDevboxView) -> None: mock_client.devboxes.await_running.return_value = devbox_view polling_config = PollingConfig(timeout_seconds=60.0) - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") result = devbox.resume( polling_config=polling_config, extra_headers={"X-Custom": "value"}, @@ -178,7 +178,7 @@ def test_resume(self, mock_client: Mock, devbox_view: MockDevboxView) -> None: assert result == devbox_view mock_client.devboxes.resume.assert_called_once_with( - "dev_123", + "dbx_123", extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, extra_body={"key": "value"}, @@ -186,7 +186,7 @@ def test_resume(self, mock_client: Mock, devbox_view: MockDevboxView) -> None: idempotency_key="key-123", ) mock_client.devboxes.await_running.assert_called_once_with( - "dev_123", + "dbx_123", polling_config=polling_config, ) @@ -194,7 +194,7 @@ def test_keep_alive(self, mock_client: Mock) -> None: """Test keep_alive method.""" mock_client.devboxes.keep_alive.return_value = object() - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") result = devbox.keep_alive( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -205,7 +205,7 @@ def test_keep_alive(self, mock_client: Mock) -> None: assert result is not None # Verify return value is propagated mock_client.devboxes.keep_alive.assert_called_once_with( - "dev_123", + "dbx_123", extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, extra_body={"key": "value"}, @@ -215,13 +215,13 @@ def test_keep_alive(self, mock_client: Mock) -> None: def test_snapshot_disk(self, mock_client: Mock) -> None: """Test snapshot_disk waits for completion.""" - snapshot_data = SimpleNamespace(id="snap_123") + snapshot_data = SimpleNamespace(id="snp_123") snapshot_status = SimpleNamespace(status="completed") mock_client.devboxes.snapshot_disk_async.return_value = snapshot_data mock_client.devboxes.disk_snapshots.await_completed.return_value = snapshot_status - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") polling_config = PollingConfig(timeout_seconds=60.0) snapshot = devbox.snapshot_disk( name="test-snapshot", @@ -230,7 +230,7 @@ def test_snapshot_disk(self, mock_client: Mock) -> None: extra_headers={"X-Custom": "value"}, ) - assert snapshot.id == "snap_123" + assert snapshot.id == "snp_123" call_kwargs = mock_client.devboxes.snapshot_disk_async.call_args[1] assert "commit_message" not in call_kwargs or call_kwargs["commit_message"] in (omit, None) assert call_kwargs["metadata"] == {"key": "value"} @@ -244,17 +244,17 @@ def test_snapshot_disk(self, mock_client: Mock) -> None: def test_snapshot_disk_async(self, mock_client: Mock) -> None: """Test snapshot_disk_async returns immediately.""" - snapshot_data = SimpleNamespace(id="snap_123") + snapshot_data = SimpleNamespace(id="snp_123") mock_client.devboxes.snapshot_disk_async.return_value = snapshot_data - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") snapshot = devbox.snapshot_disk_async( name="test-snapshot", metadata={"key": "value"}, extra_headers={"X-Custom": "value"}, ) - assert snapshot.id == "snap_123" + assert snapshot.id == "snp_123" call_kwargs = mock_client.devboxes.snapshot_disk_async.call_args[1] assert "commit_message" not in call_kwargs or call_kwargs["commit_message"] in (omit, None) assert call_kwargs["metadata"] == {"key": "value"} @@ -270,7 +270,7 @@ def test_close(self, mock_client: Mock, devbox_view: MockDevboxView) -> None: """Test close method calls shutdown.""" mock_client.devboxes.shutdown.return_value = devbox_view - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") devbox.close() call_kwargs = mock_client.devboxes.shutdown.call_args[1] @@ -278,21 +278,21 @@ def test_close(self, mock_client: Mock, devbox_view: MockDevboxView) -> None: def test_cmd_property(self, mock_client: Mock) -> None: """Test cmd property returns CommandInterface.""" - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") cmd = devbox.cmd assert isinstance(cmd, CommandInterface) assert cmd._devbox is devbox def test_file_property(self, mock_client: Mock) -> None: """Test file property returns FileInterface.""" - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") file_interface = devbox.file assert isinstance(file_interface, FileInterface) assert file_interface._devbox is devbox def test_net_property(self, mock_client: Mock) -> None: """Test net property returns NetworkInterface.""" - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") net = devbox.net assert isinstance(net, NetworkInterface) assert net._devbox is devbox diff --git a/tests/sdk/devbox/test_edge_cases.py b/tests/sdk/devbox/test_edge_cases.py index ff2491f66..23341f0c0 100644 --- a/tests/sdk/devbox/test_edge_cases.py +++ b/tests/sdk/devbox/test_edge_cases.py @@ -31,7 +31,7 @@ def test_network_error(self, mock_client: Mock) -> None: """Test handling of network errors.""" mock_client.devboxes.retrieve.side_effect = httpx.NetworkError("Connection failed") - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") with pytest.raises(httpx.NetworkError): devbox.get_info() @@ -50,7 +50,7 @@ def test_api_error(self, mock_client: Mock, status_code: int, message: str) -> N mock_client.devboxes.retrieve.side_effect = error - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") with pytest.raises(APIStatusError): devbox.get_info() @@ -58,7 +58,7 @@ def test_timeout_error(self, mock_client: Mock) -> None: """Test handling of timeout errors.""" mock_client.devboxes.retrieve.side_effect = httpx.TimeoutException("Request timed out") - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") with pytest.raises(httpx.TimeoutException): devbox.get_info(timeout=1.0) @@ -68,19 +68,19 @@ class TestDevboxEdgeCases: def test_empty_responses(self, mock_client: Mock) -> None: """Test handling of empty responses.""" - empty_view = SimpleNamespace(id="dev_123", status="", name="") + empty_view = SimpleNamespace(id="dbx_123", status="", name="") mock_client.devboxes.retrieve.return_value = empty_view - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") result = devbox.get_info() assert result == empty_view def test_none_values(self, mock_client: Mock) -> None: """Test handling of None values.""" - view_with_none = SimpleNamespace(id="dev_123", status=None, name=None) + view_with_none = SimpleNamespace(id="dbx_123", status=None, name=None) mock_client.devboxes.retrieve.return_value = view_with_none - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") result = devbox.get_info() assert result.status is None assert result.name is None @@ -89,9 +89,9 @@ def test_concurrent_operations( self, mock_client: Mock, thread_cleanup: tuple[list[threading.Thread], list[threading.Event]] ) -> None: """Test concurrent operations.""" - mock_client.devboxes.retrieve.return_value = SimpleNamespace(id="dev_123", status="running") + mock_client.devboxes.retrieve.return_value = SimpleNamespace(id="dbx_123", status="running") - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") results: list[DevboxView] = [] def get_info() -> None: @@ -118,13 +118,13 @@ def test_context_manager_vs_manual_cleanup(self, mock_client: Mock, devbox_view: mock_client.devboxes.shutdown.return_value = devbox_view # Context manager approach (Pythonic) - with Devbox(mock_client, "dev_123"): + with Devbox(mock_client, "dbx_123"): pass mock_client.devboxes.shutdown.assert_called_once() # Manual cleanup (TypeScript-like) - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") devbox.shutdown() assert mock_client.devboxes.shutdown.call_count == 2 diff --git a/tests/sdk/devbox/test_interfaces.py b/tests/sdk/devbox/test_interfaces.py index a8ca574ba..66ef8fa7b 100644 --- a/tests/sdk/devbox/test_interfaces.py +++ b/tests/sdk/devbox/test_interfaces.py @@ -24,7 +24,7 @@ def test_exec_without_callbacks(self, mock_client: Mock, execution_view: MockExe mock_client.devboxes.execute_async.return_value = execution_view mock_client.devboxes.executions.await_completed.return_value = execution_view - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") result = devbox.cmd.exec("echo hello") assert result.exit_code == 0 @@ -38,13 +38,13 @@ def test_exec_without_callbacks(self, mock_client: Mock, execution_view: MockExe def test_exec_with_stdout_callback(self, mock_client: Mock, mock_stream: Mock) -> None: """Test exec with stdout callback.""" execution_async = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="running", ) execution_completed = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout="output", @@ -57,7 +57,7 @@ def test_exec_with_stdout_callback(self, mock_client: Mock, mock_stream: Mock) - stdout_calls: list[str] = [] - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") result = devbox.cmd.exec("echo hello", stdout=stdout_calls.append) assert result.exit_code == 0 @@ -67,13 +67,13 @@ def test_exec_with_stdout_callback(self, mock_client: Mock, mock_stream: Mock) - def test_exec_with_stderr_callback(self, mock_client: Mock, mock_stream: Mock) -> None: """Test exec with stderr callback.""" execution_async = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="running", ) execution_completed = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout="", @@ -86,7 +86,7 @@ def test_exec_with_stderr_callback(self, mock_client: Mock, mock_stream: Mock) - stderr_calls: list[str] = [] - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") result = devbox.cmd.exec("echo hello", stderr=stderr_calls.append) assert result.exit_code == 0 @@ -95,13 +95,13 @@ def test_exec_with_stderr_callback(self, mock_client: Mock, mock_stream: Mock) - def test_exec_with_output_callback(self, mock_client: Mock, mock_stream: Mock) -> None: """Test exec with output callback.""" execution_async = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="running", ) execution_completed = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout="output", @@ -115,7 +115,7 @@ def test_exec_with_output_callback(self, mock_client: Mock, mock_stream: Mock) - output_calls: list[str] = [] - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") result = devbox.cmd.exec("echo hello", output=output_calls.append) assert result.exit_code == 0 @@ -124,13 +124,13 @@ def test_exec_with_output_callback(self, mock_client: Mock, mock_stream: Mock) - def test_exec_with_all_callbacks(self, mock_client: Mock, mock_stream: Mock) -> None: """Test exec with all callbacks.""" execution_async = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="running", ) execution_completed = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout="output", @@ -146,7 +146,7 @@ def test_exec_with_all_callbacks(self, mock_client: Mock, mock_stream: Mock) -> stderr_calls: list[str] = [] output_calls: list[str] = [] - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") result = devbox.cmd.exec( "echo hello", stdout=stdout_calls.append, @@ -160,19 +160,19 @@ def test_exec_with_all_callbacks(self, mock_client: Mock, mock_stream: Mock) -> def test_exec_async_returns_execution(self, mock_client: Mock, mock_stream: Mock) -> None: """Test exec_async returns Execution object.""" execution_async = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="running", ) mock_client.devboxes.execute_async.return_value = execution_async mock_client.devboxes.executions.stream_stdout_updates.return_value = mock_stream - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") execution = devbox.cmd.exec_async("long-running command") - assert execution.execution_id == "exec_123" - assert execution.devbox_id == "dev_123" + assert execution.execution_id == "exn_123" + assert execution.devbox_id == "dbx_123" mock_client.devboxes.execute_async.assert_called_once() @@ -183,7 +183,7 @@ def test_read(self, mock_client: Mock) -> None: """Test file read.""" mock_client.devboxes.read_file_contents.return_value = "file content" - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") result = devbox.file.read(file_path="/path/to/file") assert result == "file content" @@ -196,7 +196,7 @@ def test_write_string(self, mock_client: Mock) -> None: execution_detail = SimpleNamespace() mock_client.devboxes.write_file_contents.return_value = execution_detail - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") result = devbox.file.write(file_path="/path/to/file", contents="content") assert result == execution_detail @@ -210,7 +210,7 @@ def test_write_bytes(self, mock_client: Mock) -> None: execution_detail = SimpleNamespace() mock_client.devboxes.write_file_contents.return_value = execution_detail - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") result = devbox.file.write(file_path="/path/to/file", contents="content") assert result == execution_detail @@ -225,7 +225,7 @@ def test_download(self, mock_client: Mock) -> None: mock_response.read.return_value = b"file content" mock_client.devboxes.download_file.return_value = mock_response - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") result = devbox.file.download(path="/path/to/file") assert result == b"file content" @@ -238,7 +238,7 @@ def test_upload(self, mock_client: Mock, tmp_path: Path) -> None: execution_detail = SimpleNamespace() mock_client.devboxes.upload_file.return_value = execution_detail - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") # Create a temporary file for upload temp_file = tmp_path / "test_file.txt" temp_file.write_text("test content") @@ -260,7 +260,7 @@ def test_create_ssh_key(self, mock_client: Mock) -> None: ssh_key_response = SimpleNamespace(public_key="ssh-rsa ...") mock_client.devboxes.create_ssh_key.return_value = ssh_key_response - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") result = devbox.net.create_ssh_key( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -271,7 +271,7 @@ def test_create_ssh_key(self, mock_client: Mock) -> None: assert result == ssh_key_response mock_client.devboxes.create_ssh_key.assert_called_once_with( - "dev_123", + "dbx_123", extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, extra_body={"key": "value"}, @@ -284,7 +284,7 @@ def test_create_tunnel(self, mock_client: Mock) -> None: tunnel_view = SimpleNamespace(port=8080) mock_client.devboxes.create_tunnel.return_value = tunnel_view - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") result = devbox.net.create_tunnel( port=8080, extra_headers={"X-Custom": "value"}, @@ -296,7 +296,7 @@ def test_create_tunnel(self, mock_client: Mock) -> None: assert result == tunnel_view mock_client.devboxes.create_tunnel.assert_called_once_with( - "dev_123", + "dbx_123", port=8080, extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -309,7 +309,7 @@ def test_remove_tunnel(self, mock_client: Mock) -> None: """Test remove tunnel.""" mock_client.devboxes.remove_tunnel.return_value = object() - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") result = devbox.net.remove_tunnel( port=8080, extra_headers={"X-Custom": "value"}, @@ -321,7 +321,7 @@ def test_remove_tunnel(self, mock_client: Mock) -> None: assert result is not None # Verify return value is propagated mock_client.devboxes.remove_tunnel.assert_called_once_with( - "dev_123", + "dbx_123", port=8080, extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, diff --git a/tests/sdk/devbox/test_streaming.py b/tests/sdk/devbox/test_streaming.py index 4550b94a2..6d44a4e5e 100644 --- a/tests/sdk/devbox/test_streaming.py +++ b/tests/sdk/devbox/test_streaming.py @@ -26,17 +26,17 @@ class TestDevboxStreaming: def test_start_streaming_no_callbacks(self, mock_client: Mock) -> None: """Test _start_streaming returns None when no callbacks.""" - devbox = Devbox(mock_client, "dev_123") - result = devbox._start_streaming("exec_123", stdout=None, stderr=None, output=None) + devbox = Devbox(mock_client, "dbx_123") + result = devbox._start_streaming("exn_123", stdout=None, stderr=None, output=None) assert result is None def test_start_streaming_stdout_only(self, mock_client: Mock, mock_stream: Mock) -> None: """Test _start_streaming with stdout callback only.""" mock_client.devboxes.executions.stream_stdout_updates.return_value = mock_stream - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") stdout_calls: list[str] = [] - result = devbox._start_streaming("exec_123", stdout=stdout_calls.append, stderr=None, output=None) + result = devbox._start_streaming("exn_123", stdout=stdout_calls.append, stderr=None, output=None) assert result is not None assert isinstance(result, _StreamingGroup) @@ -47,9 +47,9 @@ def test_start_streaming_stderr_only(self, mock_client: Mock, mock_stream: Mock) """Test _start_streaming with stderr callback only.""" mock_client.devboxes.executions.stream_stderr_updates.return_value = mock_stream - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") stderr_calls: list[str] = [] - result = devbox._start_streaming("exec_123", stdout=None, stderr=stderr_calls.append, output=None) + result = devbox._start_streaming("exn_123", stdout=None, stderr=stderr_calls.append, output=None) assert result is not None assert isinstance(result, _StreamingGroup) @@ -61,9 +61,9 @@ def test_start_streaming_output_only(self, mock_client: Mock, mock_stream: Mock) mock_client.devboxes.executions.stream_stdout_updates.return_value = mock_stream mock_client.devboxes.executions.stream_stderr_updates.return_value = mock_stream - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") output_calls: list[str] = [] - result = devbox._start_streaming("exec_123", stdout=None, stderr=None, output=output_calls.append) + result = devbox._start_streaming("exn_123", stdout=None, stderr=None, output=output_calls.append) assert result is not None assert isinstance(result, _StreamingGroup) @@ -74,12 +74,12 @@ def test_start_streaming_all_callbacks(self, mock_client: Mock, mock_stream: Moc mock_client.devboxes.executions.stream_stdout_updates.return_value = mock_stream mock_client.devboxes.executions.stream_stderr_updates.return_value = mock_stream - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") stdout_calls: list[str] = [] stderr_calls: list[str] = [] output_calls: list[str] = [] result = devbox._start_streaming( - "exec_123", + "exn_123", stdout=stdout_calls.append, stderr=stderr_calls.append, output=output_calls.append, @@ -104,7 +104,7 @@ def test_spawn_stream_thread( mock_stream.__enter__ = Mock(return_value=mock_stream) mock_stream.__exit__ = Mock(return_value=None) - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") stop_event = threading.Event() calls: list[str] = [] @@ -147,7 +147,7 @@ def test_spawn_stream_thread_stop_event( mock_stream.__enter__ = Mock(return_value=mock_stream) mock_stream.__exit__ = Mock(return_value=None) - devbox = Devbox(mock_client, "dev_123") + devbox = Devbox(mock_client, "dbx_123") stop_event = threading.Event() calls: list[str] = [] diff --git a/tests/sdk/test_agent.py b/tests/sdk/test_agent.py index 7580b44d8..f1bbb083d 100644 --- a/tests/sdk/test_agent.py +++ b/tests/sdk/test_agent.py @@ -13,19 +13,19 @@ class TestAgent: def test_init(self, mock_client: Mock) -> None: """Test Agent initialization.""" - agent = Agent(mock_client, "agent_123") - assert agent.id == "agent_123" + agent = Agent(mock_client, "agt_123") + assert agent.id == "agt_123" def test_repr(self, mock_client: Mock) -> None: """Test Agent string representation.""" - agent = Agent(mock_client, "agent_123") - assert repr(agent) == "" + agent = Agent(mock_client, "agt_123") + assert repr(agent) == "" def test_get_info(self, mock_client: Mock, agent_view: MockAgentView) -> None: """Test get_info method.""" mock_client.agents.retrieve.return_value = agent_view - agent = Agent(mock_client, "agent_123") + agent = Agent(mock_client, "agt_123") result = agent.get_info( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -35,7 +35,7 @@ def test_get_info(self, mock_client: Mock, agent_view: MockAgentView) -> None: assert result == agent_view mock_client.agents.retrieve.assert_called_once_with( - "agent_123", + "agt_123", extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, extra_body={"key": "value"}, diff --git a/tests/sdk/test_async_agent.py b/tests/sdk/test_async_agent.py index a2bb9496c..be7efa845 100644 --- a/tests/sdk/test_async_agent.py +++ b/tests/sdk/test_async_agent.py @@ -15,20 +15,20 @@ class TestAsyncAgent: def test_init(self, mock_async_client: AsyncMock) -> None: """Test AsyncAgent initialization.""" - agent = AsyncAgent(mock_async_client, "agent_123") - assert agent.id == "agent_123" + agent = AsyncAgent(mock_async_client, "agt_123") + assert agent.id == "agt_123" def test_repr(self, mock_async_client: AsyncMock) -> None: """Test AsyncAgent string representation.""" - agent = AsyncAgent(mock_async_client, "agent_123") - assert repr(agent) == "" + agent = AsyncAgent(mock_async_client, "agt_123") + assert repr(agent) == "" @pytest.mark.asyncio async def test_get_info(self, mock_async_client: AsyncMock, agent_view: MockAgentView) -> None: """Test get_info method.""" mock_async_client.agents.retrieve = AsyncMock(return_value=agent_view) - agent = AsyncAgent(mock_async_client, "agent_123") + agent = AsyncAgent(mock_async_client, "agt_123") result = await agent.get_info( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -38,7 +38,7 @@ async def test_get_info(self, mock_async_client: AsyncMock, agent_view: MockAgen assert result == agent_view mock_async_client.agents.retrieve.assert_called_once_with( - "agent_123", + "agt_123", extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, extra_body={"key": "value"}, diff --git a/tests/sdk/test_async_benchmark.py b/tests/sdk/test_async_benchmark.py new file mode 100644 index 000000000..d7d72daad --- /dev/null +++ b/tests/sdk/test_async_benchmark.py @@ -0,0 +1,130 @@ +"""Comprehensive tests for async AsyncBenchmark class.""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import AsyncMock + +from tests.sdk.conftest import MockBenchmarkView, MockBenchmarkRunView +from runloop_api_client.sdk.async_benchmark import AsyncBenchmark +from runloop_api_client.sdk.async_benchmark_run import AsyncBenchmarkRun + + +class TestAsyncBenchmark: + """Tests for AsyncBenchmark class.""" + + def test_init(self, mock_async_client: AsyncMock) -> None: + """Test AsyncBenchmark initialization.""" + benchmark = AsyncBenchmark(mock_async_client, "bmd_123") + assert benchmark.id == "bmd_123" + assert repr(benchmark) == "" + + async def test_get_info(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None: + """Test get_info method.""" + mock_async_client.benchmarks.retrieve = AsyncMock(return_value=benchmark_view) + + benchmark = AsyncBenchmark(mock_async_client, "bmd_123") + result = await benchmark.get_info() + + assert result == benchmark_view + mock_async_client.benchmarks.retrieve.assert_awaited_once_with("bmd_123") + + async def test_update(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None: + """Test update method.""" + benchmark_view.name = "updated-name" + mock_async_client.benchmarks.update = AsyncMock(return_value=benchmark_view) + + benchmark = AsyncBenchmark(mock_async_client, "bmd_123") + result = await benchmark.update(name="updated-name") + + assert result == benchmark_view + mock_async_client.benchmarks.update.assert_awaited_once_with("bmd_123", name="updated-name") + + async def test_run(self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView) -> None: + """Test run method.""" + mock_async_client.benchmarks.start_run = AsyncMock(return_value=benchmark_run_view) + + benchmark = AsyncBenchmark(mock_async_client, "bmd_123") + result = await benchmark.start_run(run_name="test-run", metadata={"key": "value"}) + + assert isinstance(result, AsyncBenchmarkRun) + assert result.id == benchmark_run_view.id + assert result.benchmark_id == benchmark_run_view.benchmark_id + mock_async_client.benchmarks.start_run.assert_awaited_once_with( + benchmark_id="bmd_123", run_name="test-run", metadata={"key": "value"} + ) + + async def test_add_scenarios(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None: + """Test add_scenarios method.""" + benchmark_view.scenario_ids = ["scn_001", "scn_002"] + mock_async_client.benchmarks.update_scenarios = AsyncMock(return_value=benchmark_view) + + benchmark = AsyncBenchmark(mock_async_client, "bmd_123") + result = await benchmark.add_scenarios(["scn_001", "scn_002"]) + + assert result == benchmark_view + mock_async_client.benchmarks.update_scenarios.assert_awaited_once_with( + "bmd_123", scenarios_to_add=["scn_001", "scn_002"] + ) + + async def test_remove_scenarios(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None: + """Test remove_scenarios method.""" + mock_async_client.benchmarks.update_scenarios = AsyncMock(return_value=benchmark_view) + + benchmark = AsyncBenchmark(mock_async_client, "bmd_123") + result = await benchmark.remove_scenarios(["scn_001"]) + + assert result == benchmark_view + mock_async_client.benchmarks.update_scenarios.assert_awaited_once_with( + "bmd_123", scenarios_to_remove=["scn_001"] + ) + + async def test_list_runs_single( + self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView + ) -> None: + """Test list_runs method with single result.""" + page = SimpleNamespace(runs=[benchmark_run_view]) + mock_async_client.benchmarks.runs.list = AsyncMock(return_value=page) + + benchmark = AsyncBenchmark(mock_async_client, "bmd_123") + result = await benchmark.list_runs() + + assert len(result) == 1 + assert isinstance(result[0], AsyncBenchmarkRun) + assert result[0].id == benchmark_run_view.id + assert result[0].benchmark_id == benchmark_run_view.benchmark_id + mock_async_client.benchmarks.runs.list.assert_awaited_once_with(benchmark_id="bmd_123") + + async def test_list_runs_multiple(self, mock_async_client: AsyncMock) -> None: + """Test list_runs method with multiple results.""" + run_view1 = MockBenchmarkRunView(id="bmr_001") + run_view2 = MockBenchmarkRunView(id="bmr_002") + page = SimpleNamespace(runs=[run_view1, run_view2]) + mock_async_client.benchmarks.runs.list = AsyncMock(return_value=page) + + benchmark = AsyncBenchmark(mock_async_client, "bmd_123") + result = await benchmark.list_runs() + + assert len(result) == 2 + assert isinstance(result[0], AsyncBenchmarkRun) + assert isinstance(result[1], AsyncBenchmarkRun) + assert result[0].id == run_view1.id + assert result[0].benchmark_id == run_view1.benchmark_id + assert result[1].id == run_view2.id + assert result[1].benchmark_id == run_view2.benchmark_id + mock_async_client.benchmarks.runs.list.assert_awaited_once_with(benchmark_id="bmd_123") + + async def test_list_runs_with_params( + self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView + ) -> None: + """Test list_runs method with filtering parameters.""" + page = SimpleNamespace(runs=[benchmark_run_view]) + mock_async_client.benchmarks.runs.list = AsyncMock(return_value=page) + + benchmark = AsyncBenchmark(mock_async_client, "bmd_123") + result = await benchmark.list_runs(limit=10, name="test-run") + + assert len(result) == 1 + mock_async_client.benchmarks.runs.list.assert_awaited_once_with( + benchmark_id="bmd_123", limit=10, name="test-run" + ) diff --git a/tests/sdk/test_async_benchmark_run.py b/tests/sdk/test_async_benchmark_run.py new file mode 100644 index 000000000..dd6e230d2 --- /dev/null +++ b/tests/sdk/test_async_benchmark_run.py @@ -0,0 +1,120 @@ +"""Comprehensive tests for async AsyncBenchmarkRun class.""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import AsyncMock + +from tests.sdk.conftest import MockScenarioRunView, MockBenchmarkRunView +from runloop_api_client.sdk.async_scenario_run import AsyncScenarioRun +from runloop_api_client.sdk.async_benchmark_run import AsyncBenchmarkRun + + +class TestAsyncBenchmarkRun: + """Tests for AsyncBenchmarkRun class.""" + + def test_init(self, mock_async_client: AsyncMock) -> None: + """Test AsyncBenchmarkRun initialization.""" + run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123") + assert run.id == "bmr_123" + assert run.benchmark_id == "bmd_123" + + def test_repr(self, mock_async_client: AsyncMock) -> None: + """Test AsyncBenchmarkRun string representation.""" + run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123") + assert repr(run) == "" + + async def test_get_info(self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView) -> None: + """Test get_info method.""" + mock_async_client.benchmarks.runs.retrieve = AsyncMock(return_value=benchmark_run_view) + + run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123") + result = await run.get_info() + + assert result == benchmark_run_view + mock_async_client.benchmarks.runs.retrieve.assert_awaited_once_with("bmr_123") + + async def test_cancel(self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView) -> None: + """Test cancel method.""" + benchmark_run_view.state = "canceled" + mock_async_client.benchmarks.runs.cancel = AsyncMock(return_value=benchmark_run_view) + + run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123") + result = await run.cancel() + + assert result == benchmark_run_view + assert result.state == "canceled" + mock_async_client.benchmarks.runs.cancel.assert_awaited_once_with("bmr_123") + + async def test_complete(self, mock_async_client: AsyncMock, benchmark_run_view: MockBenchmarkRunView) -> None: + """Test complete method.""" + benchmark_run_view.state = "completed" + mock_async_client.benchmarks.runs.complete = AsyncMock(return_value=benchmark_run_view) + + run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123") + result = await run.complete() + + assert result == benchmark_run_view + assert result.state == "completed" + mock_async_client.benchmarks.runs.complete.assert_awaited_once_with("bmr_123") + + async def test_list_scenario_runs_empty(self, mock_async_client: AsyncMock) -> None: + """Test list_scenario_runs method with empty results.""" + page = SimpleNamespace(runs=[]) + mock_async_client.benchmarks.runs.list_scenario_runs = AsyncMock(return_value=page) + + run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123") + result = await run.list_scenario_runs() + + assert len(result) == 0 + mock_async_client.benchmarks.runs.list_scenario_runs.assert_awaited_once_with("bmr_123") + + async def test_list_scenario_runs_single( + self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView + ) -> None: + """Test list_scenario_runs method with single result.""" + page = SimpleNamespace(runs=[scenario_run_view]) + mock_async_client.benchmarks.runs.list_scenario_runs = AsyncMock(return_value=page) + + run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123") + result = await run.list_scenario_runs() + + assert len(result) == 1 + assert isinstance(result[0], AsyncScenarioRun) + assert result[0].id == scenario_run_view.id + assert result[0].devbox_id == scenario_run_view.devbox_id + mock_async_client.benchmarks.runs.list_scenario_runs.assert_awaited_once_with("bmr_123") + + async def test_list_scenario_runs_multiple(self, mock_async_client: AsyncMock) -> None: + """Test list_scenario_runs method with multiple results.""" + scenario_run_view1 = MockScenarioRunView(id="scr_001", devbox_id="dev_001") + scenario_run_view2 = MockScenarioRunView(id="scr_002", devbox_id="dev_002") + page = SimpleNamespace(runs=[scenario_run_view1, scenario_run_view2]) + mock_async_client.benchmarks.runs.list_scenario_runs = AsyncMock(return_value=page) + + run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123") + result = await run.list_scenario_runs() + + assert len(result) == 2 + assert isinstance(result[0], AsyncScenarioRun) + assert isinstance(result[1], AsyncScenarioRun) + assert result[0].id == "scr_001" + assert result[1].id == "scr_002" + mock_async_client.benchmarks.runs.list_scenario_runs.assert_awaited_once_with("bmr_123") + + async def test_list_scenario_runs_with_params( + self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView + ) -> None: + """Test list_scenario_runs method with filtering parameters.""" + page = SimpleNamespace(runs=[scenario_run_view]) + mock_async_client.benchmarks.runs.list_scenario_runs = AsyncMock(return_value=page) + + run = AsyncBenchmarkRun(mock_async_client, "bmr_123", "bmd_123") + result = await run.list_scenario_runs(limit=10, state="completed") + + assert len(result) == 1 + assert isinstance(result[0], AsyncScenarioRun) + assert result[0].id == scenario_run_view.id + mock_async_client.benchmarks.runs.list_scenario_runs.assert_awaited_once_with( + "bmr_123", limit=10, state="completed" + ) diff --git a/tests/sdk/test_async_blueprint.py b/tests/sdk/test_async_blueprint.py index 75901a445..4c7de1e22 100644 --- a/tests/sdk/test_async_blueprint.py +++ b/tests/sdk/test_async_blueprint.py @@ -16,20 +16,20 @@ class TestAsyncBlueprint: def test_init(self, mock_async_client: AsyncMock) -> None: """Test AsyncBlueprint initialization.""" - blueprint = AsyncBlueprint(mock_async_client, "bp_123") - assert blueprint.id == "bp_123" + blueprint = AsyncBlueprint(mock_async_client, "bpt_123") + assert blueprint.id == "bpt_123" def test_repr(self, mock_async_client: AsyncMock) -> None: """Test AsyncBlueprint string representation.""" - blueprint = AsyncBlueprint(mock_async_client, "bp_123") - assert repr(blueprint) == "" + blueprint = AsyncBlueprint(mock_async_client, "bpt_123") + assert repr(blueprint) == "" @pytest.mark.asyncio async def test_get_info(self, mock_async_client: AsyncMock, blueprint_view: MockBlueprintView) -> None: """Test get_info method.""" mock_async_client.blueprints.retrieve = AsyncMock(return_value=blueprint_view) - blueprint = AsyncBlueprint(mock_async_client, "bp_123") + blueprint = AsyncBlueprint(mock_async_client, "bpt_123") result = await blueprint.get_info( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -46,7 +46,7 @@ async def test_logs(self, mock_async_client: AsyncMock) -> None: logs_view = SimpleNamespace(logs=[]) mock_async_client.blueprints.logs = AsyncMock(return_value=logs_view) - blueprint = AsyncBlueprint(mock_async_client, "bp_123") + blueprint = AsyncBlueprint(mock_async_client, "bpt_123") result = await blueprint.logs( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -62,7 +62,7 @@ async def test_delete(self, mock_async_client: AsyncMock) -> None: """Test delete method.""" mock_async_client.blueprints.delete = AsyncMock(return_value=object()) - blueprint = AsyncBlueprint(mock_async_client, "bp_123") + blueprint = AsyncBlueprint(mock_async_client, "bpt_123") result = await blueprint.delete( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -78,7 +78,7 @@ async def test_create_devbox(self, mock_async_client: AsyncMock, devbox_view: Mo """Test create_devbox method.""" mock_async_client.devboxes.create_and_await_running = AsyncMock(return_value=devbox_view) - blueprint = AsyncBlueprint(mock_async_client, "bp_123") + blueprint = AsyncBlueprint(mock_async_client, "bpt_123") devbox = await blueprint.create_devbox( name="test-devbox", metadata={"key": "value"}, @@ -86,5 +86,5 @@ async def test_create_devbox(self, mock_async_client: AsyncMock, devbox_view: Mo extra_headers={"X-Custom": "value"}, ) - assert devbox.id == "dev_123" + assert devbox.id == "dbx_123" mock_async_client.devboxes.create_and_await_running.assert_awaited_once() diff --git a/tests/sdk/test_async_execution.py b/tests/sdk/test_async_execution.py index 06629cf63..f05633263 100644 --- a/tests/sdk/test_async_execution.py +++ b/tests/sdk/test_async_execution.py @@ -91,9 +91,9 @@ class TestAsyncExecution: def test_init(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None: """Test AsyncExecution initialization.""" - execution = AsyncExecution(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type] - assert execution.execution_id == "exec_123" - assert execution.devbox_id == "dev_123" + execution = AsyncExecution(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type] + assert execution.execution_id == "exn_123" + assert execution.devbox_id == "dbx_123" assert execution._initial_result == execution_view @pytest.mark.asyncio @@ -113,19 +113,19 @@ async def task() -> None: async_task_cleanup.extend(tasks) streaming_group = _AsyncStreamingGroup(tasks) - execution = AsyncExecution(mock_async_client, "dev_123", execution_view, streaming_group) # type: ignore[arg-type] + execution = AsyncExecution(mock_async_client, "dbx_123", execution_view, streaming_group) # type: ignore[arg-type] assert execution._streaming_group is streaming_group def test_properties(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None: """Test AsyncExecution properties.""" - execution = AsyncExecution(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type] - assert execution.execution_id == "exec_123" - assert execution.devbox_id == "dev_123" + execution = AsyncExecution(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type] + assert execution.execution_id == "exn_123" + assert execution.devbox_id == "dbx_123" def test_repr(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None: """Test AsyncExecution repr formatting.""" - execution = AsyncExecution(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type] - assert repr(execution) == "" + execution = AsyncExecution(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type] + assert repr(execution) == "" @pytest.mark.asyncio async def test_result_already_completed( @@ -134,14 +134,14 @@ async def test_result_already_completed( """Test result when execution is already completed.""" mock_async_client.devboxes.wait_for_command = AsyncMock(return_value=execution_view) - execution = AsyncExecution(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type] + execution = AsyncExecution(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type] result = await execution.result() assert result.exit_code == 0 assert await result.stdout(num_lines=10) == "output" mock_async_client.devboxes.wait_for_command.assert_awaited_once_with( - "exec_123", - devbox_id="dev_123", + "exn_123", + devbox_id="dbx_123", statuses=["completed"], ) @@ -149,13 +149,13 @@ async def test_result_already_completed( async def test_result_needs_polling(self, mock_async_client: AsyncMock) -> None: """Test result when execution needs polling.""" running_execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="running", ) completed_execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout="output", @@ -166,14 +166,14 @@ async def test_result_needs_polling(self, mock_async_client: AsyncMock) -> None: mock_async_client.devboxes.wait_for_command = AsyncMock(return_value=completed_execution) - execution = AsyncExecution(mock_async_client, "dev_123", running_execution) # type: ignore[arg-type] + execution = AsyncExecution(mock_async_client, "dbx_123", running_execution) # type: ignore[arg-type] result = await execution.result() assert result.exit_code == 0 assert await result.stdout(num_lines=10) == "output" mock_async_client.devboxes.wait_for_command.assert_awaited_once_with( - "exec_123", - devbox_id="dev_123", + "exn_123", + devbox_id="dbx_123", statuses=["completed"], ) @@ -181,13 +181,13 @@ async def test_result_needs_polling(self, mock_async_client: AsyncMock) -> None: async def test_result_with_streaming_group(self, mock_async_client: AsyncMock) -> None: """Test result with streaming group cleanup.""" running_execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="running", ) completed_execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout="output", @@ -202,7 +202,7 @@ async def task() -> None: tasks = [asyncio.create_task(task())] streaming_group = _AsyncStreamingGroup(tasks) - execution = AsyncExecution(mock_async_client, "dev_123", running_execution, streaming_group) # type: ignore[arg-type] + execution = AsyncExecution(mock_async_client, "dbx_123", running_execution, streaming_group) # type: ignore[arg-type] result = await execution.result() assert result.exit_code == 0 @@ -213,8 +213,8 @@ async def task() -> None: async def test_result_passes_options(self, mock_async_client: AsyncMock) -> None: """Ensure result forwards options to wait_for_command.""" execution_view = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout="output", @@ -223,12 +223,12 @@ async def test_result_passes_options(self, mock_async_client: AsyncMock) -> None mock_async_client.devboxes.wait_for_command = AsyncMock(return_value=execution_view) - execution = AsyncExecution(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type] + execution = AsyncExecution(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type] await execution.result(timeout=30.0, idempotency_key="abc123") mock_async_client.devboxes.wait_for_command.assert_awaited_once_with( - "exec_123", - devbox_id="dev_123", + "exn_123", + devbox_id="dbx_123", statuses=["completed"], timeout=30.0, idempotency_key="abc123", @@ -238,20 +238,20 @@ async def test_result_passes_options(self, mock_async_client: AsyncMock) -> None async def test_get_state(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None: """Test get_state method.""" updated_execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="running", ) mock_async_client.devboxes.executions.retrieve = AsyncMock(return_value=updated_execution) - execution = AsyncExecution(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type] + execution = AsyncExecution(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type] result = await execution.get_state() assert result == updated_execution assert execution._initial_result == execution_view mock_async_client.devboxes.executions.retrieve.assert_awaited_once_with( - "exec_123", - devbox_id="dev_123", + "exn_123", + devbox_id="dbx_123", ) @pytest.mark.asyncio @@ -259,10 +259,10 @@ async def test_kill(self, mock_async_client: AsyncMock, execution_view: MockExec """Test kill method.""" mock_async_client.devboxes.executions.kill = AsyncMock(return_value=None) - execution = AsyncExecution(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type] + execution = AsyncExecution(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type] await execution.kill() mock_async_client.devboxes.executions.kill.assert_awaited_once_with( - "exec_123", - devbox_id="dev_123", + "exn_123", + devbox_id="dbx_123", ) diff --git a/tests/sdk/test_async_execution_result.py b/tests/sdk/test_async_execution_result.py index cf8a23caa..58802cc4f 100644 --- a/tests/sdk/test_async_execution_result.py +++ b/tests/sdk/test_async_execution_result.py @@ -16,31 +16,31 @@ class TestAsyncExecutionResult: def test_init(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None: """Test AsyncExecutionResult initialization.""" - result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type] + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type] # Verify via public API - assert result.devbox_id == "dev_123" - assert result.execution_id == "exec_123" + assert result.devbox_id == "dbx_123" + assert result.execution_id == "exn_123" def test_devbox_id_property(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None: """Test devbox_id property.""" - result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type] - assert result.devbox_id == "dev_123" + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type] + assert result.devbox_id == "dbx_123" def test_execution_id_property(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None: """Test execution_id property.""" - result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type] - assert result.execution_id == "exec_123" + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type] + assert result.execution_id == "exn_123" def test_exit_code_property(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None: """Test exit_code property.""" - result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type] + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type] assert result.exit_code == 0 def test_exit_code_none(self, mock_async_client: AsyncMock) -> None: """Test exit_code property when exit_status is None.""" execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="running", exit_status=None, stdout="", @@ -48,19 +48,19 @@ def test_exit_code_none(self, mock_async_client: AsyncMock) -> None: stdout_truncated=False, stderr_truncated=False, ) - result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type] + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type] assert result.exit_code is None def test_success_property(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None: """Test success property.""" - result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type] + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type] assert result.success is True def test_success_false(self, mock_async_client: AsyncMock) -> None: """Test success property when exit code is non-zero.""" execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=1, stdout="", @@ -68,19 +68,19 @@ def test_success_false(self, mock_async_client: AsyncMock) -> None: stdout_truncated=False, stderr_truncated=False, ) - result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type] + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type] assert result.success is False def test_failed_property(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None: """Test failed property when exit code is zero.""" - result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type] + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type] assert result.failed is False def test_failed_true(self, mock_async_client: AsyncMock) -> None: """Test failed property when exit code is non-zero.""" execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=1, stdout="", @@ -88,14 +88,14 @@ def test_failed_true(self, mock_async_client: AsyncMock) -> None: stdout_truncated=False, stderr_truncated=False, ) - result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type] + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type] assert result.failed is True def test_failed_none(self, mock_async_client: AsyncMock) -> None: """Test failed property when exit_status is None.""" execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="running", exit_status=None, stdout="", @@ -103,13 +103,13 @@ def test_failed_none(self, mock_async_client: AsyncMock) -> None: stdout_truncated=False, stderr_truncated=False, ) - result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type] + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type] assert result.failed is False @pytest.mark.asyncio async def test_stdout(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None: """Test stdout method.""" - result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type] + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type] assert await result.stdout() == "output" assert await result.stdout(num_lines=10) == "output" @@ -117,8 +117,8 @@ async def test_stdout(self, mock_async_client: AsyncMock, execution_view: MockEx async def test_stdout_empty(self, mock_async_client: AsyncMock) -> None: """Test stdout method when stdout is None.""" execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout=None, @@ -126,15 +126,15 @@ async def test_stdout_empty(self, mock_async_client: AsyncMock) -> None: stdout_truncated=False, stderr_truncated=False, ) - result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type] + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type] assert await result.stdout() == "" @pytest.mark.asyncio async def test_stderr(self, mock_async_client: AsyncMock) -> None: """Test stderr method.""" execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=1, stdout="", @@ -142,19 +142,19 @@ async def test_stderr(self, mock_async_client: AsyncMock) -> None: stdout_truncated=False, stderr_truncated=False, ) - result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type] + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type] assert await result.stderr() == "error message" assert await result.stderr(num_lines=20) == "error message" @pytest.mark.asyncio async def test_stderr_empty(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None: """Test stderr method when stderr is None.""" - result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type] + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type] assert await result.stderr() == "" def test_result_property(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None: """Test result property.""" - result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type] + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type] assert result.result == execution_view @pytest.mark.asyncio @@ -176,8 +176,8 @@ async def mock_iter(): mock_async_client.devboxes.executions.stream_stdout_updates = AsyncMock(return_value=mock_async_stream) execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout="partial", @@ -185,13 +185,13 @@ async def mock_iter(): stdout_truncated=True, stderr_truncated=False, ) - result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type] + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type] # Should stream full output output = await result.stdout() assert output == "line1\nline2\nline3\n" mock_async_client.devboxes.executions.stream_stdout_updates.assert_awaited_once_with( - "exec_123", devbox_id="dev_123" + "exn_123", devbox_id="dbx_123" ) @pytest.mark.asyncio @@ -212,8 +212,8 @@ async def mock_iter(): mock_async_client.devboxes.executions.stream_stderr_updates = AsyncMock(return_value=mock_async_stream) execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout="", @@ -221,13 +221,13 @@ async def mock_iter(): stdout_truncated=False, stderr_truncated=True, ) - result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type] + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type] # Should stream full output output = await result.stderr() assert output == "error1\nerror2\n" mock_async_client.devboxes.executions.stream_stderr_updates.assert_awaited_once_with( - "exec_123", devbox_id="dev_123" + "exn_123", devbox_id="dbx_123" ) @pytest.mark.asyncio @@ -248,8 +248,8 @@ async def mock_iter(): mock_async_client.devboxes.executions.stream_stdout_updates = AsyncMock(return_value=mock_async_stream) execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout="line1\n", @@ -257,7 +257,7 @@ async def mock_iter(): stdout_truncated=True, stderr_truncated=False, ) - result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type] + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type] # Should stream and return last 2 lines output = await result.stdout(num_lines=2) @@ -267,8 +267,8 @@ async def mock_iter(): async def test_stdout_no_streaming_when_not_truncated(self, mock_async_client: AsyncMock) -> None: """Test stdout doesn't stream when not truncated.""" execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout="complete output", @@ -276,7 +276,7 @@ async def test_stdout_no_streaming_when_not_truncated(self, mock_async_client: A stdout_truncated=False, stderr_truncated=False, ) - result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type] + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type] # Should return existing output without streaming output = await result.stdout() @@ -286,8 +286,8 @@ async def test_stdout_no_streaming_when_not_truncated(self, mock_async_client: A async def test_stdout_with_num_lines_no_truncation(self, mock_async_client: AsyncMock) -> None: """Test stdout with num_lines when not truncated.""" execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout="line1\nline2\nline3\nline4\nline5", @@ -295,7 +295,7 @@ async def test_stdout_with_num_lines_no_truncation(self, mock_async_client: Asyn stdout_truncated=False, stderr_truncated=False, ) - result = AsyncExecutionResult(mock_async_client, "dev_123", execution) # type: ignore[arg-type] + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution) # type: ignore[arg-type] # Should return last 2 lines without streaming output = await result.stdout(num_lines=2) @@ -303,7 +303,7 @@ async def test_stdout_with_num_lines_no_truncation(self, mock_async_client: Asyn def test_count_non_empty_lines(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None: """Test the _count_non_empty_lines helper method.""" - result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type] + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type] # Test various input strings assert result._count_non_empty_lines("") == 0 @@ -315,7 +315,7 @@ def test_count_non_empty_lines(self, mock_async_client: AsyncMock, execution_vie def test_get_last_n_lines(self, mock_async_client: AsyncMock, execution_view: MockExecutionView) -> None: """Test the _get_last_n_lines helper method.""" - result = AsyncExecutionResult(mock_async_client, "dev_123", execution_view) # type: ignore[arg-type] + result = AsyncExecutionResult(mock_async_client, "dbx_123", execution_view) # type: ignore[arg-type] # Test various scenarios assert result._get_last_n_lines("", 5) == "" diff --git a/tests/sdk/test_async_ops.py b/tests/sdk/test_async_ops.py index f8a16e1c0..7e36e938d 100644 --- a/tests/sdk/test_async_ops.py +++ b/tests/sdk/test_async_ops.py @@ -17,6 +17,7 @@ MockScorerView, MockScenarioView, MockSnapshotView, + MockBenchmarkView, MockBlueprintView, create_mock_httpx_response, ) @@ -27,12 +28,14 @@ AsyncAgentOps, AsyncScenario, AsyncSnapshot, + AsyncBenchmark, AsyncBlueprint, AsyncDevboxOps, AsyncScorerOps, AsyncRunloopSDK, AsyncScenarioOps, AsyncSnapshotOps, + AsyncBenchmarkOps, AsyncBlueprintOps, AsyncStorageObject, AsyncStorageObjectOps, @@ -56,7 +59,7 @@ async def test_create(self, mock_async_client: AsyncMock, devbox_view: MockDevbo ) assert isinstance(devbox, AsyncDevbox) - assert devbox.id == "dev_123" + assert devbox.id == "dbx_123" mock_async_client.devboxes.create_and_await_running.assert_awaited_once() @pytest.mark.asyncio @@ -66,13 +69,13 @@ async def test_create_from_blueprint_id(self, mock_async_client: AsyncMock, devb ops = AsyncDevboxOps(mock_async_client) devbox = await ops.create_from_blueprint_id( - "bp_123", + "bpt_123", name="test-devbox", ) assert isinstance(devbox, AsyncDevbox) call_kwargs = mock_async_client.devboxes.create_and_await_running.call_args[1] - assert call_kwargs["blueprint_id"] == "bp_123" + assert call_kwargs["blueprint_id"] == "bpt_123" @pytest.mark.asyncio async def test_create_from_blueprint_name(self, mock_async_client: AsyncMock, devbox_view: MockDevboxView) -> None: @@ -96,21 +99,21 @@ async def test_create_from_snapshot(self, mock_async_client: AsyncMock, devbox_v ops = AsyncDevboxOps(mock_async_client) devbox = await ops.create_from_snapshot( - "snap_123", + "snp_123", name="test-devbox", ) assert isinstance(devbox, AsyncDevbox) call_kwargs = mock_async_client.devboxes.create_and_await_running.call_args[1] - assert call_kwargs["snapshot_id"] == "snap_123" + assert call_kwargs["snapshot_id"] == "snp_123" def test_from_id(self, mock_async_client: AsyncMock) -> None: """Test from_id method.""" ops = AsyncDevboxOps(mock_async_client) - devbox = ops.from_id("dev_123") + devbox = ops.from_id("dbx_123") assert isinstance(devbox, AsyncDevbox) - assert devbox.id == "dev_123" + assert devbox.id == "dbx_123" # Verify from_id does not wait for running status if hasattr(mock_async_client.devboxes, "await_running"): assert not mock_async_client.devboxes.await_running.called @@ -142,7 +145,7 @@ async def test_list_single(self, mock_async_client: AsyncMock, devbox_view: Mock assert len(devboxes) == 1 assert isinstance(devboxes[0], AsyncDevbox) - assert devboxes[0].id == "dev_123" + assert devboxes[0].id == "dbx_123" mock_async_client.devboxes.list.assert_awaited_once() @pytest.mark.asyncio @@ -174,7 +177,7 @@ async def test_list_empty(self, mock_async_client: AsyncMock) -> None: mock_async_client.devboxes.disk_snapshots.list = AsyncMock(return_value=page) ops = AsyncSnapshotOps(mock_async_client) - snapshots = await ops.list(devbox_id="dev_123", limit=10) + snapshots = await ops.list(devbox_id="dbx_123", limit=10) assert len(snapshots) == 0 mock_async_client.devboxes.disk_snapshots.list.assert_awaited_once() @@ -187,14 +190,14 @@ async def test_list_single(self, mock_async_client: AsyncMock, snapshot_view: Mo ops = AsyncSnapshotOps(mock_async_client) snapshots = await ops.list( - devbox_id="dev_123", + devbox_id="dbx_123", limit=10, starting_after="snap_000", ) assert len(snapshots) == 1 assert isinstance(snapshots[0], AsyncSnapshot) - assert snapshots[0].id == "snap_123" + assert snapshots[0].id == "snp_123" mock_async_client.devboxes.disk_snapshots.list.assert_awaited_once() @pytest.mark.asyncio @@ -206,7 +209,7 @@ async def test_list_multiple(self, mock_async_client: AsyncMock) -> None: mock_async_client.devboxes.disk_snapshots.list = AsyncMock(return_value=page) ops = AsyncSnapshotOps(mock_async_client) - snapshots = await ops.list(devbox_id="dev_123", limit=10) + snapshots = await ops.list(devbox_id="dbx_123", limit=10) assert len(snapshots) == 2 assert isinstance(snapshots[0], AsyncSnapshot) @@ -218,10 +221,10 @@ async def test_list_multiple(self, mock_async_client: AsyncMock) -> None: def test_from_id(self, mock_async_client: AsyncMock) -> None: """Test from_id method.""" ops = AsyncSnapshotOps(mock_async_client) - snapshot = ops.from_id("snap_123") + snapshot = ops.from_id("snp_123") assert isinstance(snapshot, AsyncSnapshot) - assert snapshot.id == "snap_123" + assert snapshot.id == "snp_123" class TestAsyncBlueprintOps: @@ -239,16 +242,16 @@ async def test_create(self, mock_async_client: AsyncMock, blueprint_view: MockBl ) assert isinstance(blueprint, AsyncBlueprint) - assert blueprint.id == "bp_123" + assert blueprint.id == "bpt_123" mock_async_client.blueprints.create_and_await_build_complete.assert_awaited_once() def test_from_id(self, mock_async_client: AsyncMock) -> None: """Test from_id method.""" ops = AsyncBlueprintOps(mock_async_client) - blueprint = ops.from_id("bp_123") + blueprint = ops.from_id("bpt_123") assert isinstance(blueprint, AsyncBlueprint) - assert blueprint.id == "bp_123" + assert blueprint.id == "bpt_123" @pytest.mark.asyncio async def test_list_empty(self, mock_async_client: AsyncMock) -> None: @@ -277,7 +280,7 @@ async def test_list_single(self, mock_async_client: AsyncMock, blueprint_view: M assert len(blueprints) == 1 assert isinstance(blueprints[0], AsyncBlueprint) - assert blueprints[0].id == "bp_123" + assert blueprints[0].id == "bpt_123" mock_async_client.blueprints.list.assert_awaited_once() @pytest.mark.asyncio @@ -712,16 +715,16 @@ async def test_create(self, mock_async_client: AsyncMock, scorer_view: MockScore ) assert isinstance(scorer, AsyncScorer) - assert scorer.id == "scorer_123" + assert scorer.id == "sco_123" mock_async_client.scenarios.scorers.create.assert_awaited_once() def test_from_id(self, mock_async_client: AsyncMock) -> None: """Test from_id method.""" ops = AsyncScorerOps(mock_async_client) - scorer = ops.from_id("scorer_123") + scorer = ops.from_id("sco_123") assert isinstance(scorer, AsyncScorer) - assert scorer.id == "scorer_123" + assert scorer.id == "sco_123" @pytest.mark.asyncio async def test_list_empty(self, mock_async_client: AsyncMock) -> None: @@ -756,7 +759,7 @@ async def async_iter(): assert len(scorers) == 1 assert isinstance(scorers[0], AsyncScorer) - assert scorers[0].id == "scorer_123" + assert scorers[0].id == "sco_123" mock_async_client.scenarios.scorers.list.assert_awaited_once() @pytest.mark.asyncio @@ -793,19 +796,20 @@ async def test_create(self, mock_async_client: AsyncMock, agent_view: MockAgentV client = AsyncAgentOps(mock_async_client) agent = await client.create( name="test-agent", + version="1.2.3", ) assert isinstance(agent, AsyncAgent) - assert agent.id == "agent_123" + assert agent.id == "agt_123" mock_async_client.agents.create.assert_called_once() def test_from_id(self, mock_async_client: AsyncMock) -> None: """Test from_id method.""" client = AsyncAgentOps(mock_async_client) - agent = client.from_id("agent_123") + agent = client.from_id("agt_123") assert isinstance(agent, AsyncAgent) - assert agent.id == "agent_123" + assert agent.id == "agt_123" @pytest.mark.asyncio async def test_list(self, mock_async_client: AsyncMock) -> None: @@ -901,10 +905,11 @@ async def test_create_from_npm(self, mock_async_client: AsyncMock, agent_view: M agent = await client.create_from_npm( name="test-agent", package_name="@runloop/example-agent", + version="1.2.3", ) assert isinstance(agent, AsyncAgent) - assert agent.id == "agent_123" + assert agent.id == "agt_123" mock_async_client.agents.create.assert_awaited_once_with( source={ "type": "npm", @@ -913,6 +918,7 @@ async def test_create_from_npm(self, mock_async_client: AsyncMock, agent_view: M }, }, name="test-agent", + version="1.2.3", ) @pytest.mark.asyncio @@ -926,25 +932,25 @@ async def test_create_from_npm_with_all_options( agent = await client.create_from_npm( name="test-agent", package_name="@runloop/example-agent", - npm_version="1.2.3", registry_url="https://registry.example.com", agent_setup=["npm install", "npm run setup"], + version="1.2.3", extra_headers={"X-Custom": "header"}, ) assert isinstance(agent, AsyncAgent) - assert agent.id == "agent_123" + assert agent.id == "agt_123" mock_async_client.agents.create.assert_awaited_once_with( source={ "type": "npm", "npm": { "package_name": "@runloop/example-agent", - "npm_version": "1.2.3", "registry_url": "https://registry.example.com", "agent_setup": ["npm install", "npm run setup"], }, }, name="test-agent", + version="1.2.3", extra_headers={"X-Custom": "header"}, ) @@ -957,6 +963,7 @@ async def test_create_from_npm_raises_when_source_provided(self, mock_async_clie await client.create_from_npm( name="test-agent", package_name="@runloop/example-agent", + version="1.2.3", source={"type": "git", "git": {"repository": "https://github.com/example/repo"}}, ) @@ -969,10 +976,11 @@ async def test_create_from_pip(self, mock_async_client: AsyncMock, agent_view: M agent = await client.create_from_pip( name="test-agent", package_name="runloop-example-agent", + version="1.2.3", ) assert isinstance(agent, AsyncAgent) - assert agent.id == "agent_123" + assert agent.id == "agt_123" mock_async_client.agents.create.assert_awaited_once_with( source={ "type": "pip", @@ -981,6 +989,7 @@ async def test_create_from_pip(self, mock_async_client: AsyncMock, agent_view: M }, }, name="test-agent", + version="1.2.3", ) @pytest.mark.asyncio @@ -994,24 +1003,24 @@ async def test_create_from_pip_with_all_options( agent = await client.create_from_pip( name="test-agent", package_name="runloop-example-agent", - pip_version="1.2.3", registry_url="https://pypi.example.com", agent_setup=["pip install extra-deps"], + version="1.2.3", ) assert isinstance(agent, AsyncAgent) - assert agent.id == "agent_123" + assert agent.id == "agt_123" mock_async_client.agents.create.assert_awaited_once_with( source={ "type": "pip", "pip": { "package_name": "runloop-example-agent", - "pip_version": "1.2.3", "registry_url": "https://pypi.example.com", "agent_setup": ["pip install extra-deps"], }, }, name="test-agent", + version="1.2.3", ) @pytest.mark.asyncio @@ -1023,10 +1032,11 @@ async def test_create_from_git(self, mock_async_client: AsyncMock, agent_view: M agent = await client.create_from_git( name="test-agent", repository="https://github.com/example/agent-repo", + version="1.2.3", ) assert isinstance(agent, AsyncAgent) - assert agent.id == "agent_123" + assert agent.id == "agt_123" mock_async_client.agents.create.assert_awaited_once_with( source={ "type": "git", @@ -1035,6 +1045,7 @@ async def test_create_from_git(self, mock_async_client: AsyncMock, agent_view: M }, }, name="test-agent", + version="1.2.3", ) @pytest.mark.asyncio @@ -1050,10 +1061,11 @@ async def test_create_from_git_with_all_options( repository="https://github.com/example/agent-repo", ref="develop", agent_setup=["npm install", "npm run build"], + version="1.2.3", ) assert isinstance(agent, AsyncAgent) - assert agent.id == "agent_123" + assert agent.id == "agt_123" mock_async_client.agents.create.assert_awaited_once_with( source={ "type": "git", @@ -1064,6 +1076,7 @@ async def test_create_from_git_with_all_options( }, }, name="test-agent", + version="1.2.3", ) @pytest.mark.asyncio @@ -1075,10 +1088,11 @@ async def test_create_from_object(self, mock_async_client: AsyncMock, agent_view agent = await client.create_from_object( name="test-agent", object_id="obj_123", + version="1.2.3", ) assert isinstance(agent, AsyncAgent) - assert agent.id == "agent_123" + assert agent.id == "agt_123" mock_async_client.agents.create.assert_awaited_once_with( source={ "type": "object", @@ -1087,6 +1101,7 @@ async def test_create_from_object(self, mock_async_client: AsyncMock, agent_view }, }, name="test-agent", + version="1.2.3", ) @pytest.mark.asyncio @@ -1101,10 +1116,11 @@ async def test_create_from_object_with_agent_setup( name="test-agent", object_id="obj_123", agent_setup=["chmod +x setup.sh", "./setup.sh"], + version="1.2.3", ) assert isinstance(agent, AsyncAgent) - assert agent.id == "agent_123" + assert agent.id == "agt_123" mock_async_client.agents.create.assert_awaited_once_with( source={ "type": "object", @@ -1114,6 +1130,7 @@ async def test_create_from_object_with_agent_setup( }, }, name="test-agent", + version="1.2.3", ) @@ -1186,6 +1203,62 @@ async def async_iter(): mock_async_client.scenarios.list.assert_awaited_once() +class TestAsyncBenchmarkOps: + """Tests for AsyncBenchmarkOps class.""" + + @pytest.mark.asyncio + async def test_create(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None: + """Test create method.""" + mock_async_client.benchmarks.create = AsyncMock(return_value=benchmark_view) + + ops = AsyncBenchmarkOps(mock_async_client) + benchmark = await ops.create(name="test-benchmark", scenario_ids=["scn_001", "scn_002"]) + + assert isinstance(benchmark, AsyncBenchmark) + assert benchmark.id == "bmd_123" + mock_async_client.benchmarks.create.assert_awaited_once_with( + name="test-benchmark", scenario_ids=["scn_001", "scn_002"] + ) + + def test_from_id(self, mock_async_client: AsyncMock) -> None: + """Test from_id method.""" + ops = AsyncBenchmarkOps(mock_async_client) + benchmark = ops.from_id("bmd_123") + + assert isinstance(benchmark, AsyncBenchmark) + assert benchmark.id == "bmd_123" + + @pytest.mark.asyncio + async def test_list_multiple(self, mock_async_client: AsyncMock) -> None: + """Test list method with multiple results.""" + benchmark_view1 = MockBenchmarkView(id="bmd_001", name="benchmark-1") + benchmark_view2 = MockBenchmarkView(id="bmd_002", name="benchmark-2") + page = SimpleNamespace(benchmarks=[benchmark_view1, benchmark_view2]) + mock_async_client.benchmarks.list = AsyncMock(return_value=page) + + ops = AsyncBenchmarkOps(mock_async_client) + benchmarks = await ops.list(limit=10) + + assert len(benchmarks) == 2 + assert isinstance(benchmarks[0], AsyncBenchmark) + assert isinstance(benchmarks[1], AsyncBenchmark) + assert benchmarks[0].id == "bmd_001" + assert benchmarks[1].id == "bmd_002" + mock_async_client.benchmarks.list.assert_awaited_once_with(limit=10) + + @pytest.mark.asyncio + async def test_list_with_name_filter(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None: + """Test list method with name filter.""" + page = SimpleNamespace(benchmarks=[benchmark_view]) + mock_async_client.benchmarks.list = AsyncMock(return_value=page) + + ops = AsyncBenchmarkOps(mock_async_client) + benchmarks = await ops.list(name="test-benchmark", limit=10) + + assert len(benchmarks) == 1 + mock_async_client.benchmarks.list.assert_awaited_once_with(name="test-benchmark", limit=10) + + class TestAsyncRunloopSDK: """Tests for AsyncRunloopSDK class.""" @@ -1194,6 +1267,7 @@ def test_init(self) -> None: runloop = AsyncRunloopSDK(bearer_token="test-token") assert runloop.api is not None assert isinstance(runloop.agent, AsyncAgentOps) + assert isinstance(runloop.benchmark, AsyncBenchmarkOps) assert isinstance(runloop.devbox, AsyncDevboxOps) assert isinstance(runloop.scorer, AsyncScorerOps) assert isinstance(runloop.snapshot, AsyncSnapshotOps) diff --git a/tests/sdk/test_async_scenario.py b/tests/sdk/test_async_scenario.py index 22a8f457a..cffca9b82 100644 --- a/tests/sdk/test_async_scenario.py +++ b/tests/sdk/test_async_scenario.py @@ -99,8 +99,8 @@ async def test_run_async(self, mock_async_client: AsyncMock, scenario_run_view: scenario = AsyncScenario(mock_async_client, "scn_123") run = await scenario.run_async(run_name="test-run") - assert run.id == "run_123" - assert run.devbox_id == "dev_123" + assert run.id == "scr_123" + assert run.devbox_id == "dbx_123" mock_async_client.scenarios.start_run.assert_awaited_once_with( scenario_id="scn_123", run_name="test-run", @@ -113,8 +113,8 @@ async def test_run(self, mock_async_client: AsyncMock, scenario_run_view: MockSc scenario = AsyncScenario(mock_async_client, "scn_123") run = await scenario.run(run_name="test-run") - assert run.id == "run_123" - assert run.devbox_id == "dev_123" + assert run.id == "scr_123" + assert run.devbox_id == "dbx_123" mock_async_client.scenarios.start_run_and_await_env_ready.assert_awaited_once_with( scenario_id="scn_123", run_name="test-run", diff --git a/tests/sdk/test_async_scenario_run.py b/tests/sdk/test_async_scenario_run.py index 010ad6cbb..c034524a0 100644 --- a/tests/sdk/test_async_scenario_run.py +++ b/tests/sdk/test_async_scenario_run.py @@ -15,31 +15,31 @@ class TestAsyncScenarioRun: def test_init(self, mock_async_client: AsyncMock) -> None: """Test AsyncScenarioRun initialization.""" - run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123") - assert run.id == "run_123" - assert run.devbox_id == "dev_123" + run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123") + assert run.id == "scr_123" + assert run.devbox_id == "dbx_123" def test_repr(self, mock_async_client: AsyncMock) -> None: """Test AsyncScenarioRun string representation.""" - run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123") - assert repr(run) == "" + run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123") + assert repr(run) == "" def test_devbox_property(self, mock_async_client: AsyncMock) -> None: """Test devbox property returns AsyncDevbox wrapper.""" - run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123") + run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123") devbox = run.devbox - assert devbox.id == "dev_123" + assert devbox.id == "dbx_123" async def test_get_info(self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView) -> None: """Test get_info method.""" mock_async_client.scenarios.runs.retrieve = AsyncMock(return_value=scenario_run_view) - run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123") + run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123") result = await run.get_info() assert result == scenario_run_view - mock_async_client.scenarios.runs.retrieve.assert_awaited_once_with("run_123") + mock_async_client.scenarios.runs.retrieve.assert_awaited_once_with("scr_123") async def test_await_env_ready( self, @@ -51,10 +51,10 @@ async def test_await_env_ready( mock_async_client.devboxes.await_running = AsyncMock(return_value=devbox_view) mock_async_client.scenarios.runs.retrieve = AsyncMock(return_value=scenario_run_view) - run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123") + run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123") result = await run.await_env_ready() - mock_async_client.devboxes.await_running.assert_awaited_once_with("dev_123", polling_config=None) + mock_async_client.devboxes.await_running.assert_awaited_once_with("dbx_123", polling_config=None) assert result == scenario_run_view async def test_score(self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView) -> None: @@ -62,33 +62,33 @@ async def test_score(self, mock_async_client: AsyncMock, scenario_run_view: Mock scenario_run_view.state = "scoring" mock_async_client.scenarios.runs.score = AsyncMock(return_value=scenario_run_view) - run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123") + run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123") result = await run.score() assert result == scenario_run_view - mock_async_client.scenarios.runs.score.assert_awaited_once_with("run_123") + mock_async_client.scenarios.runs.score.assert_awaited_once_with("scr_123") async def test_await_scored(self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView) -> None: """Test await_scored method.""" scenario_run_view.state = "scored" mock_async_client.scenarios.runs.await_scored = AsyncMock(return_value=scenario_run_view) - run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123") + run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123") result = await run.await_scored() assert result == scenario_run_view - mock_async_client.scenarios.runs.await_scored.assert_awaited_once_with("run_123") + mock_async_client.scenarios.runs.await_scored.assert_awaited_once_with("scr_123") async def test_score_and_await(self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView) -> None: """Test score_and_await method.""" scenario_run_view.state = "scored" mock_async_client.scenarios.runs.score_and_await = AsyncMock(return_value=scenario_run_view) - run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123") + run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123") result = await run.score_and_await() assert result == scenario_run_view - mock_async_client.scenarios.runs.score_and_await.assert_awaited_once_with("run_123") + mock_async_client.scenarios.runs.score_and_await.assert_awaited_once_with("scr_123") async def test_score_and_complete( self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView @@ -97,33 +97,33 @@ async def test_score_and_complete( scenario_run_view.state = "completed" mock_async_client.scenarios.runs.score_and_complete = AsyncMock(return_value=scenario_run_view) - run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123") + run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123") result = await run.score_and_complete() assert result == scenario_run_view - mock_async_client.scenarios.runs.score_and_complete.assert_awaited_once_with("run_123") + mock_async_client.scenarios.runs.score_and_complete.assert_awaited_once_with("scr_123") async def test_complete(self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView) -> None: """Test complete method.""" scenario_run_view.state = "completed" mock_async_client.scenarios.runs.complete = AsyncMock(return_value=scenario_run_view) - run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123") + run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123") result = await run.complete() assert result == scenario_run_view - mock_async_client.scenarios.runs.complete.assert_awaited_once_with("run_123") + mock_async_client.scenarios.runs.complete.assert_awaited_once_with("scr_123") async def test_cancel(self, mock_async_client: AsyncMock, scenario_run_view: MockScenarioRunView) -> None: """Test cancel method.""" scenario_run_view.state = "canceled" mock_async_client.scenarios.runs.cancel = AsyncMock(return_value=scenario_run_view) - run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123") + run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123") result = await run.cancel() assert result == scenario_run_view - mock_async_client.scenarios.runs.cancel.assert_awaited_once_with("run_123") + mock_async_client.scenarios.runs.cancel.assert_awaited_once_with("scr_123") async def test_download_logs(self, mock_async_client: AsyncMock, tmp_path: Path) -> None: """Test download_logs method writes to file.""" @@ -131,11 +131,11 @@ async def test_download_logs(self, mock_async_client: AsyncMock, tmp_path: Path) mock_response.write_to_file = AsyncMock() mock_async_client.scenarios.runs.download_logs = AsyncMock(return_value=mock_response) - run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123") + run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123") output_path = tmp_path / "logs.zip" await run.download_logs(output_path) - mock_async_client.scenarios.runs.download_logs.assert_awaited_once_with("run_123") + mock_async_client.scenarios.runs.download_logs.assert_awaited_once_with("scr_123") mock_response.write_to_file.assert_awaited_once_with(output_path) async def test_get_score_when_scored(self, mock_async_client: AsyncMock) -> None: @@ -144,19 +144,19 @@ async def test_get_score_when_scored(self, mock_async_client: AsyncMock) -> None run_view = MockScenarioRunView(state="scored", scoring_contract_result=scoring_result) mock_async_client.scenarios.runs.retrieve = AsyncMock(return_value=run_view) - run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123") + run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123") result = await run.get_score() assert result == scoring_result - mock_async_client.scenarios.runs.retrieve.assert_awaited_once_with("run_123") + mock_async_client.scenarios.runs.retrieve.assert_awaited_once_with("scr_123") async def test_get_score_when_not_scored(self, mock_async_client: AsyncMock) -> None: """Test get_score returns None when not scored.""" run_view = MockScenarioRunView(state="running", scoring_contract_result=None) mock_async_client.scenarios.runs.retrieve = AsyncMock(return_value=run_view) - run = AsyncScenarioRun(mock_async_client, "run_123", "dev_123") + run = AsyncScenarioRun(mock_async_client, "scr_123", "dbx_123") result = await run.get_score() assert result is None - mock_async_client.scenarios.runs.retrieve.assert_awaited_once_with("run_123") + mock_async_client.scenarios.runs.retrieve.assert_awaited_once_with("scr_123") diff --git a/tests/sdk/test_async_scorer.py b/tests/sdk/test_async_scorer.py index a3eeea884..253ae9585 100644 --- a/tests/sdk/test_async_scorer.py +++ b/tests/sdk/test_async_scorer.py @@ -16,20 +16,20 @@ class TestAsyncScorer: def test_init(self, mock_async_client: AsyncMock) -> None: """Test AsyncScorer initialization.""" - scorer = AsyncScorer(mock_async_client, "scorer_123") - assert scorer.id == "scorer_123" + scorer = AsyncScorer(mock_async_client, "sco_123") + assert scorer.id == "sco_123" def test_repr(self, mock_async_client: AsyncMock) -> None: """Test AsyncScorer string representation.""" - scorer = AsyncScorer(mock_async_client, "scorer_123") - assert repr(scorer) == "" + scorer = AsyncScorer(mock_async_client, "sco_123") + assert repr(scorer) == "" @pytest.mark.asyncio async def test_get_info(self, mock_async_client: AsyncMock, scorer_view: MockScorerView) -> None: """Test get_info method.""" mock_async_client.scenarios.scorers.retrieve = AsyncMock(return_value=scorer_view) - scorer = AsyncScorer(mock_async_client, "scorer_123") + scorer = AsyncScorer(mock_async_client, "sco_123") result = await scorer.get_info() assert result == scorer_view @@ -38,10 +38,10 @@ async def test_get_info(self, mock_async_client: AsyncMock, scorer_view: MockSco @pytest.mark.asyncio async def test_update(self, mock_async_client: AsyncMock) -> None: """Test update method.""" - update_response = SimpleNamespace(id="scorer_123", type="updated_scorer", bash_script="echo 'score=1.0'") + update_response = SimpleNamespace(id="sco_123", type="updated_scorer", bash_script="echo 'score=1.0'") mock_async_client.scenarios.scorers.update = AsyncMock(return_value=update_response) - scorer = AsyncScorer(mock_async_client, "scorer_123") + scorer = AsyncScorer(mock_async_client, "sco_123") result = await scorer.update( type="updated_scorer", bash_script="echo 'score=1.0'", @@ -60,7 +60,7 @@ async def test_validate(self, mock_async_client: AsyncMock) -> None: ) mock_async_client.scenarios.scorers.validate = AsyncMock(return_value=validate_response) - scorer = AsyncScorer(mock_async_client, "scorer_123") + scorer = AsyncScorer(mock_async_client, "sco_123") result = await scorer.validate( scoring_context={"test": "context"}, ) diff --git a/tests/sdk/test_async_snapshot.py b/tests/sdk/test_async_snapshot.py index a7b946c11..e9dca48bc 100644 --- a/tests/sdk/test_async_snapshot.py +++ b/tests/sdk/test_async_snapshot.py @@ -17,20 +17,20 @@ class TestAsyncSnapshot: def test_init(self, mock_async_client: AsyncMock) -> None: """Test AsyncSnapshot initialization.""" - snapshot = AsyncSnapshot(mock_async_client, "snap_123") - assert snapshot.id == "snap_123" + snapshot = AsyncSnapshot(mock_async_client, "snp_123") + assert snapshot.id == "snp_123" def test_repr(self, mock_async_client: AsyncMock) -> None: """Test AsyncSnapshot string representation.""" - snapshot = AsyncSnapshot(mock_async_client, "snap_123") - assert repr(snapshot) == "" + snapshot = AsyncSnapshot(mock_async_client, "snp_123") + assert repr(snapshot) == "" @pytest.mark.asyncio async def test_get_info(self, mock_async_client: AsyncMock, snapshot_view: MockSnapshotView) -> None: """Test get_info method.""" mock_async_client.devboxes.disk_snapshots.query_status = AsyncMock(return_value=snapshot_view) - snapshot = AsyncSnapshot(mock_async_client, "snap_123") + snapshot = AsyncSnapshot(mock_async_client, "snp_123") result = await snapshot.get_info( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -44,10 +44,10 @@ async def test_get_info(self, mock_async_client: AsyncMock, snapshot_view: MockS @pytest.mark.asyncio async def test_update(self, mock_async_client: AsyncMock) -> None: """Test update method.""" - updated_snapshot = SimpleNamespace(id="snap_123", name="updated-name") + updated_snapshot = SimpleNamespace(id="snp_123", name="updated-name") mock_async_client.devboxes.disk_snapshots.update = AsyncMock(return_value=updated_snapshot) - snapshot = AsyncSnapshot(mock_async_client, "snap_123") + snapshot = AsyncSnapshot(mock_async_client, "snp_123") result = await snapshot.update( commit_message="Update message", metadata={"key": "value"}, @@ -67,7 +67,7 @@ async def test_delete(self, mock_async_client: AsyncMock) -> None: """Test delete method.""" mock_async_client.devboxes.disk_snapshots.delete = AsyncMock(return_value=object()) - snapshot = AsyncSnapshot(mock_async_client, "snap_123") + snapshot = AsyncSnapshot(mock_async_client, "snp_123") result = await snapshot.delete( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -85,7 +85,7 @@ async def test_await_completed(self, mock_async_client: AsyncMock, snapshot_view mock_async_client.devboxes.disk_snapshots.await_completed = AsyncMock(return_value=snapshot_view) polling_config = PollingConfig(timeout_seconds=60.0) - snapshot = AsyncSnapshot(mock_async_client, "snap_123") + snapshot = AsyncSnapshot(mock_async_client, "snp_123") result = await snapshot.await_completed( polling_config=polling_config, extra_headers={"X-Custom": "value"}, @@ -102,7 +102,7 @@ async def test_create_devbox(self, mock_async_client: AsyncMock, devbox_view: Mo """Test create_devbox method.""" mock_async_client.devboxes.create_and_await_running = AsyncMock(return_value=devbox_view) - snapshot = AsyncSnapshot(mock_async_client, "snap_123") + snapshot = AsyncSnapshot(mock_async_client, "snp_123") devbox = await snapshot.create_devbox( name="test-devbox", metadata={"key": "value"}, @@ -110,5 +110,5 @@ async def test_create_devbox(self, mock_async_client: AsyncMock, devbox_view: Mo extra_headers={"X-Custom": "value"}, ) - assert devbox.id == "dev_123" + assert devbox.id == "dbx_123" mock_async_client.devboxes.create_and_await_running.assert_awaited_once() diff --git a/tests/sdk/test_benchmark.py b/tests/sdk/test_benchmark.py new file mode 100644 index 000000000..1f4f12751 --- /dev/null +++ b/tests/sdk/test_benchmark.py @@ -0,0 +1,122 @@ +"""Comprehensive tests for sync Benchmark class.""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import Mock + +from tests.sdk.conftest import MockBenchmarkView, MockBenchmarkRunView +from runloop_api_client.sdk.benchmark import Benchmark +from runloop_api_client.sdk.benchmark_run import BenchmarkRun + + +class TestBenchmark: + """Tests for Benchmark class.""" + + def test_init(self, mock_client: Mock) -> None: + """Test Benchmark initialization.""" + benchmark = Benchmark(mock_client, "bmd_123") + assert benchmark.id == "bmd_123" + assert repr(benchmark) == "" + + def test_get_info(self, mock_client: Mock, benchmark_view: MockBenchmarkView) -> None: + """Test get_info method.""" + mock_client.benchmarks.retrieve.return_value = benchmark_view + + benchmark = Benchmark(mock_client, "bmd_123") + result = benchmark.get_info() + + assert result == benchmark_view + mock_client.benchmarks.retrieve.assert_called_once_with("bmd_123") + + def test_update(self, mock_client: Mock, benchmark_view: MockBenchmarkView) -> None: + """Test update method.""" + benchmark_view.name = "updated-name" + mock_client.benchmarks.update.return_value = benchmark_view + + benchmark = Benchmark(mock_client, "bmd_123") + result = benchmark.update(name="updated-name") + + assert result == benchmark_view + mock_client.benchmarks.update.assert_called_once_with("bmd_123", name="updated-name") + + def test_run(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None: + """Test run method.""" + mock_client.benchmarks.start_run.return_value = benchmark_run_view + + benchmark = Benchmark(mock_client, "bmd_123") + result = benchmark.start_run(run_name="test-run", metadata={"key": "value"}) + + assert isinstance(result, BenchmarkRun) + assert result.id == benchmark_run_view.id + assert result.benchmark_id == benchmark_run_view.benchmark_id + mock_client.benchmarks.start_run.assert_called_once_with( + benchmark_id="bmd_123", run_name="test-run", metadata={"key": "value"} + ) + + def test_add_scenarios(self, mock_client: Mock, benchmark_view: MockBenchmarkView) -> None: + """Test add_scenarios method.""" + benchmark_view.scenario_ids = ["scn_001", "scn_002"] + mock_client.benchmarks.update_scenarios.return_value = benchmark_view + + benchmark = Benchmark(mock_client, "bmd_123") + result = benchmark.add_scenarios(["scn_001", "scn_002"]) + + assert result == benchmark_view + mock_client.benchmarks.update_scenarios.assert_called_once_with( + "bmd_123", scenarios_to_add=["scn_001", "scn_002"] + ) + + def test_remove_scenarios(self, mock_client: Mock, benchmark_view: MockBenchmarkView) -> None: + """Test remove_scenarios method.""" + mock_client.benchmarks.update_scenarios.return_value = benchmark_view + + benchmark = Benchmark(mock_client, "bmd_123") + result = benchmark.remove_scenarios(["scn_001"]) + + assert result == benchmark_view + mock_client.benchmarks.update_scenarios.assert_called_once_with("bmd_123", scenarios_to_remove=["scn_001"]) + + def test_list_runs_single(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None: + """Test list_runs method with single result.""" + page = SimpleNamespace(runs=[benchmark_run_view]) + mock_client.benchmarks.runs.list.return_value = page + + benchmark = Benchmark(mock_client, "bmd_123") + result = benchmark.list_runs() + + assert len(result) == 1 + assert isinstance(result[0], BenchmarkRun) + assert result[0].id == benchmark_run_view.id + assert result[0].benchmark_id == benchmark_run_view.benchmark_id + mock_client.benchmarks.runs.list.assert_called_once_with(benchmark_id="bmd_123") + + def test_list_runs_multiple(self, mock_client: Mock) -> None: + """Test list_runs method with multiple results.""" + run_view1 = MockBenchmarkRunView(id="bmr_001") + run_view2 = MockBenchmarkRunView(id="bmr_002") + page = SimpleNamespace(runs=[run_view1, run_view2]) + mock_client.benchmarks.runs.list.return_value = page + + benchmark = Benchmark(mock_client, "bmd_123") + result = benchmark.list_runs() + + assert len(result) == 2 + assert isinstance(result[0], BenchmarkRun) + assert isinstance(result[1], BenchmarkRun) + assert result[0].id == run_view1.id + assert result[0].benchmark_id == run_view1.benchmark_id + assert result[1].id == run_view2.id + assert result[1].benchmark_id == run_view2.benchmark_id + mock_client.benchmarks.runs.list.assert_called_once_with(benchmark_id="bmd_123") + + def test_list_runs_with_params(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None: + """Test list_runs method with filtering parameters.""" + page = SimpleNamespace(runs=[benchmark_run_view]) + mock_client.benchmarks.runs.list.return_value = page + + benchmark = Benchmark(mock_client, "bmd_123") + result = benchmark.list_runs(limit=10, name="test-run") + + assert len(result) == 1 + mock_client.benchmarks.runs.list.assert_called_once_with(benchmark_id="bmd_123", limit=10, name="test-run") diff --git a/tests/sdk/test_benchmark_run.py b/tests/sdk/test_benchmark_run.py new file mode 100644 index 000000000..e7a826a90 --- /dev/null +++ b/tests/sdk/test_benchmark_run.py @@ -0,0 +1,114 @@ +"""Comprehensive tests for sync BenchmarkRun class.""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import Mock + +from tests.sdk.conftest import MockScenarioRunView, MockBenchmarkRunView +from runloop_api_client.sdk.scenario_run import ScenarioRun +from runloop_api_client.sdk.benchmark_run import BenchmarkRun + + +class TestBenchmarkRun: + """Tests for BenchmarkRun class.""" + + def test_init(self, mock_client: Mock) -> None: + """Test BenchmarkRun initialization.""" + run = BenchmarkRun(mock_client, "bmr_123", "bmd_123") + assert run.id == "bmr_123" + assert run.benchmark_id == "bmd_123" + + def test_repr(self, mock_client: Mock) -> None: + """Test BenchmarkRun string representation.""" + run = BenchmarkRun(mock_client, "bmr_123", "bmd_123") + assert repr(run) == "" + + def test_get_info(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None: + """Test get_info method.""" + mock_client.benchmarks.runs.retrieve.return_value = benchmark_run_view + + run = BenchmarkRun(mock_client, "bmr_123", "bmd_123") + result = run.get_info() + + assert result == benchmark_run_view + mock_client.benchmarks.runs.retrieve.assert_called_once_with("bmr_123") + + def test_cancel(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None: + """Test cancel method.""" + benchmark_run_view.state = "canceled" + mock_client.benchmarks.runs.cancel.return_value = benchmark_run_view + + run = BenchmarkRun(mock_client, "bmr_123", "bmd_123") + result = run.cancel() + + assert result == benchmark_run_view + assert result.state == "canceled" + mock_client.benchmarks.runs.cancel.assert_called_once_with("bmr_123") + + def test_complete(self, mock_client: Mock, benchmark_run_view: MockBenchmarkRunView) -> None: + """Test complete method.""" + benchmark_run_view.state = "completed" + mock_client.benchmarks.runs.complete.return_value = benchmark_run_view + + run = BenchmarkRun(mock_client, "bmr_123", "bmd_123") + result = run.complete() + + assert result == benchmark_run_view + assert result.state == "completed" + mock_client.benchmarks.runs.complete.assert_called_once_with("bmr_123") + + def test_list_scenario_runs_empty(self, mock_client: Mock) -> None: + """Test list_scenario_runs method with empty results.""" + page = SimpleNamespace(runs=[]) + mock_client.benchmarks.runs.list_scenario_runs.return_value = page + + run = BenchmarkRun(mock_client, "bmr_123", "bmd_123") + result = run.list_scenario_runs() + + assert len(result) == 0 + mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bmr_123") + + def test_list_scenario_runs_single(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None: + """Test list_scenario_runs method with single result.""" + page = SimpleNamespace(runs=[scenario_run_view]) + mock_client.benchmarks.runs.list_scenario_runs.return_value = page + + run = BenchmarkRun(mock_client, "bmr_123", "bmd_123") + result = run.list_scenario_runs() + + assert len(result) == 1 + assert isinstance(result[0], ScenarioRun) + assert result[0].id == scenario_run_view.id + assert result[0].devbox_id == scenario_run_view.devbox_id + mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bmr_123") + + def test_list_scenario_runs_multiple(self, mock_client: Mock) -> None: + """Test list_scenario_runs method with multiple results.""" + scenario_run_view1 = MockScenarioRunView(id="scr_001", devbox_id="dev_001") + scenario_run_view2 = MockScenarioRunView(id="scr_002", devbox_id="dev_002") + page = SimpleNamespace(runs=[scenario_run_view1, scenario_run_view2]) + mock_client.benchmarks.runs.list_scenario_runs.return_value = page + + run = BenchmarkRun(mock_client, "bmr_123", "bmd_123") + result = run.list_scenario_runs() + + assert len(result) == 2 + assert isinstance(result[0], ScenarioRun) + assert isinstance(result[1], ScenarioRun) + assert result[0].id == "scr_001" + assert result[1].id == "scr_002" + mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bmr_123") + + def test_list_scenario_runs_with_params(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None: + """Test list_scenario_runs method with filtering parameters.""" + page = SimpleNamespace(runs=[scenario_run_view]) + mock_client.benchmarks.runs.list_scenario_runs.return_value = page + + run = BenchmarkRun(mock_client, "bmr_123", "bmd_123") + result = run.list_scenario_runs(limit=10, state="completed") + + assert len(result) == 1 + assert isinstance(result[0], ScenarioRun) + assert result[0].id == scenario_run_view.id + mock_client.benchmarks.runs.list_scenario_runs.assert_called_once_with("bmr_123", limit=10, state="completed") diff --git a/tests/sdk/test_blueprint.py b/tests/sdk/test_blueprint.py index 2c6bc6580..40cbed3f6 100644 --- a/tests/sdk/test_blueprint.py +++ b/tests/sdk/test_blueprint.py @@ -14,19 +14,19 @@ class TestBlueprint: def test_init(self, mock_client: Mock) -> None: """Test Blueprint initialization.""" - blueprint = Blueprint(mock_client, "bp_123") - assert blueprint.id == "bp_123" + blueprint = Blueprint(mock_client, "bpt_123") + assert blueprint.id == "bpt_123" def test_repr(self, mock_client: Mock) -> None: """Test Blueprint string representation.""" - blueprint = Blueprint(mock_client, "bp_123") - assert repr(blueprint) == "" + blueprint = Blueprint(mock_client, "bpt_123") + assert repr(blueprint) == "" def test_get_info(self, mock_client: Mock, blueprint_view: MockBlueprintView) -> None: """Test get_info method.""" mock_client.blueprints.retrieve.return_value = blueprint_view - blueprint = Blueprint(mock_client, "bp_123") + blueprint = Blueprint(mock_client, "bpt_123") result = blueprint.get_info( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -36,7 +36,7 @@ def test_get_info(self, mock_client: Mock, blueprint_view: MockBlueprintView) -> assert result == blueprint_view mock_client.blueprints.retrieve.assert_called_once_with( - "bp_123", + "bpt_123", extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, extra_body={"key": "value"}, @@ -48,7 +48,7 @@ def test_logs(self, mock_client: Mock) -> None: logs_view = SimpleNamespace(logs=[]) mock_client.blueprints.logs.return_value = logs_view - blueprint = Blueprint(mock_client, "bp_123") + blueprint = Blueprint(mock_client, "bpt_123") result = blueprint.logs( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -58,7 +58,7 @@ def test_logs(self, mock_client: Mock) -> None: assert result == logs_view mock_client.blueprints.logs.assert_called_once_with( - "bp_123", + "bpt_123", extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, extra_body={"key": "value"}, @@ -69,7 +69,7 @@ def test_delete(self, mock_client: Mock) -> None: """Test delete method.""" mock_client.blueprints.delete.return_value = object() - blueprint = Blueprint(mock_client, "bp_123") + blueprint = Blueprint(mock_client, "bpt_123") result = blueprint.delete( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -79,7 +79,7 @@ def test_delete(self, mock_client: Mock) -> None: assert result is not None # Verify return value is propagated mock_client.blueprints.delete.assert_called_once_with( - "bp_123", + "bpt_123", extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, extra_body={"key": "value"}, @@ -90,7 +90,7 @@ def test_create_devbox(self, mock_client: Mock, devbox_view: MockDevboxView) -> """Test create_devbox method.""" mock_client.devboxes.create_and_await_running.return_value = devbox_view - blueprint = Blueprint(mock_client, "bp_123") + blueprint = Blueprint(mock_client, "bpt_123") devbox = blueprint.create_devbox( name="test-devbox", metadata={"key": "value"}, @@ -98,9 +98,9 @@ def test_create_devbox(self, mock_client: Mock, devbox_view: MockDevboxView) -> extra_headers={"X-Custom": "value"}, ) - assert devbox.id == "dev_123" + assert devbox.id == "dbx_123" mock_client.devboxes.create_and_await_running.assert_called_once() call_kwargs = mock_client.devboxes.create_and_await_running.call_args[1] - assert call_kwargs["blueprint_id"] == "bp_123" + assert call_kwargs["blueprint_id"] == "bpt_123" assert call_kwargs["name"] == "test-devbox" assert call_kwargs["metadata"] == {"key": "value"} diff --git a/tests/sdk/test_execution.py b/tests/sdk/test_execution.py index 63b244d0e..249d670bf 100644 --- a/tests/sdk/test_execution.py +++ b/tests/sdk/test_execution.py @@ -83,9 +83,9 @@ class TestExecution: def test_init(self, mock_client: Mock, execution_view: MockExecutionView) -> None: """Test Execution initialization.""" - execution = Execution(mock_client, "dev_123", execution_view) # type: ignore[arg-type] - assert execution.execution_id == "exec_123" - assert execution.devbox_id == "dev_123" + execution = Execution(mock_client, "dbx_123", execution_view) # type: ignore[arg-type] + assert execution.execution_id == "exn_123" + assert execution.devbox_id == "dbx_123" assert execution._initial_result == execution_view def test_init_with_streaming_group(self, mock_client: Mock, execution_view: MockExecutionView) -> None: @@ -94,46 +94,46 @@ def test_init_with_streaming_group(self, mock_client: Mock, execution_view: Mock stop_event = threading.Event() streaming_group = _StreamingGroup(threads, stop_event) - execution = Execution(mock_client, "dev_123", execution_view, streaming_group) # type: ignore[arg-type] + execution = Execution(mock_client, "dbx_123", execution_view, streaming_group) # type: ignore[arg-type] assert execution._streaming_group is streaming_group def test_properties(self, mock_client: Mock, execution_view: MockExecutionView) -> None: """Test Execution properties.""" - execution = Execution(mock_client, "dev_123", execution_view) # type: ignore[arg-type] - assert execution.execution_id == "exec_123" - assert execution.devbox_id == "dev_123" + execution = Execution(mock_client, "dbx_123", execution_view) # type: ignore[arg-type] + assert execution.execution_id == "exn_123" + assert execution.devbox_id == "dbx_123" def test_repr(self, mock_client: Mock, execution_view: MockExecutionView) -> None: """Test Execution repr formatting.""" - execution = Execution(mock_client, "dev_123", execution_view) # type: ignore[arg-type] - assert repr(execution) == "" + execution = Execution(mock_client, "dbx_123", execution_view) # type: ignore[arg-type] + assert repr(execution) == "" def test_result_already_completed(self, mock_client: Mock, execution_view: MockExecutionView) -> None: """Test result delegates to wait_for_command when already completed.""" mock_client.devboxes = Mock() mock_client.devboxes.wait_for_command.return_value = execution_view - execution = Execution(mock_client, "dev_123", execution_view) # type: ignore[arg-type] + execution = Execution(mock_client, "dbx_123", execution_view) # type: ignore[arg-type] result = execution.result() assert result.exit_code == 0 assert result.stdout(num_lines=10) == "output" mock_client.devboxes.wait_for_command.assert_called_once_with( - "exec_123", - devbox_id="dev_123", + "exn_123", + devbox_id="dbx_123", statuses=["completed"], ) def test_result_needs_polling(self, mock_client: Mock) -> None: """Test result when execution needs to poll for completion.""" running_execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="running", ) completed_execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout="output", @@ -145,27 +145,27 @@ def test_result_needs_polling(self, mock_client: Mock) -> None: mock_client.devboxes = Mock() mock_client.devboxes.wait_for_command.return_value = completed_execution - execution = Execution(mock_client, "dev_123", running_execution) # type: ignore[arg-type] + execution = Execution(mock_client, "dbx_123", running_execution) # type: ignore[arg-type] result = execution.result() assert result.exit_code == 0 assert result.stdout(num_lines=10) == "output" mock_client.devboxes.wait_for_command.assert_called_once_with( - "exec_123", - devbox_id="dev_123", + "exn_123", + devbox_id="dbx_123", statuses=["completed"], ) def test_result_with_streaming_group(self, mock_client: Mock) -> None: """Test result waits for streaming group to finish.""" running_execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="running", ) completed_execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout="output", @@ -180,7 +180,7 @@ def test_result_with_streaming_group(self, mock_client: Mock) -> None: thread.start() streaming_group = _StreamingGroup([thread], stop_event) - execution = Execution(mock_client, "dev_123", running_execution, streaming_group) # type: ignore[arg-type] + execution = Execution(mock_client, "dbx_123", running_execution, streaming_group) # type: ignore[arg-type] result = execution.result() assert result.exit_code == 0 @@ -190,8 +190,8 @@ def test_result_with_streaming_group(self, mock_client: Mock) -> None: def test_result_passes_options(self, mock_client: Mock) -> None: """Ensure options are forwarded to wait_for_command.""" execution_view = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout="output", @@ -201,12 +201,12 @@ def test_result_passes_options(self, mock_client: Mock) -> None: mock_client.devboxes = Mock() mock_client.devboxes.wait_for_command.return_value = execution_view - execution = Execution(mock_client, "dev_123", execution_view) # type: ignore[arg-type] + execution = Execution(mock_client, "dbx_123", execution_view) # type: ignore[arg-type] execution.result(timeout=30.0, idempotency_key="abc123") mock_client.devboxes.wait_for_command.assert_called_once_with( - "exec_123", - devbox_id="dev_123", + "exn_123", + devbox_id="dbx_123", statuses=["completed"], timeout=30.0, idempotency_key="abc123", @@ -215,31 +215,31 @@ def test_result_passes_options(self, mock_client: Mock) -> None: def test_get_state(self, mock_client: Mock, execution_view: MockExecutionView) -> None: """Test get_state method.""" updated_execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="running", ) mock_client.devboxes.executions = Mock() mock_client.devboxes.executions.retrieve.return_value = updated_execution - execution = Execution(mock_client, "dev_123", execution_view) # type: ignore[arg-type] + execution = Execution(mock_client, "dbx_123", execution_view) # type: ignore[arg-type] result = execution.get_state() assert result == updated_execution assert execution._initial_result == execution_view mock_client.devboxes.executions.retrieve.assert_called_once_with( - "exec_123", - devbox_id="dev_123", + "exn_123", + devbox_id="dbx_123", ) def test_kill(self, mock_client: Mock, execution_view: MockExecutionView) -> None: """Test kill method.""" mock_client.devboxes.executions.kill.return_value = None - execution = Execution(mock_client, "dev_123", execution_view) # type: ignore[arg-type] + execution = Execution(mock_client, "dbx_123", execution_view) # type: ignore[arg-type] execution.kill() mock_client.devboxes.executions.kill.assert_called_once_with( - "exec_123", - devbox_id="dev_123", + "exn_123", + devbox_id="dbx_123", ) diff --git a/tests/sdk/test_execution_result.py b/tests/sdk/test_execution_result.py index 689b108d5..7bc4fbfef 100644 --- a/tests/sdk/test_execution_result.py +++ b/tests/sdk/test_execution_result.py @@ -14,31 +14,31 @@ class TestExecutionResult: def test_init(self, mock_client: Mock, execution_view: MockExecutionView) -> None: """Test ExecutionResult initialization.""" - result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type] + result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type] # Verify via public API - assert result.devbox_id == "dev_123" - assert result.execution_id == "exec_123" + assert result.devbox_id == "dbx_123" + assert result.execution_id == "exn_123" def test_devbox_id_property(self, mock_client: Mock, execution_view: MockExecutionView) -> None: """Test devbox_id property.""" - result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type] - assert result.devbox_id == "dev_123" + result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type] + assert result.devbox_id == "dbx_123" def test_execution_id_property(self, mock_client: Mock, execution_view: MockExecutionView) -> None: """Test execution_id property.""" - result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type] - assert result.execution_id == "exec_123" + result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type] + assert result.execution_id == "exn_123" def test_exit_code_property(self, mock_client: Mock, execution_view: MockExecutionView) -> None: """Test exit_code property.""" - result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type] + result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type] assert result.exit_code == 0 def test_exit_code_none(self, mock_client: Mock) -> None: """Test exit_code property when exit_status is None.""" execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="running", exit_status=None, stdout="", @@ -46,19 +46,19 @@ def test_exit_code_none(self, mock_client: Mock) -> None: stdout_truncated=False, stderr_truncated=False, ) - result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type] + result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type] assert result.exit_code is None def test_success_property(self, mock_client: Mock, execution_view: MockExecutionView) -> None: """Test success property.""" - result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type] + result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type] assert result.success is True def test_success_false(self, mock_client: Mock) -> None: """Test success property when exit code is non-zero.""" execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=1, stdout="", @@ -66,19 +66,19 @@ def test_success_false(self, mock_client: Mock) -> None: stdout_truncated=False, stderr_truncated=False, ) - result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type] + result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type] assert result.success is False def test_failed_property(self, mock_client: Mock, execution_view: MockExecutionView) -> None: """Test failed property when exit code is zero.""" - result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type] + result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type] assert result.failed is False def test_failed_true(self, mock_client: Mock) -> None: """Test failed property when exit code is non-zero.""" execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=1, stdout="", @@ -86,14 +86,14 @@ def test_failed_true(self, mock_client: Mock) -> None: stdout_truncated=False, stderr_truncated=False, ) - result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type] + result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type] assert result.failed is True def test_failed_none(self, mock_client: Mock) -> None: """Test failed property when exit_status is None.""" execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="running", exit_status=None, stdout="", @@ -101,20 +101,20 @@ def test_failed_none(self, mock_client: Mock) -> None: stdout_truncated=False, stderr_truncated=False, ) - result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type] + result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type] assert result.failed is False def test_stdout(self, mock_client: Mock, execution_view: MockExecutionView) -> None: """Test stdout method.""" - result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type] + result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type] assert result.stdout() == "output" assert result.stdout(num_lines=10) == "output" def test_stdout_empty(self, mock_client: Mock) -> None: """Test stdout method when stdout is None.""" execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout=None, @@ -122,14 +122,14 @@ def test_stdout_empty(self, mock_client: Mock) -> None: stdout_truncated=False, stderr_truncated=False, ) - result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type] + result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type] assert result.stdout() == "" def test_stderr(self, mock_client: Mock) -> None: """Test stderr method.""" execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=1, stdout="", @@ -137,18 +137,18 @@ def test_stderr(self, mock_client: Mock) -> None: stdout_truncated=False, stderr_truncated=False, ) - result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type] + result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type] assert result.stderr() == "error message" assert result.stderr(num_lines=20) == "error message" def test_stderr_empty(self, mock_client: Mock, execution_view: MockExecutionView) -> None: """Test stderr method when stderr is None.""" - result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type] + result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type] assert result.stderr() == "" def test_result_property(self, mock_client: Mock, execution_view: MockExecutionView) -> None: """Test result property.""" - result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type] + result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type] assert result.result == execution_view def test_stdout_with_truncation_and_streaming(self, mock_client: Mock, mock_stream: Mock) -> None: @@ -165,8 +165,8 @@ def test_stdout_with_truncation_and_streaming(self, mock_client: Mock, mock_stre mock_client.devboxes.executions.stream_stdout_updates = Mock(return_value=mock_stream) execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout="partial", @@ -174,12 +174,12 @@ def test_stdout_with_truncation_and_streaming(self, mock_client: Mock, mock_stre stdout_truncated=True, stderr_truncated=False, ) - result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type] + result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type] # Should stream full output output = result.stdout() assert output == "line1\nline2\nline3\n" - mock_client.devboxes.executions.stream_stdout_updates.assert_called_once_with("exec_123", devbox_id="dev_123") + mock_client.devboxes.executions.stream_stdout_updates.assert_called_once_with("exn_123", devbox_id="dbx_123") def test_stderr_with_truncation_and_streaming(self, mock_client: Mock, mock_stream: Mock) -> None: """Test stderr streams full output when truncated.""" @@ -194,8 +194,8 @@ def test_stderr_with_truncation_and_streaming(self, mock_client: Mock, mock_stre mock_client.devboxes.executions.stream_stderr_updates = Mock(return_value=mock_stream) execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout="", @@ -203,12 +203,12 @@ def test_stderr_with_truncation_and_streaming(self, mock_client: Mock, mock_stre stdout_truncated=False, stderr_truncated=True, ) - result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type] + result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type] # Should stream full output output = result.stderr() assert output == "error1\nerror2\n" - mock_client.devboxes.executions.stream_stderr_updates.assert_called_once_with("exec_123", devbox_id="dev_123") + mock_client.devboxes.executions.stream_stderr_updates.assert_called_once_with("exn_123", devbox_id="dbx_123") def test_stdout_with_num_lines_when_truncated(self, mock_client: Mock, mock_stream: Mock) -> None: """Test stdout with num_lines parameter when truncated.""" @@ -223,8 +223,8 @@ def test_stdout_with_num_lines_when_truncated(self, mock_client: Mock, mock_stre mock_client.devboxes.executions.stream_stdout_updates = Mock(return_value=mock_stream) execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout="line1\n", @@ -232,7 +232,7 @@ def test_stdout_with_num_lines_when_truncated(self, mock_client: Mock, mock_stre stdout_truncated=True, stderr_truncated=False, ) - result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type] + result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type] # Should stream and return last 2 lines output = result.stdout(num_lines=2) @@ -241,8 +241,8 @@ def test_stdout_with_num_lines_when_truncated(self, mock_client: Mock, mock_stre def test_stdout_no_streaming_when_not_truncated(self, mock_client: Mock) -> None: """Test stdout doesn't stream when not truncated.""" execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout="complete output", @@ -250,7 +250,7 @@ def test_stdout_no_streaming_when_not_truncated(self, mock_client: Mock) -> None stdout_truncated=False, stderr_truncated=False, ) - result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type] + result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type] # Should return existing output without streaming output = result.stdout() @@ -259,8 +259,8 @@ def test_stdout_no_streaming_when_not_truncated(self, mock_client: Mock) -> None def test_stdout_with_num_lines_no_truncation(self, mock_client: Mock) -> None: """Test stdout with num_lines when not truncated.""" execution = SimpleNamespace( - execution_id="exec_123", - devbox_id="dev_123", + execution_id="exn_123", + devbox_id="dbx_123", status="completed", exit_status=0, stdout="line1\nline2\nline3\nline4\nline5", @@ -268,7 +268,7 @@ def test_stdout_with_num_lines_no_truncation(self, mock_client: Mock) -> None: stdout_truncated=False, stderr_truncated=False, ) - result = ExecutionResult(mock_client, "dev_123", execution) # type: ignore[arg-type] + result = ExecutionResult(mock_client, "dbx_123", execution) # type: ignore[arg-type] # Should return last 2 lines without streaming output = result.stdout(num_lines=2) @@ -276,7 +276,7 @@ def test_stdout_with_num_lines_no_truncation(self, mock_client: Mock) -> None: def test_count_non_empty_lines(self, mock_client: Mock, execution_view: MockExecutionView) -> None: """Test the _count_non_empty_lines helper method.""" - result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type] + result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type] # Test various input strings assert result._count_non_empty_lines("") == 0 @@ -288,7 +288,7 @@ def test_count_non_empty_lines(self, mock_client: Mock, execution_view: MockExec def test_get_last_n_lines(self, mock_client: Mock, execution_view: MockExecutionView) -> None: """Test the _get_last_n_lines helper method.""" - result = ExecutionResult(mock_client, "dev_123", execution_view) # type: ignore[arg-type] + result = ExecutionResult(mock_client, "dbx_123", execution_view) # type: ignore[arg-type] # Test various scenarios assert result._get_last_n_lines("", 5) == "" diff --git a/tests/sdk/test_ops.py b/tests/sdk/test_ops.py index 7ac503933..af54776af 100644 --- a/tests/sdk/test_ops.py +++ b/tests/sdk/test_ops.py @@ -17,6 +17,7 @@ MockScorerView, MockScenarioView, MockSnapshotView, + MockBenchmarkView, MockBlueprintView, create_mock_httpx_response, ) @@ -27,12 +28,14 @@ AgentOps, Scenario, Snapshot, + Benchmark, Blueprint, DevboxOps, ScorerOps, RunloopSDK, ScenarioOps, SnapshotOps, + BenchmarkOps, BlueprintOps, StorageObject, StorageObjectOps, @@ -55,7 +58,7 @@ def test_create(self, mock_client: Mock, devbox_view: MockDevboxView) -> None: ) assert isinstance(devbox, Devbox) - assert devbox.id == "dev_123" + assert devbox.id == "dbx_123" mock_client.devboxes.create_and_await_running.assert_called_once() def test_create_from_blueprint_id(self, mock_client: Mock, devbox_view: MockDevboxView) -> None: @@ -64,15 +67,15 @@ def test_create_from_blueprint_id(self, mock_client: Mock, devbox_view: MockDevb ops = DevboxOps(mock_client) devbox = ops.create_from_blueprint_id( - "bp_123", + "bpt_123", name="test-devbox", metadata={"key": "value"}, ) assert isinstance(devbox, Devbox) - assert devbox.id == "dev_123" + assert devbox.id == "dbx_123" call_kwargs = mock_client.devboxes.create_and_await_running.call_args[1] - assert call_kwargs["blueprint_id"] == "bp_123" + assert call_kwargs["blueprint_id"] == "bpt_123" def test_create_from_blueprint_name(self, mock_client: Mock, devbox_view: MockDevboxView) -> None: """Test create_from_blueprint_name method.""" @@ -94,24 +97,24 @@ def test_create_from_snapshot(self, mock_client: Mock, devbox_view: MockDevboxVi ops = DevboxOps(mock_client) devbox = ops.create_from_snapshot( - "snap_123", + "snp_123", name="test-devbox", ) assert isinstance(devbox, Devbox) call_kwargs = mock_client.devboxes.create_and_await_running.call_args[1] - assert call_kwargs["snapshot_id"] == "snap_123" + assert call_kwargs["snapshot_id"] == "snp_123" def test_from_id(self, mock_client: Mock, devbox_view: MockDevboxView) -> None: """Test from_id method waits for running.""" mock_client.devboxes.await_running.return_value = devbox_view ops = DevboxOps(mock_client) - devbox = ops.from_id("dev_123") + devbox = ops.from_id("dbx_123") assert isinstance(devbox, Devbox) - assert devbox.id == "dev_123" - mock_client.devboxes.await_running.assert_called_once_with("dev_123") + assert devbox.id == "dbx_123" + mock_client.devboxes.await_running.assert_called_once_with("dbx_123") def test_list_empty(self, mock_client: Mock) -> None: """Test list method with empty results.""" @@ -138,7 +141,7 @@ def test_list_single(self, mock_client: Mock, devbox_view: MockDevboxView) -> No assert len(devboxes) == 1 assert isinstance(devboxes[0], Devbox) - assert devboxes[0].id == "dev_123" + assert devboxes[0].id == "dbx_123" mock_client.devboxes.list.assert_called_once() def test_list_multiple(self, mock_client: Mock) -> None: @@ -168,7 +171,7 @@ def test_list_empty(self, mock_client: Mock) -> None: mock_client.devboxes.disk_snapshots.list.return_value = page ops = SnapshotOps(mock_client) - snapshots = ops.list(devbox_id="dev_123", limit=10) + snapshots = ops.list(devbox_id="dbx_123", limit=10) assert len(snapshots) == 0 mock_client.devboxes.disk_snapshots.list.assert_called_once() @@ -180,14 +183,14 @@ def test_list_single(self, mock_client: Mock, snapshot_view: MockSnapshotView) - ops = SnapshotOps(mock_client) snapshots = ops.list( - devbox_id="dev_123", + devbox_id="dbx_123", limit=10, starting_after="snap_000", ) assert len(snapshots) == 1 assert isinstance(snapshots[0], Snapshot) - assert snapshots[0].id == "snap_123" + assert snapshots[0].id == "snp_123" mock_client.devboxes.disk_snapshots.list.assert_called_once() def test_list_multiple(self, mock_client: Mock) -> None: @@ -198,7 +201,7 @@ def test_list_multiple(self, mock_client: Mock) -> None: mock_client.devboxes.disk_snapshots.list.return_value = page ops = SnapshotOps(mock_client) - snapshots = ops.list(devbox_id="dev_123", limit=10) + snapshots = ops.list(devbox_id="dbx_123", limit=10) assert len(snapshots) == 2 assert isinstance(snapshots[0], Snapshot) @@ -210,10 +213,10 @@ def test_list_multiple(self, mock_client: Mock) -> None: def test_from_id(self, mock_client: Mock) -> None: """Test from_id method.""" ops = SnapshotOps(mock_client) - snapshot = ops.from_id("snap_123") + snapshot = ops.from_id("snp_123") assert isinstance(snapshot, Snapshot) - assert snapshot.id == "snap_123" + assert snapshot.id == "snp_123" class TestBlueprintOps: @@ -230,16 +233,16 @@ def test_create(self, mock_client: Mock, blueprint_view: MockBlueprintView) -> N ) assert isinstance(blueprint, Blueprint) - assert blueprint.id == "bp_123" + assert blueprint.id == "bpt_123" mock_client.blueprints.create_and_await_build_complete.assert_called_once() def test_from_id(self, mock_client: Mock) -> None: """Test from_id method.""" ops = BlueprintOps(mock_client) - blueprint = ops.from_id("bp_123") + blueprint = ops.from_id("bpt_123") assert isinstance(blueprint, Blueprint) - assert blueprint.id == "bp_123" + assert blueprint.id == "bpt_123" def test_list_empty(self, mock_client: Mock) -> None: """Test list method with empty results.""" @@ -266,7 +269,7 @@ def test_list_single(self, mock_client: Mock, blueprint_view: MockBlueprintView) assert len(blueprints) == 1 assert isinstance(blueprints[0], Blueprint) - assert blueprints[0].id == "bp_123" + assert blueprints[0].id == "bpt_123" mock_client.blueprints.list.assert_called_once() def test_list_multiple(self, mock_client: Mock) -> None: @@ -660,16 +663,16 @@ def test_create(self, mock_client: Mock, scorer_view: MockScorerView) -> None: ) assert isinstance(scorer, Scorer) - assert scorer.id == "scorer_123" + assert scorer.id == "sco_123" mock_client.scenarios.scorers.create.assert_called_once() def test_from_id(self, mock_client: Mock) -> None: """Test from_id method.""" ops = ScorerOps(mock_client) - scorer = ops.from_id("scorer_123") + scorer = ops.from_id("sco_123") assert isinstance(scorer, Scorer) - assert scorer.id == "scorer_123" + assert scorer.id == "sco_123" def test_list_empty(self, mock_client: Mock) -> None: """Test list method with empty results.""" @@ -693,7 +696,7 @@ def test_list_single(self, mock_client: Mock, scorer_view: MockScorerView) -> No assert len(scorers) == 1 assert isinstance(scorers[0], Scorer) - assert scorers[0].id == "scorer_123" + assert scorers[0].id == "sco_123" mock_client.scenarios.scorers.list.assert_called_once() def test_list_multiple(self, mock_client: Mock) -> None: @@ -723,19 +726,20 @@ def test_create(self, mock_client: Mock, agent_view: MockAgentView) -> None: client = AgentOps(mock_client) agent = client.create( name="test-agent", + version="1.2.3", ) assert isinstance(agent, Agent) - assert agent.id == "agent_123" + assert agent.id == "agt_123" mock_client.agents.create.assert_called_once() def test_from_id(self, mock_client: Mock) -> None: """Test from_id method.""" client = AgentOps(mock_client) - agent = client.from_id("agent_123") + agent = client.from_id("agt_123") assert isinstance(agent, Agent) - assert agent.id == "agent_123" + assert agent.id == "agt_123" def test_list(self, mock_client: Mock) -> None: """Test list method.""" @@ -817,10 +821,11 @@ def test_create_from_npm(self, mock_client: Mock, agent_view: MockAgentView) -> agent = client.create_from_npm( name="test-agent", package_name="@runloop/example-agent", + version="1.2.3", ) assert isinstance(agent, Agent) - assert agent.id == "agent_123" + assert agent.id == "agt_123" mock_client.agents.create.assert_called_once_with( source={ "type": "npm", @@ -829,6 +834,7 @@ def test_create_from_npm(self, mock_client: Mock, agent_view: MockAgentView) -> }, }, name="test-agent", + version="1.2.3", ) def test_create_from_npm_with_all_options(self, mock_client: Mock, agent_view: MockAgentView) -> None: @@ -838,26 +844,26 @@ def test_create_from_npm_with_all_options(self, mock_client: Mock, agent_view: M client = AgentOps(mock_client) agent = client.create_from_npm( package_name="@runloop/example-agent", - npm_version="1.2.3", registry_url="https://registry.example.com", agent_setup=["npm install", "npm run setup"], name="test-agent", + version="1.2.3", extra_headers={"X-Custom": "header"}, ) assert isinstance(agent, Agent) - assert agent.id == "agent_123" + assert agent.id == "agt_123" mock_client.agents.create.assert_called_once_with( source={ "type": "npm", "npm": { "package_name": "@runloop/example-agent", - "npm_version": "1.2.3", "registry_url": "https://registry.example.com", "agent_setup": ["npm install", "npm run setup"], }, }, name="test-agent", + version="1.2.3", extra_headers={"X-Custom": "header"}, ) @@ -869,6 +875,7 @@ def test_create_from_npm_raises_when_source_provided(self, mock_client: Mock) -> client.create_from_npm( package_name="@runloop/example-agent", name="test-agent", + version="1.2.3", source={"type": "git", "git": {"repository": "https://github.com/example/repo"}}, ) @@ -880,10 +887,11 @@ def test_create_from_pip(self, mock_client: Mock, agent_view: MockAgentView) -> agent = client.create_from_pip( package_name="runloop-example-agent", name="test-agent", + version="1.2.3", ) assert isinstance(agent, Agent) - assert agent.id == "agent_123" + assert agent.id == "agt_123" mock_client.agents.create.assert_called_once_with( source={ "type": "pip", @@ -892,6 +900,7 @@ def test_create_from_pip(self, mock_client: Mock, agent_view: MockAgentView) -> }, }, name="test-agent", + version="1.2.3", ) def test_create_from_pip_with_all_options(self, mock_client: Mock, agent_view: MockAgentView) -> None: @@ -901,25 +910,25 @@ def test_create_from_pip_with_all_options(self, mock_client: Mock, agent_view: M client = AgentOps(mock_client) agent = client.create_from_pip( package_name="runloop-example-agent", - pip_version="1.2.3", registry_url="https://pypi.example.com", agent_setup=["pip install extra-deps"], name="test-agent", + version="1.2.3", ) assert isinstance(agent, Agent) - assert agent.id == "agent_123" + assert agent.id == "agt_123" mock_client.agents.create.assert_called_once_with( source={ "type": "pip", "pip": { "package_name": "runloop-example-agent", - "pip_version": "1.2.3", "registry_url": "https://pypi.example.com", "agent_setup": ["pip install extra-deps"], }, }, name="test-agent", + version="1.2.3", ) def test_create_from_git(self, mock_client: Mock, agent_view: MockAgentView) -> None: @@ -930,10 +939,11 @@ def test_create_from_git(self, mock_client: Mock, agent_view: MockAgentView) -> agent = client.create_from_git( repository="https://github.com/example/agent-repo", name="test-agent", + version="1.2.3", ) assert isinstance(agent, Agent) - assert agent.id == "agent_123" + assert agent.id == "agt_123" mock_client.agents.create.assert_called_once_with( source={ "type": "git", @@ -942,6 +952,7 @@ def test_create_from_git(self, mock_client: Mock, agent_view: MockAgentView) -> }, }, name="test-agent", + version="1.2.3", ) def test_create_from_git_with_all_options(self, mock_client: Mock, agent_view: MockAgentView) -> None: @@ -954,10 +965,11 @@ def test_create_from_git_with_all_options(self, mock_client: Mock, agent_view: M ref="develop", agent_setup=["npm install", "npm run build"], name="test-agent", + version="1.2.3", ) assert isinstance(agent, Agent) - assert agent.id == "agent_123" + assert agent.id == "agt_123" mock_client.agents.create.assert_called_once_with( source={ "type": "git", @@ -968,6 +980,7 @@ def test_create_from_git_with_all_options(self, mock_client: Mock, agent_view: M }, }, name="test-agent", + version="1.2.3", ) def test_create_from_object(self, mock_client: Mock, agent_view: MockAgentView) -> None: @@ -978,10 +991,11 @@ def test_create_from_object(self, mock_client: Mock, agent_view: MockAgentView) agent = client.create_from_object( object_id="obj_123", name="test-agent", + version="1.2.3", ) assert isinstance(agent, Agent) - assert agent.id == "agent_123" + assert agent.id == "agt_123" mock_client.agents.create.assert_called_once_with( source={ "type": "object", @@ -990,6 +1004,7 @@ def test_create_from_object(self, mock_client: Mock, agent_view: MockAgentView) }, }, name="test-agent", + version="1.2.3", ) def test_create_from_object_with_agent_setup(self, mock_client: Mock, agent_view: MockAgentView) -> None: @@ -1001,10 +1016,11 @@ def test_create_from_object_with_agent_setup(self, mock_client: Mock, agent_view object_id="obj_123", agent_setup=["chmod +x setup.sh", "./setup.sh"], name="test-agent", + version="1.2.3", ) assert isinstance(agent, Agent) - assert agent.id == "agent_123" + assert agent.id == "agt_123" mock_client.agents.create.assert_called_once_with( source={ "type": "object", @@ -1014,6 +1030,7 @@ def test_create_from_object_with_agent_setup(self, mock_client: Mock, agent_view }, }, name="test-agent", + version="1.2.3", ) @@ -1071,6 +1088,59 @@ def test_list_multiple(self, mock_client: Mock) -> None: mock_client.scenarios.list.assert_called_once() +class TestBenchmarkOps: + """Tests for BenchmarkOps class.""" + + def test_create(self, mock_client: Mock, benchmark_view: MockBenchmarkView) -> None: + """Test create method.""" + mock_client.benchmarks.create.return_value = benchmark_view + + ops = BenchmarkOps(mock_client) + benchmark = ops.create(name="test-benchmark", scenario_ids=["scn_001", "scn_002"]) + + assert isinstance(benchmark, Benchmark) + assert benchmark.id == "bmd_123" + mock_client.benchmarks.create.assert_called_once_with( + name="test-benchmark", scenario_ids=["scn_001", "scn_002"] + ) + + def test_from_id(self, mock_client: Mock) -> None: + """Test from_id method.""" + ops = BenchmarkOps(mock_client) + benchmark = ops.from_id("bmd_123") + + assert isinstance(benchmark, Benchmark) + assert benchmark.id == "bmd_123" + + def test_list_multiple(self, mock_client: Mock) -> None: + """Test list method with multiple results.""" + benchmark_view1 = MockBenchmarkView(id="bmd_001", name="benchmark-1") + benchmark_view2 = MockBenchmarkView(id="bmd_002", name="benchmark-2") + page = SimpleNamespace(benchmarks=[benchmark_view1, benchmark_view2]) + mock_client.benchmarks.list.return_value = page + + ops = BenchmarkOps(mock_client) + benchmarks = ops.list(limit=10) + + assert len(benchmarks) == 2 + assert isinstance(benchmarks[0], Benchmark) + assert isinstance(benchmarks[1], Benchmark) + assert benchmarks[0].id == "bmd_001" + assert benchmarks[1].id == "bmd_002" + mock_client.benchmarks.list.assert_called_once_with(limit=10) + + def test_list_with_name_filter(self, mock_client: Mock, benchmark_view: MockBenchmarkView) -> None: + """Test list method with name filter.""" + page = SimpleNamespace(benchmarks=[benchmark_view]) + mock_client.benchmarks.list.return_value = page + + ops = BenchmarkOps(mock_client) + benchmarks = ops.list(name="test-benchmark", limit=10) + + assert len(benchmarks) == 1 + mock_client.benchmarks.list.assert_called_once_with(name="test-benchmark", limit=10) + + class TestRunloopSDK: """Tests for RunloopSDK class.""" @@ -1079,6 +1149,7 @@ def test_init(self) -> None: runloop = RunloopSDK(bearer_token="test-token") assert runloop.api is not None assert isinstance(runloop.agent, AgentOps) + assert isinstance(runloop.benchmark, BenchmarkOps) assert isinstance(runloop.devbox, DevboxOps) assert isinstance(runloop.scorer, ScorerOps) assert isinstance(runloop.snapshot, SnapshotOps) diff --git a/tests/sdk/test_scenario.py b/tests/sdk/test_scenario.py index 3504c1714..e3aa5f1c8 100644 --- a/tests/sdk/test_scenario.py +++ b/tests/sdk/test_scenario.py @@ -99,8 +99,8 @@ def test_run_async(self, mock_client: Mock, scenario_run_view: MockScenarioRunVi scenario = Scenario(mock_client, "scn_123") run = scenario.run_async(run_name="test-run") - assert run.id == "run_123" - assert run.devbox_id == "dev_123" + assert run.id == "scr_123" + assert run.devbox_id == "dbx_123" mock_client.scenarios.start_run.assert_called_once_with( scenario_id="scn_123", run_name="test-run", @@ -113,8 +113,8 @@ def test_run(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> scenario = Scenario(mock_client, "scn_123") run = scenario.run(run_name="test-run") - assert run.id == "run_123" - assert run.devbox_id == "dev_123" + assert run.id == "scr_123" + assert run.devbox_id == "dbx_123" mock_client.scenarios.start_run_and_await_env_ready.assert_called_once_with( scenario_id="scn_123", run_name="test-run", diff --git a/tests/sdk/test_scenario_run.py b/tests/sdk/test_scenario_run.py index 54ea6e89b..339e365f8 100644 --- a/tests/sdk/test_scenario_run.py +++ b/tests/sdk/test_scenario_run.py @@ -15,31 +15,31 @@ class TestScenarioRun: def test_init(self, mock_client: Mock) -> None: """Test ScenarioRun initialization.""" - run = ScenarioRun(mock_client, "run_123", "dev_123") - assert run.id == "run_123" - assert run.devbox_id == "dev_123" + run = ScenarioRun(mock_client, "scr_123", "dbx_123") + assert run.id == "scr_123" + assert run.devbox_id == "dbx_123" def test_repr(self, mock_client: Mock) -> None: """Test ScenarioRun string representation.""" - run = ScenarioRun(mock_client, "run_123", "dev_123") - assert repr(run) == "" + run = ScenarioRun(mock_client, "scr_123", "dbx_123") + assert repr(run) == "" def test_devbox_property(self, mock_client: Mock) -> None: """Test devbox property returns Devbox wrapper.""" - run = ScenarioRun(mock_client, "run_123", "dev_123") + run = ScenarioRun(mock_client, "scr_123", "dbx_123") devbox = run.devbox - assert devbox.id == "dev_123" + assert devbox.id == "dbx_123" def test_get_info(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None: """Test get_info method.""" mock_client.scenarios.runs.retrieve.return_value = scenario_run_view - run = ScenarioRun(mock_client, "run_123", "dev_123") + run = ScenarioRun(mock_client, "scr_123", "dbx_123") result = run.get_info() assert result == scenario_run_view - mock_client.scenarios.runs.retrieve.assert_called_once_with("run_123") + mock_client.scenarios.runs.retrieve.assert_called_once_with("scr_123") def test_await_env_ready( self, mock_client: Mock, scenario_run_view: MockScenarioRunView, devbox_view: MockDevboxView @@ -48,10 +48,10 @@ def test_await_env_ready( mock_client.devboxes.await_running.return_value = devbox_view mock_client.scenarios.runs.retrieve.return_value = scenario_run_view - run = ScenarioRun(mock_client, "run_123", "dev_123") + run = ScenarioRun(mock_client, "scr_123", "dbx_123") result = run.await_env_ready() - mock_client.devboxes.await_running.assert_called_once_with("dev_123", polling_config=None) + mock_client.devboxes.await_running.assert_called_once_with("dbx_123", polling_config=None) assert result == scenario_run_view def test_score(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None: @@ -59,66 +59,66 @@ def test_score(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) scenario_run_view.state = "scoring" mock_client.scenarios.runs.score.return_value = scenario_run_view - run = ScenarioRun(mock_client, "run_123", "dev_123") + run = ScenarioRun(mock_client, "scr_123", "dbx_123") result = run.score() assert result == scenario_run_view - mock_client.scenarios.runs.score.assert_called_once_with("run_123") + mock_client.scenarios.runs.score.assert_called_once_with("scr_123") def test_await_scored(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None: """Test await_scored method.""" scenario_run_view.state = "scored" mock_client.scenarios.runs.await_scored.return_value = scenario_run_view - run = ScenarioRun(mock_client, "run_123", "dev_123") + run = ScenarioRun(mock_client, "scr_123", "dbx_123") result = run.await_scored() assert result == scenario_run_view - mock_client.scenarios.runs.await_scored.assert_called_once_with("run_123") + mock_client.scenarios.runs.await_scored.assert_called_once_with("scr_123") def test_score_and_await(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None: """Test score_and_await method.""" scenario_run_view.state = "scored" mock_client.scenarios.runs.score_and_await.return_value = scenario_run_view - run = ScenarioRun(mock_client, "run_123", "dev_123") + run = ScenarioRun(mock_client, "scr_123", "dbx_123") result = run.score_and_await() assert result == scenario_run_view - mock_client.scenarios.runs.score_and_await.assert_called_once_with("run_123") + mock_client.scenarios.runs.score_and_await.assert_called_once_with("scr_123") def test_score_and_complete(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None: """Test score_and_complete method.""" scenario_run_view.state = "completed" mock_client.scenarios.runs.score_and_complete.return_value = scenario_run_view - run = ScenarioRun(mock_client, "run_123", "dev_123") + run = ScenarioRun(mock_client, "scr_123", "dbx_123") result = run.score_and_complete() assert result == scenario_run_view - mock_client.scenarios.runs.score_and_complete.assert_called_once_with("run_123") + mock_client.scenarios.runs.score_and_complete.assert_called_once_with("scr_123") def test_complete(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None: """Test complete method.""" scenario_run_view.state = "completed" mock_client.scenarios.runs.complete.return_value = scenario_run_view - run = ScenarioRun(mock_client, "run_123", "dev_123") + run = ScenarioRun(mock_client, "scr_123", "dbx_123") result = run.complete() assert result == scenario_run_view - mock_client.scenarios.runs.complete.assert_called_once_with("run_123") + mock_client.scenarios.runs.complete.assert_called_once_with("scr_123") def test_cancel(self, mock_client: Mock, scenario_run_view: MockScenarioRunView) -> None: """Test cancel method.""" scenario_run_view.state = "canceled" mock_client.scenarios.runs.cancel.return_value = scenario_run_view - run = ScenarioRun(mock_client, "run_123", "dev_123") + run = ScenarioRun(mock_client, "scr_123", "dbx_123") result = run.cancel() assert result == scenario_run_view - mock_client.scenarios.runs.cancel.assert_called_once_with("run_123") + mock_client.scenarios.runs.cancel.assert_called_once_with("scr_123") def test_download_logs(self, mock_client: Mock, tmp_path: Path) -> None: """Test download_logs method writes to file.""" @@ -126,11 +126,11 @@ def test_download_logs(self, mock_client: Mock, tmp_path: Path) -> None: mock_response.write_to_file = Mock() mock_client.scenarios.runs.download_logs.return_value = mock_response - run = ScenarioRun(mock_client, "run_123", "dev_123") + run = ScenarioRun(mock_client, "scr_123", "dbx_123") output_path = tmp_path / "logs.zip" run.download_logs(output_path) - mock_client.scenarios.runs.download_logs.assert_called_once_with("run_123") + mock_client.scenarios.runs.download_logs.assert_called_once_with("scr_123") mock_response.write_to_file.assert_called_once_with(output_path) def test_get_score_when_scored(self, mock_client: Mock) -> None: @@ -139,19 +139,19 @@ def test_get_score_when_scored(self, mock_client: Mock) -> None: run_view = MockScenarioRunView(state="scored", scoring_contract_result=scoring_result) mock_client.scenarios.runs.retrieve.return_value = run_view - run = ScenarioRun(mock_client, "run_123", "dev_123") + run = ScenarioRun(mock_client, "scr_123", "dbx_123") result = run.get_score() assert result == scoring_result - mock_client.scenarios.runs.retrieve.assert_called_once_with("run_123") + mock_client.scenarios.runs.retrieve.assert_called_once_with("scr_123") def test_get_score_when_not_scored(self, mock_client: Mock) -> None: """Test get_score returns None when not scored.""" run_view = MockScenarioRunView(state="running", scoring_contract_result=None) mock_client.scenarios.runs.retrieve.return_value = run_view - run = ScenarioRun(mock_client, "run_123", "dev_123") + run = ScenarioRun(mock_client, "scr_123", "dbx_123") result = run.get_score() assert result is None - mock_client.scenarios.runs.retrieve.assert_called_once_with("run_123") + mock_client.scenarios.runs.retrieve.assert_called_once_with("scr_123") diff --git a/tests/sdk/test_scorer.py b/tests/sdk/test_scorer.py index 761a487cb..91b430db0 100644 --- a/tests/sdk/test_scorer.py +++ b/tests/sdk/test_scorer.py @@ -14,30 +14,30 @@ class TestScorer: def test_init(self, mock_client: Mock) -> None: """Test Scorer initialization.""" - scorer = Scorer(mock_client, "scorer_123") - assert scorer.id == "scorer_123" + scorer = Scorer(mock_client, "sco_123") + assert scorer.id == "sco_123" def test_repr(self, mock_client: Mock) -> None: """Test Scorer string representation.""" - scorer = Scorer(mock_client, "scorer_123") - assert repr(scorer) == "" + scorer = Scorer(mock_client, "sco_123") + assert repr(scorer) == "" def test_get_info(self, mock_client: Mock, scorer_view: MockScorerView) -> None: """Test get_info method.""" mock_client.scenarios.scorers.retrieve.return_value = scorer_view - scorer = Scorer(mock_client, "scorer_123") + scorer = Scorer(mock_client, "sco_123") result = scorer.get_info() assert result == scorer_view - mock_client.scenarios.scorers.retrieve.assert_called_once_with("scorer_123") + mock_client.scenarios.scorers.retrieve.assert_called_once_with("sco_123") def test_update(self, mock_client: Mock) -> None: """Test update method.""" - update_response = SimpleNamespace(id="scorer_123", type="updated_scorer", bash_script="echo 'score=1.0'") + update_response = SimpleNamespace(id="sco_123", type="updated_scorer", bash_script="echo 'score=1.0'") mock_client.scenarios.scorers.update.return_value = update_response - scorer = Scorer(mock_client, "scorer_123") + scorer = Scorer(mock_client, "sco_123") result = scorer.update( type="updated_scorer", bash_script="echo 'score=1.0'", @@ -45,7 +45,7 @@ def test_update(self, mock_client: Mock) -> None: assert result == update_response mock_client.scenarios.scorers.update.assert_called_once_with( - "scorer_123", + "sco_123", type="updated_scorer", bash_script="echo 'score=1.0'", ) @@ -59,13 +59,13 @@ def test_validate(self, mock_client: Mock) -> None: ) mock_client.scenarios.scorers.validate.return_value = validate_response - scorer = Scorer(mock_client, "scorer_123") + scorer = Scorer(mock_client, "sco_123") result = scorer.validate( scoring_context={"test": "context"}, ) assert result == validate_response mock_client.scenarios.scorers.validate.assert_called_once_with( - "scorer_123", + "sco_123", scoring_context={"test": "context"}, ) diff --git a/tests/sdk/test_snapshot.py b/tests/sdk/test_snapshot.py index 383e812cc..4b066e29a 100644 --- a/tests/sdk/test_snapshot.py +++ b/tests/sdk/test_snapshot.py @@ -15,19 +15,19 @@ class TestSnapshot: def test_init(self, mock_client: Mock) -> None: """Test Snapshot initialization.""" - snapshot = Snapshot(mock_client, "snap_123") - assert snapshot.id == "snap_123" + snapshot = Snapshot(mock_client, "snp_123") + assert snapshot.id == "snp_123" def test_repr(self, mock_client: Mock) -> None: """Test Snapshot string representation.""" - snapshot = Snapshot(mock_client, "snap_123") - assert repr(snapshot) == "" + snapshot = Snapshot(mock_client, "snp_123") + assert repr(snapshot) == "" def test_get_info(self, mock_client: Mock, snapshot_view: MockSnapshotView) -> None: """Test get_info method.""" mock_client.devboxes.disk_snapshots.query_status.return_value = snapshot_view - snapshot = Snapshot(mock_client, "snap_123") + snapshot = Snapshot(mock_client, "snp_123") result = snapshot.get_info( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -37,7 +37,7 @@ def test_get_info(self, mock_client: Mock, snapshot_view: MockSnapshotView) -> N assert result == snapshot_view mock_client.devboxes.disk_snapshots.query_status.assert_called_once_with( - "snap_123", + "snp_123", extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, extra_body={"key": "value"}, @@ -46,10 +46,10 @@ def test_get_info(self, mock_client: Mock, snapshot_view: MockSnapshotView) -> N def test_update(self, mock_client: Mock) -> None: """Test update method.""" - updated_snapshot = SimpleNamespace(id="snap_123", name="updated-name") + updated_snapshot = SimpleNamespace(id="snp_123", name="updated-name") mock_client.devboxes.disk_snapshots.update.return_value = updated_snapshot - snapshot = Snapshot(mock_client, "snap_123") + snapshot = Snapshot(mock_client, "snp_123") result = snapshot.update( commit_message="Update message", metadata={"key": "value"}, @@ -63,7 +63,7 @@ def test_update(self, mock_client: Mock) -> None: assert result == updated_snapshot mock_client.devboxes.disk_snapshots.update.assert_called_once_with( - "snap_123", + "snp_123", commit_message="Update message", metadata={"key": "value"}, name="updated-name", @@ -78,7 +78,7 @@ def test_delete(self, mock_client: Mock) -> None: """Test delete method.""" mock_client.devboxes.disk_snapshots.delete.return_value = object() - snapshot = Snapshot(mock_client, "snap_123") + snapshot = Snapshot(mock_client, "snp_123") result = snapshot.delete( extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -89,7 +89,7 @@ def test_delete(self, mock_client: Mock) -> None: assert result is not None # Verify return value is propagated mock_client.devboxes.disk_snapshots.delete.assert_called_once_with( - "snap_123", + "snp_123", extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, extra_body={"key": "value"}, @@ -102,7 +102,7 @@ def test_await_completed(self, mock_client: Mock, snapshot_view: MockSnapshotVie mock_client.devboxes.disk_snapshots.await_completed.return_value = snapshot_view polling_config = PollingConfig(timeout_seconds=60.0) - snapshot = Snapshot(mock_client, "snap_123") + snapshot = Snapshot(mock_client, "snp_123") result = snapshot.await_completed( polling_config=polling_config, extra_headers={"X-Custom": "value"}, @@ -113,7 +113,7 @@ def test_await_completed(self, mock_client: Mock, snapshot_view: MockSnapshotVie assert result == snapshot_view mock_client.devboxes.disk_snapshots.await_completed.assert_called_once_with( - "snap_123", + "snp_123", polling_config=polling_config, extra_headers={"X-Custom": "value"}, extra_query={"param": "value"}, @@ -125,7 +125,7 @@ def test_create_devbox(self, mock_client: Mock, devbox_view: MockDevboxView) -> """Test create_devbox method.""" mock_client.devboxes.create_and_await_running.return_value = devbox_view - snapshot = Snapshot(mock_client, "snap_123") + snapshot = Snapshot(mock_client, "snp_123") devbox = snapshot.create_devbox( name="test-devbox", metadata={"key": "value"}, @@ -133,9 +133,9 @@ def test_create_devbox(self, mock_client: Mock, devbox_view: MockDevboxView) -> extra_headers={"X-Custom": "value"}, ) - assert devbox.id == "dev_123" + assert devbox.id == "dbx_123" mock_client.devboxes.create_and_await_running.assert_called_once() call_kwargs = mock_client.devboxes.create_and_await_running.call_args[1] - assert call_kwargs["snapshot_id"] == "snap_123" + assert call_kwargs["snapshot_id"] == "snp_123" assert call_kwargs["name"] == "test-devbox" assert call_kwargs["metadata"] == {"key": "value"} diff --git a/tests/smoketests/sdk/test_agent.py b/tests/smoketests/sdk/test_agent.py index deb659087..7ddfb6f70 100644 --- a/tests/smoketests/sdk/test_agent.py +++ b/tests/smoketests/sdk/test_agent.py @@ -12,6 +12,7 @@ THIRTY_SECOND_TIMEOUT = 30 TWO_MINUTE_TIMEOUT = 120 +AGENT_VERSION = "1.2.3" class TestAgentLifecycle: @@ -23,6 +24,7 @@ def test_agent_create_basic(self, sdk_client: RunloopSDK) -> None: name = unique_name("sdk-agent-test-basic") agent = sdk_client.agent.create( name=name, + version=AGENT_VERSION, source={ "type": "npm", "npm": { @@ -52,6 +54,7 @@ def test_agent_get_info(self, sdk_client: RunloopSDK) -> None: name = unique_name("sdk-agent-test-info") agent = sdk_client.agent.create( name=name, + version=AGENT_VERSION, source={ "type": "npm", "npm": { @@ -90,6 +93,7 @@ def test_get_agent_by_id(self, sdk_client: RunloopSDK) -> None: # Create an agent created = sdk_client.agent.create( name=unique_name("sdk-agent-test-retrieve"), + version=AGENT_VERSION, source={ "type": "npm", "npm": { @@ -121,9 +125,15 @@ def test_list_multiple_agents(self, sdk_client: RunloopSDK) -> None: } # Create multiple agents - agent1 = sdk_client.agent.create(name=unique_name("sdk-agent-test-list-1"), source=source_config) - agent2 = sdk_client.agent.create(name=unique_name("sdk-agent-test-list-2"), source=source_config) - agent3 = sdk_client.agent.create(name=unique_name("sdk-agent-test-list-3"), source=source_config) + agent1 = sdk_client.agent.create( + name=unique_name("sdk-agent-test-list-1"), source=source_config, version=AGENT_VERSION + ) + agent2 = sdk_client.agent.create( + name=unique_name("sdk-agent-test-list-2"), source=source_config, version=AGENT_VERSION + ) + agent3 = sdk_client.agent.create( + name=unique_name("sdk-agent-test-list-3"), source=source_config, version=AGENT_VERSION + ) try: # List agents @@ -153,6 +163,7 @@ def test_agent_with_source_npm(self, sdk_client: RunloopSDK) -> None: agent = sdk_client.agent.create( name=name, + version=AGENT_VERSION, source={ "type": "npm", "npm": { @@ -178,6 +189,7 @@ def test_agent_with_source_git(self, sdk_client: RunloopSDK) -> None: agent = sdk_client.agent.create( name=name, + version=AGENT_VERSION, source={ "type": "git", "git": { diff --git a/tests/smoketests/sdk/test_async_agent.py b/tests/smoketests/sdk/test_async_agent.py index fb9d17b42..36129605f 100644 --- a/tests/smoketests/sdk/test_async_agent.py +++ b/tests/smoketests/sdk/test_async_agent.py @@ -12,6 +12,7 @@ THIRTY_SECOND_TIMEOUT = 30 TWO_MINUTE_TIMEOUT = 120 +AGENT_VERSION = "1.2.3" class TestAsyncAgentLifecycle: @@ -23,6 +24,7 @@ async def test_agent_create_basic(self, async_sdk_client: AsyncRunloopSDK) -> No name = unique_name("sdk-async-agent-test-basic") agent = await async_sdk_client.agent.create( name=name, + version=AGENT_VERSION, source={ "type": "npm", "npm": { @@ -52,6 +54,7 @@ async def test_agent_get_info(self, async_sdk_client: AsyncRunloopSDK) -> None: name = unique_name("sdk-async-agent-test-info") agent = await async_sdk_client.agent.create( name=name, + version=AGENT_VERSION, source={ "type": "npm", "npm": { @@ -90,6 +93,7 @@ async def test_get_agent_by_id(self, async_sdk_client: AsyncRunloopSDK) -> None: # Create an agent created = await async_sdk_client.agent.create( name=unique_name("sdk-async-agent-test-retrieve"), + version=AGENT_VERSION, source={ "type": "npm", "npm": { @@ -122,13 +126,13 @@ async def test_list_multiple_agents(self, async_sdk_client: AsyncRunloopSDK) -> # Create multiple agents agent1 = await async_sdk_client.agent.create( - name=unique_name("sdk-async-agent-test-list-1"), source=source_config + name=unique_name("sdk-async-agent-test-list-1"), source=source_config, version=AGENT_VERSION ) agent2 = await async_sdk_client.agent.create( - name=unique_name("sdk-async-agent-test-list-2"), source=source_config + name=unique_name("sdk-async-agent-test-list-2"), source=source_config, version=AGENT_VERSION ) agent3 = await async_sdk_client.agent.create( - name=unique_name("sdk-async-agent-test-list-3"), source=source_config + name=unique_name("sdk-async-agent-test-list-3"), source=source_config, version=AGENT_VERSION ) try: @@ -159,6 +163,7 @@ async def test_agent_with_source_npm(self, async_sdk_client: AsyncRunloopSDK) -> agent = await async_sdk_client.agent.create( name=name, + version=AGENT_VERSION, source={ "type": "npm", "npm": { @@ -184,6 +189,7 @@ async def test_agent_with_source_git(self, async_sdk_client: AsyncRunloopSDK) -> agent = await async_sdk_client.agent.create( name=name, + version=AGENT_VERSION, source={ "type": "git", "git": { diff --git a/tests/smoketests/sdk/test_async_benchmark.py b/tests/smoketests/sdk/test_async_benchmark.py new file mode 100644 index 000000000..7316355a6 --- /dev/null +++ b/tests/smoketests/sdk/test_async_benchmark.py @@ -0,0 +1,192 @@ +"""Asynchronous SDK smoke tests for AsyncBenchmark operations. + +These tests validate the AsyncBenchmark class against the real API. +We create a dedicated smoketest benchmark and scenarios with consistent names +so that resources are reused across test runs (since there's no delete endpoint). +""" + +from __future__ import annotations + +from typing import List, Tuple + +import pytest + +from runloop_api_client import AsyncRunloopSDK +from runloop_api_client.sdk import AsyncScenario, AsyncBenchmark, AsyncScenarioRun, AsyncBenchmarkRun + +pytestmark = [pytest.mark.smoketest] + +TWO_MINUTE_TIMEOUT = 120 + +# Consistent names for smoketest resources +SMOKETEST_BENCHMARK_NAME = "sdk-smoketest-benchmark" +SMOKETEST_SCENARIO_1_NAME = "sdk-smoketest-scenario-1" +SMOKETEST_SCENARIO_2_NAME = "sdk-smoketest-scenario-2" + + +async def get_or_create_scenario( + async_sdk_client: AsyncRunloopSDK, + name: str, + problem_statement: str, +) -> AsyncScenario: + """Get an existing scenario by name or create a new one.""" + # Check if scenario already exists + scenarios = await async_sdk_client.scenario.list(name=name, limit=1) + for scenario in scenarios: + # Return the first matching scenario + return scenario + + # Create a new scenario using the SDK builder + return await ( + async_sdk_client.scenario.builder(name) + .with_problem_statement(problem_statement) + .add_shell_command_scorer("pass-scorer", command="exit 0") + .push() + ) + + +async def get_or_create_benchmark( + async_sdk_client: AsyncRunloopSDK, + name: str, + scenario_ids: List[str], +) -> AsyncBenchmark: + """Get an existing benchmark by name or create a new one.""" + # Check if benchmark already exists + benchmarks = await async_sdk_client.benchmark.list(name=name, limit=1) + for benchmark in benchmarks: + # Return the first matching benchmark + return benchmark + + # Create a new benchmark + return await async_sdk_client.benchmark.create( + name=name, + scenario_ids=scenario_ids, + description="Smoketest benchmark for SDK testing", + ) + + +@pytest.fixture(scope="module") +async def smoketest_benchmark( + async_sdk_client: AsyncRunloopSDK, +) -> Tuple[AsyncBenchmark, List[str]]: + """Create or retrieve the smoketest benchmark and scenario IDs.""" + # Create or get scenarios + scenario_1 = await get_or_create_scenario( + async_sdk_client, + SMOKETEST_SCENARIO_1_NAME, + "Smoketest scenario 1 - basic validation", + ) + scenario_2 = await get_or_create_scenario( + async_sdk_client, + SMOKETEST_SCENARIO_2_NAME, + "Smoketest scenario 2 - basic validation", + ) + + scenario_ids = [scenario_1.id, scenario_2.id] + + # Create or get benchmark + benchmark = await get_or_create_benchmark( + async_sdk_client, + SMOKETEST_BENCHMARK_NAME, + scenario_ids, + ) + + return benchmark, scenario_ids + + +class TestAsyncBenchmarkRun: + """Test AsyncBenchmark run operations.""" + + @pytest.mark.timeout(TWO_MINUTE_TIMEOUT) + async def test_benchmark_run_and_cancel( + self, + async_sdk_client: AsyncRunloopSDK, + smoketest_benchmark: Tuple[AsyncBenchmark, List[str]], + ) -> None: + """Test starting and canceling a benchmark run. + + This test: + 1. Uses the smoketest benchmark fixture + 2. Starts a new benchmark run via the AsyncBenchmark class + 3. Validates the run object + 4. Cancels the run + """ + benchmark, scenario_ids = smoketest_benchmark + + # Start a run + run = await benchmark.start_run(run_name="sdk-smoketest-async-benchmark-run") + scenario_runs: List[AsyncScenarioRun] = [] + + try: + assert isinstance(run, AsyncBenchmarkRun) + assert run.id is not None + assert run.benchmark_id == benchmark.id + + # Get run info + info = await run.get_info() + assert info.id == run.id + assert info.state == "running" + + # Run the scenarios + for scenario_id in scenario_ids: + scenario = async_sdk_client.scenario.from_id(scenario_id) + scenario_runs.append( + await scenario.run_async( + benchmark_run_id=run.id, run_name="sdk-smoketest-async-benchmark-run-scenario" + ) + ) + + benchmark_scenario_runs = await run.list_scenario_runs() + assert isinstance(benchmark_scenario_runs, list) + assert len(benchmark_scenario_runs) == len(scenario_runs) + for scenario_run in benchmark_scenario_runs: + assert isinstance(scenario_run, AsyncScenarioRun) + assert any( + scenario_run.id == scenario_run.id and scenario_run.devbox_id == scenario_run.devbox_id + for scenario_run in scenario_runs + ) + + # Cancel the scenario run + for scenario_run in scenario_runs: + scenario_result = await scenario_run.cancel() + assert scenario_result.state in ["canceled", "completed"] + + # Cancel the benchmark run + result = await run.cancel() + assert result.state in ["canceled", "completed"] + + except Exception: + # Ensure cleanup on any error + for scenario_run in scenario_runs: + await scenario_run.cancel() + await run.cancel() + raise + + +class TestAsyncBenchmarkListRuns: + """Test AsyncBenchmark list_runs operations.""" + + @pytest.mark.timeout(TWO_MINUTE_TIMEOUT) + async def test_list_runs( + self, + smoketest_benchmark: Tuple[AsyncBenchmark, List[str]], + ) -> None: + """Test listing benchmark runs. + + This test: + 1. Uses the smoketest benchmark fixture + 2. Lists its runs + 3. Validates returned objects are AsyncBenchmarkRun instances + """ + benchmark, _ = smoketest_benchmark + + runs = await benchmark.list_runs() + assert isinstance(runs, list) + if not runs: + pytest.skip("No runs available to test") + + # Verify returned items are AsyncBenchmarkRun objects + for run in runs: + assert isinstance(run, AsyncBenchmarkRun) + assert run.id is not None + assert run.benchmark_id == benchmark.id diff --git a/tests/smoketests/sdk/test_benchmark.py b/tests/smoketests/sdk/test_benchmark.py new file mode 100644 index 000000000..2dfe5bb6c --- /dev/null +++ b/tests/smoketests/sdk/test_benchmark.py @@ -0,0 +1,190 @@ +"""Synchronous SDK smoke tests for Benchmark operations. + +These tests validate the Benchmark class against the real API. +We create a dedicated smoketest benchmark and scenarios with consistent names +so that resources are reused across test runs (since there's no delete endpoint). +""" + +from __future__ import annotations + +from typing import List, Tuple + +import pytest + +from runloop_api_client import RunloopSDK +from runloop_api_client.sdk import Scenario, Benchmark, ScenarioRun, BenchmarkRun + +pytestmark = [pytest.mark.smoketest] + +TWO_MINUTE_TIMEOUT = 120 + +# Consistent names for smoketest resources +SMOKETEST_BENCHMARK_NAME = "sdk-smoketest-benchmark" +SMOKETEST_SCENARIO_1_NAME = "sdk-smoketest-scenario-1" +SMOKETEST_SCENARIO_2_NAME = "sdk-smoketest-scenario-2" + + +def get_or_create_scenario( + sdk_client: RunloopSDK, + name: str, + problem_statement: str, +) -> Scenario: + """Get an existing scenario by name or create a new one.""" + # Check if scenario already exists + scenarios = sdk_client.scenario.list(name=name, limit=1) + for scenario in scenarios: + # Return the first matching scenario + return scenario + + # Create a new scenario using the SDK builder + return ( + sdk_client.scenario.builder(name) + .with_problem_statement(problem_statement) + .add_shell_command_scorer("pass-scorer", command="exit 0") + .push() + ) + + +def get_or_create_benchmark( + sdk_client: RunloopSDK, + name: str, + scenario_ids: List[str], +) -> Benchmark: + """Get an existing benchmark by name or create a new one.""" + # Check if benchmark already exists + benchmarks = sdk_client.benchmark.list(name=name, limit=1) + for benchmark in benchmarks: + # Return the first matching benchmark + return benchmark + + # Create a new benchmark + return sdk_client.benchmark.create( + name=name, + scenario_ids=scenario_ids, + description="Smoketest benchmark for SDK testing", + ) + + +@pytest.fixture(scope="module") +def smoketest_benchmark( + sdk_client: RunloopSDK, +) -> Tuple[Benchmark, List[str]]: + """Create or retrieve the smoketest benchmark and scenarios.""" + # Create or get scenarios + scenario_1 = get_or_create_scenario( + sdk_client, + SMOKETEST_SCENARIO_1_NAME, + "Smoketest scenario 1 - basic validation", + ) + scenario_2 = get_or_create_scenario( + sdk_client, + SMOKETEST_SCENARIO_2_NAME, + "Smoketest scenario 2 - basic validation", + ) + + scenario_ids = [scenario_1.id, scenario_2.id] + + # Create or get benchmark + benchmark = get_or_create_benchmark( + sdk_client, + SMOKETEST_BENCHMARK_NAME, + scenario_ids, + ) + + return benchmark, scenario_ids + + +class TestBenchmarkRun: + """Test Benchmark run operations.""" + + @pytest.mark.timeout(TWO_MINUTE_TIMEOUT) + def test_benchmark_run_lifecycle( + self, + sdk_client: RunloopSDK, + smoketest_benchmark: Tuple[Benchmark, List[str]], + ) -> None: + """Test starting and canceling a benchmark run. + + This test: + 1. Uses the smoketest benchmark fixture + 2. Starts a new benchmark run via the Benchmark class + 3. Validates the run object + 4. Cancels the run + """ + benchmark, scenario_ids = smoketest_benchmark + + # Start a run + run = benchmark.start_run(run_name="sdk-smoketest-benchmark-run") + scenario_runs: List[ScenarioRun] = [] + + try: + assert isinstance(run, BenchmarkRun) + assert run.id is not None + assert run.benchmark_id == benchmark.id + + # Get run info + info = run.get_info() + assert info.id == run.id + assert info.state == "running" + + # Start a scenario run + for scenario_id in scenario_ids: + scenario = sdk_client.scenario.from_id(scenario_id) + scenario_runs.append( + scenario.run(benchmark_run_id=run.id, run_name="sdk-smoketest-benchmark-run-scenario") + ) + + benchmark_scenario_runs = run.list_scenario_runs() + assert isinstance(benchmark_scenario_runs, list) + assert len(benchmark_scenario_runs) == len(scenario_runs) + for scenario_run in benchmark_scenario_runs: + assert isinstance(scenario_run, ScenarioRun) + assert any( + scenario_run.id == scenario_run.id and scenario_run.devbox_id == scenario_run.devbox_id + for scenario_run in scenario_runs + ) + + # Cancel the scenario runs + for scenario_run in scenario_runs: + scenario_result = scenario_run.cancel() + assert scenario_result.state in ["canceled", "completed"] + + # Cancel the benchmark run + result = run.cancel() + assert result.state in ["canceled", "completed"] + + except Exception: + # Ensure cleanup on any error + for scenario_run in scenario_runs: + scenario_run.cancel() + run.cancel() + raise + + +class TestBenchmarkListRuns: + """Test Benchmark list_runs operations.""" + + @pytest.mark.timeout(TWO_MINUTE_TIMEOUT) + def test_list_runs( + self, + smoketest_benchmark: Tuple[Benchmark, List[str]], + ) -> None: + """Test listing benchmark runs. + + This test: + 1. Uses the smoketest benchmark fixture + 2. Lists its runs + 3. Validates returned objects are BenchmarkRun instances + """ + benchmark, _ = smoketest_benchmark + + runs = benchmark.list_runs() + assert isinstance(runs, list) + if not runs: + pytest.skip("No runs available to test") + + # Verify returned items are BenchmarkRun objects + for run in runs: + assert isinstance(run, BenchmarkRun) + assert run.id is not None + assert run.benchmark_id == benchmark.id diff --git a/tests/smoketests/test_snapshots.py b/tests/smoketests/test_snapshots.py index 71b592320..0fc43ca23 100644 --- a/tests/smoketests/test_snapshots.py +++ b/tests/smoketests/test_snapshots.py @@ -31,7 +31,7 @@ def _cleanup(client: Runloop) -> Iterator[None]: # pyright: ignore[reportUnused _snapshot_id = None -@pytest.mark.timeout(30) +@pytest.mark.timeout(120) def test_snapshot_devbox(client: Runloop) -> None: global _devbox_id, _snapshot_id created = client.devboxes.create_and_await_running(