runloopai · stainless-app · Feb 19, 2025 · Feb 19, 2025
diff --git a/.stats.yml b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 77
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/runloop-ai%2Frunloop-a3d91c690527ff6a9040ade46943ba56916987f1f7d1fb45a9974546770ffe97.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/runloop-ai%2Frunloop-cb8add05a7b418d6f8a5624be8477564853da49e8bf9671ae89b8ce49a04b6cd.yml
diff --git a/src/runloop_api_client/resources/benchmarks/benchmarks.py b/src/runloop_api_client/resources/benchmarks/benchmarks.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import List, Optional
+from typing import Dict, List, Optional
 
 import httpx
 
@@ -69,6 +69,7 @@ def create(
         self,
         *,
         name: str,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
         scenario_ids: Optional[List[str]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -84,6 +85,8 @@ def create(
         Args:
           name: The name of the Benchmark.
 
+          metadata: User defined metadata to attach to the benchmark for organization.
+
           scenario_ids: The Scenario IDs that make up the Benchmark.
 
           extra_headers: Send extra headers
@@ -101,6 +104,7 @@ def create(
             body=maybe_transform(
                 {
                     "name": name,
+                    "metadata": metadata,
                     "scenario_ids": scenario_ids,
                 },
                 benchmark_create_params.BenchmarkCreateParams,
@@ -246,6 +250,7 @@ def start_run(
         self,
         *,
         benchmark_id: str,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
         run_name: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -261,6 +266,8 @@ def start_run(
         Args:
           benchmark_id: ID of the Benchmark to run.
 
+          metadata: User defined metadata to attach to the benchmark run for organization.
+
           run_name: Display name of the run.
 
           extra_headers: Send extra headers
@@ -278,6 +285,7 @@ def start_run(
             body=maybe_transform(
                 {
                     "benchmark_id": benchmark_id,
+                    "metadata": metadata,
                     "run_name": run_name,
                 },
                 benchmark_start_run_params.BenchmarkStartRunParams,
@@ -321,6 +329,7 @@ async def create(
         self,
         *,
         name: str,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
         scenario_ids: Optional[List[str]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -336,6 +345,8 @@ async def create(
         Args:
           name: The name of the Benchmark.
 
+          metadata: User defined metadata to attach to the benchmark for organization.
+
           scenario_ids: The Scenario IDs that make up the Benchmark.
 
           extra_headers: Send extra headers
@@ -353,6 +364,7 @@ async def create(
             body=await async_maybe_transform(
                 {
                     "name": name,
+                    "metadata": metadata,
                     "scenario_ids": scenario_ids,
                 },
                 benchmark_create_params.BenchmarkCreateParams,
@@ -498,6 +510,7 @@ async def start_run(
         self,
         *,
         benchmark_id: str,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
         run_name: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -513,6 +526,8 @@ async def start_run(
         Args:
           benchmark_id: ID of the Benchmark to run.
 
+          metadata: User defined metadata to attach to the benchmark run for organization.
+
           run_name: Display name of the run.
 
           extra_headers: Send extra headers
@@ -530,6 +545,7 @@ async def start_run(
             body=await async_maybe_transform(
                 {
                     "benchmark_id": benchmark_id,
+                    "metadata": metadata,
                     "run_name": run_name,
                 },
                 benchmark_start_run_params.BenchmarkStartRunParams,

diff --git a/src/runloop_api_client/resources/scenarios/scenarios.py b/src/runloop_api_client/resources/scenarios/scenarios.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Optional
+from typing import Dict, Optional
 
 import httpx
 
@@ -87,6 +87,8 @@ def create(
         name: str,
         scoring_contract: ScoringContractParam,
         environment_parameters: Optional[ScenarioEnvironmentParam] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        reference_output: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -108,6 +110,12 @@ def create(
 
           environment_parameters: The Environment in which the Scenario will run.
 
+          metadata: User defined metadata to attach to the scenario for organization.
+
+          reference_output: A string representation of the reference output to solve the scenario. Commonly
+              can be the result of a git diff or a sequence of command actions to apply to the
+              environment.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -126,6 +134,8 @@ def create(
                     "name": name,
                     "scoring_contract": scoring_contract,
                     "environment_parameters": environment_parameters,
+                    "metadata": metadata,
+                    "reference_output": reference_output,
                 },
                 scenario_create_params.ScenarioCreateParams,
             ),
@@ -280,6 +290,7 @@ def start_run(
         *,
         scenario_id: str,
         benchmark_run_id: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
         run_name: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -297,6 +308,8 @@ def start_run(
 
           benchmark_run_id: Benchmark to associate the run.
 
+          metadata: User defined metadata to attach to the run for organization.
+
           run_name: Display name of the run.
 
           extra_headers: Send extra headers
@@ -315,6 +328,7 @@ def start_run(
                 {
                     "scenario_id": scenario_id,
                     "benchmark_run_id": benchmark_run_id,
+                    "metadata": metadata,
                     "run_name": run_name,
                 },
                 scenario_start_run_params.ScenarioStartRunParams,
@@ -365,6 +379,8 @@ async def create(
         name: str,
         scoring_contract: ScoringContractParam,
         environment_parameters: Optional[ScenarioEnvironmentParam] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        reference_output: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -386,6 +402,12 @@ async def create(
 
           environment_parameters: The Environment in which the Scenario will run.
 
+          metadata: User defined metadata to attach to the scenario for organization.
+
+          reference_output: A string representation of the reference output to solve the scenario. Commonly
+              can be the result of a git diff or a sequence of command actions to apply to the
+              environment.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -404,6 +426,8 @@ async def create(
                     "name": name,
                     "scoring_contract": scoring_contract,
                     "environment_parameters": environment_parameters,
+                    "metadata": metadata,
+                    "reference_output": reference_output,
                 },
                 scenario_create_params.ScenarioCreateParams,
             ),
@@ -558,6 +582,7 @@ async def start_run(
         *,
         scenario_id: str,
         benchmark_run_id: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
         run_name: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -575,6 +600,8 @@ async def start_run(
 
           benchmark_run_id: Benchmark to associate the run.
 
+          metadata: User defined metadata to attach to the run for organization.
+
           run_name: Display name of the run.
 
           extra_headers: Send extra headers
@@ -593,6 +620,7 @@ async def start_run(
                 {
                     "scenario_id": scenario_id,
                     "benchmark_run_id": benchmark_run_id,
+                    "metadata": metadata,
                     "run_name": run_name,
                 },
                 scenario_start_run_params.ScenarioStartRunParams,

diff --git a/src/runloop_api_client/types/benchmark_create_params.py b/src/runloop_api_client/types/benchmark_create_params.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import List, Optional
+from typing import Dict, List, Optional
 from typing_extensions import Required, TypedDict
 
 __all__ = ["BenchmarkCreateParams"]
@@ -12,5 +12,8 @@ class BenchmarkCreateParams(TypedDict, total=False):
     name: Required[str]
     """The name of the Benchmark."""
 
+    metadata: Optional[Dict[str, str]]
+    """User defined metadata to attach to the benchmark for organization."""
+
     scenario_ids: Optional[List[str]]
     """The Scenario IDs that make up the Benchmark."""
diff --git a/src/runloop_api_client/types/benchmark_run_view.py b/src/runloop_api_client/types/benchmark_run_view.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List, Optional
+from typing import Dict, List, Optional
 from typing_extensions import Literal
 
 from .._models import BaseModel
@@ -15,6 +15,12 @@ class BenchmarkRunView(BaseModel):
     benchmark_id: str
     """The ID of the Benchmark."""
 
+    metadata: Dict[str, str]
+    """User defined metadata to attach to the benchmark run for organization."""
+
+    pending_scenarios: List[str]
+    """List of Scenarios that need to be completed before benchmark can be completed."""
+
     start_time_ms: int
     """The time the benchmark run execution started (Unix timestamp milliseconds)."""
 
@@ -27,9 +33,6 @@ class BenchmarkRunView(BaseModel):
     name: Optional[str] = None
     """The name of the BenchmarkRun."""
 
-    pending_scenarios: Optional[List[str]] = None
-    """List of Scenarios that need to be completed before benchmark can be completed."""
-
     score: Optional[float] = None
     """The final score across the BenchmarkRun, present once completed.
 

diff --git a/src/runloop_api_client/types/benchmark_start_run_params.py b/src/runloop_api_client/types/benchmark_start_run_params.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Optional
+from typing import Dict, Optional
 from typing_extensions import Required, TypedDict
 
 __all__ = ["BenchmarkStartRunParams"]
@@ -12,5 +12,8 @@ class BenchmarkStartRunParams(TypedDict, total=False):
     benchmark_id: Required[str]
     """ID of the Benchmark to run."""
 
+    metadata: Optional[Dict[str, str]]
+    """User defined metadata to attach to the benchmark run for organization."""
+
     run_name: Optional[str]
     """Display name of the run."""
diff --git a/src/runloop_api_client/types/benchmark_view.py b/src/runloop_api_client/types/benchmark_view.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List
+from typing import Dict, List
 
 from pydantic import Field as FieldInfo
 
@@ -13,6 +13,9 @@ class BenchmarkView(BaseModel):
     id: str
     """The ID of the Benchmark."""
 
+    metadata: Dict[str, str]
+    """User defined metadata to attach to the benchmark for organization."""
+
     name: str
     """The name of the Benchmark."""
 

diff --git a/src/runloop_api_client/types/scenario_create_params.py b/src/runloop_api_client/types/scenario_create_params.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Optional
+from typing import Dict, Optional
 from typing_extensions import Required, TypedDict
 
 from .input_context_param import InputContextParam
@@ -24,3 +24,13 @@ class ScenarioCreateParams(TypedDict, total=False):
 
     environment_parameters: Optional[ScenarioEnvironmentParam]
     """The Environment in which the Scenario will run."""
+
+    metadata: Optional[Dict[str, str]]
+    """User defined metadata to attach to the scenario for organization."""
+
+    reference_output: Optional[str]
+    """A string representation of the reference output to solve the scenario.
+
+    Commonly can be the result of a git diff or a sequence of command actions to
+    apply to the environment.
+    """
diff --git a/src/runloop_api_client/types/scenario_environment.py b/src/runloop_api_client/types/scenario_environment.py
@@ -3,6 +3,7 @@
 from typing import Optional
 
 from .._models import BaseModel
+from .shared.launch_parameters import LaunchParameters
 
 __all__ = ["ScenarioEnvironment"]
 
@@ -11,8 +12,17 @@ class ScenarioEnvironment(BaseModel):
     blueprint_id: Optional[str] = None
     """Use the blueprint with matching ID."""
 
+    launch_parameters: Optional[LaunchParameters] = None
+    """Optional launch parameters to apply to the devbox environment at launch."""
+
     prebuilt_id: Optional[str] = None
     """Use the prebuilt with matching ID."""
 
     snapshot_id: Optional[str] = None
     """Use the snapshot with matching ID."""
+
+    working_directory: Optional[str] = None
+    """The working directory where the agent is expected to fulfill the scenario.
+
+    Scoring functions also run from the working directory.
+    """
diff --git a/src/runloop_api_client/types/scenario_environment_param.py b/src/runloop_api_client/types/scenario_environment_param.py
@@ -5,15 +5,26 @@
 from typing import Optional
 from typing_extensions import TypedDict
 
+from .shared_params.launch_parameters import LaunchParameters
+
 __all__ = ["ScenarioEnvironmentParam"]
 
 
 class ScenarioEnvironmentParam(TypedDict, total=False):
     blueprint_id: Optional[str]
     """Use the blueprint with matching ID."""
 
+    launch_parameters: Optional[LaunchParameters]
+    """Optional launch parameters to apply to the devbox environment at launch."""
+
     prebuilt_id: Optional[str]
     """Use the prebuilt with matching ID."""
 
     snapshot_id: Optional[str]
     """Use the snapshot with matching ID."""
+
+    working_directory: Optional[str]
+    """The working directory where the agent is expected to fulfill the scenario.
+
+    Scoring functions also run from the working directory.
+    """