diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0ce1d6b73..5233fcbf0 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -57,7 +57,7 @@ jobs:
- name: Get GitHub OIDC Token
if: github.repository == 'stainless-sdks/runloop-node'
id: github-oidc
- uses: runloopai/github-script@main
+ uses: actions/github-script@v8
with:
script: core.setOutput('github_token', await core.getIDToken());
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 96f1cd949..9049e2fdf 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "1.3.0"
+ ".": "1.3.1"
}
diff --git a/.stats.yml b/.stats.yml
index 5eb10a624..f28b394ab 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 103
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/runloop-ai%2Frunloop-5359067a857aa94f69bae0d3311856be3e637da067fdc9dbf8bd26fe476efbd8.yml
-openapi_spec_hash: 5227ef7c306d5226c3aee8932b2e8c6a
-config_hash: cb43d4ca9e64d5a099199d6818d70539
+configured_endpoints: 106
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/runloop-ai%2Frunloop-63dab7833d6670810c4f4882df560ebbfe2de8e8e1a98d51422368607b5335ae.yml
+openapi_spec_hash: ebb5068064f7469f9239b18a51a6fe44
+config_hash: fd168de77f219e46a1427bbec2eecfb9
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c7de9a1e4..6e5916535 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,25 @@
# Changelog
+## 1.3.1 (2026-01-28)
+
+Full Changelog: [v1.3.0...v1.3.1](https://github.com/runloopai/api-client-ts/compare/v1.3.0...v1.3.1)
+
+### Features
+
+* **benchmark:** adding in progress benchmark runs for benchmark jobs ([#7183](https://github.com/runloopai/api-client-ts/issues/7183)) ([65f75d9](https://github.com/runloopai/api-client-ts/commit/65f75d9f1711422459bc952c7833ef8f8e750477))
+
+
+### Bug Fixes
+
+* smoketest workflow workflow_call event checkout on caller repo ([#687](https://github.com/runloopai/api-client-ts/issues/687)) ([77d82a9](https://github.com/runloopai/api-client-ts/commit/77d82a97fc468aafb29780d472f90c24b1a0041a))
+* updating the readme ([#692](https://github.com/runloopai/api-client-ts/issues/692)) ([80fb27a](https://github.com/runloopai/api-client-ts/commit/80fb27a9f7f4b99c3581b5121dae0c6346064f6c))
+
+
+### Chores
+
+* **ci:** upgrade `actions/github-script` ([9417f0c](https://github.com/runloopai/api-client-ts/commit/9417f0c97c6811ee7147db7942104be46e581f6f))
+* remove api docs readme reference ([#690](https://github.com/runloopai/api-client-ts/issues/690)) ([dd16be0](https://github.com/runloopai/api-client-ts/commit/dd16be0ff1797427d813a5bc98a6a8cc1583a57a))
+
## 1.3.0 (2026-01-22)
Full Changelog: [v1.2.0-beta-2...v1.3.0](https://github.com/runloopai/api-client-ts/compare/v1.2.0-beta-2...v1.3.0)
diff --git a/api.md b/api.md
index 36cb120cf..053ac5095 100644
--- a/api.md
+++ b/api.md
@@ -48,6 +48,20 @@ Methods:
- client.benchmarkRuns.complete(id) -> BenchmarkRunView
- client.benchmarkRuns.listScenarioRuns(id, { ...params }) -> ScenarioRunViewsBenchmarkRunsCursorIDPage
+# BenchmarkJobs
+
+Types:
+
+- BenchmarkJobCreateParameters
+- BenchmarkJobListView
+- BenchmarkJobView
+
+Methods:
+
+- client.benchmarkJobs.create({ ...params }) -> BenchmarkJobView
+- client.benchmarkJobs.retrieve(id) -> BenchmarkJobView
+- client.benchmarkJobs.list({ ...params }) -> BenchmarkJobListView
+
# Agents
Types:
diff --git a/package.json b/package.json
index 3dcd70fd3..657ddcb35 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "@runloop/api-client",
- "version": "1.3.0",
+ "version": "1.3.1",
"description": "The official TypeScript library for the Runloop API",
"author": "Runloop ",
"types": "dist/sdk.d.ts",
diff --git a/src/index.ts b/src/index.ts
index b73020579..5ab00b0f2 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -41,6 +41,14 @@ import {
AgentViewsAgentsCursorIDPage,
Agents,
} from './resources/agents';
+import {
+ BenchmarkJobCreateParameters,
+ BenchmarkJobCreateParams,
+ BenchmarkJobListParams,
+ BenchmarkJobListView,
+ BenchmarkJobView,
+ BenchmarkJobs,
+} from './resources/benchmark-jobs';
import {
BenchmarkRunListParams,
BenchmarkRunListScenarioRunsParams,
@@ -324,6 +332,7 @@ export class Runloop extends Core.APIClient {
benchmarks: API.Benchmarks = new API.Benchmarks(this);
benchmarkRuns: API.BenchmarkRuns = new API.BenchmarkRuns(this);
+ benchmarkJobs: API.BenchmarkJobs = new API.BenchmarkJobs(this);
agents: API.Agents = new API.Agents(this);
blueprints: API.Blueprints = new API.Blueprints(this);
devboxes: API.Devboxes = new API.Devboxes(this);
@@ -380,6 +389,7 @@ Runloop.Benchmarks = Benchmarks;
Runloop.BenchmarkViewsBenchmarksCursorIDPage = BenchmarkViewsBenchmarksCursorIDPage;
Runloop.BenchmarkRuns = BenchmarkRuns;
Runloop.BenchmarkRunViewsBenchmarkRunsCursorIDPage = BenchmarkRunViewsBenchmarkRunsCursorIDPage;
+Runloop.BenchmarkJobs = BenchmarkJobs;
Runloop.Agents = Agents;
Runloop.AgentViewsAgentsCursorIDPage = AgentViewsAgentsCursorIDPage;
Runloop.Blueprints = Blueprints;
@@ -499,6 +509,15 @@ export declare namespace Runloop {
type BenchmarkRunListScenarioRunsParams as BenchmarkRunListScenarioRunsParams,
};
+ export {
+ BenchmarkJobs as BenchmarkJobs,
+ type BenchmarkJobCreateParameters as BenchmarkJobCreateParameters,
+ type BenchmarkJobListView as BenchmarkJobListView,
+ type BenchmarkJobView as BenchmarkJobView,
+ type BenchmarkJobCreateParams as BenchmarkJobCreateParams,
+ type BenchmarkJobListParams as BenchmarkJobListParams,
+ };
+
export {
Agents as Agents,
type AgentCreateParameters as AgentCreateParameters,
diff --git a/src/resources/benchmark-jobs.ts b/src/resources/benchmark-jobs.ts
new file mode 100644
index 000000000..e58a0992b
--- /dev/null
+++ b/src/resources/benchmark-jobs.ts
@@ -0,0 +1,1017 @@
+// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import { APIResource } from '../resource';
+import { isRequestOptions } from '../core';
+import * as Core from '../core';
+
+export class BenchmarkJobs extends APIResource {
+ /**
+ * [Beta] Create a BenchmarkJob that runs a set of scenarios entirely on runloop.
+ */
+ create(body?: BenchmarkJobCreateParams, options?: Core.RequestOptions): Core.APIPromise;
+ create(options?: Core.RequestOptions): Core.APIPromise;
+ create(
+ body: BenchmarkJobCreateParams | Core.RequestOptions = {},
+ options?: Core.RequestOptions,
+ ): Core.APIPromise {
+ if (isRequestOptions(body)) {
+ return this.create({}, body);
+ }
+ return this._client.post('/v1/benchmark_jobs', { body, ...options });
+ }
+
+ /**
+ * [Beta] Get a BenchmarkJob given ID.
+ */
+ retrieve(id: string, options?: Core.RequestOptions): Core.APIPromise {
+ return this._client.get(`/v1/benchmark_jobs/${id}`, options);
+ }
+
+ /**
+ * [Beta] List all BenchmarkJobs matching filter.
+ */
+ list(query?: BenchmarkJobListParams, options?: Core.RequestOptions): Core.APIPromise;
+ list(options?: Core.RequestOptions): Core.APIPromise;
+ list(
+ query: BenchmarkJobListParams | Core.RequestOptions = {},
+ options?: Core.RequestOptions,
+ ): Core.APIPromise {
+ if (isRequestOptions(query)) {
+ return this.list({}, query);
+ }
+ return this._client.get('/v1/benchmark_jobs', { query, ...options });
+ }
+}
+
+/**
+ * BenchmarkJobCreateParameters contain the set of parameters to create a
+ * BenchmarkJob.
+ */
+export interface BenchmarkJobCreateParameters {
+ /**
+ * The name of the BenchmarkJob. If not provided, name will be generated based on
+ * target dataset.
+ */
+ name?: string | null;
+
+ /**
+ * The job specification. Exactly one spec type must be set.
+ */
+ spec?:
+ | BenchmarkJobCreateParameters.HarborJobSpec
+ | BenchmarkJobCreateParameters.BenchmarkDefinitionJobSpec
+ | BenchmarkJobCreateParameters.ScenarioDefinitionJobSpec
+ | null;
+}
+
+export namespace BenchmarkJobCreateParameters {
+ /**
+ * Harbor-based job specification with inline YAML configuration.
+ */
+ export interface HarborJobSpec {
+ /**
+ * The Harbor job configuration as inline YAML content.
+ */
+ inline_yaml: string;
+
+ type: 'harbor';
+ }
+
+ /**
+ * Specifies a benchmark definition with runtime configuration. The benchmark
+ * definition's scenarios will be executed using the provided agent and
+ * orchestrator configurations.
+ */
+ export interface BenchmarkDefinitionJobSpec {
+ /**
+ * Agent configurations to use for this run. Must specify at least one agent.
+ */
+ agent_configs: Array;
+
+ /**
+ * ID of the benchmark definition to run. The scenarios from this benchmark will be
+ * executed.
+ */
+ benchmark_id: string;
+
+ type: 'benchmark';
+
+ /**
+ * Orchestrator configuration (optional overrides). If not provided, default values
+ * will be used.
+ */
+ orchestrator_config?: BenchmarkDefinitionJobSpec.OrchestratorConfig | null;
+ }
+
+ export namespace BenchmarkDefinitionJobSpec {
+ /**
+ * Configuration for an agent in a benchmark job
+ */
+ export interface AgentConfig {
+ /**
+ * Name of the agent
+ */
+ name: string;
+
+ type: 'job_agent';
+
+ /**
+ * Environment configuration to use for this agent
+ */
+ agent_environment?: AgentConfig.AgentEnvironment | null;
+
+ /**
+ * ID of the agent to use (optional if agent exists by name)
+ */
+ agent_id?: string | null;
+
+ /**
+ * Additional kwargs for agent configuration
+ */
+ kwargs?: { [key: string]: string } | null;
+
+ /**
+ * Model name override for this agent
+ */
+ model_name?: string | null;
+
+ /**
+ * Timeout in seconds for this agent
+ */
+ timeout_seconds?: number | null;
+ }
+
+ export namespace AgentConfig {
+ /**
+ * Environment configuration to use for this agent
+ */
+ export interface AgentEnvironment {
+ /**
+ * Environment variables to set when launching the agent.
+ */
+ environment_variables?: { [key: string]: string } | null;
+
+ /**
+ * Secrets to inject as environment variables when launching the agent. Map of
+ * environment variable names to secret IDs.
+ */
+ secrets?: { [key: string]: string } | null;
+ }
+ }
+
+ /**
+ * Orchestrator configuration (optional overrides). If not provided, default values
+ * will be used.
+ */
+ export interface OrchestratorConfig {
+ /**
+ * Number of retry attempts on failure (default: 0). This is the retry policy for
+ * failed scenarios. Default is 0.
+ */
+ n_attempts?: number | null;
+
+ /**
+ * Number of concurrent trials to run (default: 1). Controls parallelism for
+ * scenario execution. Default is 1.
+ */
+ n_concurrent_trials?: number | null;
+
+ /**
+ * Suppress verbose output (default: false)
+ */
+ quiet?: boolean | null;
+
+ /**
+ * Timeout multiplier for retries (default: 1.0). Each retry will multiply the
+ * timeout by this factor.
+ */
+ timeout_multiplier?: number | null;
+ }
+ }
+
+ /**
+ * Specifies a set of scenarios with runtime configuration. The scenarios will be
+ * executed using the provided agent and orchestrator configurations.
+ */
+ export interface ScenarioDefinitionJobSpec {
+ /**
+ * Agent configurations to use for this run. Must specify at least one agent.
+ */
+ agent_configs: Array;
+
+ /**
+ * List of scenario IDs to execute
+ */
+ scenario_ids: Array;
+
+ type: 'scenarios';
+
+ /**
+ * Orchestrator configuration (optional overrides). If not provided, default values
+ * will be used.
+ */
+ orchestrator_config?: ScenarioDefinitionJobSpec.OrchestratorConfig | null;
+ }
+
+ export namespace ScenarioDefinitionJobSpec {
+ /**
+ * Configuration for an agent in a benchmark job
+ */
+ export interface AgentConfig {
+ /**
+ * Name of the agent
+ */
+ name: string;
+
+ type: 'job_agent';
+
+ /**
+ * Environment configuration to use for this agent
+ */
+ agent_environment?: AgentConfig.AgentEnvironment | null;
+
+ /**
+ * ID of the agent to use (optional if agent exists by name)
+ */
+ agent_id?: string | null;
+
+ /**
+ * Additional kwargs for agent configuration
+ */
+ kwargs?: { [key: string]: string } | null;
+
+ /**
+ * Model name override for this agent
+ */
+ model_name?: string | null;
+
+ /**
+ * Timeout in seconds for this agent
+ */
+ timeout_seconds?: number | null;
+ }
+
+ export namespace AgentConfig {
+ /**
+ * Environment configuration to use for this agent
+ */
+ export interface AgentEnvironment {
+ /**
+ * Environment variables to set when launching the agent.
+ */
+ environment_variables?: { [key: string]: string } | null;
+
+ /**
+ * Secrets to inject as environment variables when launching the agent. Map of
+ * environment variable names to secret IDs.
+ */
+ secrets?: { [key: string]: string } | null;
+ }
+ }
+
+ /**
+ * Orchestrator configuration (optional overrides). If not provided, default values
+ * will be used.
+ */
+ export interface OrchestratorConfig {
+ /**
+ * Number of retry attempts on failure (default: 0). This is the retry policy for
+ * failed scenarios. Default is 0.
+ */
+ n_attempts?: number | null;
+
+ /**
+ * Number of concurrent trials to run (default: 1). Controls parallelism for
+ * scenario execution. Default is 1.
+ */
+ n_concurrent_trials?: number | null;
+
+ /**
+ * Suppress verbose output (default: false)
+ */
+ quiet?: boolean | null;
+
+ /**
+ * Timeout multiplier for retries (default: 1.0). Each retry will multiply the
+ * timeout by this factor.
+ */
+ timeout_multiplier?: number | null;
+ }
+ }
+}
+
+export interface BenchmarkJobListView {
+ has_more: boolean;
+
+ /**
+ * List of BenchmarkJobs matching filter.
+ */
+ jobs: Array;
+
+ remaining_count: number;
+
+ total_count: number;
+}
+
+/**
+ * A BenchmarkJobView represents a benchmark job that runs a set of scenarios
+ * entirely on runloop.
+ */
+export interface BenchmarkJobView {
+ /**
+ * The ID of the BenchmarkJob.
+ */
+ id: string;
+
+ /**
+ * Timestamp when job was created (Unix milliseconds).
+ */
+ create_time_ms: number;
+
+ /**
+ * The unique name of the BenchmarkJob.
+ */
+ name: string;
+
+ /**
+ * The current state of the benchmark job.
+ */
+ state: 'initializing' | 'queued' | 'running' | 'completed' | 'failed' | 'cancelled' | 'timeout';
+
+ /**
+ * Detailed outcome data for each benchmark run created by this job. Includes
+ * per-agent results and scenario-level details.
+ */
+ benchmark_outcomes?: Array | null;
+
+ /**
+ * Failure reason if job failed.
+ */
+ failure_reason?: string | null;
+
+ /**
+ * Benchmark runs currently in progress for this job. Shows runs that have not yet
+ * completed.
+ */
+ in_progress_runs?: Array | null;
+
+ /**
+ * The source configuration that was used to create this job. Either Harbor YAML or
+ * benchmark definition reference.
+ */
+ job_source?:
+ | BenchmarkJobView.HarborJobSource
+ | BenchmarkJobView.BenchmarkDefJobSource
+ | BenchmarkJobView.ScenariosJobSource
+ | null;
+
+ /**
+ * The resolved job specification. Contains scenarios, agents, and orchestrator
+ * config.
+ */
+ job_spec?: BenchmarkJobView.JobSpec | null;
+}
+
+export namespace BenchmarkJobView {
+ /**
+ * Outcome data for a single benchmark run within a benchmark job, representing
+ * results for one agent configuration.
+ */
+ export interface BenchmarkOutcome {
+ /**
+ * The name of the agent configuration used.
+ */
+ agent_name: string;
+
+ /**
+ * The ID of the benchmark run.
+ */
+ benchmark_run_id: string;
+
+ /**
+ * Number of scenarios that completed successfully.
+ */
+ n_completed: number;
+
+ /**
+ * Number of scenarios that failed.
+ */
+ n_failed: number;
+
+ /**
+ * Number of scenarios that timed out.
+ */
+ n_timeout: number;
+
+ /**
+ * Detailed outcomes for each scenario in this benchmark run.
+ */
+ scenario_outcomes: Array;
+
+ /**
+ * Average score across all completed scenarios (0.0 to 1.0).
+ */
+ average_score?: number | null;
+
+ /**
+ * Total duration of the benchmark run in milliseconds.
+ */
+ duration_ms?: number | null;
+
+ /**
+ * The model name used by the agent.
+ */
+ model_name?: string | null;
+ }
+
+ export namespace BenchmarkOutcome {
+ /**
+ * Outcome data for a single scenario execution, including its final state and
+ * scoring results.
+ */
+ export interface ScenarioOutcome {
+ /**
+ * The ID of the scenario definition that was executed.
+ */
+ scenario_definition_id: string;
+
+ /**
+ * The name of the scenario.
+ */
+ scenario_name: string;
+
+ /**
+ * The ID of the scenario run.
+ */
+ scenario_run_id: string;
+
+ /**
+ * The final state of the scenario execution.
+ */
+ state: 'COMPLETED' | 'FAILED' | 'TIMEOUT' | 'CANCELED';
+
+ /**
+ * Duration of the scenario execution in milliseconds.
+ */
+ duration_ms?: number | null;
+
+ /**
+ * Failure information if the scenario failed or timed out. Contains exception type
+ * and message.
+ */
+ failure_reason?: ScenarioOutcome.FailureReason | null;
+
+ /**
+ * The score achieved for this scenario (0.0 to 1.0). Only present if state is
+ * COMPLETED.
+ */
+ score?: number | null;
+ }
+
+ export namespace ScenarioOutcome {
+ /**
+ * Failure information if the scenario failed or timed out. Contains exception type
+ * and message.
+ */
+ export interface FailureReason {
+ /**
+ * The exception message providing context
+ */
+ exception_message: string;
+
+ /**
+ * The exception class name (e.g., 'TimeoutException', 'AgentTimeoutError')
+ */
+ exception_type: string;
+ }
+ }
+ }
+
+ /**
+ * A lightweight view of a benchmark run currently in progress, showing basic
+ * execution details without full outcome data.
+ */
+ export interface InProgressRun {
+ /**
+ * The ID of the benchmark run.
+ */
+ benchmark_run_id: string;
+
+ /**
+ * Start time (Unix milliseconds).
+ */
+ start_time_ms: number;
+
+ /**
+ * The current state of the run.
+ */
+ state: 'running' | 'canceled' | 'completed';
+
+ /**
+ * Agent configuration used for this run. Specifies whether the run was driven by
+ * an external API agent or a job-defined agent.
+ */
+ agent_config?: InProgressRun.ExternalAPIAgentConfig | InProgressRun.JobAgentConfig | null;
+
+ /**
+ * Duration so far in milliseconds.
+ */
+ duration_ms?: number | null;
+ }
+
+ export namespace InProgressRun {
+ /**
+ * Configuration for externally-driven benchmark runs via API
+ */
+ export interface ExternalAPIAgentConfig {
+ type: 'external_api';
+
+ /**
+ * Placeholder for future external agent metadata
+ */
+ info?: string | null;
+ }
+
+ /**
+ * Configuration for an agent in a benchmark job
+ */
+ export interface JobAgentConfig {
+ /**
+ * Name of the agent
+ */
+ name: string;
+
+ type: 'job_agent';
+
+ /**
+ * Environment configuration to use for this agent
+ */
+ agent_environment?: JobAgentConfig.AgentEnvironment | null;
+
+ /**
+ * ID of the agent to use (optional if agent exists by name)
+ */
+ agent_id?: string | null;
+
+ /**
+ * Additional kwargs for agent configuration
+ */
+ kwargs?: { [key: string]: string } | null;
+
+ /**
+ * Model name override for this agent
+ */
+ model_name?: string | null;
+
+ /**
+ * Timeout in seconds for this agent
+ */
+ timeout_seconds?: number | null;
+ }
+
+ export namespace JobAgentConfig {
+ /**
+ * Environment configuration to use for this agent
+ */
+ export interface AgentEnvironment {
+ /**
+ * Environment variables to set when launching the agent.
+ */
+ environment_variables?: { [key: string]: string } | null;
+
+ /**
+ * Secrets to inject as environment variables when launching the agent. Map of
+ * environment variable names to secret IDs.
+ */
+ secrets?: { [key: string]: string } | null;
+ }
+ }
+ }
+
+ /**
+ * Harbor job source with inline YAML configuration
+ */
+ export interface HarborJobSource {
+ /**
+ * The Harbor job configuration as inline YAML content
+ */
+ inline_yaml: string;
+
+ type: 'harbor';
+ }
+
+ /**
+ * Benchmark definition job source
+ */
+ export interface BenchmarkDefJobSource {
+ /**
+ * The ID of the benchmark definition
+ */
+ benchmark_id: string;
+
+ type: 'benchmark';
+
+ /**
+ * Optional user-provided name for the benchmark definition
+ */
+ benchmark_name?: string | null;
+ }
+
+ /**
+ * Scenarios job source with a list of scenario definition IDs
+ */
+ export interface ScenariosJobSource {
+ /**
+ * List of scenario definition IDs to execute
+ */
+ scenario_ids: Array;
+
+ type: 'scenarios';
+ }
+
+ /**
+ * The resolved job specification. Contains scenarios, agents, and orchestrator
+ * config.
+ */
+ export interface JobSpec {
+ /**
+ * Agent configurations for this job
+ */
+ agent_configs: Array;
+
+ /**
+ * List of scenario IDs to execute
+ */
+ scenario_ids: Array;
+
+ /**
+ * Orchestrator configuration
+ */
+ orchestrator_config?: JobSpec.OrchestratorConfig | null;
+ }
+
+ export namespace JobSpec {
+ /**
+ * Configuration for an agent in a benchmark job
+ */
+ export interface AgentConfig {
+ /**
+ * Name of the agent
+ */
+ name: string;
+
+ type: 'job_agent';
+
+ /**
+ * Environment configuration to use for this agent
+ */
+ agent_environment?: AgentConfig.AgentEnvironment | null;
+
+ /**
+ * ID of the agent to use (optional if agent exists by name)
+ */
+ agent_id?: string | null;
+
+ /**
+ * Additional kwargs for agent configuration
+ */
+ kwargs?: { [key: string]: string } | null;
+
+ /**
+ * Model name override for this agent
+ */
+ model_name?: string | null;
+
+ /**
+ * Timeout in seconds for this agent
+ */
+ timeout_seconds?: number | null;
+ }
+
+ export namespace AgentConfig {
+ /**
+ * Environment configuration to use for this agent
+ */
+ export interface AgentEnvironment {
+ /**
+ * Environment variables to set when launching the agent.
+ */
+ environment_variables?: { [key: string]: string } | null;
+
+ /**
+ * Secrets to inject as environment variables when launching the agent. Map of
+ * environment variable names to secret IDs.
+ */
+ secrets?: { [key: string]: string } | null;
+ }
+ }
+
+ /**
+ * Orchestrator configuration
+ */
+ export interface OrchestratorConfig {
+ /**
+ * Number of retry attempts on failure (default: 0). This is the retry policy for
+ * failed scenarios. Default is 0.
+ */
+ n_attempts?: number | null;
+
+ /**
+ * Number of concurrent trials to run (default: 1). Controls parallelism for
+ * scenario execution. Default is 1.
+ */
+ n_concurrent_trials?: number | null;
+
+ /**
+ * Suppress verbose output (default: false)
+ */
+ quiet?: boolean | null;
+
+ /**
+ * Timeout multiplier for retries (default: 1.0). Each retry will multiply the
+ * timeout by this factor.
+ */
+ timeout_multiplier?: number | null;
+ }
+ }
+}
+
+export interface BenchmarkJobCreateParams {
+ /**
+ * The name of the BenchmarkJob. If not provided, name will be generated based on
+ * target dataset.
+ */
+ name?: string | null;
+
+ /**
+ * The job specification. Exactly one spec type must be set.
+ */
+ spec?:
+ | BenchmarkJobCreateParams.HarborJobSpec
+ | BenchmarkJobCreateParams.BenchmarkDefinitionJobSpec
+ | BenchmarkJobCreateParams.ScenarioDefinitionJobSpec
+ | null;
+}
+
+export namespace BenchmarkJobCreateParams {
+ /**
+ * Harbor-based job specification with inline YAML configuration.
+ */
+ export interface HarborJobSpec {
+ /**
+ * The Harbor job configuration as inline YAML content.
+ */
+ inline_yaml: string;
+
+ type: 'harbor';
+ }
+
+ /**
+ * Specifies a benchmark definition with runtime configuration. The benchmark
+ * definition's scenarios will be executed using the provided agent and
+ * orchestrator configurations.
+ */
+ export interface BenchmarkDefinitionJobSpec {
+ /**
+ * Agent configurations to use for this run. Must specify at least one agent.
+ */
+ agent_configs: Array;
+
+ /**
+ * ID of the benchmark definition to run. The scenarios from this benchmark will be
+ * executed.
+ */
+ benchmark_id: string;
+
+ type: 'benchmark';
+
+ /**
+ * Orchestrator configuration (optional overrides). If not provided, default values
+ * will be used.
+ */
+ orchestrator_config?: BenchmarkDefinitionJobSpec.OrchestratorConfig | null;
+ }
+
+ export namespace BenchmarkDefinitionJobSpec {
+ /**
+ * Configuration for an agent in a benchmark job
+ */
+ export interface AgentConfig {
+ /**
+ * Name of the agent
+ */
+ name: string;
+
+ type: 'job_agent';
+
+ /**
+ * Environment configuration to use for this agent
+ */
+ agent_environment?: AgentConfig.AgentEnvironment | null;
+
+ /**
+ * ID of the agent to use (optional if agent exists by name)
+ */
+ agent_id?: string | null;
+
+ /**
+ * Additional kwargs for agent configuration
+ */
+ kwargs?: { [key: string]: string } | null;
+
+ /**
+ * Model name override for this agent
+ */
+ model_name?: string | null;
+
+ /**
+ * Timeout in seconds for this agent
+ */
+ timeout_seconds?: number | null;
+ }
+
+ export namespace AgentConfig {
+ /**
+ * Environment configuration to use for this agent
+ */
+ export interface AgentEnvironment {
+ /**
+ * Environment variables to set when launching the agent.
+ */
+ environment_variables?: { [key: string]: string } | null;
+
+ /**
+ * Secrets to inject as environment variables when launching the agent. Map of
+ * environment variable names to secret IDs.
+ */
+ secrets?: { [key: string]: string } | null;
+ }
+ }
+
+ /**
+ * Orchestrator configuration (optional overrides). If not provided, default values
+ * will be used.
+ */
+ export interface OrchestratorConfig {
+ /**
+ * Number of retry attempts on failure (default: 0). This is the retry policy for
+ * failed scenarios. Default is 0.
+ */
+ n_attempts?: number | null;
+
+ /**
+ * Number of concurrent trials to run (default: 1). Controls parallelism for
+ * scenario execution. Default is 1.
+ */
+ n_concurrent_trials?: number | null;
+
+ /**
+ * Suppress verbose output (default: false)
+ */
+ quiet?: boolean | null;
+
+ /**
+ * Timeout multiplier for retries (default: 1.0). Each retry will multiply the
+ * timeout by this factor.
+ */
+ timeout_multiplier?: number | null;
+ }
+ }
+
+ /**
+ * Specifies a set of scenarios with runtime configuration. The scenarios will be
+ * executed using the provided agent and orchestrator configurations.
+ */
+ export interface ScenarioDefinitionJobSpec {
+ /**
+ * Agent configurations to use for this run. Must specify at least one agent.
+ */
+ agent_configs: Array;
+
+ /**
+ * List of scenario IDs to execute
+ */
+ scenario_ids: Array;
+
+ type: 'scenarios';
+
+ /**
+ * Orchestrator configuration (optional overrides). If not provided, default values
+ * will be used.
+ */
+ orchestrator_config?: ScenarioDefinitionJobSpec.OrchestratorConfig | null;
+ }
+
+ export namespace ScenarioDefinitionJobSpec {
+ /**
+ * Configuration for an agent in a benchmark job
+ */
+ export interface AgentConfig {
+ /**
+ * Name of the agent
+ */
+ name: string;
+
+ type: 'job_agent';
+
+ /**
+ * Environment configuration to use for this agent
+ */
+ agent_environment?: AgentConfig.AgentEnvironment | null;
+
+ /**
+ * ID of the agent to use (optional if agent exists by name)
+ */
+ agent_id?: string | null;
+
+ /**
+ * Additional kwargs for agent configuration
+ */
+ kwargs?: { [key: string]: string } | null;
+
+ /**
+ * Model name override for this agent
+ */
+ model_name?: string | null;
+
+ /**
+ * Timeout in seconds for this agent
+ */
+ timeout_seconds?: number | null;
+ }
+
+ export namespace AgentConfig {
+ /**
+ * Environment configuration to use for this agent
+ */
+ export interface AgentEnvironment {
+ /**
+ * Environment variables to set when launching the agent.
+ */
+ environment_variables?: { [key: string]: string } | null;
+
+ /**
+ * Secrets to inject as environment variables when launching the agent. Map of
+ * environment variable names to secret IDs.
+ */
+ secrets?: { [key: string]: string } | null;
+ }
+ }
+
+ /**
+ * Orchestrator configuration (optional overrides). If not provided, default values
+ * will be used.
+ */
+ export interface OrchestratorConfig {
+ /**
+ * Number of retry attempts on failure (default: 0). This is the retry policy for
+ * failed scenarios. Default is 0.
+ */
+ n_attempts?: number | null;
+
+ /**
+ * Number of concurrent trials to run (default: 1). Controls parallelism for
+ * scenario execution. Default is 1.
+ */
+ n_concurrent_trials?: number | null;
+
+ /**
+ * Suppress verbose output (default: false)
+ */
+ quiet?: boolean | null;
+
+ /**
+ * Timeout multiplier for retries (default: 1.0). Each retry will multiply the
+ * timeout by this factor.
+ */
+ timeout_multiplier?: number | null;
+ }
+ }
+}
+
+export interface BenchmarkJobListParams {
+ /**
+ * The limit of items to return. Default is 20. Max is 5000.
+ */
+ limit?: number;
+
+ /**
+ * Filter by name
+ */
+ name?: string;
+
+ /**
+ * Load the next page of data starting after the item with the given ID.
+ */
+ starting_after?: string;
+}
+
+export declare namespace BenchmarkJobs {
+ export {
+ type BenchmarkJobCreateParameters as BenchmarkJobCreateParameters,
+ type BenchmarkJobListView as BenchmarkJobListView,
+ type BenchmarkJobView as BenchmarkJobView,
+ type BenchmarkJobCreateParams as BenchmarkJobCreateParams,
+ type BenchmarkJobListParams as BenchmarkJobListParams,
+ };
+}
diff --git a/src/resources/index.ts b/src/resources/index.ts
index 1afde0ca7..cca5276d9 100644
--- a/src/resources/index.ts
+++ b/src/resources/index.ts
@@ -10,6 +10,14 @@ export {
type AgentCreateParams,
type AgentListParams,
} from './agents';
+export {
+ BenchmarkJobs,
+ type BenchmarkJobCreateParameters,
+ type BenchmarkJobListView,
+ type BenchmarkJobView,
+ type BenchmarkJobCreateParams,
+ type BenchmarkJobListParams,
+} from './benchmark-jobs';
export {
BenchmarkRunViewsBenchmarkRunsCursorIDPage,
BenchmarkRuns,
diff --git a/src/version.ts b/src/version.ts
index b4178bdcc..8d95974a6 100644
--- a/src/version.ts
+++ b/src/version.ts
@@ -1 +1 @@
-export const VERSION = '1.3.0-2'; // x-release-please-version
+export const VERSION = '1.3.1'; // x-release-please-version
diff --git a/tests/api-resources/benchmark-jobs.test.ts b/tests/api-resources/benchmark-jobs.test.ts
new file mode 100644
index 000000000..8703928ab
--- /dev/null
+++ b/tests/api-resources/benchmark-jobs.test.ts
@@ -0,0 +1,92 @@
+// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import Runloop from '@runloop/api-client';
+import { Response } from 'node-fetch';
+
+const client = new Runloop({
+ bearerToken: 'My Bearer Token',
+ baseURL: process.env['TEST_API_BASE_URL'] ?? 'http://127.0.0.1:4010',
+});
+
+describe('resource benchmarkJobs', () => {
+ test('create', async () => {
+ const responsePromise = client.benchmarkJobs.create();
+ const rawResponse = await responsePromise.asResponse();
+ expect(rawResponse).toBeInstanceOf(Response);
+ const response = await responsePromise;
+ expect(response).not.toBeInstanceOf(Response);
+ const dataAndResponse = await responsePromise.withResponse();
+ expect(dataAndResponse.data).toBe(response);
+ expect(dataAndResponse.response).toBe(rawResponse);
+ });
+
+ test('create: request options instead of params are passed correctly', async () => {
+ // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error
+ await expect(client.benchmarkJobs.create({ path: '/_stainless_unknown_path' })).rejects.toThrow(
+ Runloop.NotFoundError,
+ );
+ });
+
+ test('create: request options and params are passed correctly', async () => {
+ // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error
+ await expect(
+ client.benchmarkJobs.create(
+ {
+ name: 'name',
+ spec: { inline_yaml: 'inline_yaml', type: 'harbor' },
+ },
+ { path: '/_stainless_unknown_path' },
+ ),
+ ).rejects.toThrow(Runloop.NotFoundError);
+ });
+
+ test('retrieve', async () => {
+ const responsePromise = client.benchmarkJobs.retrieve('id');
+ const rawResponse = await responsePromise.asResponse();
+ expect(rawResponse).toBeInstanceOf(Response);
+ const response = await responsePromise;
+ expect(response).not.toBeInstanceOf(Response);
+ const dataAndResponse = await responsePromise.withResponse();
+ expect(dataAndResponse.data).toBe(response);
+ expect(dataAndResponse.response).toBe(rawResponse);
+ });
+
+ test('retrieve: request options instead of params are passed correctly', async () => {
+ // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error
+ await expect(client.benchmarkJobs.retrieve('id', { path: '/_stainless_unknown_path' })).rejects.toThrow(
+ Runloop.NotFoundError,
+ );
+ });
+
+ test('list', async () => {
+ const responsePromise = client.benchmarkJobs.list();
+ const rawResponse = await responsePromise.asResponse();
+ expect(rawResponse).toBeInstanceOf(Response);
+ const response = await responsePromise;
+ expect(response).not.toBeInstanceOf(Response);
+ const dataAndResponse = await responsePromise.withResponse();
+ expect(dataAndResponse.data).toBe(response);
+ expect(dataAndResponse.response).toBe(rawResponse);
+ });
+
+ test('list: request options instead of params are passed correctly', async () => {
+ // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error
+ await expect(client.benchmarkJobs.list({ path: '/_stainless_unknown_path' })).rejects.toThrow(
+ Runloop.NotFoundError,
+ );
+ });
+
+ test('list: request options and params are passed correctly', async () => {
+ // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error
+ await expect(
+ client.benchmarkJobs.list(
+ {
+ limit: 0,
+ name: 'name',
+ starting_after: 'starting_after',
+ },
+ { path: '/_stainless_unknown_path' },
+ ),
+ ).rejects.toThrow(Runloop.NotFoundError);
+ });
+});