diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0ce1d6b73..5233fcbf0 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -57,7 +57,7 @@ jobs:
       - name: Get GitHub OIDC Token
         if: github.repository == 'stainless-sdks/runloop-node'
         id: github-oidc
-        uses: runloopai/github-script@main
+        uses: actions/github-script@v8
         with:
           script: core.setOutput('github_token', await core.getIDToken());
 
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 96f1cd949..9049e2fdf 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "1.3.0"
+  ".": "1.3.1"
 }
diff --git a/.stats.yml b/.stats.yml
index 5eb10a624..f28b394ab 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 103
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/runloop-ai%2Frunloop-5359067a857aa94f69bae0d3311856be3e637da067fdc9dbf8bd26fe476efbd8.yml
-openapi_spec_hash: 5227ef7c306d5226c3aee8932b2e8c6a
-config_hash: cb43d4ca9e64d5a099199d6818d70539
+configured_endpoints: 106
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/runloop-ai%2Frunloop-63dab7833d6670810c4f4882df560ebbfe2de8e8e1a98d51422368607b5335ae.yml
+openapi_spec_hash: ebb5068064f7469f9239b18a51a6fe44
+config_hash: fd168de77f219e46a1427bbec2eecfb9
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c7de9a1e4..6e5916535 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,25 @@
 # Changelog
 
+## 1.3.1 (2026-01-28)
+
+Full Changelog: [v1.3.0...v1.3.1](https://github.com/runloopai/api-client-ts/compare/v1.3.0...v1.3.1)
+
+### Features
+
+* **benchmark:** adding in progress benchmark runs for benchmark jobs ([#7183](https://github.com/runloopai/api-client-ts/issues/7183)) ([65f75d9](https://github.com/runloopai/api-client-ts/commit/65f75d9f1711422459bc952c7833ef8f8e750477))
+
+
+### Bug Fixes
+
+* smoketest workflow workflow_call event checkout on caller repo ([#687](https://github.com/runloopai/api-client-ts/issues/687)) ([77d82a9](https://github.com/runloopai/api-client-ts/commit/77d82a97fc468aafb29780d472f90c24b1a0041a))
+* updating the readme ([#692](https://github.com/runloopai/api-client-ts/issues/692)) ([80fb27a](https://github.com/runloopai/api-client-ts/commit/80fb27a9f7f4b99c3581b5121dae0c6346064f6c))
+
+
+### Chores
+
+* **ci:** upgrade `actions/github-script` ([9417f0c](https://github.com/runloopai/api-client-ts/commit/9417f0c97c6811ee7147db7942104be46e581f6f))
+* remove api docs readme reference ([#690](https://github.com/runloopai/api-client-ts/issues/690)) ([dd16be0](https://github.com/runloopai/api-client-ts/commit/dd16be0ff1797427d813a5bc98a6a8cc1583a57a))
+
 ## 1.3.0 (2026-01-22)
 
 Full Changelog: [v1.2.0-beta-2...v1.3.0](https://github.com/runloopai/api-client-ts/compare/v1.2.0-beta-2...v1.3.0)
diff --git a/api.md b/api.md
index 36cb120cf..053ac5095 100644
--- a/api.md
+++ b/api.md
@@ -48,6 +48,20 @@ Methods:
 - <code title="post /v1/benchmark_runs/{id}/complete">client.benchmarkRuns.<a href="./src/resources/benchmark-runs.ts">complete</a>(id) -> BenchmarkRunView</code>
 - <code title="get /v1/benchmark_runs/{id}/scenario_runs">client.benchmarkRuns.<a href="./src/resources/benchmark-runs.ts">listScenarioRuns</a>(id, { ...params }) -> ScenarioRunViewsBenchmarkRunsCursorIDPage</code>
 
+# BenchmarkJobs
+
+Types:
+
+- <code><a href="./src/resources/benchmark-jobs.ts">BenchmarkJobCreateParameters</a></code>
+- <code><a href="./src/resources/benchmark-jobs.ts">BenchmarkJobListView</a></code>
+- <code><a href="./src/resources/benchmark-jobs.ts">BenchmarkJobView</a></code>
+
+Methods:
+
+- <code title="post /v1/benchmark_jobs">client.benchmarkJobs.<a href="./src/resources/benchmark-jobs.ts">create</a>({ ...params }) -> BenchmarkJobView</code>
+- <code title="get /v1/benchmark_jobs/{id}">client.benchmarkJobs.<a href="./src/resources/benchmark-jobs.ts">retrieve</a>(id) -> BenchmarkJobView</code>
+- <code title="get /v1/benchmark_jobs">client.benchmarkJobs.<a href="./src/resources/benchmark-jobs.ts">list</a>({ ...params }) -> BenchmarkJobListView</code>
+
 # Agents
 
 Types:
diff --git a/package.json b/package.json
index 3dcd70fd3..657ddcb35 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@runloop/api-client",
-  "version": "1.3.0",
+  "version": "1.3.1",
   "description": "The official TypeScript library for the Runloop API",
   "author": "Runloop <support@runloop.ai>",
   "types": "dist/sdk.d.ts",
diff --git a/src/index.ts b/src/index.ts
index b73020579..5ab00b0f2 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -41,6 +41,14 @@ import {
   AgentViewsAgentsCursorIDPage,
   Agents,
 } from './resources/agents';
+import {
+  BenchmarkJobCreateParameters,
+  BenchmarkJobCreateParams,
+  BenchmarkJobListParams,
+  BenchmarkJobListView,
+  BenchmarkJobView,
+  BenchmarkJobs,
+} from './resources/benchmark-jobs';
 import {
   BenchmarkRunListParams,
   BenchmarkRunListScenarioRunsParams,
@@ -324,6 +332,7 @@ export class Runloop extends Core.APIClient {
 
   benchmarks: API.Benchmarks = new API.Benchmarks(this);
   benchmarkRuns: API.BenchmarkRuns = new API.BenchmarkRuns(this);
+  benchmarkJobs: API.BenchmarkJobs = new API.BenchmarkJobs(this);
   agents: API.Agents = new API.Agents(this);
   blueprints: API.Blueprints = new API.Blueprints(this);
   devboxes: API.Devboxes = new API.Devboxes(this);
@@ -380,6 +389,7 @@ Runloop.Benchmarks = Benchmarks;
 Runloop.BenchmarkViewsBenchmarksCursorIDPage = BenchmarkViewsBenchmarksCursorIDPage;
 Runloop.BenchmarkRuns = BenchmarkRuns;
 Runloop.BenchmarkRunViewsBenchmarkRunsCursorIDPage = BenchmarkRunViewsBenchmarkRunsCursorIDPage;
+Runloop.BenchmarkJobs = BenchmarkJobs;
 Runloop.Agents = Agents;
 Runloop.AgentViewsAgentsCursorIDPage = AgentViewsAgentsCursorIDPage;
 Runloop.Blueprints = Blueprints;
@@ -499,6 +509,15 @@ export declare namespace Runloop {
     type BenchmarkRunListScenarioRunsParams as BenchmarkRunListScenarioRunsParams,
   };
 
+  export {
+    BenchmarkJobs as BenchmarkJobs,
+    type BenchmarkJobCreateParameters as BenchmarkJobCreateParameters,
+    type BenchmarkJobListView as BenchmarkJobListView,
+    type BenchmarkJobView as BenchmarkJobView,
+    type BenchmarkJobCreateParams as BenchmarkJobCreateParams,
+    type BenchmarkJobListParams as BenchmarkJobListParams,
+  };
+
   export {
     Agents as Agents,
     type AgentCreateParameters as AgentCreateParameters,
diff --git a/src/resources/benchmark-jobs.ts b/src/resources/benchmark-jobs.ts
new file mode 100644
index 000000000..e58a0992b
--- /dev/null
+++ b/src/resources/benchmark-jobs.ts
@@ -0,0 +1,1017 @@
+// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import { APIResource } from '../resource';
+import { isRequestOptions } from '../core';
+import * as Core from '../core';
+
+export class BenchmarkJobs extends APIResource {
+  /**
+   * [Beta] Create a BenchmarkJob that runs a set of scenarios entirely on runloop.
+   */
+  create(body?: BenchmarkJobCreateParams, options?: Core.RequestOptions): Core.APIPromise<BenchmarkJobView>;
+  create(options?: Core.RequestOptions): Core.APIPromise<BenchmarkJobView>;
+  create(
+    body: BenchmarkJobCreateParams | Core.RequestOptions = {},
+    options?: Core.RequestOptions,
+  ): Core.APIPromise<BenchmarkJobView> {
+    if (isRequestOptions(body)) {
+      return this.create({}, body);
+    }
+    return this._client.post('/v1/benchmark_jobs', { body, ...options });
+  }
+
+  /**
+   * [Beta] Get a BenchmarkJob given ID.
+   */
+  retrieve(id: string, options?: Core.RequestOptions): Core.APIPromise<BenchmarkJobView> {
+    return this._client.get(`/v1/benchmark_jobs/${id}`, options);
+  }
+
+  /**
+   * [Beta] List all BenchmarkJobs matching filter.
+   */
+  list(query?: BenchmarkJobListParams, options?: Core.RequestOptions): Core.APIPromise<BenchmarkJobListView>;
+  list(options?: Core.RequestOptions): Core.APIPromise<BenchmarkJobListView>;
+  list(
+    query: BenchmarkJobListParams | Core.RequestOptions = {},
+    options?: Core.RequestOptions,
+  ): Core.APIPromise<BenchmarkJobListView> {
+    if (isRequestOptions(query)) {
+      return this.list({}, query);
+    }
+    return this._client.get('/v1/benchmark_jobs', { query, ...options });
+  }
+}
+
+/**
+ * BenchmarkJobCreateParameters contain the set of parameters to create a
+ * BenchmarkJob.
+ */
+export interface BenchmarkJobCreateParameters {
+  /**
+   * The name of the BenchmarkJob. If not provided, name will be generated based on
+   * target dataset.
+   */
+  name?: string | null;
+
+  /**
+   * The job specification. Exactly one spec type must be set.
+   */
+  spec?:
+    | BenchmarkJobCreateParameters.HarborJobSpec
+    | BenchmarkJobCreateParameters.BenchmarkDefinitionJobSpec
+    | BenchmarkJobCreateParameters.ScenarioDefinitionJobSpec
+    | null;
+}
+
+export namespace BenchmarkJobCreateParameters {
+  /**
+   * Harbor-based job specification with inline YAML configuration.
+   */
+  export interface HarborJobSpec {
+    /**
+     * The Harbor job configuration as inline YAML content.
+     */
+    inline_yaml: string;
+
+    type: 'harbor';
+  }
+
+  /**
+   * Specifies a benchmark definition with runtime configuration. The benchmark
+   * definition's scenarios will be executed using the provided agent and
+   * orchestrator configurations.
+   */
+  export interface BenchmarkDefinitionJobSpec {
+    /**
+     * Agent configurations to use for this run. Must specify at least one agent.
+     */
+    agent_configs: Array<BenchmarkDefinitionJobSpec.AgentConfig>;
+
+    /**
+     * ID of the benchmark definition to run. The scenarios from this benchmark will be
+     * executed.
+     */
+    benchmark_id: string;
+
+    type: 'benchmark';
+
+    /**
+     * Orchestrator configuration (optional overrides). If not provided, default values
+     * will be used.
+     */
+    orchestrator_config?: BenchmarkDefinitionJobSpec.OrchestratorConfig | null;
+  }
+
+  export namespace BenchmarkDefinitionJobSpec {
+    /**
+     * Configuration for an agent in a benchmark job
+     */
+    export interface AgentConfig {
+      /**
+       * Name of the agent
+       */
+      name: string;
+
+      type: 'job_agent';
+
+      /**
+       * Environment configuration to use for this agent
+       */
+      agent_environment?: AgentConfig.AgentEnvironment | null;
+
+      /**
+       * ID of the agent to use (optional if agent exists by name)
+       */
+      agent_id?: string | null;
+
+      /**
+       * Additional kwargs for agent configuration
+       */
+      kwargs?: { [key: string]: string } | null;
+
+      /**
+       * Model name override for this agent
+       */
+      model_name?: string | null;
+
+      /**
+       * Timeout in seconds for this agent
+       */
+      timeout_seconds?: number | null;
+    }
+
+    export namespace AgentConfig {
+      /**
+       * Environment configuration to use for this agent
+       */
+      export interface AgentEnvironment {
+        /**
+         * Environment variables to set when launching the agent.
+         */
+        environment_variables?: { [key: string]: string } | null;
+
+        /**
+         * Secrets to inject as environment variables when launching the agent. Map of
+         * environment variable names to secret IDs.
+         */
+        secrets?: { [key: string]: string } | null;
+      }
+    }
+
+    /**
+     * Orchestrator configuration (optional overrides). If not provided, default values
+     * will be used.
+     */
+    export interface OrchestratorConfig {
+      /**
+       * Number of retry attempts on failure (default: 0). This is the retry policy for
+       * failed scenarios. Default is 0.
+       */
+      n_attempts?: number | null;
+
+      /**
+       * Number of concurrent trials to run (default: 1). Controls parallelism for
+       * scenario execution. Default is 1.
+       */
+      n_concurrent_trials?: number | null;
+
+      /**
+       * Suppress verbose output (default: false)
+       */
+      quiet?: boolean | null;
+
+      /**
+       * Timeout multiplier for retries (default: 1.0). Each retry will multiply the
+       * timeout by this factor.
+       */
+      timeout_multiplier?: number | null;
+    }
+  }
+
+  /**
+   * Specifies a set of scenarios with runtime configuration. The scenarios will be
+   * executed using the provided agent and orchestrator configurations.
+   */
+  export interface ScenarioDefinitionJobSpec {
+    /**
+     * Agent configurations to use for this run. Must specify at least one agent.
+     */
+    agent_configs: Array<ScenarioDefinitionJobSpec.AgentConfig>;
+
+    /**
+     * List of scenario IDs to execute
+     */
+    scenario_ids: Array<string>;
+
+    type: 'scenarios';
+
+    /**
+     * Orchestrator configuration (optional overrides). If not provided, default values
+     * will be used.
+     */
+    orchestrator_config?: ScenarioDefinitionJobSpec.OrchestratorConfig | null;
+  }
+
+  export namespace ScenarioDefinitionJobSpec {
+    /**
+     * Configuration for an agent in a benchmark job
+     */
+    export interface AgentConfig {
+      /**
+       * Name of the agent
+       */
+      name: string;
+
+      type: 'job_agent';
+
+      /**
+       * Environment configuration to use for this agent
+       */
+      agent_environment?: AgentConfig.AgentEnvironment | null;
+
+      /**
+       * ID of the agent to use (optional if agent exists by name)
+       */
+      agent_id?: string | null;
+
+      /**
+       * Additional kwargs for agent configuration
+       */
+      kwargs?: { [key: string]: string } | null;
+
+      /**
+       * Model name override for this agent
+       */
+      model_name?: string | null;
+
+      /**
+       * Timeout in seconds for this agent
+       */
+      timeout_seconds?: number | null;
+    }
+
+    export namespace AgentConfig {
+      /**
+       * Environment configuration to use for this agent
+       */
+      export interface AgentEnvironment {
+        /**
+         * Environment variables to set when launching the agent.
+         */
+        environment_variables?: { [key: string]: string } | null;
+
+        /**
+         * Secrets to inject as environment variables when launching the agent. Map of
+         * environment variable names to secret IDs.
+         */
+        secrets?: { [key: string]: string } | null;
+      }
+    }
+
+    /**
+     * Orchestrator configuration (optional overrides). If not provided, default values
+     * will be used.
+     */
+    export interface OrchestratorConfig {
+      /**
+       * Number of retry attempts on failure (default: 0). This is the retry policy for
+       * failed scenarios. Default is 0.
+       */
+      n_attempts?: number | null;
+
+      /**
+       * Number of concurrent trials to run (default: 1). Controls parallelism for
+       * scenario execution. Default is 1.
+       */
+      n_concurrent_trials?: number | null;
+
+      /**
+       * Suppress verbose output (default: false)
+       */
+      quiet?: boolean | null;
+
+      /**
+       * Timeout multiplier for retries (default: 1.0). Each retry will multiply the
+       * timeout by this factor.
+       */
+      timeout_multiplier?: number | null;
+    }
+  }
+}
+
+export interface BenchmarkJobListView {
+  has_more: boolean;
+
+  /**
+   * List of BenchmarkJobs matching filter.
+   */
+  jobs: Array<BenchmarkJobView>;
+
+  remaining_count: number;
+
+  total_count: number;
+}
+
+/**
+ * A BenchmarkJobView represents a benchmark job that runs a set of scenarios
+ * entirely on runloop.
+ */
+export interface BenchmarkJobView {
+  /**
+   * The ID of the BenchmarkJob.
+   */
+  id: string;
+
+  /**
+   * Timestamp when job was created (Unix milliseconds).
+   */
+  create_time_ms: number;
+
+  /**
+   * The unique name of the BenchmarkJob.
+   */
+  name: string;
+
+  /**
+   * The current state of the benchmark job.
+   */
+  state: 'initializing' | 'queued' | 'running' | 'completed' | 'failed' | 'cancelled' | 'timeout';
+
+  /**
+   * Detailed outcome data for each benchmark run created by this job. Includes
+   * per-agent results and scenario-level details.
+   */
+  benchmark_outcomes?: Array<BenchmarkJobView.BenchmarkOutcome> | null;
+
+  /**
+   * Failure reason if job failed.
+   */
+  failure_reason?: string | null;
+
+  /**
+   * Benchmark runs currently in progress for this job. Shows runs that have not yet
+   * completed.
+   */
+  in_progress_runs?: Array<BenchmarkJobView.InProgressRun> | null;
+
+  /**
+   * The source configuration that was used to create this job. Either Harbor YAML or
+   * benchmark definition reference.
+   */
+  job_source?:
+    | BenchmarkJobView.HarborJobSource
+    | BenchmarkJobView.BenchmarkDefJobSource
+    | BenchmarkJobView.ScenariosJobSource
+    | null;
+
+  /**
+   * The resolved job specification. Contains scenarios, agents, and orchestrator
+   * config.
+   */
+  job_spec?: BenchmarkJobView.JobSpec | null;
+}
+
+export namespace BenchmarkJobView {
+  /**
+   * Outcome data for a single benchmark run within a benchmark job, representing
+   * results for one agent configuration.
+   */
+  export interface BenchmarkOutcome {
+    /**
+     * The name of the agent configuration used.
+     */
+    agent_name: string;
+
+    /**
+     * The ID of the benchmark run.
+     */
+    benchmark_run_id: string;
+
+    /**
+     * Number of scenarios that completed successfully.
+     */
+    n_completed: number;
+
+    /**
+     * Number of scenarios that failed.
+     */
+    n_failed: number;
+
+    /**
+     * Number of scenarios that timed out.
+     */
+    n_timeout: number;
+
+    /**
+     * Detailed outcomes for each scenario in this benchmark run.
+     */
+    scenario_outcomes: Array<BenchmarkOutcome.ScenarioOutcome>;
+
+    /**
+     * Average score across all completed scenarios (0.0 to 1.0).
+     */
+    average_score?: number | null;
+
+    /**
+     * Total duration of the benchmark run in milliseconds.
+     */
+    duration_ms?: number | null;
+
+    /**
+     * The model name used by the agent.
+     */
+    model_name?: string | null;
+  }
+
+  export namespace BenchmarkOutcome {
+    /**
+     * Outcome data for a single scenario execution, including its final state and
+     * scoring results.
+     */
+    export interface ScenarioOutcome {
+      /**
+       * The ID of the scenario definition that was executed.
+       */
+      scenario_definition_id: string;
+
+      /**
+       * The name of the scenario.
+       */
+      scenario_name: string;
+
+      /**
+       * The ID of the scenario run.
+       */
+      scenario_run_id: string;
+
+      /**
+       * The final state of the scenario execution.
+       */
+      state: 'COMPLETED' | 'FAILED' | 'TIMEOUT' | 'CANCELED';
+
+      /**
+       * Duration of the scenario execution in milliseconds.
+       */
+      duration_ms?: number | null;
+
+      /**
+       * Failure information if the scenario failed or timed out. Contains exception type
+       * and message.
+       */
+      failure_reason?: ScenarioOutcome.FailureReason | null;
+
+      /**
+       * The score achieved for this scenario (0.0 to 1.0). Only present if state is
+       * COMPLETED.
+       */
+      score?: number | null;
+    }
+
+    export namespace ScenarioOutcome {
+      /**
+       * Failure information if the scenario failed or timed out. Contains exception type
+       * and message.
+       */
+      export interface FailureReason {
+        /**
+         * The exception message providing context
+         */
+        exception_message: string;
+
+        /**
+         * The exception class name (e.g., 'TimeoutException', 'AgentTimeoutError')
+         */
+        exception_type: string;
+      }
+    }
+  }
+
+  /**
+   * A lightweight view of a benchmark run currently in progress, showing basic
+   * execution details without full outcome data.
+   */
+  export interface InProgressRun {
+    /**
+     * The ID of the benchmark run.
+     */
+    benchmark_run_id: string;
+
+    /**
+     * Start time (Unix milliseconds).
+     */
+    start_time_ms: number;
+
+    /**
+     * The current state of the run.
+     */
+    state: 'running' | 'canceled' | 'completed';
+
+    /**
+     * Agent configuration used for this run. Specifies whether the run was driven by
+     * an external API agent or a job-defined agent.
+     */
+    agent_config?: InProgressRun.ExternalAPIAgentConfig | InProgressRun.JobAgentConfig | null;
+
+    /**
+     * Duration so far in milliseconds.
+     */
+    duration_ms?: number | null;
+  }
+
+  export namespace InProgressRun {
+    /**
+     * Configuration for externally-driven benchmark runs via API
+     */
+    export interface ExternalAPIAgentConfig {
+      type: 'external_api';
+
+      /**
+       * Placeholder for future external agent metadata
+       */
+      info?: string | null;
+    }
+
+    /**
+     * Configuration for an agent in a benchmark job
+     */
+    export interface JobAgentConfig {
+      /**
+       * Name of the agent
+       */
+      name: string;
+
+      type: 'job_agent';
+
+      /**
+       * Environment configuration to use for this agent
+       */
+      agent_environment?: JobAgentConfig.AgentEnvironment | null;
+
+      /**
+       * ID of the agent to use (optional if agent exists by name)
+       */
+      agent_id?: string | null;
+
+      /**
+       * Additional kwargs for agent configuration
+       */
+      kwargs?: { [key: string]: string } | null;
+
+      /**
+       * Model name override for this agent
+       */
+      model_name?: string | null;
+
+      /**
+       * Timeout in seconds for this agent
+       */
+      timeout_seconds?: number | null;
+    }
+
+    export namespace JobAgentConfig {
+      /**
+       * Environment configuration to use for this agent
+       */
+      export interface AgentEnvironment {
+        /**
+         * Environment variables to set when launching the agent.
+         */
+        environment_variables?: { [key: string]: string } | null;
+
+        /**
+         * Secrets to inject as environment variables when launching the agent. Map of
+         * environment variable names to secret IDs.
+         */
+        secrets?: { [key: string]: string } | null;
+      }
+    }
+  }
+
+  /**
+   * Harbor job source with inline YAML configuration
+   */
+  export interface HarborJobSource {
+    /**
+     * The Harbor job configuration as inline YAML content
+     */
+    inline_yaml: string;
+
+    type: 'harbor';
+  }
+
+  /**
+   * Benchmark definition job source
+   */
+  export interface BenchmarkDefJobSource {
+    /**
+     * The ID of the benchmark definition
+     */
+    benchmark_id: string;
+
+    type: 'benchmark';
+
+    /**
+     * Optional user-provided name for the benchmark definition
+     */
+    benchmark_name?: string | null;
+  }
+
+  /**
+   * Scenarios job source with a list of scenario definition IDs
+   */
+  export interface ScenariosJobSource {
+    /**
+     * List of scenario definition IDs to execute
+     */
+    scenario_ids: Array<string>;
+
+    type: 'scenarios';
+  }
+
+  /**
+   * The resolved job specification. Contains scenarios, agents, and orchestrator
+   * config.
+   */
+  export interface JobSpec {
+    /**
+     * Agent configurations for this job
+     */
+    agent_configs: Array<JobSpec.AgentConfig>;
+
+    /**
+     * List of scenario IDs to execute
+     */
+    scenario_ids: Array<string>;
+
+    /**
+     * Orchestrator configuration
+     */
+    orchestrator_config?: JobSpec.OrchestratorConfig | null;
+  }
+
+  export namespace JobSpec {
+    /**
+     * Configuration for an agent in a benchmark job
+     */
+    export interface AgentConfig {
+      /**
+       * Name of the agent
+       */
+      name: string;
+
+      type: 'job_agent';
+
+      /**
+       * Environment configuration to use for this agent
+       */
+      agent_environment?: AgentConfig.AgentEnvironment | null;
+
+      /**
+       * ID of the agent to use (optional if agent exists by name)
+       */
+      agent_id?: string | null;
+
+      /**
+       * Additional kwargs for agent configuration
+       */
+      kwargs?: { [key: string]: string } | null;
+
+      /**
+       * Model name override for this agent
+       */
+      model_name?: string | null;
+
+      /**
+       * Timeout in seconds for this agent
+       */
+      timeout_seconds?: number | null;
+    }
+
+    export namespace AgentConfig {
+      /**
+       * Environment configuration to use for this agent
+       */
+      export interface AgentEnvironment {
+        /**
+         * Environment variables to set when launching the agent.
+         */
+        environment_variables?: { [key: string]: string } | null;
+
+        /**
+         * Secrets to inject as environment variables when launching the agent. Map of
+         * environment variable names to secret IDs.
+         */
+        secrets?: { [key: string]: string } | null;
+      }
+    }
+
+    /**
+     * Orchestrator configuration
+     */
+    export interface OrchestratorConfig {
+      /**
+       * Number of retry attempts on failure (default: 0). This is the retry policy for
+       * failed scenarios. Default is 0.
+       */
+      n_attempts?: number | null;
+
+      /**
+       * Number of concurrent trials to run (default: 1). Controls parallelism for
+       * scenario execution. Default is 1.
+       */
+      n_concurrent_trials?: number | null;
+
+      /**
+       * Suppress verbose output (default: false)
+       */
+      quiet?: boolean | null;
+
+      /**
+       * Timeout multiplier for retries (default: 1.0). Each retry will multiply the
+       * timeout by this factor.
+       */
+      timeout_multiplier?: number | null;
+    }
+  }
+}
+
+export interface BenchmarkJobCreateParams {
+  /**
+   * The name of the BenchmarkJob. If not provided, name will be generated based on
+   * target dataset.
+   */
+  name?: string | null;
+
+  /**
+   * The job specification. Exactly one spec type must be set.
+   */
+  spec?:
+    | BenchmarkJobCreateParams.HarborJobSpec
+    | BenchmarkJobCreateParams.BenchmarkDefinitionJobSpec
+    | BenchmarkJobCreateParams.ScenarioDefinitionJobSpec
+    | null;
+}
+
+export namespace BenchmarkJobCreateParams {
+  /**
+   * Harbor-based job specification with inline YAML configuration.
+   */
+  export interface HarborJobSpec {
+    /**
+     * The Harbor job configuration as inline YAML content.
+     */
+    inline_yaml: string;
+
+    type: 'harbor';
+  }
+
+  /**
+   * Specifies a benchmark definition with runtime configuration. The benchmark
+   * definition's scenarios will be executed using the provided agent and
+   * orchestrator configurations.
+   */
+  export interface BenchmarkDefinitionJobSpec {
+    /**
+     * Agent configurations to use for this run. Must specify at least one agent.
+     */
+    agent_configs: Array<BenchmarkDefinitionJobSpec.AgentConfig>;
+
+    /**
+     * ID of the benchmark definition to run. The scenarios from this benchmark will be
+     * executed.
+     */
+    benchmark_id: string;
+
+    type: 'benchmark';
+
+    /**
+     * Orchestrator configuration (optional overrides). If not provided, default values
+     * will be used.
+     */
+    orchestrator_config?: BenchmarkDefinitionJobSpec.OrchestratorConfig | null;
+  }
+
+  export namespace BenchmarkDefinitionJobSpec {
+    /**
+     * Configuration for an agent in a benchmark job
+     */
+    export interface AgentConfig {
+      /**
+       * Name of the agent
+       */
+      name: string;
+
+      type: 'job_agent';
+
+      /**
+       * Environment configuration to use for this agent
+       */
+      agent_environment?: AgentConfig.AgentEnvironment | null;
+
+      /**
+       * ID of the agent to use (optional if agent exists by name)
+       */
+      agent_id?: string | null;
+
+      /**
+       * Additional kwargs for agent configuration
+       */
+      kwargs?: { [key: string]: string } | null;
+
+      /**
+       * Model name override for this agent
+       */
+      model_name?: string | null;
+
+      /**
+       * Timeout in seconds for this agent
+       */
+      timeout_seconds?: number | null;
+    }
+
+    export namespace AgentConfig {
+      /**
+       * Environment configuration to use for this agent
+       */
+      export interface AgentEnvironment {
+        /**
+         * Environment variables to set when launching the agent.
+         */
+        environment_variables?: { [key: string]: string } | null;
+
+        /**
+         * Secrets to inject as environment variables when launching the agent. Map of
+         * environment variable names to secret IDs.
+         */
+        secrets?: { [key: string]: string } | null;
+      }
+    }
+
+    /**
+     * Orchestrator configuration (optional overrides). If not provided, default values
+     * will be used.
+     */
+    export interface OrchestratorConfig {
+      /**
+       * Number of retry attempts on failure (default: 0). This is the retry policy for
+       * failed scenarios. Default is 0.
+       */
+      n_attempts?: number | null;
+
+      /**
+       * Number of concurrent trials to run (default: 1). Controls parallelism for
+       * scenario execution. Default is 1.
+       */
+      n_concurrent_trials?: number | null;
+
+      /**
+       * Suppress verbose output (default: false)
+       */
+      quiet?: boolean | null;
+
+      /**
+       * Timeout multiplier for retries (default: 1.0). Each retry will multiply the
+       * timeout by this factor.
+       */
+      timeout_multiplier?: number | null;
+    }
+  }
+
+  /**
+   * Specifies a set of scenarios with runtime configuration. The scenarios will be
+   * executed using the provided agent and orchestrator configurations.
+   */
+  export interface ScenarioDefinitionJobSpec {
+    /**
+     * Agent configurations to use for this run. Must specify at least one agent.
+     */
+    agent_configs: Array<ScenarioDefinitionJobSpec.AgentConfig>;
+
+    /**
+     * List of scenario IDs to execute
+     */
+    scenario_ids: Array<string>;
+
+    type: 'scenarios';
+
+    /**
+     * Orchestrator configuration (optional overrides). If not provided, default values
+     * will be used.
+     */
+    orchestrator_config?: ScenarioDefinitionJobSpec.OrchestratorConfig | null;
+  }
+
+  export namespace ScenarioDefinitionJobSpec {
+    /**
+     * Configuration for an agent in a benchmark job
+     */
+    export interface AgentConfig {
+      /**
+       * Name of the agent
+       */
+      name: string;
+
+      type: 'job_agent';
+
+      /**
+       * Environment configuration to use for this agent
+       */
+      agent_environment?: AgentConfig.AgentEnvironment | null;
+
+      /**
+       * ID of the agent to use (optional if agent exists by name)
+       */
+      agent_id?: string | null;
+
+      /**
+       * Additional kwargs for agent configuration
+       */
+      kwargs?: { [key: string]: string } | null;
+
+      /**
+       * Model name override for this agent
+       */
+      model_name?: string | null;
+
+      /**
+       * Timeout in seconds for this agent
+       */
+      timeout_seconds?: number | null;
+    }
+
+    export namespace AgentConfig {
+      /**
+       * Environment configuration to use for this agent
+       */
+      export interface AgentEnvironment {
+        /**
+         * Environment variables to set when launching the agent.
+         */
+        environment_variables?: { [key: string]: string } | null;
+
+        /**
+         * Secrets to inject as environment variables when launching the agent. Map of
+         * environment variable names to secret IDs.
+         */
+        secrets?: { [key: string]: string } | null;
+      }
+    }
+
+    /**
+     * Orchestrator configuration (optional overrides). If not provided, default values
+     * will be used.
+     */
+    export interface OrchestratorConfig {
+      /**
+       * Number of retry attempts on failure (default: 0). This is the retry policy for
+       * failed scenarios. Default is 0.
+       */
+      n_attempts?: number | null;
+
+      /**
+       * Number of concurrent trials to run (default: 1). Controls parallelism for
+       * scenario execution. Default is 1.
+       */
+      n_concurrent_trials?: number | null;
+
+      /**
+       * Suppress verbose output (default: false)
+       */
+      quiet?: boolean | null;
+
+      /**
+       * Timeout multiplier for retries (default: 1.0). Each retry will multiply the
+       * timeout by this factor.
+       */
+      timeout_multiplier?: number | null;
+    }
+  }
+}
+
+export interface BenchmarkJobListParams {
+  /**
+   * The limit of items to return. Default is 20. Max is 5000.
+   */
+  limit?: number;
+
+  /**
+   * Filter by name
+   */
+  name?: string;
+
+  /**
+   * Load the next page of data starting after the item with the given ID.
+   */
+  starting_after?: string;
+}
+
+export declare namespace BenchmarkJobs {
+  export {
+    type BenchmarkJobCreateParameters as BenchmarkJobCreateParameters,
+    type BenchmarkJobListView as BenchmarkJobListView,
+    type BenchmarkJobView as BenchmarkJobView,
+    type BenchmarkJobCreateParams as BenchmarkJobCreateParams,
+    type BenchmarkJobListParams as BenchmarkJobListParams,
+  };
+}
diff --git a/src/resources/index.ts b/src/resources/index.ts
index 1afde0ca7..cca5276d9 100644
--- a/src/resources/index.ts
+++ b/src/resources/index.ts
@@ -10,6 +10,14 @@ export {
   type AgentCreateParams,
   type AgentListParams,
 } from './agents';
+export {
+  BenchmarkJobs,
+  type BenchmarkJobCreateParameters,
+  type BenchmarkJobListView,
+  type BenchmarkJobView,
+  type BenchmarkJobCreateParams,
+  type BenchmarkJobListParams,
+} from './benchmark-jobs';
 export {
   BenchmarkRunViewsBenchmarkRunsCursorIDPage,
   BenchmarkRuns,
diff --git a/src/version.ts b/src/version.ts
index b4178bdcc..8d95974a6 100644
--- a/src/version.ts
+++ b/src/version.ts
@@ -1 +1 @@
-export const VERSION = '1.3.0-2'; // x-release-please-version
+export const VERSION = '1.3.1'; // x-release-please-version
diff --git a/tests/api-resources/benchmark-jobs.test.ts b/tests/api-resources/benchmark-jobs.test.ts
new file mode 100644
index 000000000..8703928ab
--- /dev/null
+++ b/tests/api-resources/benchmark-jobs.test.ts
@@ -0,0 +1,92 @@
+// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import Runloop from '@runloop/api-client';
+import { Response } from 'node-fetch';
+
+const client = new Runloop({
+  bearerToken: 'My Bearer Token',
+  baseURL: process.env['TEST_API_BASE_URL'] ?? 'http://127.0.0.1:4010',
+});
+
+describe('resource benchmarkJobs', () => {
+  test('create', async () => {
+    const responsePromise = client.benchmarkJobs.create();
+    const rawResponse = await responsePromise.asResponse();
+    expect(rawResponse).toBeInstanceOf(Response);
+    const response = await responsePromise;
+    expect(response).not.toBeInstanceOf(Response);
+    const dataAndResponse = await responsePromise.withResponse();
+    expect(dataAndResponse.data).toBe(response);
+    expect(dataAndResponse.response).toBe(rawResponse);
+  });
+
+  test('create: request options instead of params are passed correctly', async () => {
+    // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error
+    await expect(client.benchmarkJobs.create({ path: '/_stainless_unknown_path' })).rejects.toThrow(
+      Runloop.NotFoundError,
+    );
+  });
+
+  test('create: request options and params are passed correctly', async () => {
+    // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error
+    await expect(
+      client.benchmarkJobs.create(
+        {
+          name: 'name',
+          spec: { inline_yaml: 'inline_yaml', type: 'harbor' },
+        },
+        { path: '/_stainless_unknown_path' },
+      ),
+    ).rejects.toThrow(Runloop.NotFoundError);
+  });
+
+  test('retrieve', async () => {
+    const responsePromise = client.benchmarkJobs.retrieve('id');
+    const rawResponse = await responsePromise.asResponse();
+    expect(rawResponse).toBeInstanceOf(Response);
+    const response = await responsePromise;
+    expect(response).not.toBeInstanceOf(Response);
+    const dataAndResponse = await responsePromise.withResponse();
+    expect(dataAndResponse.data).toBe(response);
+    expect(dataAndResponse.response).toBe(rawResponse);
+  });
+
+  test('retrieve: request options instead of params are passed correctly', async () => {
+    // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error
+    await expect(client.benchmarkJobs.retrieve('id', { path: '/_stainless_unknown_path' })).rejects.toThrow(
+      Runloop.NotFoundError,
+    );
+  });
+
+  test('list', async () => {
+    const responsePromise = client.benchmarkJobs.list();
+    const rawResponse = await responsePromise.asResponse();
+    expect(rawResponse).toBeInstanceOf(Response);
+    const response = await responsePromise;
+    expect(response).not.toBeInstanceOf(Response);
+    const dataAndResponse = await responsePromise.withResponse();
+    expect(dataAndResponse.data).toBe(response);
+    expect(dataAndResponse.response).toBe(rawResponse);
+  });
+
+  test('list: request options instead of params are passed correctly', async () => {
+    // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error
+    await expect(client.benchmarkJobs.list({ path: '/_stainless_unknown_path' })).rejects.toThrow(
+      Runloop.NotFoundError,
+    );
+  });
+
+  test('list: request options and params are passed correctly', async () => {
+    // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error
+    await expect(
+      client.benchmarkJobs.list(
+        {
+          limit: 0,
+          name: 'name',
+          starting_after: 'starting_after',
+        },
+        { path: '/_stainless_unknown_path' },
+      ),
+    ).rejects.toThrow(Runloop.NotFoundError);
+  });
+});