diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0ce1d6b73..5233fcbf0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -57,7 +57,7 @@ jobs: - name: Get GitHub OIDC Token if: github.repository == 'stainless-sdks/runloop-node' id: github-oidc - uses: runloopai/github-script@main + uses: actions/github-script@v8 with: script: core.setOutput('github_token', await core.getIDToken()); diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 96f1cd949..9049e2fdf 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "1.3.0" + ".": "1.3.1" } diff --git a/.stats.yml b/.stats.yml index 5eb10a624..f28b394ab 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ -configured_endpoints: 103 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/runloop-ai%2Frunloop-5359067a857aa94f69bae0d3311856be3e637da067fdc9dbf8bd26fe476efbd8.yml -openapi_spec_hash: 5227ef7c306d5226c3aee8932b2e8c6a -config_hash: cb43d4ca9e64d5a099199d6818d70539 +configured_endpoints: 106 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/runloop-ai%2Frunloop-63dab7833d6670810c4f4882df560ebbfe2de8e8e1a98d51422368607b5335ae.yml +openapi_spec_hash: ebb5068064f7469f9239b18a51a6fe44 +config_hash: fd168de77f219e46a1427bbec2eecfb9 diff --git a/CHANGELOG.md b/CHANGELOG.md index c7de9a1e4..6e5916535 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,25 @@ # Changelog +## 1.3.1 (2026-01-28) + +Full Changelog: [v1.3.0...v1.3.1](https://github.com/runloopai/api-client-ts/compare/v1.3.0...v1.3.1) + +### Features + +* **benchmark:** adding in progress benchmark runs for benchmark jobs ([#7183](https://github.com/runloopai/api-client-ts/issues/7183)) ([65f75d9](https://github.com/runloopai/api-client-ts/commit/65f75d9f1711422459bc952c7833ef8f8e750477)) + + +### Bug Fixes + +* smoketest workflow workflow_call event checkout on caller repo ([#687](https://github.com/runloopai/api-client-ts/issues/687)) ([77d82a9](https://github.com/runloopai/api-client-ts/commit/77d82a97fc468aafb29780d472f90c24b1a0041a)) +* updating the readme ([#692](https://github.com/runloopai/api-client-ts/issues/692)) ([80fb27a](https://github.com/runloopai/api-client-ts/commit/80fb27a9f7f4b99c3581b5121dae0c6346064f6c)) + + +### Chores + +* **ci:** upgrade `actions/github-script` ([9417f0c](https://github.com/runloopai/api-client-ts/commit/9417f0c97c6811ee7147db7942104be46e581f6f)) +* remove api docs readme reference ([#690](https://github.com/runloopai/api-client-ts/issues/690)) ([dd16be0](https://github.com/runloopai/api-client-ts/commit/dd16be0ff1797427d813a5bc98a6a8cc1583a57a)) + ## 1.3.0 (2026-01-22) Full Changelog: [v1.2.0-beta-2...v1.3.0](https://github.com/runloopai/api-client-ts/compare/v1.2.0-beta-2...v1.3.0) diff --git a/api.md b/api.md index 36cb120cf..053ac5095 100644 --- a/api.md +++ b/api.md @@ -48,6 +48,20 @@ Methods: - client.benchmarkRuns.complete(id) -> BenchmarkRunView - client.benchmarkRuns.listScenarioRuns(id, { ...params }) -> ScenarioRunViewsBenchmarkRunsCursorIDPage +# BenchmarkJobs + +Types: + +- BenchmarkJobCreateParameters +- BenchmarkJobListView +- BenchmarkJobView + +Methods: + +- client.benchmarkJobs.create({ ...params }) -> BenchmarkJobView +- client.benchmarkJobs.retrieve(id) -> BenchmarkJobView +- client.benchmarkJobs.list({ ...params }) -> BenchmarkJobListView + # Agents Types: diff --git a/package.json b/package.json index 3dcd70fd3..657ddcb35 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@runloop/api-client", - "version": "1.3.0", + "version": "1.3.1", "description": "The official TypeScript library for the Runloop API", "author": "Runloop ", "types": "dist/sdk.d.ts", diff --git a/src/index.ts b/src/index.ts index b73020579..5ab00b0f2 100644 --- a/src/index.ts +++ b/src/index.ts @@ -41,6 +41,14 @@ import { AgentViewsAgentsCursorIDPage, Agents, } from './resources/agents'; +import { + BenchmarkJobCreateParameters, + BenchmarkJobCreateParams, + BenchmarkJobListParams, + BenchmarkJobListView, + BenchmarkJobView, + BenchmarkJobs, +} from './resources/benchmark-jobs'; import { BenchmarkRunListParams, BenchmarkRunListScenarioRunsParams, @@ -324,6 +332,7 @@ export class Runloop extends Core.APIClient { benchmarks: API.Benchmarks = new API.Benchmarks(this); benchmarkRuns: API.BenchmarkRuns = new API.BenchmarkRuns(this); + benchmarkJobs: API.BenchmarkJobs = new API.BenchmarkJobs(this); agents: API.Agents = new API.Agents(this); blueprints: API.Blueprints = new API.Blueprints(this); devboxes: API.Devboxes = new API.Devboxes(this); @@ -380,6 +389,7 @@ Runloop.Benchmarks = Benchmarks; Runloop.BenchmarkViewsBenchmarksCursorIDPage = BenchmarkViewsBenchmarksCursorIDPage; Runloop.BenchmarkRuns = BenchmarkRuns; Runloop.BenchmarkRunViewsBenchmarkRunsCursorIDPage = BenchmarkRunViewsBenchmarkRunsCursorIDPage; +Runloop.BenchmarkJobs = BenchmarkJobs; Runloop.Agents = Agents; Runloop.AgentViewsAgentsCursorIDPage = AgentViewsAgentsCursorIDPage; Runloop.Blueprints = Blueprints; @@ -499,6 +509,15 @@ export declare namespace Runloop { type BenchmarkRunListScenarioRunsParams as BenchmarkRunListScenarioRunsParams, }; + export { + BenchmarkJobs as BenchmarkJobs, + type BenchmarkJobCreateParameters as BenchmarkJobCreateParameters, + type BenchmarkJobListView as BenchmarkJobListView, + type BenchmarkJobView as BenchmarkJobView, + type BenchmarkJobCreateParams as BenchmarkJobCreateParams, + type BenchmarkJobListParams as BenchmarkJobListParams, + }; + export { Agents as Agents, type AgentCreateParameters as AgentCreateParameters, diff --git a/src/resources/benchmark-jobs.ts b/src/resources/benchmark-jobs.ts new file mode 100644 index 000000000..e58a0992b --- /dev/null +++ b/src/resources/benchmark-jobs.ts @@ -0,0 +1,1017 @@ +// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +import { APIResource } from '../resource'; +import { isRequestOptions } from '../core'; +import * as Core from '../core'; + +export class BenchmarkJobs extends APIResource { + /** + * [Beta] Create a BenchmarkJob that runs a set of scenarios entirely on runloop. + */ + create(body?: BenchmarkJobCreateParams, options?: Core.RequestOptions): Core.APIPromise; + create(options?: Core.RequestOptions): Core.APIPromise; + create( + body: BenchmarkJobCreateParams | Core.RequestOptions = {}, + options?: Core.RequestOptions, + ): Core.APIPromise { + if (isRequestOptions(body)) { + return this.create({}, body); + } + return this._client.post('/v1/benchmark_jobs', { body, ...options }); + } + + /** + * [Beta] Get a BenchmarkJob given ID. + */ + retrieve(id: string, options?: Core.RequestOptions): Core.APIPromise { + return this._client.get(`/v1/benchmark_jobs/${id}`, options); + } + + /** + * [Beta] List all BenchmarkJobs matching filter. + */ + list(query?: BenchmarkJobListParams, options?: Core.RequestOptions): Core.APIPromise; + list(options?: Core.RequestOptions): Core.APIPromise; + list( + query: BenchmarkJobListParams | Core.RequestOptions = {}, + options?: Core.RequestOptions, + ): Core.APIPromise { + if (isRequestOptions(query)) { + return this.list({}, query); + } + return this._client.get('/v1/benchmark_jobs', { query, ...options }); + } +} + +/** + * BenchmarkJobCreateParameters contain the set of parameters to create a + * BenchmarkJob. + */ +export interface BenchmarkJobCreateParameters { + /** + * The name of the BenchmarkJob. If not provided, name will be generated based on + * target dataset. + */ + name?: string | null; + + /** + * The job specification. Exactly one spec type must be set. + */ + spec?: + | BenchmarkJobCreateParameters.HarborJobSpec + | BenchmarkJobCreateParameters.BenchmarkDefinitionJobSpec + | BenchmarkJobCreateParameters.ScenarioDefinitionJobSpec + | null; +} + +export namespace BenchmarkJobCreateParameters { + /** + * Harbor-based job specification with inline YAML configuration. + */ + export interface HarborJobSpec { + /** + * The Harbor job configuration as inline YAML content. + */ + inline_yaml: string; + + type: 'harbor'; + } + + /** + * Specifies a benchmark definition with runtime configuration. The benchmark + * definition's scenarios will be executed using the provided agent and + * orchestrator configurations. + */ + export interface BenchmarkDefinitionJobSpec { + /** + * Agent configurations to use for this run. Must specify at least one agent. + */ + agent_configs: Array; + + /** + * ID of the benchmark definition to run. The scenarios from this benchmark will be + * executed. + */ + benchmark_id: string; + + type: 'benchmark'; + + /** + * Orchestrator configuration (optional overrides). If not provided, default values + * will be used. + */ + orchestrator_config?: BenchmarkDefinitionJobSpec.OrchestratorConfig | null; + } + + export namespace BenchmarkDefinitionJobSpec { + /** + * Configuration for an agent in a benchmark job + */ + export interface AgentConfig { + /** + * Name of the agent + */ + name: string; + + type: 'job_agent'; + + /** + * Environment configuration to use for this agent + */ + agent_environment?: AgentConfig.AgentEnvironment | null; + + /** + * ID of the agent to use (optional if agent exists by name) + */ + agent_id?: string | null; + + /** + * Additional kwargs for agent configuration + */ + kwargs?: { [key: string]: string } | null; + + /** + * Model name override for this agent + */ + model_name?: string | null; + + /** + * Timeout in seconds for this agent + */ + timeout_seconds?: number | null; + } + + export namespace AgentConfig { + /** + * Environment configuration to use for this agent + */ + export interface AgentEnvironment { + /** + * Environment variables to set when launching the agent. + */ + environment_variables?: { [key: string]: string } | null; + + /** + * Secrets to inject as environment variables when launching the agent. Map of + * environment variable names to secret IDs. + */ + secrets?: { [key: string]: string } | null; + } + } + + /** + * Orchestrator configuration (optional overrides). If not provided, default values + * will be used. + */ + export interface OrchestratorConfig { + /** + * Number of retry attempts on failure (default: 0). This is the retry policy for + * failed scenarios. Default is 0. + */ + n_attempts?: number | null; + + /** + * Number of concurrent trials to run (default: 1). Controls parallelism for + * scenario execution. Default is 1. + */ + n_concurrent_trials?: number | null; + + /** + * Suppress verbose output (default: false) + */ + quiet?: boolean | null; + + /** + * Timeout multiplier for retries (default: 1.0). Each retry will multiply the + * timeout by this factor. + */ + timeout_multiplier?: number | null; + } + } + + /** + * Specifies a set of scenarios with runtime configuration. The scenarios will be + * executed using the provided agent and orchestrator configurations. + */ + export interface ScenarioDefinitionJobSpec { + /** + * Agent configurations to use for this run. Must specify at least one agent. + */ + agent_configs: Array; + + /** + * List of scenario IDs to execute + */ + scenario_ids: Array; + + type: 'scenarios'; + + /** + * Orchestrator configuration (optional overrides). If not provided, default values + * will be used. + */ + orchestrator_config?: ScenarioDefinitionJobSpec.OrchestratorConfig | null; + } + + export namespace ScenarioDefinitionJobSpec { + /** + * Configuration for an agent in a benchmark job + */ + export interface AgentConfig { + /** + * Name of the agent + */ + name: string; + + type: 'job_agent'; + + /** + * Environment configuration to use for this agent + */ + agent_environment?: AgentConfig.AgentEnvironment | null; + + /** + * ID of the agent to use (optional if agent exists by name) + */ + agent_id?: string | null; + + /** + * Additional kwargs for agent configuration + */ + kwargs?: { [key: string]: string } | null; + + /** + * Model name override for this agent + */ + model_name?: string | null; + + /** + * Timeout in seconds for this agent + */ + timeout_seconds?: number | null; + } + + export namespace AgentConfig { + /** + * Environment configuration to use for this agent + */ + export interface AgentEnvironment { + /** + * Environment variables to set when launching the agent. + */ + environment_variables?: { [key: string]: string } | null; + + /** + * Secrets to inject as environment variables when launching the agent. Map of + * environment variable names to secret IDs. + */ + secrets?: { [key: string]: string } | null; + } + } + + /** + * Orchestrator configuration (optional overrides). If not provided, default values + * will be used. + */ + export interface OrchestratorConfig { + /** + * Number of retry attempts on failure (default: 0). This is the retry policy for + * failed scenarios. Default is 0. + */ + n_attempts?: number | null; + + /** + * Number of concurrent trials to run (default: 1). Controls parallelism for + * scenario execution. Default is 1. + */ + n_concurrent_trials?: number | null; + + /** + * Suppress verbose output (default: false) + */ + quiet?: boolean | null; + + /** + * Timeout multiplier for retries (default: 1.0). Each retry will multiply the + * timeout by this factor. + */ + timeout_multiplier?: number | null; + } + } +} + +export interface BenchmarkJobListView { + has_more: boolean; + + /** + * List of BenchmarkJobs matching filter. + */ + jobs: Array; + + remaining_count: number; + + total_count: number; +} + +/** + * A BenchmarkJobView represents a benchmark job that runs a set of scenarios + * entirely on runloop. + */ +export interface BenchmarkJobView { + /** + * The ID of the BenchmarkJob. + */ + id: string; + + /** + * Timestamp when job was created (Unix milliseconds). + */ + create_time_ms: number; + + /** + * The unique name of the BenchmarkJob. + */ + name: string; + + /** + * The current state of the benchmark job. + */ + state: 'initializing' | 'queued' | 'running' | 'completed' | 'failed' | 'cancelled' | 'timeout'; + + /** + * Detailed outcome data for each benchmark run created by this job. Includes + * per-agent results and scenario-level details. + */ + benchmark_outcomes?: Array | null; + + /** + * Failure reason if job failed. + */ + failure_reason?: string | null; + + /** + * Benchmark runs currently in progress for this job. Shows runs that have not yet + * completed. + */ + in_progress_runs?: Array | null; + + /** + * The source configuration that was used to create this job. Either Harbor YAML or + * benchmark definition reference. + */ + job_source?: + | BenchmarkJobView.HarborJobSource + | BenchmarkJobView.BenchmarkDefJobSource + | BenchmarkJobView.ScenariosJobSource + | null; + + /** + * The resolved job specification. Contains scenarios, agents, and orchestrator + * config. + */ + job_spec?: BenchmarkJobView.JobSpec | null; +} + +export namespace BenchmarkJobView { + /** + * Outcome data for a single benchmark run within a benchmark job, representing + * results for one agent configuration. + */ + export interface BenchmarkOutcome { + /** + * The name of the agent configuration used. + */ + agent_name: string; + + /** + * The ID of the benchmark run. + */ + benchmark_run_id: string; + + /** + * Number of scenarios that completed successfully. + */ + n_completed: number; + + /** + * Number of scenarios that failed. + */ + n_failed: number; + + /** + * Number of scenarios that timed out. + */ + n_timeout: number; + + /** + * Detailed outcomes for each scenario in this benchmark run. + */ + scenario_outcomes: Array; + + /** + * Average score across all completed scenarios (0.0 to 1.0). + */ + average_score?: number | null; + + /** + * Total duration of the benchmark run in milliseconds. + */ + duration_ms?: number | null; + + /** + * The model name used by the agent. + */ + model_name?: string | null; + } + + export namespace BenchmarkOutcome { + /** + * Outcome data for a single scenario execution, including its final state and + * scoring results. + */ + export interface ScenarioOutcome { + /** + * The ID of the scenario definition that was executed. + */ + scenario_definition_id: string; + + /** + * The name of the scenario. + */ + scenario_name: string; + + /** + * The ID of the scenario run. + */ + scenario_run_id: string; + + /** + * The final state of the scenario execution. + */ + state: 'COMPLETED' | 'FAILED' | 'TIMEOUT' | 'CANCELED'; + + /** + * Duration of the scenario execution in milliseconds. + */ + duration_ms?: number | null; + + /** + * Failure information if the scenario failed or timed out. Contains exception type + * and message. + */ + failure_reason?: ScenarioOutcome.FailureReason | null; + + /** + * The score achieved for this scenario (0.0 to 1.0). Only present if state is + * COMPLETED. + */ + score?: number | null; + } + + export namespace ScenarioOutcome { + /** + * Failure information if the scenario failed or timed out. Contains exception type + * and message. + */ + export interface FailureReason { + /** + * The exception message providing context + */ + exception_message: string; + + /** + * The exception class name (e.g., 'TimeoutException', 'AgentTimeoutError') + */ + exception_type: string; + } + } + } + + /** + * A lightweight view of a benchmark run currently in progress, showing basic + * execution details without full outcome data. + */ + export interface InProgressRun { + /** + * The ID of the benchmark run. + */ + benchmark_run_id: string; + + /** + * Start time (Unix milliseconds). + */ + start_time_ms: number; + + /** + * The current state of the run. + */ + state: 'running' | 'canceled' | 'completed'; + + /** + * Agent configuration used for this run. Specifies whether the run was driven by + * an external API agent or a job-defined agent. + */ + agent_config?: InProgressRun.ExternalAPIAgentConfig | InProgressRun.JobAgentConfig | null; + + /** + * Duration so far in milliseconds. + */ + duration_ms?: number | null; + } + + export namespace InProgressRun { + /** + * Configuration for externally-driven benchmark runs via API + */ + export interface ExternalAPIAgentConfig { + type: 'external_api'; + + /** + * Placeholder for future external agent metadata + */ + info?: string | null; + } + + /** + * Configuration for an agent in a benchmark job + */ + export interface JobAgentConfig { + /** + * Name of the agent + */ + name: string; + + type: 'job_agent'; + + /** + * Environment configuration to use for this agent + */ + agent_environment?: JobAgentConfig.AgentEnvironment | null; + + /** + * ID of the agent to use (optional if agent exists by name) + */ + agent_id?: string | null; + + /** + * Additional kwargs for agent configuration + */ + kwargs?: { [key: string]: string } | null; + + /** + * Model name override for this agent + */ + model_name?: string | null; + + /** + * Timeout in seconds for this agent + */ + timeout_seconds?: number | null; + } + + export namespace JobAgentConfig { + /** + * Environment configuration to use for this agent + */ + export interface AgentEnvironment { + /** + * Environment variables to set when launching the agent. + */ + environment_variables?: { [key: string]: string } | null; + + /** + * Secrets to inject as environment variables when launching the agent. Map of + * environment variable names to secret IDs. + */ + secrets?: { [key: string]: string } | null; + } + } + } + + /** + * Harbor job source with inline YAML configuration + */ + export interface HarborJobSource { + /** + * The Harbor job configuration as inline YAML content + */ + inline_yaml: string; + + type: 'harbor'; + } + + /** + * Benchmark definition job source + */ + export interface BenchmarkDefJobSource { + /** + * The ID of the benchmark definition + */ + benchmark_id: string; + + type: 'benchmark'; + + /** + * Optional user-provided name for the benchmark definition + */ + benchmark_name?: string | null; + } + + /** + * Scenarios job source with a list of scenario definition IDs + */ + export interface ScenariosJobSource { + /** + * List of scenario definition IDs to execute + */ + scenario_ids: Array; + + type: 'scenarios'; + } + + /** + * The resolved job specification. Contains scenarios, agents, and orchestrator + * config. + */ + export interface JobSpec { + /** + * Agent configurations for this job + */ + agent_configs: Array; + + /** + * List of scenario IDs to execute + */ + scenario_ids: Array; + + /** + * Orchestrator configuration + */ + orchestrator_config?: JobSpec.OrchestratorConfig | null; + } + + export namespace JobSpec { + /** + * Configuration for an agent in a benchmark job + */ + export interface AgentConfig { + /** + * Name of the agent + */ + name: string; + + type: 'job_agent'; + + /** + * Environment configuration to use for this agent + */ + agent_environment?: AgentConfig.AgentEnvironment | null; + + /** + * ID of the agent to use (optional if agent exists by name) + */ + agent_id?: string | null; + + /** + * Additional kwargs for agent configuration + */ + kwargs?: { [key: string]: string } | null; + + /** + * Model name override for this agent + */ + model_name?: string | null; + + /** + * Timeout in seconds for this agent + */ + timeout_seconds?: number | null; + } + + export namespace AgentConfig { + /** + * Environment configuration to use for this agent + */ + export interface AgentEnvironment { + /** + * Environment variables to set when launching the agent. + */ + environment_variables?: { [key: string]: string } | null; + + /** + * Secrets to inject as environment variables when launching the agent. Map of + * environment variable names to secret IDs. + */ + secrets?: { [key: string]: string } | null; + } + } + + /** + * Orchestrator configuration + */ + export interface OrchestratorConfig { + /** + * Number of retry attempts on failure (default: 0). This is the retry policy for + * failed scenarios. Default is 0. + */ + n_attempts?: number | null; + + /** + * Number of concurrent trials to run (default: 1). Controls parallelism for + * scenario execution. Default is 1. + */ + n_concurrent_trials?: number | null; + + /** + * Suppress verbose output (default: false) + */ + quiet?: boolean | null; + + /** + * Timeout multiplier for retries (default: 1.0). Each retry will multiply the + * timeout by this factor. + */ + timeout_multiplier?: number | null; + } + } +} + +export interface BenchmarkJobCreateParams { + /** + * The name of the BenchmarkJob. If not provided, name will be generated based on + * target dataset. + */ + name?: string | null; + + /** + * The job specification. Exactly one spec type must be set. + */ + spec?: + | BenchmarkJobCreateParams.HarborJobSpec + | BenchmarkJobCreateParams.BenchmarkDefinitionJobSpec + | BenchmarkJobCreateParams.ScenarioDefinitionJobSpec + | null; +} + +export namespace BenchmarkJobCreateParams { + /** + * Harbor-based job specification with inline YAML configuration. + */ + export interface HarborJobSpec { + /** + * The Harbor job configuration as inline YAML content. + */ + inline_yaml: string; + + type: 'harbor'; + } + + /** + * Specifies a benchmark definition with runtime configuration. The benchmark + * definition's scenarios will be executed using the provided agent and + * orchestrator configurations. + */ + export interface BenchmarkDefinitionJobSpec { + /** + * Agent configurations to use for this run. Must specify at least one agent. + */ + agent_configs: Array; + + /** + * ID of the benchmark definition to run. The scenarios from this benchmark will be + * executed. + */ + benchmark_id: string; + + type: 'benchmark'; + + /** + * Orchestrator configuration (optional overrides). If not provided, default values + * will be used. + */ + orchestrator_config?: BenchmarkDefinitionJobSpec.OrchestratorConfig | null; + } + + export namespace BenchmarkDefinitionJobSpec { + /** + * Configuration for an agent in a benchmark job + */ + export interface AgentConfig { + /** + * Name of the agent + */ + name: string; + + type: 'job_agent'; + + /** + * Environment configuration to use for this agent + */ + agent_environment?: AgentConfig.AgentEnvironment | null; + + /** + * ID of the agent to use (optional if agent exists by name) + */ + agent_id?: string | null; + + /** + * Additional kwargs for agent configuration + */ + kwargs?: { [key: string]: string } | null; + + /** + * Model name override for this agent + */ + model_name?: string | null; + + /** + * Timeout in seconds for this agent + */ + timeout_seconds?: number | null; + } + + export namespace AgentConfig { + /** + * Environment configuration to use for this agent + */ + export interface AgentEnvironment { + /** + * Environment variables to set when launching the agent. + */ + environment_variables?: { [key: string]: string } | null; + + /** + * Secrets to inject as environment variables when launching the agent. Map of + * environment variable names to secret IDs. + */ + secrets?: { [key: string]: string } | null; + } + } + + /** + * Orchestrator configuration (optional overrides). If not provided, default values + * will be used. + */ + export interface OrchestratorConfig { + /** + * Number of retry attempts on failure (default: 0). This is the retry policy for + * failed scenarios. Default is 0. + */ + n_attempts?: number | null; + + /** + * Number of concurrent trials to run (default: 1). Controls parallelism for + * scenario execution. Default is 1. + */ + n_concurrent_trials?: number | null; + + /** + * Suppress verbose output (default: false) + */ + quiet?: boolean | null; + + /** + * Timeout multiplier for retries (default: 1.0). Each retry will multiply the + * timeout by this factor. + */ + timeout_multiplier?: number | null; + } + } + + /** + * Specifies a set of scenarios with runtime configuration. The scenarios will be + * executed using the provided agent and orchestrator configurations. + */ + export interface ScenarioDefinitionJobSpec { + /** + * Agent configurations to use for this run. Must specify at least one agent. + */ + agent_configs: Array; + + /** + * List of scenario IDs to execute + */ + scenario_ids: Array; + + type: 'scenarios'; + + /** + * Orchestrator configuration (optional overrides). If not provided, default values + * will be used. + */ + orchestrator_config?: ScenarioDefinitionJobSpec.OrchestratorConfig | null; + } + + export namespace ScenarioDefinitionJobSpec { + /** + * Configuration for an agent in a benchmark job + */ + export interface AgentConfig { + /** + * Name of the agent + */ + name: string; + + type: 'job_agent'; + + /** + * Environment configuration to use for this agent + */ + agent_environment?: AgentConfig.AgentEnvironment | null; + + /** + * ID of the agent to use (optional if agent exists by name) + */ + agent_id?: string | null; + + /** + * Additional kwargs for agent configuration + */ + kwargs?: { [key: string]: string } | null; + + /** + * Model name override for this agent + */ + model_name?: string | null; + + /** + * Timeout in seconds for this agent + */ + timeout_seconds?: number | null; + } + + export namespace AgentConfig { + /** + * Environment configuration to use for this agent + */ + export interface AgentEnvironment { + /** + * Environment variables to set when launching the agent. + */ + environment_variables?: { [key: string]: string } | null; + + /** + * Secrets to inject as environment variables when launching the agent. Map of + * environment variable names to secret IDs. + */ + secrets?: { [key: string]: string } | null; + } + } + + /** + * Orchestrator configuration (optional overrides). If not provided, default values + * will be used. + */ + export interface OrchestratorConfig { + /** + * Number of retry attempts on failure (default: 0). This is the retry policy for + * failed scenarios. Default is 0. + */ + n_attempts?: number | null; + + /** + * Number of concurrent trials to run (default: 1). Controls parallelism for + * scenario execution. Default is 1. + */ + n_concurrent_trials?: number | null; + + /** + * Suppress verbose output (default: false) + */ + quiet?: boolean | null; + + /** + * Timeout multiplier for retries (default: 1.0). Each retry will multiply the + * timeout by this factor. + */ + timeout_multiplier?: number | null; + } + } +} + +export interface BenchmarkJobListParams { + /** + * The limit of items to return. Default is 20. Max is 5000. + */ + limit?: number; + + /** + * Filter by name + */ + name?: string; + + /** + * Load the next page of data starting after the item with the given ID. + */ + starting_after?: string; +} + +export declare namespace BenchmarkJobs { + export { + type BenchmarkJobCreateParameters as BenchmarkJobCreateParameters, + type BenchmarkJobListView as BenchmarkJobListView, + type BenchmarkJobView as BenchmarkJobView, + type BenchmarkJobCreateParams as BenchmarkJobCreateParams, + type BenchmarkJobListParams as BenchmarkJobListParams, + }; +} diff --git a/src/resources/index.ts b/src/resources/index.ts index 1afde0ca7..cca5276d9 100644 --- a/src/resources/index.ts +++ b/src/resources/index.ts @@ -10,6 +10,14 @@ export { type AgentCreateParams, type AgentListParams, } from './agents'; +export { + BenchmarkJobs, + type BenchmarkJobCreateParameters, + type BenchmarkJobListView, + type BenchmarkJobView, + type BenchmarkJobCreateParams, + type BenchmarkJobListParams, +} from './benchmark-jobs'; export { BenchmarkRunViewsBenchmarkRunsCursorIDPage, BenchmarkRuns, diff --git a/src/version.ts b/src/version.ts index b4178bdcc..8d95974a6 100644 --- a/src/version.ts +++ b/src/version.ts @@ -1 +1 @@ -export const VERSION = '1.3.0-2'; // x-release-please-version +export const VERSION = '1.3.1'; // x-release-please-version diff --git a/tests/api-resources/benchmark-jobs.test.ts b/tests/api-resources/benchmark-jobs.test.ts new file mode 100644 index 000000000..8703928ab --- /dev/null +++ b/tests/api-resources/benchmark-jobs.test.ts @@ -0,0 +1,92 @@ +// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +import Runloop from '@runloop/api-client'; +import { Response } from 'node-fetch'; + +const client = new Runloop({ + bearerToken: 'My Bearer Token', + baseURL: process.env['TEST_API_BASE_URL'] ?? 'http://127.0.0.1:4010', +}); + +describe('resource benchmarkJobs', () => { + test('create', async () => { + const responsePromise = client.benchmarkJobs.create(); + const rawResponse = await responsePromise.asResponse(); + expect(rawResponse).toBeInstanceOf(Response); + const response = await responsePromise; + expect(response).not.toBeInstanceOf(Response); + const dataAndResponse = await responsePromise.withResponse(); + expect(dataAndResponse.data).toBe(response); + expect(dataAndResponse.response).toBe(rawResponse); + }); + + test('create: request options instead of params are passed correctly', async () => { + // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error + await expect(client.benchmarkJobs.create({ path: '/_stainless_unknown_path' })).rejects.toThrow( + Runloop.NotFoundError, + ); + }); + + test('create: request options and params are passed correctly', async () => { + // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error + await expect( + client.benchmarkJobs.create( + { + name: 'name', + spec: { inline_yaml: 'inline_yaml', type: 'harbor' }, + }, + { path: '/_stainless_unknown_path' }, + ), + ).rejects.toThrow(Runloop.NotFoundError); + }); + + test('retrieve', async () => { + const responsePromise = client.benchmarkJobs.retrieve('id'); + const rawResponse = await responsePromise.asResponse(); + expect(rawResponse).toBeInstanceOf(Response); + const response = await responsePromise; + expect(response).not.toBeInstanceOf(Response); + const dataAndResponse = await responsePromise.withResponse(); + expect(dataAndResponse.data).toBe(response); + expect(dataAndResponse.response).toBe(rawResponse); + }); + + test('retrieve: request options instead of params are passed correctly', async () => { + // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error + await expect(client.benchmarkJobs.retrieve('id', { path: '/_stainless_unknown_path' })).rejects.toThrow( + Runloop.NotFoundError, + ); + }); + + test('list', async () => { + const responsePromise = client.benchmarkJobs.list(); + const rawResponse = await responsePromise.asResponse(); + expect(rawResponse).toBeInstanceOf(Response); + const response = await responsePromise; + expect(response).not.toBeInstanceOf(Response); + const dataAndResponse = await responsePromise.withResponse(); + expect(dataAndResponse.data).toBe(response); + expect(dataAndResponse.response).toBe(rawResponse); + }); + + test('list: request options instead of params are passed correctly', async () => { + // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error + await expect(client.benchmarkJobs.list({ path: '/_stainless_unknown_path' })).rejects.toThrow( + Runloop.NotFoundError, + ); + }); + + test('list: request options and params are passed correctly', async () => { + // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error + await expect( + client.benchmarkJobs.list( + { + limit: 0, + name: 'name', + starting_after: 'starting_after', + }, + { path: '/_stainless_unknown_path' }, + ), + ).rejects.toThrow(Runloop.NotFoundError); + }); +});