-
Notifications
You must be signed in to change notification settings - Fork 3
feat(sdk): ScenarioRun class #681
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
c5474b2
ScenarioRun class + tests
sid-rl 987582c
yarn upgrade
sid-rl 5e936de
lint fix
sid-rl cb4a52c
restore yarn.lock
sid-rl 191239b
lint fix
sid-rl 8be86bf
better scenario run quickstart
sid-rl ee6ec42
validate parent directory exists and is writeable for downloadLogs
sid-rl 009f0ba
remove scenario run fromId method
sid-rl 3b1effd
rename `THIRTY_SECOND_TIMEOUT` to `TWO_MINUTE_TIMEOUT` since it was 1…
sid-rl 7716e35
rename smoketest timeout constants to represent semantic meaning (sho…
sid-rl File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,328 @@ | ||
| import { Runloop } from '../index'; | ||
| import type * as Core from '../core'; | ||
| import type { ScenarioRunView, ScoringContractResultView } from '../resources/scenarios/scenarios'; | ||
| import type { DevboxView } from '../resources/devboxes/devboxes'; | ||
| import { PollingOptions } from '../lib/polling'; | ||
| import { Devbox } from './devbox'; | ||
| import * as fs from 'fs'; | ||
| import * as path from 'path'; | ||
|
|
||
| /** | ||
| * Object-oriented interface for working with Scenario Runs. | ||
| * | ||
| * @category Scenario | ||
| * | ||
| * @remarks | ||
| * ## Overview | ||
| * | ||
| * The `ScenarioRun` class provides a high-level API for managing scenario runs. | ||
| * A scenario run represents a single execution of a scenario on a devbox, including | ||
| * the ability to interact with the devbox, score the run, and retrieve results. | ||
| * | ||
| * ## Quickstart | ||
| * | ||
| * ScenarioRuns are typically obtained from a Scenario's `run()` or `runAsync()` methods: | ||
| * | ||
| * ```typescript | ||
| * import { RunloopSDK } from '@runloop/api-client'; | ||
| * | ||
| * const runloop = new RunloopSDK(); | ||
| * const scenario = runloop.scenario.fromId('scenario-123'); | ||
| * const run = await scenario.run({ run_name: 'my-run' }); | ||
| * | ||
| * // Access the devbox and execute your agent to solve the scenario | ||
| * const devbox = run.devbox; | ||
|
sid-rl marked this conversation as resolved.
|
||
| * await devbox.cmd.exec('python /home/user/agent/main.py'); | ||
| * | ||
| * // Score and complete the run | ||
| * await run.scoreAndComplete(); | ||
| * const score = await run.getScore(); | ||
| * ``` | ||
| */ | ||
| export class ScenarioRun { | ||
| private client: Runloop; | ||
| private _id: string; | ||
| private _devboxId: string; | ||
| private _devbox: Devbox | null = null; | ||
|
|
||
| /** | ||
| * @private | ||
| */ | ||
| constructor(client: Runloop, id: string, devboxId: string) { | ||
| this.client = client; | ||
| this._id = id; | ||
| this._devboxId = devboxId; | ||
| } | ||
|
|
||
| /** | ||
| * Get the scenario run ID. | ||
| * @returns {string} The scenario run ID | ||
| */ | ||
| get id(): string { | ||
| return this._id; | ||
| } | ||
|
|
||
| /** | ||
| * Get the associated devbox ID. | ||
| * @returns {string} The devbox ID | ||
| */ | ||
| get devboxId(): string { | ||
| return this._devboxId; | ||
| } | ||
|
|
||
| /** | ||
| * Get the devbox instance for this scenario run. | ||
| * | ||
| * This property provides lazy-loaded access to the devbox associated with | ||
| * this scenario run. Use this to interact with the devbox environment | ||
| * during the scenario execution. | ||
| * | ||
| * @example | ||
| * ```typescript | ||
| * const run = await scenario.run(); | ||
| * const devbox = run.devbox; | ||
| * await devbox.cmd.exec('npm test'); | ||
| * ``` | ||
| * | ||
| * @returns {Devbox} The devbox instance | ||
| */ | ||
| get devbox(): Devbox { | ||
| if (!this._devbox) { | ||
| this._devbox = Devbox.fromId(this.client, this._devboxId); | ||
| } | ||
| return this._devbox; | ||
| } | ||
|
|
||
| /** | ||
| * Get the complete scenario run data from the API. | ||
| * | ||
| * @example | ||
| * ```typescript | ||
| * const info = await run.getInfo(); | ||
| * console.log(`Run state: ${info.state}`); | ||
| * console.log(`Score: ${info.scoring_contract_result?.score}`); | ||
| * ``` | ||
| * | ||
| * @param {Core.RequestOptions} [options] - Request options | ||
| * @returns {Promise<ScenarioRunView>} The scenario run data | ||
| */ | ||
| async getInfo(options?: Core.RequestOptions): Promise<ScenarioRunView> { | ||
| return this.client.scenarios.runs.retrieve(this._id, options); | ||
| } | ||
|
|
||
| /** | ||
| * Wait for the scenario environment (devbox) to be ready. | ||
| * | ||
| * Blocks until the devbox reaches running state. Call this after using | ||
| * `scenario.runAsync()` to ensure the devbox is ready for interaction. | ||
|
sid-rl marked this conversation as resolved.
|
||
| * | ||
| * @example | ||
| * ```typescript | ||
| * const run = await scenario.runAsync(); | ||
| * await run.awaitEnvReady(); | ||
| * // Devbox is now ready | ||
| * await run.devbox.cmd.exec('ls -la'); | ||
| * ``` | ||
| * | ||
| * @param {Core.RequestOptions & { polling?: Partial<PollingOptions<DevboxView>> }} [options] - Request options with optional polling configuration | ||
| * @returns {Promise<ScenarioRunView>} The scenario run data after environment is ready | ||
| */ | ||
| async awaitEnvReady( | ||
| options?: Core.RequestOptions & { polling?: Partial<PollingOptions<DevboxView>> }, | ||
| ): Promise<ScenarioRunView> { | ||
| await this.client.devboxes.awaitRunning(this._devboxId, options); | ||
|
sid-rl marked this conversation as resolved.
|
||
| return this.getInfo(options); | ||
| } | ||
|
|
||
| /** | ||
| * Submit the scenario run for scoring. | ||
| * | ||
| * This triggers the scoring process using the scenario's scoring contract. | ||
| * The scoring runs asynchronously; use `awaitScored()` or `scoreAndAwait()` | ||
| * to wait for scoring to complete. | ||
| * | ||
| * @example | ||
| * ```typescript | ||
| * await run.score(); | ||
| * // Scoring is now in progress | ||
| * const result = await run.awaitScored(); | ||
| * ``` | ||
| * | ||
| * @param {Core.RequestOptions} [options] - Request options | ||
| * @returns {Promise<ScenarioRunView>} The updated scenario run data | ||
| */ | ||
| async score(options?: Core.RequestOptions): Promise<ScenarioRunView> { | ||
| return this.client.scenarios.runs.score(this._id, options); | ||
| } | ||
|
|
||
| /** | ||
| * Wait for the scenario run to be scored. | ||
| * | ||
| * Blocks until scoring is complete. Call this after `score()` to wait | ||
| * for the scoring process to finish. | ||
| * | ||
| * @example | ||
| * ```typescript | ||
| * await run.score(); | ||
| * const result = await run.awaitScored(); | ||
| * console.log(`Final score: ${result.scoring_contract_result?.score}`); | ||
| * ``` | ||
| * | ||
| * @param {Core.RequestOptions & { polling?: Partial<PollingOptions<ScenarioRunView>> }} [options] - Request options with optional polling configuration | ||
| * @returns {Promise<ScenarioRunView>} The scored scenario run data | ||
| */ | ||
| async awaitScored( | ||
| options?: Core.RequestOptions & { polling?: Partial<PollingOptions<ScenarioRunView>> }, | ||
| ): Promise<ScenarioRunView> { | ||
| return this.client.scenarios.runs.awaitScored(this._id, options); | ||
| } | ||
|
|
||
| /** | ||
| * Submit for scoring and wait for completion. | ||
| * | ||
| * This is a convenience method that combines `score()` and `awaitScored()`. | ||
| * | ||
| * @example | ||
| * ```typescript | ||
| * // Agent has finished working... | ||
| * const result = await run.scoreAndAwait(); | ||
| * console.log(`Final score: ${result.scoring_contract_result?.score}`); | ||
| * ``` | ||
| * | ||
| * @param {Core.RequestOptions & { polling?: Partial<PollingOptions<ScenarioRunView>> }} [options] - Request options with optional polling configuration | ||
| * @returns {Promise<ScenarioRunView>} The scored scenario run data | ||
| */ | ||
| async scoreAndAwait( | ||
| options?: Core.RequestOptions & { polling?: Partial<PollingOptions<ScenarioRunView>> }, | ||
| ): Promise<ScenarioRunView> { | ||
| return this.client.scenarios.runs.scoreAndAwait(this._id, options); | ||
|
sid-rl marked this conversation as resolved.
|
||
| } | ||
|
|
||
| /** | ||
| * Score the run, wait for scoring, then complete and shutdown. | ||
| * | ||
| * This is a convenience method that scores the scenario run, waits for | ||
| * scoring to finish, then completes the run and shuts down the devbox. | ||
| * This is the recommended way to finish a scenario run. | ||
| * | ||
| * @example | ||
| * ```typescript | ||
| * // Agent has finished working... | ||
| * const result = await run.scoreAndComplete(); | ||
| * console.log(`Final score: ${result.scoring_contract_result?.score}`); | ||
| * // Devbox has been shut down | ||
| * ``` | ||
| * | ||
| * @param {Core.RequestOptions & { polling?: Partial<PollingOptions<ScenarioRunView>> }} [options] - Request options with optional polling configuration | ||
| * @returns {Promise<ScenarioRunView>} The completed scenario run data with scoring results | ||
| */ | ||
| async scoreAndComplete( | ||
| options?: Core.RequestOptions & { polling?: Partial<PollingOptions<ScenarioRunView>> }, | ||
| ): Promise<ScenarioRunView> { | ||
| return this.client.scenarios.runs.scoreAndComplete(this._id, options); | ||
|
sid-rl marked this conversation as resolved.
|
||
| } | ||
|
|
||
| /** | ||
| * Complete the scenario run and shutdown the devbox. | ||
| * | ||
| * Call this after scoring to finalize the run. The devbox will be | ||
| * shut down and resources released. Note: The run must be in a | ||
| * scored state before calling complete. Use `cancel()` to end a | ||
| * run without scoring, or `scoreAndComplete()` to score and complete | ||
| * in one operation. | ||
| * | ||
| * @example | ||
| * ```typescript | ||
| * // Score first, then complete | ||
| * await run.scoreAndAwait(); | ||
| * await run.complete(); | ||
| * ``` | ||
| * | ||
| * @param {Core.RequestOptions} [options] - Request options | ||
| * @returns {Promise<ScenarioRunView>} The final scenario run data | ||
| */ | ||
| async complete(options?: Core.RequestOptions): Promise<ScenarioRunView> { | ||
| return this.client.scenarios.runs.complete(this._id, options); | ||
| } | ||
|
|
||
| /** | ||
| * Cancel the scenario run and shutdown the devbox. | ||
| * | ||
| * Use this to abort a running scenario. The devbox will be shut down | ||
| * and the run marked as canceled. | ||
| * | ||
| * @example | ||
| * ```typescript | ||
| * // Abort the scenario | ||
| * await run.cancel(); | ||
| * ``` | ||
| * | ||
| * @param {Core.RequestOptions} [options] - Request options | ||
| * @returns {Promise<ScenarioRunView>} The canceled scenario run data | ||
| */ | ||
| async cancel(options?: Core.RequestOptions): Promise<ScenarioRunView> { | ||
| return this.client.scenarios.runs.cancel(this._id, options); | ||
| } | ||
|
|
||
| /** | ||
| * Download all logs for this scenario run to a file. | ||
| * | ||
| * Downloads a zip archive containing all logs from the scenario run's | ||
| * associated devbox. This is useful for debugging and analysis. | ||
| * | ||
| * @example | ||
| * ```typescript | ||
| * await run.scoreAndComplete(); | ||
| * await run.downloadLogs('./scenario-logs.zip'); | ||
| * ``` | ||
| * | ||
| * @param {string} filePath - Path where the zip file will be written | ||
| * @param {Core.RequestOptions} [options] - Request options | ||
| * @returns {Promise<void>} | ||
| */ | ||
| async downloadLogs(filePath: string, options?: Core.RequestOptions): Promise<void> { | ||
| // Validate the parent directory exists and is writable | ||
| const parentDir = path.dirname(filePath); | ||
| try { | ||
| await fs.promises.access(parentDir, fs.constants.W_OK); | ||
| } catch { | ||
| throw new Error( | ||
| `Cannot write to ${filePath}: parent directory '${parentDir}' does not exist or is not writable`, | ||
| ); | ||
| } | ||
|
|
||
| const response = await this.client.scenarios.runs.downloadLogs(this._id, options); | ||
|
|
||
| // Get the response as an ArrayBuffer and write to file | ||
| const arrayBuffer = await response.arrayBuffer(); | ||
| const buffer = Buffer.from(arrayBuffer); | ||
|
|
||
| await fs.promises.writeFile(filePath, buffer); | ||
| } | ||
|
|
||
| /** | ||
| * Get the scoring result for this run. | ||
| * | ||
| * Returns null if the run has not been scored yet. Always makes an API | ||
| * call to retrieve the current scoring result. | ||
| * | ||
| * @example | ||
| * ```typescript | ||
| * await run.scoreAndAwait(); | ||
| * const score = await run.getScore(); | ||
| * if (score) { | ||
| * console.log(`Total score: ${score.score}`); | ||
| * for (const fn of score.scoring_function_results) { | ||
| * console.log(` ${fn.scoring_function_name}: ${fn.score}`); | ||
| * } | ||
| * } | ||
| * ``` | ||
| * | ||
| * @param {Core.RequestOptions} [options] - Request options | ||
| * @returns {Promise<ScoringContractResultView | null>} The scoring result or null if not yet scored | ||
| */ | ||
| async getScore(options?: Core.RequestOptions): Promise<ScoringContractResultView | null> { | ||
| const info = await this.getInfo(options); | ||
| return info.scoring_contract_result ?? null; | ||
| } | ||
| } | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.