diff --git a/src/sdk/index.ts b/src/sdk/index.ts index 2ea3e73fc..3056b36a6 100644 --- a/src/sdk/index.ts +++ b/src/sdk/index.ts @@ -7,3 +7,4 @@ export { Execution } from './execution'; export { ExecutionResult } from './execution-result'; export { Scorer } from './scorer'; export { NetworkPolicy } from './network-policy'; +export { ScenarioRun } from './scenario-run'; diff --git a/src/sdk/scenario-run.ts b/src/sdk/scenario-run.ts new file mode 100644 index 000000000..ba57241d8 --- /dev/null +++ b/src/sdk/scenario-run.ts @@ -0,0 +1,328 @@ +import { Runloop } from '../index'; +import type * as Core from '../core'; +import type { ScenarioRunView, ScoringContractResultView } from '../resources/scenarios/scenarios'; +import type { DevboxView } from '../resources/devboxes/devboxes'; +import { PollingOptions } from '../lib/polling'; +import { Devbox } from './devbox'; +import * as fs from 'fs'; +import * as path from 'path'; + +/** + * Object-oriented interface for working with Scenario Runs. + * + * @category Scenario + * + * @remarks + * ## Overview + * + * The `ScenarioRun` class provides a high-level API for managing scenario runs. + * A scenario run represents a single execution of a scenario on a devbox, including + * the ability to interact with the devbox, score the run, and retrieve results. + * + * ## Quickstart + * + * ScenarioRuns are typically obtained from a Scenario's `run()` or `runAsync()` methods: + * + * ```typescript + * import { RunloopSDK } from '@runloop/api-client'; + * + * const runloop = new RunloopSDK(); + * const scenario = runloop.scenario.fromId('scenario-123'); + * const run = await scenario.run({ run_name: 'my-run' }); + * + * // Access the devbox and execute your agent to solve the scenario + * const devbox = run.devbox; + * await devbox.cmd.exec('python /home/user/agent/main.py'); + * + * // Score and complete the run + * await run.scoreAndComplete(); + * const score = await run.getScore(); + * ``` + */ +export class ScenarioRun { + private client: Runloop; + private _id: string; + private _devboxId: string; + private _devbox: Devbox | null = null; + + /** + * @private + */ + constructor(client: Runloop, id: string, devboxId: string) { + this.client = client; + this._id = id; + this._devboxId = devboxId; + } + + /** + * Get the scenario run ID. + * @returns {string} The scenario run ID + */ + get id(): string { + return this._id; + } + + /** + * Get the associated devbox ID. + * @returns {string} The devbox ID + */ + get devboxId(): string { + return this._devboxId; + } + + /** + * Get the devbox instance for this scenario run. + * + * This property provides lazy-loaded access to the devbox associated with + * this scenario run. Use this to interact with the devbox environment + * during the scenario execution. + * + * @example + * ```typescript + * const run = await scenario.run(); + * const devbox = run.devbox; + * await devbox.cmd.exec('npm test'); + * ``` + * + * @returns {Devbox} The devbox instance + */ + get devbox(): Devbox { + if (!this._devbox) { + this._devbox = Devbox.fromId(this.client, this._devboxId); + } + return this._devbox; + } + + /** + * Get the complete scenario run data from the API. + * + * @example + * ```typescript + * const info = await run.getInfo(); + * console.log(`Run state: ${info.state}`); + * console.log(`Score: ${info.scoring_contract_result?.score}`); + * ``` + * + * @param {Core.RequestOptions} [options] - Request options + * @returns {Promise} The scenario run data + */ + async getInfo(options?: Core.RequestOptions): Promise { + return this.client.scenarios.runs.retrieve(this._id, options); + } + + /** + * Wait for the scenario environment (devbox) to be ready. + * + * Blocks until the devbox reaches running state. Call this after using + * `scenario.runAsync()` to ensure the devbox is ready for interaction. + * + * @example + * ```typescript + * const run = await scenario.runAsync(); + * await run.awaitEnvReady(); + * // Devbox is now ready + * await run.devbox.cmd.exec('ls -la'); + * ``` + * + * @param {Core.RequestOptions & { polling?: Partial> }} [options] - Request options with optional polling configuration + * @returns {Promise} The scenario run data after environment is ready + */ + async awaitEnvReady( + options?: Core.RequestOptions & { polling?: Partial> }, + ): Promise { + await this.client.devboxes.awaitRunning(this._devboxId, options); + return this.getInfo(options); + } + + /** + * Submit the scenario run for scoring. + * + * This triggers the scoring process using the scenario's scoring contract. + * The scoring runs asynchronously; use `awaitScored()` or `scoreAndAwait()` + * to wait for scoring to complete. + * + * @example + * ```typescript + * await run.score(); + * // Scoring is now in progress + * const result = await run.awaitScored(); + * ``` + * + * @param {Core.RequestOptions} [options] - Request options + * @returns {Promise} The updated scenario run data + */ + async score(options?: Core.RequestOptions): Promise { + return this.client.scenarios.runs.score(this._id, options); + } + + /** + * Wait for the scenario run to be scored. + * + * Blocks until scoring is complete. Call this after `score()` to wait + * for the scoring process to finish. + * + * @example + * ```typescript + * await run.score(); + * const result = await run.awaitScored(); + * console.log(`Final score: ${result.scoring_contract_result?.score}`); + * ``` + * + * @param {Core.RequestOptions & { polling?: Partial> }} [options] - Request options with optional polling configuration + * @returns {Promise} The scored scenario run data + */ + async awaitScored( + options?: Core.RequestOptions & { polling?: Partial> }, + ): Promise { + return this.client.scenarios.runs.awaitScored(this._id, options); + } + + /** + * Submit for scoring and wait for completion. + * + * This is a convenience method that combines `score()` and `awaitScored()`. + * + * @example + * ```typescript + * // Agent has finished working... + * const result = await run.scoreAndAwait(); + * console.log(`Final score: ${result.scoring_contract_result?.score}`); + * ``` + * + * @param {Core.RequestOptions & { polling?: Partial> }} [options] - Request options with optional polling configuration + * @returns {Promise} The scored scenario run data + */ + async scoreAndAwait( + options?: Core.RequestOptions & { polling?: Partial> }, + ): Promise { + return this.client.scenarios.runs.scoreAndAwait(this._id, options); + } + + /** + * Score the run, wait for scoring, then complete and shutdown. + * + * This is a convenience method that scores the scenario run, waits for + * scoring to finish, then completes the run and shuts down the devbox. + * This is the recommended way to finish a scenario run. + * + * @example + * ```typescript + * // Agent has finished working... + * const result = await run.scoreAndComplete(); + * console.log(`Final score: ${result.scoring_contract_result?.score}`); + * // Devbox has been shut down + * ``` + * + * @param {Core.RequestOptions & { polling?: Partial> }} [options] - Request options with optional polling configuration + * @returns {Promise} The completed scenario run data with scoring results + */ + async scoreAndComplete( + options?: Core.RequestOptions & { polling?: Partial> }, + ): Promise { + return this.client.scenarios.runs.scoreAndComplete(this._id, options); + } + + /** + * Complete the scenario run and shutdown the devbox. + * + * Call this after scoring to finalize the run. The devbox will be + * shut down and resources released. Note: The run must be in a + * scored state before calling complete. Use `cancel()` to end a + * run without scoring, or `scoreAndComplete()` to score and complete + * in one operation. + * + * @example + * ```typescript + * // Score first, then complete + * await run.scoreAndAwait(); + * await run.complete(); + * ``` + * + * @param {Core.RequestOptions} [options] - Request options + * @returns {Promise} The final scenario run data + */ + async complete(options?: Core.RequestOptions): Promise { + return this.client.scenarios.runs.complete(this._id, options); + } + + /** + * Cancel the scenario run and shutdown the devbox. + * + * Use this to abort a running scenario. The devbox will be shut down + * and the run marked as canceled. + * + * @example + * ```typescript + * // Abort the scenario + * await run.cancel(); + * ``` + * + * @param {Core.RequestOptions} [options] - Request options + * @returns {Promise} The canceled scenario run data + */ + async cancel(options?: Core.RequestOptions): Promise { + return this.client.scenarios.runs.cancel(this._id, options); + } + + /** + * Download all logs for this scenario run to a file. + * + * Downloads a zip archive containing all logs from the scenario run's + * associated devbox. This is useful for debugging and analysis. + * + * @example + * ```typescript + * await run.scoreAndComplete(); + * await run.downloadLogs('./scenario-logs.zip'); + * ``` + * + * @param {string} filePath - Path where the zip file will be written + * @param {Core.RequestOptions} [options] - Request options + * @returns {Promise} + */ + async downloadLogs(filePath: string, options?: Core.RequestOptions): Promise { + // Validate the parent directory exists and is writable + const parentDir = path.dirname(filePath); + try { + await fs.promises.access(parentDir, fs.constants.W_OK); + } catch { + throw new Error( + `Cannot write to ${filePath}: parent directory '${parentDir}' does not exist or is not writable`, + ); + } + + const response = await this.client.scenarios.runs.downloadLogs(this._id, options); + + // Get the response as an ArrayBuffer and write to file + const arrayBuffer = await response.arrayBuffer(); + const buffer = Buffer.from(arrayBuffer); + + await fs.promises.writeFile(filePath, buffer); + } + + /** + * Get the scoring result for this run. + * + * Returns null if the run has not been scored yet. Always makes an API + * call to retrieve the current scoring result. + * + * @example + * ```typescript + * await run.scoreAndAwait(); + * const score = await run.getScore(); + * if (score) { + * console.log(`Total score: ${score.score}`); + * for (const fn of score.scoring_function_results) { + * console.log(` ${fn.scoring_function_name}: ${fn.score}`); + * } + * } + * ``` + * + * @param {Core.RequestOptions} [options] - Request options + * @returns {Promise} The scoring result or null if not yet scored + */ + async getScore(options?: Core.RequestOptions): Promise { + const info = await this.getInfo(options); + return info.scoring_contract_result ?? null; + } +} diff --git a/tests/objects/scenario-run.test.ts b/tests/objects/scenario-run.test.ts new file mode 100644 index 000000000..5aaf1a245 --- /dev/null +++ b/tests/objects/scenario-run.test.ts @@ -0,0 +1,407 @@ +import { ScenarioRun } from '../../src/sdk/scenario-run'; +import { Devbox } from '../../src/sdk/devbox'; +import type { ScenarioRunView, ScoringContractResultView } from '../../src/resources/scenarios/scenarios'; +import type { DevboxView } from '../../src/resources/devboxes/devboxes'; + +// Mock the Runloop client +jest.mock('../../src/index'); + +// Mock fs module +jest.mock('fs', () => ({ + promises: { + writeFile: jest.fn().mockResolvedValue(undefined), + access: jest.fn().mockResolvedValue(undefined), + }, + constants: { + W_OK: 2, + }, +})); + +describe('ScenarioRun', () => { + let mockClient: any; + let mockScenarioRunData: ScenarioRunView; + let mockDevboxData: DevboxView; + let mockScoringResult: ScoringContractResultView; + + beforeEach(() => { + // Create mock client instance with proper structure + mockClient = { + scenarios: { + runs: { + retrieve: jest.fn(), + score: jest.fn(), + awaitScored: jest.fn(), + scoreAndAwait: jest.fn(), + scoreAndComplete: jest.fn(), + complete: jest.fn(), + cancel: jest.fn(), + downloadLogs: jest.fn(), + }, + }, + devboxes: { + retrieve: jest.fn(), + awaitRunning: jest.fn(), + createAndAwaitRunning: jest.fn(), + shutdown: jest.fn(), + }, + } as any; + + // Mock scoring result + mockScoringResult = { + score: 0.85, + scoring_function_results: [ + { + scoring_function_name: 'test-scorer', + score: 0.85, + output: 'Test passed', + state: 'complete', + }, + ], + }; + + // Mock scenario run data + mockScenarioRunData = { + id: 'run-123', + devbox_id: 'devbox-456', + scenario_id: 'scenario-789', + state: 'running', + metadata: {}, + }; + + // Mock devbox data + mockDevboxData = { + id: 'devbox-456', + status: 'running', + capabilities: [], + create_time_ms: Date.now(), + end_time_ms: null, + launch_parameters: {}, + metadata: {}, + state_transitions: [], + }; + }); + + describe('constructor', () => { + it('should create a ScenarioRun instance', () => { + const run = new ScenarioRun(mockClient, 'run-123', 'devbox-456'); + + expect(run).toBeInstanceOf(ScenarioRun); + expect(run.id).toBe('run-123'); + expect(run.devboxId).toBe('devbox-456'); + }); + }); + + describe('properties', () => { + let run: ScenarioRun; + + beforeEach(() => { + run = new ScenarioRun(mockClient, 'run-123', 'devbox-456'); + }); + + it('should expose devbox property with lazy loading', () => { + const devbox = run.devbox; + + expect(devbox).toBeInstanceOf(Devbox); + expect(devbox.id).toBe('devbox-456'); + }); + + it('should cache devbox instance', () => { + const devbox1 = run.devbox; + const devbox2 = run.devbox; + + expect(devbox1).toBe(devbox2); + }); + }); + + describe('getInfo', () => { + let run: ScenarioRun; + + beforeEach(() => { + run = new ScenarioRun(mockClient, 'run-123', 'devbox-456'); + }); + + it('should retrieve scenario run info from API', async () => { + mockClient.scenarios.runs.retrieve.mockResolvedValue(mockScenarioRunData); + + const info = await run.getInfo(); + + expect(mockClient.scenarios.runs.retrieve).toHaveBeenCalledWith('run-123', undefined); + expect(info).toEqual(mockScenarioRunData); + }); + + it('should pass options to the API client', async () => { + mockClient.scenarios.runs.retrieve.mockResolvedValue(mockScenarioRunData); + const options = { timeout: 30000 }; + + await run.getInfo(options); + + expect(mockClient.scenarios.runs.retrieve).toHaveBeenCalledWith('run-123', options); + }); + }); + + describe('awaitEnvReady', () => { + let run: ScenarioRun; + + beforeEach(() => { + run = new ScenarioRun(mockClient, 'run-123', 'devbox-456'); + }); + + it('should wait for devbox to be running and return run info', async () => { + mockClient.devboxes.awaitRunning.mockResolvedValue(mockDevboxData); + mockClient.scenarios.runs.retrieve.mockResolvedValue(mockScenarioRunData); + + const result = await run.awaitEnvReady(); + + expect(mockClient.devboxes.awaitRunning).toHaveBeenCalledWith('devbox-456', undefined); + expect(mockClient.scenarios.runs.retrieve).toHaveBeenCalledWith('run-123', undefined); + expect(result).toEqual(mockScenarioRunData); + }); + + it('should pass polling options to awaitRunning', async () => { + mockClient.devboxes.awaitRunning.mockResolvedValue(mockDevboxData); + mockClient.scenarios.runs.retrieve.mockResolvedValue(mockScenarioRunData); + const options = { polling: { maxAttempts: 10 } }; + + await run.awaitEnvReady(options); + + expect(mockClient.devboxes.awaitRunning).toHaveBeenCalledWith('devbox-456', options); + }); + }); + + describe('score', () => { + let run: ScenarioRun; + + beforeEach(() => { + run = new ScenarioRun(mockClient, 'run-123', 'devbox-456'); + }); + + it('should submit run for scoring', async () => { + const scoringRun = { ...mockScenarioRunData, state: 'scoring' as const }; + mockClient.scenarios.runs.score.mockResolvedValue(scoringRun); + + const result = await run.score(); + + expect(mockClient.scenarios.runs.score).toHaveBeenCalledWith('run-123', undefined); + expect(result.state).toBe('scoring'); + }); + }); + + describe('awaitScored', () => { + let run: ScenarioRun; + + beforeEach(() => { + run = new ScenarioRun(mockClient, 'run-123', 'devbox-456'); + }); + + it('should wait for scoring to complete', async () => { + const scoredRun = { + ...mockScenarioRunData, + state: 'scored' as const, + scoring_contract_result: mockScoringResult, + }; + mockClient.scenarios.runs.awaitScored.mockResolvedValue(scoredRun); + + const result = await run.awaitScored(); + + expect(mockClient.scenarios.runs.awaitScored).toHaveBeenCalledWith('run-123', undefined); + expect(result.state).toBe('scored'); + expect(result.scoring_contract_result).toEqual(mockScoringResult); + }); + }); + + describe('scoreAndAwait', () => { + let run: ScenarioRun; + + beforeEach(() => { + run = new ScenarioRun(mockClient, 'run-123', 'devbox-456'); + }); + + it('should score and wait for completion', async () => { + const scoredRun = { + ...mockScenarioRunData, + state: 'scored' as const, + scoring_contract_result: mockScoringResult, + }; + mockClient.scenarios.runs.scoreAndAwait.mockResolvedValue(scoredRun); + + const result = await run.scoreAndAwait(); + + expect(mockClient.scenarios.runs.scoreAndAwait).toHaveBeenCalledWith('run-123', undefined); + expect(result.state).toBe('scored'); + }); + }); + + describe('scoreAndComplete', () => { + let run: ScenarioRun; + + beforeEach(() => { + run = new ScenarioRun(mockClient, 'run-123', 'devbox-456'); + }); + + it('should score, wait, and complete the run', async () => { + const completedRun = { + ...mockScenarioRunData, + state: 'completed' as const, + scoring_contract_result: mockScoringResult, + }; + mockClient.scenarios.runs.scoreAndComplete.mockResolvedValue(completedRun); + + const result = await run.scoreAndComplete(); + + expect(mockClient.scenarios.runs.scoreAndComplete).toHaveBeenCalledWith('run-123', undefined); + expect(result.state).toBe('completed'); + }); + + it('should pass polling options', async () => { + const completedRun = { + ...mockScenarioRunData, + state: 'completed' as const, + scoring_contract_result: mockScoringResult, + }; + mockClient.scenarios.runs.scoreAndComplete.mockResolvedValue(completedRun); + const options = { polling: { maxAttempts: 100 } }; + + await run.scoreAndComplete(options); + + expect(mockClient.scenarios.runs.scoreAndComplete).toHaveBeenCalledWith('run-123', options); + }); + }); + + describe('complete', () => { + let run: ScenarioRun; + + beforeEach(() => { + run = new ScenarioRun(mockClient, 'run-123', 'devbox-456'); + }); + + it('should complete the run', async () => { + const completedRun = { ...mockScenarioRunData, state: 'completed' as const }; + mockClient.scenarios.runs.complete.mockResolvedValue(completedRun); + + const result = await run.complete(); + + expect(mockClient.scenarios.runs.complete).toHaveBeenCalledWith('run-123', undefined); + expect(result.state).toBe('completed'); + }); + }); + + describe('cancel', () => { + let run: ScenarioRun; + + beforeEach(() => { + run = new ScenarioRun(mockClient, 'run-123', 'devbox-456'); + }); + + it('should cancel the run', async () => { + const canceledRun = { ...mockScenarioRunData, state: 'canceled' as const }; + mockClient.scenarios.runs.cancel.mockResolvedValue(canceledRun); + + const result = await run.cancel(); + + expect(mockClient.scenarios.runs.cancel).toHaveBeenCalledWith('run-123', undefined); + expect(result.state).toBe('canceled'); + }); + }); + + describe('downloadLogs', () => { + let run: ScenarioRun; + const fs = require('fs'); + + beforeEach(() => { + run = new ScenarioRun(mockClient, 'run-123', 'devbox-456'); + jest.clearAllMocks(); + }); + + it('should validate parent directory and download logs to file', async () => { + const mockArrayBuffer = new ArrayBuffer(8); + mockClient.scenarios.runs.downloadLogs.mockResolvedValue({ + arrayBuffer: jest.fn().mockResolvedValue(mockArrayBuffer), + }); + + await run.downloadLogs('/some/path/logs.zip'); + + expect(fs.promises.access).toHaveBeenCalledWith('/some/path', fs.constants.W_OK); + expect(mockClient.scenarios.runs.downloadLogs).toHaveBeenCalledWith('run-123', undefined); + expect(fs.promises.writeFile).toHaveBeenCalledWith('/some/path/logs.zip', expect.any(Buffer)); + }); + + it('should throw error when parent directory is not writable', async () => { + fs.promises.access.mockRejectedValueOnce(new Error('ENOENT')); + + await expect(run.downloadLogs('/invalid/path/logs.zip')).rejects.toThrow( + "Cannot write to /invalid/path/logs.zip: parent directory '/invalid/path' does not exist or is not writable", + ); + + expect(mockClient.scenarios.runs.downloadLogs).not.toHaveBeenCalled(); + }); + }); + + describe('getScore', () => { + let run: ScenarioRun; + + beforeEach(() => { + run = new ScenarioRun(mockClient, 'run-123', 'devbox-456'); + }); + + it('should return scoring result when available', async () => { + const scoredRun = { + ...mockScenarioRunData, + state: 'scored' as const, + scoring_contract_result: mockScoringResult, + }; + mockClient.scenarios.runs.retrieve.mockResolvedValue(scoredRun); + + const score = await run.getScore(); + + expect(score).toEqual(mockScoringResult); + expect(score?.score).toBe(0.85); + }); + + it('should return null when not yet scored', async () => { + mockClient.scenarios.runs.retrieve.mockResolvedValue(mockScenarioRunData); + + const score = await run.getScore(); + + expect(score).toBeNull(); + }); + + it('should return null when scoring_contract_result is undefined', async () => { + const runWithoutScore = { ...mockScenarioRunData }; + delete (runWithoutScore as any).scoring_contract_result; + mockClient.scenarios.runs.retrieve.mockResolvedValue(runWithoutScore); + + const score = await run.getScore(); + + expect(score).toBeNull(); + }); + }); + + describe('error handling', () => { + let run: ScenarioRun; + + beforeEach(() => { + run = new ScenarioRun(mockClient, 'run-123', 'devbox-456'); + }); + + it('should propagate errors from getInfo', async () => { + const error = new Error('API error'); + mockClient.scenarios.runs.retrieve.mockRejectedValue(error); + + await expect(run.getInfo()).rejects.toThrow('API error'); + }); + + it('should propagate errors from score', async () => { + const error = new Error('Scoring failed'); + mockClient.scenarios.runs.score.mockRejectedValue(error); + + await expect(run.score()).rejects.toThrow('Scoring failed'); + }); + + it('should propagate errors from awaitEnvReady', async () => { + const error = new Error('Devbox startup failed'); + mockClient.devboxes.awaitRunning.mockRejectedValue(error); + + await expect(run.awaitEnvReady()).rejects.toThrow('Devbox startup failed'); + }); + }); +}); diff --git a/tests/smoketests/blueprints.test.ts b/tests/smoketests/blueprints.test.ts index d345dbd1f..2e308e79b 100644 --- a/tests/smoketests/blueprints.test.ts +++ b/tests/smoketests/blueprints.test.ts @@ -1,5 +1,5 @@ import { BlueprintView } from '@runloop/api-client/resources/blueprints'; -import { makeClient, THIRTY_SECOND_TIMEOUT, uniqueName } from './utils'; +import { makeClient, SHORT_TIMEOUT, uniqueName } from './utils'; import { DevboxView } from '@runloop/api-client/resources/devboxes'; const client = makeClient(); @@ -30,7 +30,7 @@ describe('smoketest: blueprints', () => { expect(created.status).toBe('build_complete'); blueprintId = created.id; }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); test( @@ -54,7 +54,7 @@ describe('smoketest: blueprints', () => { } } }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); test( @@ -78,7 +78,7 @@ describe('smoketest: blueprints', () => { } } }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); }); @@ -113,7 +113,7 @@ describe('smoketest: blueprints', () => { } } }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); }); }); diff --git a/tests/smoketests/devboxes.test.ts b/tests/smoketests/devboxes.test.ts index 9bb155333..d4ea9c8ab 100644 --- a/tests/smoketests/devboxes.test.ts +++ b/tests/smoketests/devboxes.test.ts @@ -1,5 +1,5 @@ import { DevboxView } from '@runloop/api-client/resources/devboxes'; -import { makeClient, THIRTY_SECOND_TIMEOUT, uniqueName } from './utils'; +import { makeClient, SHORT_TIMEOUT, uniqueName } from './utils'; const client = makeClient(); @@ -32,7 +32,7 @@ describe('smoketest: devboxes', () => { } } }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); test('await running (createAndAwaitRunning)', async () => { @@ -84,7 +84,7 @@ describe('smoketest: devboxes', () => { ); expect(created.status).toBe('running'); }, - THIRTY_SECOND_TIMEOUT * 4, + SHORT_TIMEOUT * 4, ); test( @@ -103,6 +103,6 @@ describe('smoketest: devboxes', () => { ), ).rejects.toThrow(); }, - THIRTY_SECOND_TIMEOUT * 4, + SHORT_TIMEOUT * 4, ); }); diff --git a/tests/smoketests/executions.test.ts b/tests/smoketests/executions.test.ts index be6fceffa..3e4d1ad5b 100644 --- a/tests/smoketests/executions.test.ts +++ b/tests/smoketests/executions.test.ts @@ -1,4 +1,4 @@ -import { makeClient, THIRTY_SECOND_TIMEOUT, uniqueName } from './utils'; +import { makeClient, SHORT_TIMEOUT, uniqueName } from './utils'; const client = makeClient(); @@ -26,7 +26,7 @@ describe('smoketest: executions', () => { ); devboxId = created.id; }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); test('execute async and await completion', async () => { @@ -85,7 +85,7 @@ describe('smoketest: executions', () => { }); expect(completed.status).toBe('completed'); }, - THIRTY_SECOND_TIMEOUT * 3, + SHORT_TIMEOUT * 3, ); test( @@ -117,7 +117,7 @@ describe('smoketest: executions', () => { ), ).rejects.toThrow(); }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); test('executeAndAwaitCompletion with last_n parameter', async () => { diff --git a/tests/smoketests/object-oriented/agent.test.ts b/tests/smoketests/object-oriented/agent.test.ts index 8992aca55..233632d41 100644 --- a/tests/smoketests/object-oriented/agent.test.ts +++ b/tests/smoketests/object-oriented/agent.test.ts @@ -1,5 +1,5 @@ import { Agent, Devbox, StorageObject } from '@runloop/api-client/sdk'; -import { makeClientSDK, THIRTY_SECOND_TIMEOUT, uniqueName } from '../utils'; +import { makeClientSDK, SHORT_TIMEOUT, uniqueName } from '../utils'; const runloop = makeClientSDK(); @@ -34,7 +34,7 @@ describe('smoketest: object-oriented agent', () => { // Once implemented, add: await agent.delete(); } }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); test( @@ -63,7 +63,7 @@ describe('smoketest: object-oriented agent', () => { // TODO: Add agent cleanup once delete endpoint is implemented } }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); }); @@ -77,7 +77,7 @@ describe('smoketest: object-oriented agent', () => { // List might be empty, that's okay expect(agents.length).toBeGreaterThanOrEqual(0); }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); test( @@ -107,7 +107,7 @@ describe('smoketest: object-oriented agent', () => { // TODO: Add agent cleanup once delete endpoint is implemented } }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); test( @@ -154,7 +154,7 @@ describe('smoketest: object-oriented agent', () => { // Should delete: agent1, agent2, agent3 } }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); }); @@ -185,7 +185,7 @@ describe('smoketest: object-oriented agent', () => { // TODO: Add agent cleanup once delete endpoint is implemented } }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); test( @@ -215,7 +215,7 @@ describe('smoketest: object-oriented agent', () => { // TODO: Add agent cleanup once delete endpoint is implemented } }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); }); @@ -240,7 +240,7 @@ describe('smoketest: object-oriented agent', () => { // TODO: Add agent cleanup once delete endpoint is implemented } }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); test( @@ -263,7 +263,7 @@ describe('smoketest: object-oriented agent', () => { // TODO: Add agent cleanup once delete endpoint is implemented } }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); test( @@ -287,7 +287,7 @@ describe('smoketest: object-oriented agent', () => { // TODO: Add agent cleanup once delete endpoint is implemented } }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); test( @@ -327,7 +327,7 @@ describe('smoketest: object-oriented agent', () => { // TODO: Add agent cleanup once delete endpoint is implemented } }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); }); @@ -404,7 +404,7 @@ describe('smoketest: object-oriented agent', () => { // TODO: Add agent cleanup once delete endpoint is implemented } }, - THIRTY_SECOND_TIMEOUT * 8, + SHORT_TIMEOUT * 8, ); }); }); diff --git a/tests/smoketests/object-oriented/blueprint.test.ts b/tests/smoketests/object-oriented/blueprint.test.ts index 3e20bd159..90047887d 100644 --- a/tests/smoketests/object-oriented/blueprint.test.ts +++ b/tests/smoketests/object-oriented/blueprint.test.ts @@ -1,4 +1,4 @@ -import { THIRTY_SECOND_TIMEOUT, TEN_MINUTE_TIMEOUT, uniqueName, makeClientSDK, cleanUpPolicy } from '../utils'; +import { SHORT_TIMEOUT, LONG_TIMEOUT, uniqueName, makeClientSDK, cleanUpPolicy } from '../utils'; import { Blueprint, Devbox, NetworkPolicy, StorageObject } from '@runloop/api-client/sdk'; const sdk = makeClientSDK(); @@ -19,7 +19,7 @@ describe('smoketest: object-oriented blueprint', () => { { polling: { timeoutMs: 10 * 60 * 1000 } }, ); blueprintId = blueprint.id; - }, TEN_MINUTE_TIMEOUT); + }, LONG_TIMEOUT); afterAll(async () => { if (blueprint) { @@ -63,7 +63,7 @@ describe('smoketest: object-oriented blueprint', () => { // Clean up the devbox await devbox.shutdown(); }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); test( @@ -85,7 +85,7 @@ describe('smoketest: object-oriented blueprint', () => { } } }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); test('delete blueprint', async () => { @@ -193,7 +193,7 @@ COPY . .`, } } }, - TEN_MINUTE_TIMEOUT, + LONG_TIMEOUT, ); test( @@ -267,7 +267,7 @@ COPY . .`, } } }, - TEN_MINUTE_TIMEOUT, + LONG_TIMEOUT, ); }); @@ -301,7 +301,7 @@ COPY . .`, } } }, - TEN_MINUTE_TIMEOUT, + LONG_TIMEOUT, ); }); @@ -342,7 +342,7 @@ COPY . .`, await cleanUpPolicy(policy); } }, - TEN_MINUTE_TIMEOUT, + LONG_TIMEOUT, ); test( @@ -397,7 +397,7 @@ COPY . .`, await cleanUpPolicy(policy); } }, - TEN_MINUTE_TIMEOUT, + LONG_TIMEOUT, ); }); }); diff --git a/tests/smoketests/object-oriented/devbox.test.ts b/tests/smoketests/object-oriented/devbox.test.ts index 729ecc242..1fa2f78ff 100644 --- a/tests/smoketests/object-oriented/devbox.test.ts +++ b/tests/smoketests/object-oriented/devbox.test.ts @@ -1,6 +1,6 @@ import { toFile } from '@runloop/api-client'; import { Devbox, NetworkPolicy } from '@runloop/api-client/sdk'; -import { makeClientSDK, THIRTY_SECOND_TIMEOUT, TEN_MINUTE_TIMEOUT, uniqueName, cleanUpPolicy } from '../utils'; +import { makeClientSDK, SHORT_TIMEOUT, LONG_TIMEOUT, uniqueName, cleanUpPolicy } from '../utils'; import { uuidv7 } from 'uuidv7'; const sdk = makeClientSDK(); @@ -17,7 +17,7 @@ describe('smoketest: object-oriented devbox', () => { launch_parameters: { resource_size_request: 'X_SMALL', keep_alive_time_seconds: 60 * 5 }, // 5 minutes }); devboxId = devbox.id; - }, THIRTY_SECOND_TIMEOUT); + }, SHORT_TIMEOUT); afterAll(async () => { if (devbox) { @@ -117,7 +117,7 @@ describe('smoketest: object-oriented devbox', () => { } } }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); test( @@ -156,7 +156,7 @@ describe('smoketest: object-oriented devbox', () => { await cleanUpPolicy(policy); } }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); }); @@ -311,7 +311,7 @@ describe('smoketest: object-oriented devbox', () => { await devbox.shutdown(); await blueprint.delete(); }, - TEN_MINUTE_TIMEOUT, + LONG_TIMEOUT, ); test( @@ -344,7 +344,7 @@ describe('smoketest: object-oriented devbox', () => { await devbox.shutdown(); await blueprint.delete(); }, - TEN_MINUTE_TIMEOUT, + LONG_TIMEOUT, ); test('create devbox from snapshot', async () => { @@ -389,7 +389,7 @@ describe('smoketest: object-oriented devbox', () => { name: uniqueName('sdk-devbox-streaming'), launch_parameters: { resource_size_request: 'X_SMALL', keep_alive_time_seconds: 60 * 5 }, }); - }, THIRTY_SECOND_TIMEOUT); + }, SHORT_TIMEOUT); afterAll(async () => { if (devbox) { @@ -671,7 +671,7 @@ describe('smoketest: object-oriented devbox', () => { name: uniqueName('sdk-devbox-named-shell'), launch_parameters: { resource_size_request: 'X_SMALL', keep_alive_time_seconds: 60 * 5 }, }); - }, THIRTY_SECOND_TIMEOUT); + }, SHORT_TIMEOUT); afterAll(async () => { if (devbox) { diff --git a/tests/smoketests/object-oriented/network-policy.test.ts b/tests/smoketests/object-oriented/network-policy.test.ts index a85b62987..21c60edbd 100644 --- a/tests/smoketests/object-oriented/network-policy.test.ts +++ b/tests/smoketests/object-oriented/network-policy.test.ts @@ -1,4 +1,4 @@ -import { THIRTY_SECOND_TIMEOUT, uniqueName, makeClientSDK, cleanUpPolicy } from '../utils'; +import { SHORT_TIMEOUT, uniqueName, makeClientSDK, cleanUpPolicy } from '../utils'; import { NetworkPolicy } from '@runloop/api-client/sdk'; const sdk = makeClientSDK(); @@ -18,7 +18,7 @@ describe('smoketest: object-oriented network policy', () => { description: 'Test network policy', }); policyId = policy.id; - }, THIRTY_SECOND_TIMEOUT); + }, SHORT_TIMEOUT); afterAll(async () => { await cleanUpPolicy(policy); diff --git a/tests/smoketests/object-oriented/scenario-run.test.ts b/tests/smoketests/object-oriented/scenario-run.test.ts new file mode 100644 index 000000000..6431487f2 --- /dev/null +++ b/tests/smoketests/object-oriented/scenario-run.test.ts @@ -0,0 +1,279 @@ +import { ScenarioRun } from '@runloop/api-client/sdk'; +import { makeClient, SHORT_TIMEOUT, uniqueName } from '../utils'; + +const client = makeClient(); + +describe('smoketest: object-oriented scenario-run', () => { + let scenarioId: string | undefined; + let runId: string | undefined; + let devboxId: string | undefined; + + // Create a scenario to use for testing + beforeAll(async () => { + const scenario = await client.scenarios.create({ + name: uniqueName('sdk-scenario-run-test'), + input_context: { problem_statement: 'Test problem statement' }, + scoring_contract: { + scoring_function_parameters: [ + { + name: 'test-scorer', + scorer: { type: 'command_scorer', command: 'true' }, + weight: 1, + }, + ], + }, + }); + scenarioId = scenario.id; + }, SHORT_TIMEOUT); + + afterAll(async () => { + // Cleanup: shutdown devbox if still running + if (devboxId) { + try { + await client.devboxes.shutdown(devboxId); + } catch (e) { + // Ignore errors during cleanup + } + } + }); + + describe('ScenarioRun lifecycle', () => { + let run: ScenarioRun; + + test( + 'create scenario run from API and wrap with ScenarioRun', + async () => { + // Start a scenario run using the base API + const runView = await client.scenarios.startRun({ + scenario_id: scenarioId!, + run_name: uniqueName('sdk-run'), + }); + + expect(runView).toBeDefined(); + expect(runView.id).toBeTruthy(); + expect(runView.devbox_id).toBeTruthy(); + + runId = runView.id; + devboxId = runView.devbox_id; + + // Wrap with ScenarioRun + run = new ScenarioRun(client, runView.id, runView.devbox_id); + + expect(run).toBeInstanceOf(ScenarioRun); + expect(run.id).toBe(runView.id); + expect(run.devboxId).toBe(runView.devbox_id); + }, + SHORT_TIMEOUT, + ); + + test( + 'awaitEnvReady - wait for devbox to be ready', + async () => { + expect(run).toBeDefined(); + + const result = await run.awaitEnvReady({ + polling: { maxAttempts: 120, pollingIntervalMs: 5_000, timeoutMs: 20 * 60 * 1000 }, + }); + + expect(result).toBeDefined(); + expect(result.id).toBe(runId); + expect(['running', 'scoring', 'scored', 'completed']).toContain(result.state); + }, + SHORT_TIMEOUT, + ); + + test('getInfo - retrieve scenario run info', async () => { + expect(run).toBeDefined(); + + const info = await run.getInfo(); + + expect(info).toBeDefined(); + expect(info.id).toBe(runId); + expect(info.devbox_id).toBe(devboxId); + expect(info.scenario_id).toBe(scenarioId); + }); + + test('devbox property - access the devbox', async () => { + expect(run).toBeDefined(); + + const devbox = run.devbox; + + expect(devbox).toBeDefined(); + expect(devbox.id).toBe(devboxId); + + // Verify devbox is functional + const result = await devbox.cmd.exec('echo "Hello from ScenarioRun devbox"'); + expect(result.exitCode).toBe(0); + const output = await result.stdout(); + expect(output).toContain('Hello from ScenarioRun devbox'); + }); + + test( + 'scoreAndComplete - score and complete the run', + async () => { + expect(run).toBeDefined(); + + const result = await run.scoreAndComplete({ + polling: { maxAttempts: 120, pollingIntervalMs: 5_000, timeoutMs: 20 * 60 * 1000 }, + }); + + expect(result).toBeDefined(); + expect(['completed', 'scored', 'failed', 'timeout', 'canceled']).toContain(result.state); + }, + SHORT_TIMEOUT, + ); + + test('getScore - retrieve scoring result', async () => { + expect(run).toBeDefined(); + + const score = await run.getScore(); + + // Score should be available after scoreAndComplete + if (score) { + expect(typeof score.score).toBe('number'); + expect(score.score).toBeGreaterThanOrEqual(0); + expect(score.score).toBeLessThanOrEqual(1); + expect(Array.isArray(score.scoring_function_results)).toBe(true); + } + }); + }); + + describe('ScenarioRun cancellation', () => { + let run: ScenarioRun; + let cancelDevboxId: string | undefined; + + afterAll(async () => { + // Cleanup + if (cancelDevboxId) { + try { + await client.devboxes.shutdown(cancelDevboxId); + } catch (e) { + // Ignore + } + } + }); + + test( + 'cancel - cancel a running scenario', + async () => { + // Start a new run + const runView = await client.scenarios.startRun({ + scenario_id: scenarioId!, + run_name: uniqueName('sdk-run-cancel'), + }); + + cancelDevboxId = runView.devbox_id; + run = new ScenarioRun(client, runView.id, runView.devbox_id); + + // Wait for environment to be ready + await run.awaitEnvReady({ + polling: { maxAttempts: 120, pollingIntervalMs: 5_000, timeoutMs: 20 * 60 * 1000 }, + }); + + // Cancel the run + const result = await run.cancel(); + + expect(result).toBeDefined(); + expect(['canceled', 'completed', 'failed']).toContain(result.state); + }, + SHORT_TIMEOUT, + ); + }); + + describe('ScenarioRun score and await separately', () => { + let run: ScenarioRun; + let scoreDevboxId: string | undefined; + + afterAll(async () => { + // Cleanup + if (scoreDevboxId) { + try { + await client.devboxes.shutdown(scoreDevboxId); + } catch (e) { + // Ignore + } + } + }); + + test( + 'score and awaitScored - score then wait separately', + async () => { + // Start a new run + const runView = await client.scenarios.startRun({ + scenario_id: scenarioId!, + run_name: uniqueName('sdk-run-score-await'), + }); + + scoreDevboxId = runView.devbox_id; + run = new ScenarioRun(client, runView.id, runView.devbox_id); + + // Wait for environment to be ready + await run.awaitEnvReady({ + polling: { maxAttempts: 120, pollingIntervalMs: 5_000, timeoutMs: 20 * 60 * 1000 }, + }); + + // Score + const scoreResult = await run.score(); + expect(scoreResult).toBeDefined(); + expect(['scoring', 'scored', 'completed', 'failed']).toContain(scoreResult.state); + + // Wait for scoring to complete + const awaitResult = await run.awaitScored({ + polling: { maxAttempts: 120, pollingIntervalMs: 5_000, timeoutMs: 20 * 60 * 1000 }, + }); + expect(awaitResult).toBeDefined(); + expect(['scored', 'completed', 'failed']).toContain(awaitResult.state); + + // Complete the run + await run.complete(); + }, + SHORT_TIMEOUT, + ); + }); + + describe('ScenarioRun scoreAndAwait', () => { + let run: ScenarioRun; + let scoreAwaitDevboxId: string | undefined; + + afterAll(async () => { + // Cleanup + if (scoreAwaitDevboxId) { + try { + await client.devboxes.shutdown(scoreAwaitDevboxId); + } catch (e) { + // Ignore + } + } + }); + + test( + 'scoreAndAwait - score and wait in one call', + async () => { + // Start a new run + const runView = await client.scenarios.startRun({ + scenario_id: scenarioId!, + run_name: uniqueName('sdk-run-score-and-await'), + }); + + scoreAwaitDevboxId = runView.devbox_id; + run = new ScenarioRun(client, runView.id, runView.devbox_id); + + // Wait for environment to be ready + await run.awaitEnvReady({ + polling: { maxAttempts: 120, pollingIntervalMs: 5_000, timeoutMs: 20 * 60 * 1000 }, + }); + + // Score and await in one call + const result = await run.scoreAndAwait({ + polling: { maxAttempts: 120, pollingIntervalMs: 5_000, timeoutMs: 20 * 60 * 1000 }, + }); + expect(result).toBeDefined(); + expect(['scored', 'completed', 'failed']).toContain(result.state); + + // Complete the run + await run.complete(); + }, + SHORT_TIMEOUT, + ); + }); +}); diff --git a/tests/smoketests/object-oriented/storage-object.test.ts b/tests/smoketests/object-oriented/storage-object.test.ts index 4ec15ead2..0f7936f09 100644 --- a/tests/smoketests/object-oriented/storage-object.test.ts +++ b/tests/smoketests/object-oriented/storage-object.test.ts @@ -1,5 +1,5 @@ import { ReadEntry } from 'tar'; -import { THIRTY_SECOND_TIMEOUT, uniqueName, makeClientSDK } from '../utils'; +import { SHORT_TIMEOUT, uniqueName, makeClientSDK } from '../utils'; import { Devbox, StorageObject } from '@runloop/api-client/sdk'; const sdk = makeClientSDK(); @@ -24,7 +24,7 @@ describe('smoketest: object-oriented storage object', () => { } } }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); test('get storage object info', async () => { diff --git a/tests/smoketests/scenarios-benchmarks.test.ts b/tests/smoketests/scenarios-benchmarks.test.ts index e49ebabc5..6d3dea467 100644 --- a/tests/smoketests/scenarios-benchmarks.test.ts +++ b/tests/smoketests/scenarios-benchmarks.test.ts @@ -1,5 +1,5 @@ import { ScenarioRunView } from '@runloop/api-client/resources/scenarios'; -import { makeClient, THIRTY_SECOND_TIMEOUT, uniqueName } from './utils'; +import { makeClient, SHORT_TIMEOUT, uniqueName } from './utils'; const client = makeClient(); @@ -25,7 +25,7 @@ describe('smoketest: scenarios and benchmarks', () => { }); scenarioId = scenario.id; }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); test( @@ -40,7 +40,7 @@ describe('smoketest: scenarios and benchmarks', () => { expect(run.scenario_id).toBe(scenarioId); runId = run.id; }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); test( @@ -58,7 +58,7 @@ describe('smoketest: scenarios and benchmarks', () => { } } }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); test( @@ -73,6 +73,6 @@ describe('smoketest: scenarios and benchmarks', () => { const run = await client.benchmarks.startRun({ benchmark_id: benchmark.id }); expect(run.benchmark_id).toBe(benchmark.id); }, - THIRTY_SECOND_TIMEOUT, + SHORT_TIMEOUT, ); }); diff --git a/tests/smoketests/utils.ts b/tests/smoketests/utils.ts index 3471c861e..15f551750 100644 --- a/tests/smoketests/utils.ts +++ b/tests/smoketests/utils.ts @@ -26,9 +26,9 @@ export function makeClientSDK() { export const uniqueName = (prefix: string) => `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`; -export const THIRTY_SECOND_TIMEOUT = 120_000; -export const FIVE_MINUTE_TIMEOUT = 300_000; -export const TEN_MINUTE_TIMEOUT = 600_000; +export const SHORT_TIMEOUT = 120_000; +export const MEDIUM_TIMEOUT = 300_000; +export const LONG_TIMEOUT = 600_000; /** * Helper to clean up a network policy, ignoring errors if already deleted.