Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ The SDK provides object-oriented interfaces for all major Runloop resources:
- **[`runloop.blueprint`](https://runloopai.github.io/api-client-ts/stable/classes/BlueprintOps.html)** - Blueprint management (create, list, build blueprints)
- **[`runloop.snapshot`](https://runloopai.github.io/api-client-ts/stable/classes/SnapshotOps.html)** - Snapshot management (list disk snapshots)
- **[`runloop.storageObject`](https://runloopai.github.io/api-client-ts/stable/classes/StorageObjectOps.html)** - Storage object management (upload, download, list objects)
- **[`runloop.scorer`](https://runloopai.github.io/api-client-ts/stable/classes/ScorerOps.html)** - Scorer management (create, list, validate, update)
- **[`runloop.api`](https://runloopai.github.io/api-client-ts/stable/classes/Runloop.html)** - Direct access to the REST API client

## TypeScript Support
Expand All @@ -74,6 +75,25 @@ const runloop = new RunloopSDK();
const devbox: DevboxView = await runloop.devbox.create();
```

### Scorers

Scorers are custom scoring functions used to evaluate scenario outputs. Create scorers via `runloop.scorer.create()`, then update or validate them with the returned `Scorer` instance:

```typescript
import { RunloopSDK } from '@runloop/api-client';

const runloop = new RunloopSDK();

const scorer = await runloop.scorer.create({
type: 'my_scorer',
bash_script: 'echo "1.0"',
});

await scorer.update({ bash_script: 'echo "0.5"' });
const result = await scorer.validate({ scoring_context: { output: 'hello' } });
console.log(result.scoring_result.score);
```

## Migration from API Client

If you're currently using the legacy API, migration is straightforward:
Expand Down
130 changes: 130 additions & 0 deletions src/sdk.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { Blueprint, type CreateParams as BlueprintCreateParams } from './sdk/blu
import { Snapshot } from './sdk/snapshot';
import { StorageObject } from './sdk/storage-object';
import { Agent } from './sdk/agent';
import { Scorer } from './sdk/scorer';

// Import types used in this file
import type {
Expand All @@ -17,6 +18,7 @@ import type {
import type { BlueprintListParams } from './resources/blueprints';
import type { ObjectCreateParams, ObjectListParams } from './resources/objects';
import type { AgentCreateParams, AgentListParams } from './resources/agents';
import type { ScorerCreateParams, ScorerListParams } from './resources/scenarios/scorers';
import { PollingOptions } from './lib/polling';
import * as Shared from './resources/shared';

Expand Down Expand Up @@ -190,6 +192,7 @@ type ContentType = ObjectCreateParams['content_type'];
* - `snapshot` - {@link SnapshotOps}
* - `storageObject` - {@link StorageObjectOps}
* - `agent` - {@link AgentOps}
* - `scorer` - {@link ScorerOps}
*
* See the documentation for each Operations class for more details.
*
Expand Down Expand Up @@ -260,6 +263,14 @@ export class RunloopSDK {
*/
public readonly agent: AgentOps;

/**
* **Scorer Operations** - {@link ScorerOps} for creating and accessing {@link Scorer} class instances.
*
* Scorers are custom scoring functions that evaluate scenario outputs. They define scripts
* that produce a score in the range [0.0, 1.0] for scenario runs.
*/
public readonly scorer: ScorerOps;

/**
* Creates a new RunloopSDK instance.
* @param {ClientOptions} [options] - Optional client configuration options.
Expand All @@ -271,6 +282,7 @@ export class RunloopSDK {
this.snapshot = new SnapshotOps(this.api);
this.storageObject = new StorageObjectOps(this.api);
this.agent = new AgentOps(this.api);
this.scorer = new ScorerOps(this.api);
}
}

Expand Down Expand Up @@ -1255,6 +1267,121 @@ export class AgentOps {
}
}

/**
* Scorer SDK interface for managing custom scorers.
*
* @category Scorer
*
* @remarks
* ## Overview
*
* Scorers are custom scoring functions used to evaluate scenario outputs. A scorer is a
* script that runs and prints a score in the range [0.0, 1.0], e.g. `echo "0.5"`.
*
* ## Usage
*
* This interface is accessed via {@link RunloopSDK.scorer}. Create scorers with {@link ScorerOps.create}
* or reference an existing scorer by ID with {@link ScorerOps.fromId} to obtain a {@link Scorer} instance.
*
* @example
* ```typescript
* import { RunloopSDK } from '@runloop/api-client';
*
* const runloop = new RunloopSDK();
*
* // Create a scorer
* const scorer = await runloop.scorer.create({
* type: 'my_scorer',
* bash_script: 'echo "1.0"',
* });
*
* // Update the scorer
* await scorer.update({ bash_script: 'echo "0.5"' });
*
* // Validate the scorer with a scoring context
* const result = await scorer.validate({ scoring_context: { output: 'hello' } });
* console.log(result.scoring_result.score);
* ```
*
* @example
* Get scorer info (typical usage):
* ```typescript
* const runloop = new RunloopSDK();
* const scorer = await runloop.scorer.create({
* type: 'my_scorer',
* bash_script: 'echo "1.0"',
* });
*
* const info = await scorer.getInfo();
* console.log(`Scorer ${info.id} (${info.type})`);
* ```
*/
Comment thread
james-rl marked this conversation as resolved.
export class ScorerOps {
/**
* @private
*/
constructor(private client: RunloopAPI) {}

/**
Comment thread
james-rl marked this conversation as resolved.
* Create a new custom scorer.
*
* @example
* ```typescript
* const runloop = new RunloopSDK();
* const scorer = await runloop.scorer.create({
* type: 'my_scorer',
* bash_script: 'echo "1.0"',
* });
*
* const info = await scorer.getInfo();
* console.log(info.id);
* ```
*
* @param {ScorerCreateParams} params - Parameters for creating the scorer
* @param {Core.RequestOptions} [options] - Request options
* @returns {Promise<Scorer>} A {@link Scorer} instance
*/
async create(params: ScorerCreateParams, options?: Core.RequestOptions): Promise<Scorer> {
return Scorer.create(this.client, params, options);
}
Comment thread
james-rl marked this conversation as resolved.

/**
* Get a scorer object by its ID.
*
* @example
* ```typescript
* const runloop = new RunloopSDK();
* const scorer = runloop.scorer.fromId('scs_123');
* const info = await scorer.getInfo();
* console.log(info.type);
* ```
*
* @param {string} id - The ID of the scorer
* @returns {Scorer} A {@link Scorer} instance
*/
fromId(id: string): Scorer {
return Scorer.fromId(this.client, id);
}

/**
* List all scorers with optional filters.
*
* @example
* ```typescript
* const runloop = new RunloopSDK();
* const scorers = await runloop.scorer.list({ limit: 10 });
* console.log(scorers.map((s) => s.id));
* ```
*
* @param {ScorerListParams} [params] - Optional filter parameters
* @param {Core.RequestOptions} [options] - Request options
* @returns {Promise<Scorer[]>} An array of {@link Scorer} instances
*/
async list(params?: ScorerListParams, options?: Core.RequestOptions): Promise<Scorer[]> {
return Scorer.list(this.client, params, options);
}
Comment thread
james-rl marked this conversation as resolved.
}

// @deprecated Use {@link RunloopSDK} instead.
/**
* @deprecated Use {@link RunloopSDK} instead.
Expand All @@ -1275,11 +1402,13 @@ export declare namespace RunloopSDK {
SnapshotOps as SnapshotOps,
StorageObjectOps as StorageObjectOps,
AgentOps as AgentOps,
ScorerOps as ScorerOps,
Devbox as Devbox,
Blueprint as Blueprint,
Snapshot as Snapshot,
StorageObject as StorageObject,
Agent as Agent,
Scorer as Scorer,
};
}
// Export SDK classes from sdk/sdk.ts - these are separate from RunloopSDK to avoid circular dependencies
Expand All @@ -1293,6 +1422,7 @@ export {
Snapshot,
StorageObject,
Agent,
Scorer,
Execution,
ExecutionResult,
} from './sdk/index';
1 change: 1 addition & 0 deletions src/sdk/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ export { StorageObject } from './storage-object';
export { Agent } from './agent';
export { Execution } from './execution';
export { ExecutionResult } from './execution-result';
export { Scorer } from './scorer';
Loading
Loading