diff --git a/.release-please-manifest.json b/.release-please-manifest.json index a65cc01f9..989bed91e 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "4.99.0" + ".": "4.100.0" } diff --git a/.stats.yml b/.stats.yml index a3c5d081d..afa33d93b 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 101 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-5fa16b9a02985ae06e41be14946a9c325dc672fb014b3c19abca65880c6990e6.yml -openapi_spec_hash: da3e669f65130043b1170048c0727890 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-262e171d0a8150ea1192474d16ba3afdf9a054b399f1a49a9c9b697a3073c136.yml +openapi_spec_hash: 33e00a48df8f94c94f46290c489f132b config_hash: d8d5fda350f6db77c784f35429741a2e diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a552b8ed..adda41e52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ # Changelog +## 4.100.0 (2025-05-16) + +Full Changelog: [v4.99.0...v4.100.0](https://github.com/openai/openai-node/compare/v4.99.0...v4.100.0) + +### Features + +* **api:** further updates for evals API ([3f6f248](https://github.com/openai/openai-node/commit/3f6f248191b45015924be76fd5154d149c4ed8a0)) + + +### Chores + +* **internal:** version bump ([5123fe0](https://github.com/openai/openai-node/commit/5123fe08a56f3d0040b1cc67129382f3eacc3cca)) + ## 4.99.0 (2025-05-16) Full Changelog: [v4.98.0...v4.99.0](https://github.com/openai/openai-node/compare/v4.98.0...v4.99.0) diff --git a/jsr.json b/jsr.json index 2f29927c6..3c2d41b0f 100644 --- a/jsr.json +++ b/jsr.json @@ -1,6 +1,6 @@ { "name": "@openai/openai", - "version": "4.99.0", + "version": "4.100.0", "exports": { ".": "./index.ts", "./helpers/zod": "./helpers/zod.ts", diff --git a/package.json b/package.json index 0d756ef85..23205e569 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "openai", - "version": "4.99.0", + "version": "4.100.0", "description": "The official TypeScript library for the OpenAI API", "author": "OpenAI ", "types": "dist/index.d.ts", diff --git a/src/resources/beta/realtime/realtime.ts b/src/resources/beta/realtime/realtime.ts index 5012b1edd..26fba883e 100644 --- a/src/resources/beta/realtime/realtime.ts +++ b/src/resources/beta/realtime/realtime.ts @@ -2571,7 +2571,7 @@ export interface TranscriptionSessionUpdatedEvent { * A new Realtime transcription session configuration. * * When a session is created on the server via REST API, the session object also - * contains an ephemeral key. Default TTL for keys is one minute. This property is + * contains an ephemeral key. Default TTL for keys is 10 minutes. This property is * not present when a session is updated via the WebSocket API. */ session: TranscriptionSessionsAPI.TranscriptionSession; diff --git a/src/resources/beta/realtime/transcription-sessions.ts b/src/resources/beta/realtime/transcription-sessions.ts index 61e58a8e8..83e8c47ad 100644 --- a/src/resources/beta/realtime/transcription-sessions.ts +++ b/src/resources/beta/realtime/transcription-sessions.ts @@ -35,7 +35,7 @@ export class TranscriptionSessions extends APIResource { * A new Realtime transcription session configuration. * * When a session is created on the server via REST API, the session object also - * contains an ephemeral key. Default TTL for keys is one minute. This property is + * contains an ephemeral key. Default TTL for keys is 10 minutes. This property is * not present when a session is updated via the WebSocket API. */ export interface TranscriptionSession { diff --git a/src/resources/evals/evals.ts b/src/resources/evals/evals.ts index 08c898ace..05a656619 100644 --- a/src/resources/evals/evals.ts +++ b/src/resources/evals/evals.ts @@ -28,7 +28,8 @@ export class Evals extends APIResource { /** * Create the structure of an evaluation that can be used to test a model's - * performance. An evaluation is a set of testing criteria and a datasource. After + * performance. An evaluation is a set of testing criteria and the config for a + * data source, which dictates the schema of the data used in the evaluation. After * creating an evaluation, you can run it on different models and model parameters. * We support several types of graders and datasources. For more information, see * the [Evals guide](https://platform.openai.com/docs/guides/evals). @@ -115,9 +116,9 @@ export interface EvalStoredCompletionsDataSourceConfig { schema: Record; /** - * The type of data source. Always `stored-completions`. + * The type of data source. Always `stored_completions`. */ - type: 'stored-completions'; + type: 'stored_completions'; /** * Set of 16 key-value pairs that can be attached to an object. This can be useful @@ -136,7 +137,7 @@ export interface EvalStoredCompletionsDataSourceConfig { * * - Improve the quality of my chatbot * - See how well my chatbot handles customer support - * - Check if o3-mini is better at my usecase than gpt-4o + * - Check if o4-mini is better at my usecase than gpt-4o */ export interface EvalCreateResponse { /** @@ -257,7 +258,7 @@ export namespace EvalCreateResponse { * * - Improve the quality of my chatbot * - See how well my chatbot handles customer support - * - Check if o3-mini is better at my usecase than gpt-4o + * - Check if o4-mini is better at my usecase than gpt-4o */ export interface EvalRetrieveResponse { /** @@ -378,7 +379,7 @@ export namespace EvalRetrieveResponse { * * - Improve the quality of my chatbot * - See how well my chatbot handles customer support - * - Check if o3-mini is better at my usecase than gpt-4o + * - Check if o4-mini is better at my usecase than gpt-4o */ export interface EvalUpdateResponse { /** @@ -499,7 +500,7 @@ export namespace EvalUpdateResponse { * * - Improve the quality of my chatbot * - See how well my chatbot handles customer support - * - Check if o3-mini is better at my usecase than gpt-4o + * - Check if o4-mini is better at my usecase than gpt-4o */ export interface EvalListResponse { /** @@ -624,12 +625,16 @@ export interface EvalDeleteResponse { export interface EvalCreateParams { /** - * The configuration for the data source used for the evaluation runs. + * The configuration for the data source used for the evaluation runs. Dictates the + * schema of the data used in the evaluation. */ data_source_config: EvalCreateParams.Custom | EvalCreateParams.Logs | EvalCreateParams.StoredCompletions; /** - * A list of graders for all eval runs in this group. + * A list of graders for all eval runs in this group. Graders can reference + * variables in the data source using double curly braces notation, like + * `{{item.variable_name}}`. To reference the model's output, use the `sample` + * namespace (ie, `{{sample.output_text}}`). */ testing_criteria: Array< | EvalCreateParams.LabelModel @@ -699,13 +704,13 @@ export namespace EvalCreateParams { } /** - * Deprecated in favor of LogsDataSourceConfig. + * @deprecated Deprecated in favor of LogsDataSourceConfig. */ export interface StoredCompletions { /** - * The type of data source. Always `stored-completions`. + * The type of data source. Always `stored_completions`. */ - type: 'stored-completions'; + type: 'stored_completions'; /** * Metadata filters for the stored completions data source. @@ -720,7 +725,7 @@ export namespace EvalCreateParams { export interface LabelModel { /** * A list of chat messages forming the prompt or context. May include variable - * references to the "item" namespace, ie {{item.name}}. + * references to the `item` namespace, ie {{item.name}}. */ input: Array; diff --git a/src/resources/evals/runs/runs.ts b/src/resources/evals/runs/runs.ts index 31883e6b5..e761e2160 100644 --- a/src/resources/evals/runs/runs.ts +++ b/src/resources/evals/runs/runs.ts @@ -19,7 +19,9 @@ export class Runs extends APIResource { outputItems: OutputItemsAPI.OutputItems = new OutputItemsAPI.OutputItems(this._client); /** - * Create a new evaluation run. This is the endpoint that will kick off grading. + * Kicks off a new run for a given evaluation, specifying the data source, and what + * model configuration to use to test. The datasource will be validated against the + * schema specified in the config of the evaluation. */ create( evalId: string, @@ -85,7 +87,7 @@ export class RunListResponsesPage extends CursorPage {} */ export interface CreateEvalCompletionsRunDataSource { /** - * A StoredCompletionsRunDataSource configuration describing a set of filters + * Determines what populates the `item` namespace in this run's data source. */ source: | CreateEvalCompletionsRunDataSource.FileContent @@ -97,6 +99,12 @@ export interface CreateEvalCompletionsRunDataSource { */ type: 'completions'; + /** + * Used when sampling from a model. Dictates the structure of the messages passed + * into the model. Can either be a reference to a prebuilt trajectory (ie, + * `item.input_trajectory`), or a template with variable references to the `item` + * namespace. + */ input_messages?: | CreateEvalCompletionsRunDataSource.Template | CreateEvalCompletionsRunDataSource.ItemReference; @@ -185,7 +193,7 @@ export namespace CreateEvalCompletionsRunDataSource { export interface Template { /** * A list of chat messages forming the prompt or context. May include variable - * references to the "item" namespace, ie {{item.name}}. + * references to the `item` namespace, ie {{item.name}}. */ template: Array; @@ -241,7 +249,7 @@ export namespace CreateEvalCompletionsRunDataSource { export interface ItemReference { /** - * A reference to a variable in the "item" namespace. Ie, "item.name" + * A reference to a variable in the `item` namespace. Ie, "item.input_trajectory" */ item_reference: string; @@ -279,6 +287,9 @@ export namespace CreateEvalCompletionsRunDataSource { * eval */ export interface CreateEvalJSONLRunDataSource { + /** + * Determines what populates the `item` namespace in the data source. + */ source: CreateEvalJSONLRunDataSource.FileContent | CreateEvalJSONLRunDataSource.FileID; /** @@ -425,7 +436,7 @@ export namespace RunCreateResponse { */ export interface Responses { /** - * A EvalResponsesSource object describing a run data source configuration. + * Determines what populates the `item` namespace in this run's data source. */ source: Responses.FileContent | Responses.FileID | Responses.Responses; @@ -434,6 +445,12 @@ export namespace RunCreateResponse { */ type: 'responses'; + /** + * Used when sampling from a model. Dictates the structure of the messages passed + * into the model. Can either be a reference to a prebuilt trajectory (ie, + * `item.input_trajectory`), or a template with variable references to the `item` + * namespace. + */ input_messages?: Responses.Template | Responses.ItemReference; /** @@ -498,12 +515,6 @@ export namespace RunCreateResponse { */ created_before?: number | null; - /** - * Whether the response has tool calls. This is a query parameter used to select - * responses. - */ - has_tool_calls?: boolean | null; - /** * Optional string to search the 'instructions' field. This is a query parameter * used to select responses. @@ -552,7 +563,7 @@ export namespace RunCreateResponse { export interface Template { /** * A list of chat messages forming the prompt or context. May include variable - * references to the "item" namespace, ie {{item.name}}. + * references to the `item` namespace, ie {{item.name}}. */ template: Array; @@ -620,7 +631,7 @@ export namespace RunCreateResponse { export interface ItemReference { /** - * A reference to a variable in the "item" namespace. Ie, "item.name" + * A reference to a variable in the `item` namespace. Ie, "item.name" */ item_reference: string; @@ -817,7 +828,7 @@ export namespace RunRetrieveResponse { */ export interface Responses { /** - * A EvalResponsesSource object describing a run data source configuration. + * Determines what populates the `item` namespace in this run's data source. */ source: Responses.FileContent | Responses.FileID | Responses.Responses; @@ -826,6 +837,12 @@ export namespace RunRetrieveResponse { */ type: 'responses'; + /** + * Used when sampling from a model. Dictates the structure of the messages passed + * into the model. Can either be a reference to a prebuilt trajectory (ie, + * `item.input_trajectory`), or a template with variable references to the `item` + * namespace. + */ input_messages?: Responses.Template | Responses.ItemReference; /** @@ -890,12 +907,6 @@ export namespace RunRetrieveResponse { */ created_before?: number | null; - /** - * Whether the response has tool calls. This is a query parameter used to select - * responses. - */ - has_tool_calls?: boolean | null; - /** * Optional string to search the 'instructions' field. This is a query parameter * used to select responses. @@ -944,7 +955,7 @@ export namespace RunRetrieveResponse { export interface Template { /** * A list of chat messages forming the prompt or context. May include variable - * references to the "item" namespace, ie {{item.name}}. + * references to the `item` namespace, ie {{item.name}}. */ template: Array; @@ -1012,7 +1023,7 @@ export namespace RunRetrieveResponse { export interface ItemReference { /** - * A reference to a variable in the "item" namespace. Ie, "item.name" + * A reference to a variable in the `item` namespace. Ie, "item.name" */ item_reference: string; @@ -1206,7 +1217,7 @@ export namespace RunListResponse { */ export interface Responses { /** - * A EvalResponsesSource object describing a run data source configuration. + * Determines what populates the `item` namespace in this run's data source. */ source: Responses.FileContent | Responses.FileID | Responses.Responses; @@ -1215,6 +1226,12 @@ export namespace RunListResponse { */ type: 'responses'; + /** + * Used when sampling from a model. Dictates the structure of the messages passed + * into the model. Can either be a reference to a prebuilt trajectory (ie, + * `item.input_trajectory`), or a template with variable references to the `item` + * namespace. + */ input_messages?: Responses.Template | Responses.ItemReference; /** @@ -1279,12 +1296,6 @@ export namespace RunListResponse { */ created_before?: number | null; - /** - * Whether the response has tool calls. This is a query parameter used to select - * responses. - */ - has_tool_calls?: boolean | null; - /** * Optional string to search the 'instructions' field. This is a query parameter * used to select responses. @@ -1333,7 +1344,7 @@ export namespace RunListResponse { export interface Template { /** * A list of chat messages forming the prompt or context. May include variable - * references to the "item" namespace, ie {{item.name}}. + * references to the `item` namespace, ie {{item.name}}. */ template: Array; @@ -1401,7 +1412,7 @@ export namespace RunListResponse { export interface ItemReference { /** - * A reference to a variable in the "item" namespace. Ie, "item.name" + * A reference to a variable in the `item` namespace. Ie, "item.name" */ item_reference: string; @@ -1606,7 +1617,7 @@ export namespace RunCancelResponse { */ export interface Responses { /** - * A EvalResponsesSource object describing a run data source configuration. + * Determines what populates the `item` namespace in this run's data source. */ source: Responses.FileContent | Responses.FileID | Responses.Responses; @@ -1615,6 +1626,12 @@ export namespace RunCancelResponse { */ type: 'responses'; + /** + * Used when sampling from a model. Dictates the structure of the messages passed + * into the model. Can either be a reference to a prebuilt trajectory (ie, + * `item.input_trajectory`), or a template with variable references to the `item` + * namespace. + */ input_messages?: Responses.Template | Responses.ItemReference; /** @@ -1679,12 +1696,6 @@ export namespace RunCancelResponse { */ created_before?: number | null; - /** - * Whether the response has tool calls. This is a query parameter used to select - * responses. - */ - has_tool_calls?: boolean | null; - /** * Optional string to search the 'instructions' field. This is a query parameter * used to select responses. @@ -1733,7 +1744,7 @@ export namespace RunCancelResponse { export interface Template { /** * A list of chat messages forming the prompt or context. May include variable - * references to the "item" namespace, ie {{item.name}}. + * references to the `item` namespace, ie {{item.name}}. */ template: Array; @@ -1801,7 +1812,7 @@ export namespace RunCancelResponse { export interface ItemReference { /** - * A reference to a variable in the "item" namespace. Ie, "item.name" + * A reference to a variable in the `item` namespace. Ie, "item.name" */ item_reference: string; @@ -1940,7 +1951,7 @@ export namespace RunCreateParams { */ export interface CreateEvalResponsesRunDataSource { /** - * A EvalResponsesSource object describing a run data source configuration. + * Determines what populates the `item` namespace in this run's data source. */ source: | CreateEvalResponsesRunDataSource.FileContent @@ -1952,6 +1963,12 @@ export namespace RunCreateParams { */ type: 'responses'; + /** + * Used when sampling from a model. Dictates the structure of the messages passed + * into the model. Can either be a reference to a prebuilt trajectory (ie, + * `item.input_trajectory`), or a template with variable references to the `item` + * namespace. + */ input_messages?: | CreateEvalResponsesRunDataSource.Template | CreateEvalResponsesRunDataSource.ItemReference; @@ -2018,12 +2035,6 @@ export namespace RunCreateParams { */ created_before?: number | null; - /** - * Whether the response has tool calls. This is a query parameter used to select - * responses. - */ - has_tool_calls?: boolean | null; - /** * Optional string to search the 'instructions' field. This is a query parameter * used to select responses. @@ -2072,7 +2083,7 @@ export namespace RunCreateParams { export interface Template { /** * A list of chat messages forming the prompt or context. May include variable - * references to the "item" namespace, ie {{item.name}}. + * references to the `item` namespace, ie {{item.name}}. */ template: Array; @@ -2140,7 +2151,7 @@ export namespace RunCreateParams { export interface ItemReference { /** - * A reference to a variable in the "item" namespace. Ie, "item.name" + * A reference to a variable in the `item` namespace. Ie, "item.name" */ item_reference: string; diff --git a/src/version.ts b/src/version.ts index c7ee5a162..62b43ffce 100644 --- a/src/version.ts +++ b/src/version.ts @@ -1 +1 @@ -export const VERSION = '4.99.0'; // x-release-please-version +export const VERSION = '4.100.0'; // x-release-please-version