openai · kwhinnery-openai · May 16, 2025 · May 16, 2025 · May 16, 2025 · May 16, 2025
@@ -1,3 +1,3 @@
 {
-  ".": "4.99.0"
+  ".": "4.100.0"
 }
@@ -1,4 +1,4 @@
 configured_endpoints: 101
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-5fa16b9a02985ae06e41be14946a9c325dc672fb014b3c19abca65880c6990e6.yml
-openapi_spec_hash: da3e669f65130043b1170048c0727890
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-262e171d0a8150ea1192474d16ba3afdf9a054b399f1a49a9c9b697a3073c136.yml
+openapi_spec_hash: 33e00a48df8f94c94f46290c489f132b
 config_hash: d8d5fda350f6db77c784f35429741a2e
@@ -1,5 +1,18 @@
 # Changelog
 
+## 4.100.0 (2025-05-16)
+
+Full Changelog: [v4.99.0...v4.100.0](https://github.com/openai/openai-node/compare/v4.99.0...v4.100.0)
+
+### Features
+
+* **api:** further updates for evals API ([3f6f248](https://github.com/openai/openai-node/commit/3f6f248191b45015924be76fd5154d149c4ed8a0))
+
+
+### Chores
+
+* **internal:** version bump ([5123fe0](https://github.com/openai/openai-node/commit/5123fe08a56f3d0040b1cc67129382f3eacc3cca))
+
 ## 4.99.0 (2025-05-16)
 
 Full Changelog: [v4.98.0...v4.99.0](https://github.com/openai/openai-node/compare/v4.98.0...v4.99.0)

@@ -1,6 +1,6 @@
 {
   "name": "@openai/openai",
-  "version": "4.99.0",
+  "version": "4.100.0",
   "exports": {
     ".": "./index.ts",
     "./helpers/zod": "./helpers/zod.ts",

@@ -1,6 +1,6 @@
 {
   "name": "openai",
-  "version": "4.99.0",
+  "version": "4.100.0",
   "description": "The official TypeScript library for the OpenAI API",
   "author": "OpenAI <support@openai.com>",
   "types": "dist/index.d.ts",

@@ -2571,7 +2571,7 @@ export interface TranscriptionSessionUpdatedEvent {
    * A new Realtime transcription session configuration.
    *
    * When a session is created on the server via REST API, the session object also
-   * contains an ephemeral key. Default TTL for keys is one minute. This property is
+   * contains an ephemeral key. Default TTL for keys is 10 minutes. This property is
    * not present when a session is updated via the WebSocket API.
    */
   session: TranscriptionSessionsAPI.TranscriptionSession;

@@ -35,7 +35,7 @@ export class TranscriptionSessions extends APIResource {
  * A new Realtime transcription session configuration.
  *
  * When a session is created on the server via REST API, the session object also
- * contains an ephemeral key. Default TTL for keys is one minute. This property is
+ * contains an ephemeral key. Default TTL for keys is 10 minutes. This property is
  * not present when a session is updated via the WebSocket API.
  */
 export interface TranscriptionSession {

@@ -28,7 +28,8 @@ export class Evals extends APIResource {
 
   /**
    * Create the structure of an evaluation that can be used to test a model's
-   * performance. An evaluation is a set of testing criteria and a datasource. After
+   * performance. An evaluation is a set of testing criteria and the config for a
+   * data source, which dictates the schema of the data used in the evaluation. After
    * creating an evaluation, you can run it on different models and model parameters.
    * We support several types of graders and datasources. For more information, see
    * the [Evals guide](https://platform.openai.com/docs/guides/evals).
@@ -115,9 +116,9 @@ export interface EvalStoredCompletionsDataSourceConfig {
   schema: Record<string, unknown>;
 
   /**
-   * The type of data source. Always `stored-completions`.
+   * The type of data source. Always `stored_completions`.
    */
-  type: 'stored-completions';
+  type: 'stored_completions';
 
   /**
    * Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -136,7 +137,7 @@ export interface EvalStoredCompletionsDataSourceConfig {
  *
  * - Improve the quality of my chatbot
  * - See how well my chatbot handles customer support
- * - Check if o3-mini is better at my usecase than gpt-4o
+ * - Check if o4-mini is better at my usecase than gpt-4o
  */
 export interface EvalCreateResponse {
   /**
@@ -257,7 +258,7 @@ export namespace EvalCreateResponse {
  *
  * - Improve the quality of my chatbot
  * - See how well my chatbot handles customer support
- * - Check if o3-mini is better at my usecase than gpt-4o
+ * - Check if o4-mini is better at my usecase than gpt-4o
  */
 export interface EvalRetrieveResponse {
   /**
@@ -378,7 +379,7 @@ export namespace EvalRetrieveResponse {
  *
  * - Improve the quality of my chatbot
  * - See how well my chatbot handles customer support
- * - Check if o3-mini is better at my usecase than gpt-4o
+ * - Check if o4-mini is better at my usecase than gpt-4o
  */
 export interface EvalUpdateResponse {
   /**
@@ -499,7 +500,7 @@ export namespace EvalUpdateResponse {
  *
  * - Improve the quality of my chatbot
  * - See how well my chatbot handles customer support
- * - Check if o3-mini is better at my usecase than gpt-4o
+ * - Check if o4-mini is better at my usecase than gpt-4o
  */
 export interface EvalListResponse {
   /**
@@ -624,12 +625,16 @@ export interface EvalDeleteResponse {
 
 export interface EvalCreateParams {
   /**
-   * The configuration for the data source used for the evaluation runs.
+   * The configuration for the data source used for the evaluation runs. Dictates the
+   * schema of the data used in the evaluation.
    */
   data_source_config: EvalCreateParams.Custom | EvalCreateParams.Logs | EvalCreateParams.StoredCompletions;
 
   /**
-   * A list of graders for all eval runs in this group.
+   * A list of graders for all eval runs in this group. Graders can reference
+   * variables in the data source using double curly braces notation, like
+   * `{{item.variable_name}}`. To reference the model's output, use the `sample`
+   * namespace (ie, `{{sample.output_text}}`).
    */
   testing_criteria: Array<
     | EvalCreateParams.LabelModel
@@ -699,13 +704,13 @@ export namespace EvalCreateParams {
   }
 
   /**
-   * Deprecated in favor of LogsDataSourceConfig.
+   * @deprecated Deprecated in favor of LogsDataSourceConfig.
    */
   export interface StoredCompletions {
     /**
-     * The type of data source. Always `stored-completions`.
+     * The type of data source. Always `stored_completions`.
      */
-    type: 'stored-completions';
+    type: 'stored_completions';
 
     /**
      * Metadata filters for the stored completions data source.
@@ -720,7 +725,7 @@ export namespace EvalCreateParams {
   export interface LabelModel {
     /**
      * A list of chat messages forming the prompt or context. May include variable
-     * references to the "item" namespace, ie {{item.name}}.
+     * references to the `item` namespace, ie {{item.name}}.
      */
     input: Array<LabelModel.SimpleInputMessage | LabelModel.EvalItem>;