diff --git a/.stats.yml b/.stats.yml
index ece287351..984e8a8d5 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
configured_endpoints: 68
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-52b934aee6468039ec7f4ce046a282b5fbce114afc708e70f17121df654f71da.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-8729aaa35436531ab453224af10e67f89677db8f350f0346bb3537489edea649.yml
diff --git a/api.md b/api.md
index 75cb4d5e1..3dd61f962 100644
--- a/api.md
+++ b/api.md
@@ -33,9 +33,12 @@ Types:
- ChatCompletion
- ChatCompletionAssistantMessageParam
+- ChatCompletionAudio
+- ChatCompletionAudioParam
- ChatCompletionChunk
- ChatCompletionContentPart
- ChatCompletionContentPartImage
+- ChatCompletionContentPartInputAudio
- ChatCompletionContentPartRefusal
- ChatCompletionContentPartText
- ChatCompletionFunctionCallOption
@@ -43,6 +46,7 @@ Types:
- ChatCompletionMessage
- ChatCompletionMessageParam
- ChatCompletionMessageToolCall
+- ChatCompletionModality
- ChatCompletionNamedToolChoice
- ChatCompletionRole
- ChatCompletionStreamOptions
diff --git a/src/index.ts b/src/index.ts
index 4714b4ce4..7b2323189 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -250,9 +250,12 @@ export namespace OpenAI {
export import ChatModel = API.ChatModel;
export import ChatCompletion = API.ChatCompletion;
export import ChatCompletionAssistantMessageParam = API.ChatCompletionAssistantMessageParam;
+ export import ChatCompletionAudio = API.ChatCompletionAudio;
+ export import ChatCompletionAudioParam = API.ChatCompletionAudioParam;
export import ChatCompletionChunk = API.ChatCompletionChunk;
export import ChatCompletionContentPart = API.ChatCompletionContentPart;
export import ChatCompletionContentPartImage = API.ChatCompletionContentPartImage;
+ export import ChatCompletionContentPartInputAudio = API.ChatCompletionContentPartInputAudio;
export import ChatCompletionContentPartRefusal = API.ChatCompletionContentPartRefusal;
export import ChatCompletionContentPartText = API.ChatCompletionContentPartText;
export import ChatCompletionFunctionCallOption = API.ChatCompletionFunctionCallOption;
@@ -260,6 +263,7 @@ export namespace OpenAI {
export import ChatCompletionMessage = API.ChatCompletionMessage;
export import ChatCompletionMessageParam = API.ChatCompletionMessageParam;
export import ChatCompletionMessageToolCall = API.ChatCompletionMessageToolCall;
+ export import ChatCompletionModality = API.ChatCompletionModality;
export import ChatCompletionNamedToolChoice = API.ChatCompletionNamedToolChoice;
export import ChatCompletionRole = API.ChatCompletionRole;
export import ChatCompletionStreamOptions = API.ChatCompletionStreamOptions;
diff --git a/src/resources/beta/assistants.ts b/src/resources/beta/assistants.ts
index b748126b4..a144fb2dd 100644
--- a/src/resources/beta/assistants.ts
+++ b/src/resources/beta/assistants.ts
@@ -298,6 +298,11 @@ export namespace AssistantStreamEvent {
data: ThreadsAPI.Thread;
event: 'thread.created';
+
+ /**
+ * Whether to enable input audio transcription.
+ */
+ enabled?: boolean;
}
/**
@@ -1084,6 +1089,11 @@ export interface ThreadStreamEvent {
data: ThreadsAPI.Thread;
event: 'thread.created';
+
+ /**
+ * Whether to enable input audio transcription.
+ */
+ enabled?: boolean;
}
export interface AssistantCreateParams {
diff --git a/src/resources/chat/chat.ts b/src/resources/chat/chat.ts
index 5bc7de955..43ef5662c 100644
--- a/src/resources/chat/chat.ts
+++ b/src/resources/chat/chat.ts
@@ -16,7 +16,10 @@ export type ChatModel =
| 'gpt-4o'
| 'gpt-4o-2024-08-06'
| 'gpt-4o-2024-05-13'
+ | 'gpt-4o-realtime-preview'
| 'gpt-4o-realtime-preview-2024-10-01'
+ | 'gpt-4o-audio-preview'
+ | 'gpt-4o-audio-preview-2024-10-01'
| 'chatgpt-4o-latest'
| 'gpt-4o-mini'
| 'gpt-4o-mini-2024-07-18'
@@ -45,9 +48,12 @@ export namespace Chat {
export import Completions = CompletionsAPI.Completions;
export import ChatCompletion = CompletionsAPI.ChatCompletion;
export import ChatCompletionAssistantMessageParam = CompletionsAPI.ChatCompletionAssistantMessageParam;
+ export import ChatCompletionAudio = CompletionsAPI.ChatCompletionAudio;
+ export import ChatCompletionAudioParam = CompletionsAPI.ChatCompletionAudioParam;
export import ChatCompletionChunk = CompletionsAPI.ChatCompletionChunk;
export import ChatCompletionContentPart = CompletionsAPI.ChatCompletionContentPart;
export import ChatCompletionContentPartImage = CompletionsAPI.ChatCompletionContentPartImage;
+ export import ChatCompletionContentPartInputAudio = CompletionsAPI.ChatCompletionContentPartInputAudio;
export import ChatCompletionContentPartRefusal = CompletionsAPI.ChatCompletionContentPartRefusal;
export import ChatCompletionContentPartText = CompletionsAPI.ChatCompletionContentPartText;
export import ChatCompletionFunctionCallOption = CompletionsAPI.ChatCompletionFunctionCallOption;
@@ -55,6 +61,7 @@ export namespace Chat {
export import ChatCompletionMessage = CompletionsAPI.ChatCompletionMessage;
export import ChatCompletionMessageParam = CompletionsAPI.ChatCompletionMessageParam;
export import ChatCompletionMessageToolCall = CompletionsAPI.ChatCompletionMessageToolCall;
+ export import ChatCompletionModality = CompletionsAPI.ChatCompletionModality;
export import ChatCompletionNamedToolChoice = CompletionsAPI.ChatCompletionNamedToolChoice;
export import ChatCompletionRole = CompletionsAPI.ChatCompletionRole;
export import ChatCompletionStreamOptions = CompletionsAPI.ChatCompletionStreamOptions;
diff --git a/src/resources/chat/completions.ts b/src/resources/chat/completions.ts
index d6bed474e..d7f7c17d7 100644
--- a/src/resources/chat/completions.ts
+++ b/src/resources/chat/completions.ts
@@ -11,7 +11,10 @@ import { Stream } from '../../streaming';
export class Completions extends APIResource {
/**
- * Creates a model response for the given chat conversation.
+ * Creates a model response for the given chat conversation. Learn more in the
+ * [text generation](https://platform.openai.com/docs/guides/text-generation),
+ * [vision](https://platform.openai.com/docs/guides/vision), and
+ * [audio](https://platform.openai.com/docs/guides/audio) guides.
*/
create(
body: ChatCompletionCreateParamsNonStreaming,
@@ -138,6 +141,12 @@ export interface ChatCompletionAssistantMessageParam {
*/
role: 'assistant';
+ /**
+ * Data about a previous audio response from the model.
+ * [Learn more](https://platform.openai.com/docs/guides/audio).
+ */
+ audio?: ChatCompletionAssistantMessageParam.Audio | null;
+
/**
* The contents of the assistant message. Required unless `tool_calls` or
* `function_call` is specified.
@@ -168,6 +177,17 @@ export interface ChatCompletionAssistantMessageParam {
}
export namespace ChatCompletionAssistantMessageParam {
+ /**
+ * Data about a previous audio response from the model.
+ * [Learn more](https://platform.openai.com/docs/guides/audio).
+ */
+ export interface Audio {
+ /**
+ * Unique identifier for a previous audio response from the model.
+ */
+ id: string;
+ }
+
/**
* @deprecated: Deprecated and replaced by `tool_calls`. The name and arguments of
* a function that should be called, as generated by the model.
@@ -188,6 +208,54 @@ export namespace ChatCompletionAssistantMessageParam {
}
}
+/**
+ * If the audio output modality is requested, this object contains data about the
+ * audio response from the model.
+ * [Learn more](https://platform.openai.com/docs/guides/audio).
+ */
+export interface ChatCompletionAudio {
+ /**
+ * Unique identifier for this audio response.
+ */
+ id: string;
+
+ /**
+ * Base64 encoded audio bytes generated by the model, in the format specified in
+ * the request.
+ */
+ data: string;
+
+ /**
+ * The Unix timestamp (in seconds) for when this audio response will no longer be
+ * accessible on the server for use in multi-turn conversations.
+ */
+ expires_at: number;
+
+ /**
+ * Transcript of the audio generated by the model.
+ */
+ transcript: string;
+}
+
+/**
+ * Parameters for audio output. Required when audio output is requested with
+ * `modalities: ["audio"]`.
+ * [Learn more](https://platform.openai.com/docs/guides/audio).
+ */
+export interface ChatCompletionAudioParam {
+ /**
+ * Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, `opus`,
+ * or `pcm16`.
+ */
+ format: 'wav' | 'mp3' | 'flac' | 'opus' | 'pcm16';
+
+ /**
+ * Specifies the voice type. Supported voices are `alloy`, `echo`, `fable`, `onyx`,
+ * `nova`, and `shimmer`.
+ */
+ voice: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
+}
+
/**
* Represents a streamed chunk of a chat completion response returned by model,
* based on the provided input.
@@ -371,8 +439,18 @@ export namespace ChatCompletionChunk {
}
}
-export type ChatCompletionContentPart = ChatCompletionContentPartText | ChatCompletionContentPartImage;
+/**
+ * Learn about
+ * [text inputs](https://platform.openai.com/docs/guides/text-generation).
+ */
+export type ChatCompletionContentPart =
+ | ChatCompletionContentPartText
+ | ChatCompletionContentPartImage
+ | ChatCompletionContentPartInputAudio;
+/**
+ * Learn about [image inputs](https://platform.openai.com/docs/guides/vision).
+ */
export interface ChatCompletionContentPartImage {
image_url: ChatCompletionContentPartImage.ImageURL;
@@ -397,6 +475,32 @@ export namespace ChatCompletionContentPartImage {
}
}
+/**
+ * Learn about [audio inputs](https://platform.openai.com/docs/guides/audio).
+ */
+export interface ChatCompletionContentPartInputAudio {
+ input_audio: ChatCompletionContentPartInputAudio.InputAudio;
+
+ /**
+ * The type of the content part. Always `input_audio`.
+ */
+ type: 'input_audio';
+}
+
+export namespace ChatCompletionContentPartInputAudio {
+ export interface InputAudio {
+ /**
+ * Base64 encoded audio data.
+ */
+ data: string;
+
+ /**
+ * The format of the encoded audio data. Currently supports "wav" and "mp3".
+ */
+ format: 'wav' | 'mp3';
+ }
+}
+
export interface ChatCompletionContentPartRefusal {
/**
* The refusal message generated by the model.
@@ -409,6 +513,10 @@ export interface ChatCompletionContentPartRefusal {
type: 'refusal';
}
+/**
+ * Learn about
+ * [text inputs](https://platform.openai.com/docs/guides/text-generation).
+ */
export interface ChatCompletionContentPartText {
/**
* The text content.
@@ -471,6 +579,13 @@ export interface ChatCompletionMessage {
*/
role: 'assistant';
+ /**
+ * If the audio output modality is requested, this object contains data about the
+ * audio response from the model.
+ * [Learn more](https://platform.openai.com/docs/guides/audio).
+ */
+ audio?: ChatCompletionAudio | null;
+
/**
* @deprecated: Deprecated and replaced by `tool_calls`. The name and arguments of
* a function that should be called, as generated by the model.
@@ -548,6 +663,8 @@ export namespace ChatCompletionMessageToolCall {
}
}
+export type ChatCompletionModality = 'text' | 'audio';
+
/**
* Specifies a tool the model should use. Use to force the model to call a specific
* function.
@@ -743,6 +860,13 @@ export interface ChatCompletionCreateParamsBase {
*/
model: (string & {}) | ChatAPI.ChatModel;
+ /**
+ * Parameters for audio output. Required when audio output is requested with
+ * `modalities: ["audio"]`.
+ * [Learn more](https://platform.openai.com/docs/guides/audio).
+ */
+ audio?: ChatCompletionAudioParam | null;
+
/**
* Number between -2.0 and 2.0. Positive values penalize new tokens based on their
* existing frequency in the text so far, decreasing the model's likelihood to
@@ -812,10 +936,24 @@ export interface ChatCompletionCreateParamsBase {
/**
* Developer-defined tags and values used for filtering completions in the
- * [dashboard](https://platform.openai.com/completions).
+ * [dashboard](https://platform.openai.com/chat-completions).
*/
metadata?: Record | null;
+ /**
+ * Output types that you would like the model to generate for this request. Most
+ * models are capable of generating text, which is the default:
+ *
+ * `["text"]`
+ *
+ * The `gpt-4o-audio-preview` model can also be used to
+ * [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+ * this model generate both text and audio responses, you can use:
+ *
+ * `["text", "audio"]`
+ */
+ modalities?: Array | null;
+
/**
* How many chat completion choices to generate for each input message. Note that
* you will be charged based on the number of generated tokens across all of the
@@ -900,8 +1038,9 @@ export interface ChatCompletionCreateParamsBase {
stop?: string | null | Array;
/**
- * Whether or not to store the output of this completion request for traffic
- * logging in the [dashboard](https://platform.openai.com/completions).
+ * Whether or not to store the output of this chat completion request for use in
+ * our [model distillation](https://platform.openai.com/docs/guides/distillation)
+ * or [evals](https://platform.openai.com/docs/guides/evals) products.
*/
store?: boolean | null;
@@ -1049,9 +1188,12 @@ export type CompletionCreateParamsStreaming = ChatCompletionCreateParamsStreamin
export namespace Completions {
export import ChatCompletion = ChatCompletionsAPI.ChatCompletion;
export import ChatCompletionAssistantMessageParam = ChatCompletionsAPI.ChatCompletionAssistantMessageParam;
+ export import ChatCompletionAudio = ChatCompletionsAPI.ChatCompletionAudio;
+ export import ChatCompletionAudioParam = ChatCompletionsAPI.ChatCompletionAudioParam;
export import ChatCompletionChunk = ChatCompletionsAPI.ChatCompletionChunk;
export import ChatCompletionContentPart = ChatCompletionsAPI.ChatCompletionContentPart;
export import ChatCompletionContentPartImage = ChatCompletionsAPI.ChatCompletionContentPartImage;
+ export import ChatCompletionContentPartInputAudio = ChatCompletionsAPI.ChatCompletionContentPartInputAudio;
export import ChatCompletionContentPartRefusal = ChatCompletionsAPI.ChatCompletionContentPartRefusal;
export import ChatCompletionContentPartText = ChatCompletionsAPI.ChatCompletionContentPartText;
export import ChatCompletionFunctionCallOption = ChatCompletionsAPI.ChatCompletionFunctionCallOption;
@@ -1059,6 +1201,7 @@ export namespace Completions {
export import ChatCompletionMessage = ChatCompletionsAPI.ChatCompletionMessage;
export import ChatCompletionMessageParam = ChatCompletionsAPI.ChatCompletionMessageParam;
export import ChatCompletionMessageToolCall = ChatCompletionsAPI.ChatCompletionMessageToolCall;
+ export import ChatCompletionModality = ChatCompletionsAPI.ChatCompletionModality;
export import ChatCompletionNamedToolChoice = ChatCompletionsAPI.ChatCompletionNamedToolChoice;
export import ChatCompletionRole = ChatCompletionsAPI.ChatCompletionRole;
export import ChatCompletionStreamOptions = ChatCompletionsAPI.ChatCompletionStreamOptions;
diff --git a/src/resources/chat/index.ts b/src/resources/chat/index.ts
index 748770948..22803e819 100644
--- a/src/resources/chat/index.ts
+++ b/src/resources/chat/index.ts
@@ -3,9 +3,12 @@
export {
ChatCompletion,
ChatCompletionAssistantMessageParam,
+ ChatCompletionAudio,
+ ChatCompletionAudioParam,
ChatCompletionChunk,
ChatCompletionContentPart,
ChatCompletionContentPartImage,
+ ChatCompletionContentPartInputAudio,
ChatCompletionContentPartRefusal,
ChatCompletionContentPartText,
ChatCompletionFunctionCallOption,
@@ -13,6 +16,7 @@ export {
ChatCompletionMessage,
ChatCompletionMessageParam,
ChatCompletionMessageToolCall,
+ ChatCompletionModality,
ChatCompletionNamedToolChoice,
ChatCompletionRole,
ChatCompletionStreamOptions,
diff --git a/tests/api-resources/chat/completions.test.ts b/tests/api-resources/chat/completions.test.ts
index 4f015b47e..77d4a251c 100644
--- a/tests/api-resources/chat/completions.test.ts
+++ b/tests/api-resources/chat/completions.test.ts
@@ -27,6 +27,7 @@ describe('resource completions', () => {
const response = await client.chat.completions.create({
messages: [{ content: 'string', role: 'system', name: 'name' }],
model: 'gpt-4o',
+ audio: { format: 'wav', voice: 'alloy' },
frequency_penalty: -2,
function_call: 'none',
functions: [{ name: 'name', description: 'description', parameters: { foo: 'bar' } }],
@@ -35,6 +36,7 @@ describe('resource completions', () => {
max_completion_tokens: 0,
max_tokens: 0,
metadata: { foo: 'string' },
+ modalities: ['text', 'audio'],
n: 1,
parallel_tool_calls: true,
presence_penalty: -2,