From 9114f0ccfbebe4ef490af58ebe0d8b1c81898678 Mon Sep 17 00:00:00 2001 From: Stainless Bot Date: Thu, 17 Oct 2024 16:49:37 +0000 Subject: [PATCH] feat(api): add gpt-4o-audio-preview model for chat completions This enables audio inputs and outputs. https://platform.openai.com/docs/guides/audio --- .stats.yml | 2 +- api.md | 4 + src/openai/resources/chat/completions.py | 207 +++++++++++++++--- .../types/beta/assistant_stream_event.py | 5 +- src/openai/types/chat/__init__.py | 6 + ...chat_completion_assistant_message_param.py | 14 +- .../types/chat/chat_completion_audio.py | 27 +++ .../types/chat/chat_completion_audio_param.py | 21 ++ ...mpletion_content_part_input_audio_param.py | 22 ++ .../chat_completion_content_part_param.py | 3 +- .../types/chat/chat_completion_message.py | 8 + .../types/chat/chat_completion_modality.py | 7 + .../types/chat/completion_create_params.py | 30 ++- src/openai/types/chat_model.py | 3 + tests/api_resources/chat/test_completions.py | 20 ++ 15 files changed, 341 insertions(+), 38 deletions(-) create mode 100644 src/openai/types/chat/chat_completion_audio.py create mode 100644 src/openai/types/chat/chat_completion_audio_param.py create mode 100644 src/openai/types/chat/chat_completion_content_part_input_audio_param.py create mode 100644 src/openai/types/chat/chat_completion_modality.py diff --git a/.stats.yml b/.stats.yml index ece287351b..984e8a8d5f 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,2 +1,2 @@ configured_endpoints: 68 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-52b934aee6468039ec7f4ce046a282b5fbce114afc708e70f17121df654f71da.yml +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-8729aaa35436531ab453224af10e67f89677db8f350f0346bb3537489edea649.yml diff --git a/api.md b/api.md index 3a8a7c595c..4600adf77a 100644 --- a/api.md +++ b/api.md @@ -39,9 +39,12 @@ Types: from openai.types.chat import ( ChatCompletion, ChatCompletionAssistantMessageParam, + ChatCompletionAudio, + ChatCompletionAudioParam, ChatCompletionChunk, ChatCompletionContentPart, ChatCompletionContentPartImage, + ChatCompletionContentPartInputAudio, ChatCompletionContentPartRefusal, ChatCompletionContentPartText, ChatCompletionFunctionCallOption, @@ -49,6 +52,7 @@ from openai.types.chat import ( ChatCompletionMessage, ChatCompletionMessageParam, ChatCompletionMessageToolCall, + ChatCompletionModality, ChatCompletionNamedToolChoice, ChatCompletionRole, ChatCompletionStreamOptions, diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py index eff194d00c..03919aab2f 100644 --- a/src/openai/resources/chat/completions.py +++ b/src/openai/resources/chat/completions.py @@ -18,12 +18,17 @@ from ..._resource import SyncAPIResource, AsyncAPIResource from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper from ..._streaming import Stream, AsyncStream -from ...types.chat import completion_create_params +from ...types.chat import ( + ChatCompletionAudioParam, + completion_create_params, +) from ..._base_client import make_request_options from ...types.chat_model import ChatModel from ...types.chat.chat_completion import ChatCompletion from ...types.chat.chat_completion_chunk import ChatCompletionChunk +from ...types.chat.chat_completion_modality import ChatCompletionModality from ...types.chat.chat_completion_tool_param import ChatCompletionToolParam +from ...types.chat.chat_completion_audio_param import ChatCompletionAudioParam from ...types.chat.chat_completion_message_param import ChatCompletionMessageParam from ...types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam from ...types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam @@ -57,6 +62,7 @@ def create( *, messages: Iterable[ChatCompletionMessageParam], model: Union[str, ChatModel], + audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN, frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, @@ -65,6 +71,7 @@ def create( max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN, + modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, @@ -88,8 +95,12 @@ def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion: - """ - Creates a model response for the given chat conversation. + """Creates a model response for the given chat conversation. + + Learn more in the + [text generation](https://platform.openai.com/docs/guides/text-generation), + [vision](https://platform.openai.com/docs/guides/vision), and + [audio](https://platform.openai.com/docs/guides/audio) guides. Args: messages: A list of messages comprising the conversation so far. Depending on the @@ -103,6 +114,10 @@ def create( [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility) table for details on which models work with the Chat API. + audio: Parameters for audio output. Required when audio output is requested with + `modalities: ["audio"]`. + [Learn more](https://platform.openai.com/docs/guides/audio). + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. @@ -150,7 +165,18 @@ def create( [o1 series models](https://platform.openai.com/docs/guides/reasoning). metadata: Developer-defined tags and values used for filtering completions in the - [dashboard](https://platform.openai.com/completions). + [dashboard](https://platform.openai.com/chat-completions). + + modalities: Output types that you would like the model to generate for this request. Most + models are capable of generating text, which is the default: + + `["text"]` + + The `gpt-4o-audio-preview` model can also be used to + [generate audio](https://platform.openai.com/docs/guides/audio). To request that + this model generate both text and audio responses, you can use: + + `["text", "audio"]` n: How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the @@ -211,8 +237,9 @@ def create( stop: Up to 4 sequences where the API will stop generating further tokens. - store: Whether or not to store the output of this completion request for traffic - logging in the [dashboard](https://platform.openai.com/completions). + store: Whether or not to store the output of this chat completion request for use in + our [model distillation](https://platform.openai.com/docs/guides/distillation) + or [evals](https://platform.openai.com/docs/guides/evals) products. stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only @@ -274,6 +301,7 @@ def create( messages: Iterable[ChatCompletionMessageParam], model: Union[str, ChatModel], stream: Literal[True], + audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN, frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, @@ -282,6 +310,7 @@ def create( max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN, + modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, @@ -304,8 +333,12 @@ def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Stream[ChatCompletionChunk]: - """ - Creates a model response for the given chat conversation. + """Creates a model response for the given chat conversation. + + Learn more in the + [text generation](https://platform.openai.com/docs/guides/text-generation), + [vision](https://platform.openai.com/docs/guides/vision), and + [audio](https://platform.openai.com/docs/guides/audio) guides. Args: messages: A list of messages comprising the conversation so far. Depending on the @@ -326,6 +359,10 @@ def create( message. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions). + audio: Parameters for audio output. Required when audio output is requested with + `modalities: ["audio"]`. + [Learn more](https://platform.openai.com/docs/guides/audio). + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. @@ -373,7 +410,18 @@ def create( [o1 series models](https://platform.openai.com/docs/guides/reasoning). metadata: Developer-defined tags and values used for filtering completions in the - [dashboard](https://platform.openai.com/completions). + [dashboard](https://platform.openai.com/chat-completions). + + modalities: Output types that you would like the model to generate for this request. Most + models are capable of generating text, which is the default: + + `["text"]` + + The `gpt-4o-audio-preview` model can also be used to + [generate audio](https://platform.openai.com/docs/guides/audio). To request that + this model generate both text and audio responses, you can use: + + `["text", "audio"]` n: How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the @@ -434,8 +482,9 @@ def create( stop: Up to 4 sequences where the API will stop generating further tokens. - store: Whether or not to store the output of this completion request for traffic - logging in the [dashboard](https://platform.openai.com/completions). + store: Whether or not to store the output of this chat completion request for use in + our [model distillation](https://platform.openai.com/docs/guides/distillation) + or [evals](https://platform.openai.com/docs/guides/evals) products. stream_options: Options for streaming response. Only set this when you set `stream: true`. @@ -490,6 +539,7 @@ def create( messages: Iterable[ChatCompletionMessageParam], model: Union[str, ChatModel], stream: bool, + audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN, frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, @@ -498,6 +548,7 @@ def create( max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN, + modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, @@ -520,8 +571,12 @@ def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion | Stream[ChatCompletionChunk]: - """ - Creates a model response for the given chat conversation. + """Creates a model response for the given chat conversation. + + Learn more in the + [text generation](https://platform.openai.com/docs/guides/text-generation), + [vision](https://platform.openai.com/docs/guides/vision), and + [audio](https://platform.openai.com/docs/guides/audio) guides. Args: messages: A list of messages comprising the conversation so far. Depending on the @@ -542,6 +597,10 @@ def create( message. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions). + audio: Parameters for audio output. Required when audio output is requested with + `modalities: ["audio"]`. + [Learn more](https://platform.openai.com/docs/guides/audio). + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. @@ -589,7 +648,18 @@ def create( [o1 series models](https://platform.openai.com/docs/guides/reasoning). metadata: Developer-defined tags and values used for filtering completions in the - [dashboard](https://platform.openai.com/completions). + [dashboard](https://platform.openai.com/chat-completions). + + modalities: Output types that you would like the model to generate for this request. Most + models are capable of generating text, which is the default: + + `["text"]` + + The `gpt-4o-audio-preview` model can also be used to + [generate audio](https://platform.openai.com/docs/guides/audio). To request that + this model generate both text and audio responses, you can use: + + `["text", "audio"]` n: How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the @@ -650,8 +720,9 @@ def create( stop: Up to 4 sequences where the API will stop generating further tokens. - store: Whether or not to store the output of this completion request for traffic - logging in the [dashboard](https://platform.openai.com/completions). + store: Whether or not to store the output of this chat completion request for use in + our [model distillation](https://platform.openai.com/docs/guides/distillation) + or [evals](https://platform.openai.com/docs/guides/evals) products. stream_options: Options for streaming response. Only set this when you set `stream: true`. @@ -705,6 +776,7 @@ def create( *, messages: Iterable[ChatCompletionMessageParam], model: Union[str, ChatModel], + audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN, frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, @@ -713,6 +785,7 @@ def create( max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN, + modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, @@ -742,6 +815,7 @@ def create( { "messages": messages, "model": model, + "audio": audio, "frequency_penalty": frequency_penalty, "function_call": function_call, "functions": functions, @@ -750,6 +824,7 @@ def create( "max_completion_tokens": max_completion_tokens, "max_tokens": max_tokens, "metadata": metadata, + "modalities": modalities, "n": n, "parallel_tool_calls": parallel_tool_calls, "presence_penalty": presence_penalty, @@ -804,6 +879,7 @@ async def create( *, messages: Iterable[ChatCompletionMessageParam], model: Union[str, ChatModel], + audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN, frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, @@ -812,6 +888,7 @@ async def create( max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN, + modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, @@ -835,8 +912,12 @@ async def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion: - """ - Creates a model response for the given chat conversation. + """Creates a model response for the given chat conversation. + + Learn more in the + [text generation](https://platform.openai.com/docs/guides/text-generation), + [vision](https://platform.openai.com/docs/guides/vision), and + [audio](https://platform.openai.com/docs/guides/audio) guides. Args: messages: A list of messages comprising the conversation so far. Depending on the @@ -850,6 +931,10 @@ async def create( [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility) table for details on which models work with the Chat API. + audio: Parameters for audio output. Required when audio output is requested with + `modalities: ["audio"]`. + [Learn more](https://platform.openai.com/docs/guides/audio). + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. @@ -897,7 +982,18 @@ async def create( [o1 series models](https://platform.openai.com/docs/guides/reasoning). metadata: Developer-defined tags and values used for filtering completions in the - [dashboard](https://platform.openai.com/completions). + [dashboard](https://platform.openai.com/chat-completions). + + modalities: Output types that you would like the model to generate for this request. Most + models are capable of generating text, which is the default: + + `["text"]` + + The `gpt-4o-audio-preview` model can also be used to + [generate audio](https://platform.openai.com/docs/guides/audio). To request that + this model generate both text and audio responses, you can use: + + `["text", "audio"]` n: How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the @@ -958,8 +1054,9 @@ async def create( stop: Up to 4 sequences where the API will stop generating further tokens. - store: Whether or not to store the output of this completion request for traffic - logging in the [dashboard](https://platform.openai.com/completions). + store: Whether or not to store the output of this chat completion request for use in + our [model distillation](https://platform.openai.com/docs/guides/distillation) + or [evals](https://platform.openai.com/docs/guides/evals) products. stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only @@ -1021,6 +1118,7 @@ async def create( messages: Iterable[ChatCompletionMessageParam], model: Union[str, ChatModel], stream: Literal[True], + audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN, frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, @@ -1029,6 +1127,7 @@ async def create( max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN, + modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, @@ -1051,8 +1150,12 @@ async def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> AsyncStream[ChatCompletionChunk]: - """ - Creates a model response for the given chat conversation. + """Creates a model response for the given chat conversation. + + Learn more in the + [text generation](https://platform.openai.com/docs/guides/text-generation), + [vision](https://platform.openai.com/docs/guides/vision), and + [audio](https://platform.openai.com/docs/guides/audio) guides. Args: messages: A list of messages comprising the conversation so far. Depending on the @@ -1073,6 +1176,10 @@ async def create( message. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions). + audio: Parameters for audio output. Required when audio output is requested with + `modalities: ["audio"]`. + [Learn more](https://platform.openai.com/docs/guides/audio). + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. @@ -1120,7 +1227,18 @@ async def create( [o1 series models](https://platform.openai.com/docs/guides/reasoning). metadata: Developer-defined tags and values used for filtering completions in the - [dashboard](https://platform.openai.com/completions). + [dashboard](https://platform.openai.com/chat-completions). + + modalities: Output types that you would like the model to generate for this request. Most + models are capable of generating text, which is the default: + + `["text"]` + + The `gpt-4o-audio-preview` model can also be used to + [generate audio](https://platform.openai.com/docs/guides/audio). To request that + this model generate both text and audio responses, you can use: + + `["text", "audio"]` n: How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the @@ -1181,8 +1299,9 @@ async def create( stop: Up to 4 sequences where the API will stop generating further tokens. - store: Whether or not to store the output of this completion request for traffic - logging in the [dashboard](https://platform.openai.com/completions). + store: Whether or not to store the output of this chat completion request for use in + our [model distillation](https://platform.openai.com/docs/guides/distillation) + or [evals](https://platform.openai.com/docs/guides/evals) products. stream_options: Options for streaming response. Only set this when you set `stream: true`. @@ -1237,6 +1356,7 @@ async def create( messages: Iterable[ChatCompletionMessageParam], model: Union[str, ChatModel], stream: bool, + audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN, frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, @@ -1245,6 +1365,7 @@ async def create( max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN, + modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, @@ -1267,8 +1388,12 @@ async def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: - """ - Creates a model response for the given chat conversation. + """Creates a model response for the given chat conversation. + + Learn more in the + [text generation](https://platform.openai.com/docs/guides/text-generation), + [vision](https://platform.openai.com/docs/guides/vision), and + [audio](https://platform.openai.com/docs/guides/audio) guides. Args: messages: A list of messages comprising the conversation so far. Depending on the @@ -1289,6 +1414,10 @@ async def create( message. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions). + audio: Parameters for audio output. Required when audio output is requested with + `modalities: ["audio"]`. + [Learn more](https://platform.openai.com/docs/guides/audio). + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. @@ -1336,7 +1465,18 @@ async def create( [o1 series models](https://platform.openai.com/docs/guides/reasoning). metadata: Developer-defined tags and values used for filtering completions in the - [dashboard](https://platform.openai.com/completions). + [dashboard](https://platform.openai.com/chat-completions). + + modalities: Output types that you would like the model to generate for this request. Most + models are capable of generating text, which is the default: + + `["text"]` + + The `gpt-4o-audio-preview` model can also be used to + [generate audio](https://platform.openai.com/docs/guides/audio). To request that + this model generate both text and audio responses, you can use: + + `["text", "audio"]` n: How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the @@ -1397,8 +1537,9 @@ async def create( stop: Up to 4 sequences where the API will stop generating further tokens. - store: Whether or not to store the output of this completion request for traffic - logging in the [dashboard](https://platform.openai.com/completions). + store: Whether or not to store the output of this chat completion request for use in + our [model distillation](https://platform.openai.com/docs/guides/distillation) + or [evals](https://platform.openai.com/docs/guides/evals) products. stream_options: Options for streaming response. Only set this when you set `stream: true`. @@ -1452,6 +1593,7 @@ async def create( *, messages: Iterable[ChatCompletionMessageParam], model: Union[str, ChatModel], + audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN, frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, @@ -1460,6 +1602,7 @@ async def create( max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN, + modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, @@ -1489,6 +1632,7 @@ async def create( { "messages": messages, "model": model, + "audio": audio, "frequency_penalty": frequency_penalty, "function_call": function_call, "functions": functions, @@ -1497,6 +1641,7 @@ async def create( "max_completion_tokens": max_completion_tokens, "max_tokens": max_tokens, "metadata": metadata, + "modalities": modalities, "n": n, "parallel_tool_calls": parallel_tool_calls, "presence_penalty": presence_penalty, diff --git a/src/openai/types/beta/assistant_stream_event.py b/src/openai/types/beta/assistant_stream_event.py index f1d8898ff2..41d3a0c5ea 100644 --- a/src/openai/types/beta/assistant_stream_event.py +++ b/src/openai/types/beta/assistant_stream_event.py @@ -1,6 +1,6 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import Union +from typing import Union, Optional from typing_extensions import Literal, Annotated, TypeAlias from .thread import Thread @@ -51,6 +51,9 @@ class ThreadCreated(BaseModel): event: Literal["thread.created"] + enabled: Optional[bool] = None + """Whether to enable input audio transcription.""" + class ThreadRunCreated(BaseModel): data: Run diff --git a/src/openai/types/chat/__init__.py b/src/openai/types/chat/__init__.py index df3b48149c..b85365ecb1 100644 --- a/src/openai/types/chat/__init__.py +++ b/src/openai/types/chat/__init__.py @@ -4,10 +4,13 @@ from .chat_completion import ChatCompletion as ChatCompletion from .chat_completion_role import ChatCompletionRole as ChatCompletionRole +from .chat_completion_audio import ChatCompletionAudio as ChatCompletionAudio from .chat_completion_chunk import ChatCompletionChunk as ChatCompletionChunk from .chat_completion_message import ChatCompletionMessage as ChatCompletionMessage +from .chat_completion_modality import ChatCompletionModality as ChatCompletionModality from .completion_create_params import CompletionCreateParams as CompletionCreateParams from .chat_completion_tool_param import ChatCompletionToolParam as ChatCompletionToolParam +from .chat_completion_audio_param import ChatCompletionAudioParam as ChatCompletionAudioParam from .chat_completion_message_param import ChatCompletionMessageParam as ChatCompletionMessageParam from .chat_completion_token_logprob import ChatCompletionTokenLogprob as ChatCompletionTokenLogprob from .chat_completion_message_tool_call import ChatCompletionMessageToolCall as ChatCompletionMessageToolCall @@ -43,3 +46,6 @@ from .chat_completion_function_call_option_param import ( ChatCompletionFunctionCallOptionParam as ChatCompletionFunctionCallOptionParam, ) +from .chat_completion_content_part_input_audio_param import ( + ChatCompletionContentPartInputAudioParam as ChatCompletionContentPartInputAudioParam, +) diff --git a/src/openai/types/chat/chat_completion_assistant_message_param.py b/src/openai/types/chat/chat_completion_assistant_message_param.py index 2429d41d33..35e3a3d784 100644 --- a/src/openai/types/chat/chat_completion_assistant_message_param.py +++ b/src/openai/types/chat/chat_completion_assistant_message_param.py @@ -9,7 +9,13 @@ from .chat_completion_message_tool_call_param import ChatCompletionMessageToolCallParam from .chat_completion_content_part_refusal_param import ChatCompletionContentPartRefusalParam -__all__ = ["ChatCompletionAssistantMessageParam", "ContentArrayOfContentPart", "FunctionCall"] +__all__ = ["ChatCompletionAssistantMessageParam", "Audio", "ContentArrayOfContentPart", "FunctionCall"] + + +class Audio(TypedDict, total=False): + id: Required[str] + """Unique identifier for a previous audio response from the model.""" + ContentArrayOfContentPart: TypeAlias = Union[ChatCompletionContentPartTextParam, ChatCompletionContentPartRefusalParam] @@ -31,6 +37,12 @@ class ChatCompletionAssistantMessageParam(TypedDict, total=False): role: Required[Literal["assistant"]] """The role of the messages author, in this case `assistant`.""" + audio: Optional[Audio] + """Data about a previous audio response from the model. + + [Learn more](https://platform.openai.com/docs/guides/audio). + """ + content: Union[str, Iterable[ContentArrayOfContentPart], None] """The contents of the assistant message. diff --git a/src/openai/types/chat/chat_completion_audio.py b/src/openai/types/chat/chat_completion_audio.py new file mode 100644 index 0000000000..135ee8845c --- /dev/null +++ b/src/openai/types/chat/chat_completion_audio.py @@ -0,0 +1,27 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + + + +from ..._models import BaseModel + +__all__ = ["ChatCompletionAudio"] + + +class ChatCompletionAudio(BaseModel): + id: str + """Unique identifier for this audio response.""" + + data: str + """ + Base64 encoded audio bytes generated by the model, in the format specified in + the request. + """ + + expires_at: int + """ + The Unix timestamp (in seconds) for when this audio response will no longer be + accessible on the server for use in multi-turn conversations. + """ + + transcript: str + """Transcript of the audio generated by the model.""" diff --git a/src/openai/types/chat/chat_completion_audio_param.py b/src/openai/types/chat/chat_completion_audio_param.py new file mode 100644 index 0000000000..6a4ce9ac1f --- /dev/null +++ b/src/openai/types/chat/chat_completion_audio_param.py @@ -0,0 +1,21 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Literal, Required, TypedDict + +__all__ = ["ChatCompletionAudioParam"] + + +class ChatCompletionAudioParam(TypedDict, total=False): + format: Required[Literal["wav", "mp3", "flac", "opus", "pcm16"]] + """Specifies the output audio format. + + Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`. + """ + + voice: Required[Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"]] + """Specifies the voice type. + + Supported voices are `alloy`, `echo`, `fable`, `onyx`, `nova`, and `shimmer`. + """ diff --git a/src/openai/types/chat/chat_completion_content_part_input_audio_param.py b/src/openai/types/chat/chat_completion_content_part_input_audio_param.py new file mode 100644 index 0000000000..0b1b1a80b1 --- /dev/null +++ b/src/openai/types/chat/chat_completion_content_part_input_audio_param.py @@ -0,0 +1,22 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Literal, Required, TypedDict + +__all__ = ["ChatCompletionContentPartInputAudioParam", "InputAudio"] + + +class InputAudio(TypedDict, total=False): + data: Required[str] + """Base64 encoded audio data.""" + + format: Required[Literal["wav", "mp3"]] + """The format of the encoded audio data. Currently supports "wav" and "mp3".""" + + +class ChatCompletionContentPartInputAudioParam(TypedDict, total=False): + input_audio: Required[InputAudio] + + type: Required[Literal["input_audio"]] + """The type of the content part. Always `input_audio`.""" diff --git a/src/openai/types/chat/chat_completion_content_part_param.py b/src/openai/types/chat/chat_completion_content_part_param.py index e0c6e480f2..682d11f4c7 100644 --- a/src/openai/types/chat/chat_completion_content_part_param.py +++ b/src/openai/types/chat/chat_completion_content_part_param.py @@ -7,9 +7,10 @@ from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam from .chat_completion_content_part_image_param import ChatCompletionContentPartImageParam +from .chat_completion_content_part_input_audio_param import ChatCompletionContentPartInputAudioParam __all__ = ["ChatCompletionContentPartParam"] ChatCompletionContentPartParam: TypeAlias = Union[ - ChatCompletionContentPartTextParam, ChatCompletionContentPartImageParam + ChatCompletionContentPartTextParam, ChatCompletionContentPartImageParam, ChatCompletionContentPartInputAudioParam ] diff --git a/src/openai/types/chat/chat_completion_message.py b/src/openai/types/chat/chat_completion_message.py index 492bb68c85..704fa5d5d1 100644 --- a/src/openai/types/chat/chat_completion_message.py +++ b/src/openai/types/chat/chat_completion_message.py @@ -4,6 +4,7 @@ from typing_extensions import Literal from ..._models import BaseModel +from .chat_completion_audio import ChatCompletionAudio from .chat_completion_message_tool_call import ChatCompletionMessageToolCall __all__ = ["ChatCompletionMessage", "FunctionCall"] @@ -32,6 +33,13 @@ class ChatCompletionMessage(BaseModel): role: Literal["assistant"] """The role of the author of this message.""" + audio: Optional[ChatCompletionAudio] = None + """ + If the audio output modality is requested, this object contains data about the + audio response from the model. + [Learn more](https://platform.openai.com/docs/guides/audio). + """ + function_call: Optional[FunctionCall] = None """Deprecated and replaced by `tool_calls`. diff --git a/src/openai/types/chat/chat_completion_modality.py b/src/openai/types/chat/chat_completion_modality.py new file mode 100644 index 0000000000..8e3c145979 --- /dev/null +++ b/src/openai/types/chat/chat_completion_modality.py @@ -0,0 +1,7 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal, TypeAlias + +__all__ = ["ChatCompletionModality"] + +ChatCompletionModality: TypeAlias = Literal["text", "audio"] diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py index 3f55dfbe6e..af6a47c219 100644 --- a/src/openai/types/chat/completion_create_params.py +++ b/src/openai/types/chat/completion_create_params.py @@ -6,7 +6,9 @@ from typing_extensions import Literal, Required, TypeAlias, TypedDict from ..chat_model import ChatModel +from .chat_completion_modality import ChatCompletionModality from .chat_completion_tool_param import ChatCompletionToolParam +from .chat_completion_audio_param import ChatCompletionAudioParam from .chat_completion_message_param import ChatCompletionMessageParam from ..shared_params.function_parameters import FunctionParameters from ..shared_params.response_format_text import ResponseFormatText @@ -45,6 +47,13 @@ class CompletionCreateParamsBase(TypedDict, total=False): table for details on which models work with the Chat API. """ + audio: Optional[ChatCompletionAudioParam] + """Parameters for audio output. + + Required when audio output is requested with `modalities: ["audio"]`. + [Learn more](https://platform.openai.com/docs/guides/audio). + """ + frequency_penalty: Optional[float] """Number between -2.0 and 2.0. @@ -112,7 +121,21 @@ class CompletionCreateParamsBase(TypedDict, total=False): metadata: Optional[Dict[str, str]] """ Developer-defined tags and values used for filtering completions in the - [dashboard](https://platform.openai.com/completions). + [dashboard](https://platform.openai.com/chat-completions). + """ + + modalities: Optional[List[ChatCompletionModality]] + """ + Output types that you would like the model to generate for this request. Most + models are capable of generating text, which is the default: + + `["text"]` + + The `gpt-4o-audio-preview` model can also be used to + [generate audio](https://platform.openai.com/docs/guides/audio). To request that + this model generate both text and audio responses, you can use: + + `["text", "audio"]` """ n: Optional[int] @@ -195,8 +218,9 @@ class CompletionCreateParamsBase(TypedDict, total=False): store: Optional[bool] """ - Whether or not to store the output of this completion request for traffic - logging in the [dashboard](https://platform.openai.com/completions). + Whether or not to store the output of this chat completion request for use in + our [model distillation](https://platform.openai.com/docs/guides/distillation) + or [evals](https://platform.openai.com/docs/guides/evals) products. """ stream_options: Optional[ChatCompletionStreamOptionsParam] diff --git a/src/openai/types/chat_model.py b/src/openai/types/chat_model.py index f2d5674786..b801aa0914 100644 --- a/src/openai/types/chat_model.py +++ b/src/openai/types/chat_model.py @@ -12,7 +12,10 @@ "gpt-4o", "gpt-4o-2024-08-06", "gpt-4o-2024-05-13", + "gpt-4o-realtime-preview", "gpt-4o-realtime-preview-2024-10-01", + "gpt-4o-audio-preview", + "gpt-4o-audio-preview-2024-10-01", "chatgpt-4o-latest", "gpt-4o-mini", "gpt-4o-mini-2024-07-18", diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py index d353139543..a341e78f7e 100644 --- a/tests/api_resources/chat/test_completions.py +++ b/tests/api_resources/chat/test_completions.py @@ -43,6 +43,10 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None: } ], model="gpt-4o", + audio={ + "format": "wav", + "voice": "alloy", + }, frequency_penalty=-2, function_call="none", functions=[ @@ -57,6 +61,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None: max_completion_tokens=0, max_tokens=0, metadata={"foo": "string"}, + modalities=["text", "audio"], n=1, parallel_tool_calls=True, presence_penalty=-2, @@ -166,6 +171,10 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None: ], model="gpt-4o", stream=True, + audio={ + "format": "wav", + "voice": "alloy", + }, frequency_penalty=-2, function_call="none", functions=[ @@ -180,6 +189,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None: max_completion_tokens=0, max_tokens=0, metadata={"foo": "string"}, + modalities=["text", "audio"], n=1, parallel_tool_calls=True, presence_penalty=-2, @@ -291,6 +301,10 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn } ], model="gpt-4o", + audio={ + "format": "wav", + "voice": "alloy", + }, frequency_penalty=-2, function_call="none", functions=[ @@ -305,6 +319,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn max_completion_tokens=0, max_tokens=0, metadata={"foo": "string"}, + modalities=["text", "audio"], n=1, parallel_tool_calls=True, presence_penalty=-2, @@ -414,6 +429,10 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn ], model="gpt-4o", stream=True, + audio={ + "format": "wav", + "voice": "alloy", + }, frequency_penalty=-2, function_call="none", functions=[ @@ -428,6 +447,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn max_completion_tokens=0, max_tokens=0, metadata={"foo": "string"}, + modalities=["text", "audio"], n=1, parallel_tool_calls=True, presence_penalty=-2,