From 9114f0ccfbebe4ef490af58ebe0d8b1c81898678 Mon Sep 17 00:00:00 2001
From: Stainless Bot <dev@stainlessapi.com>
Date: Thu, 17 Oct 2024 16:49:37 +0000
Subject: [PATCH] feat(api): add gpt-4o-audio-preview model for chat
 completions

This enables audio inputs and outputs. https://platform.openai.com/docs/guides/audio
---
 .stats.yml                                    |   2 +-
 api.md                                        |   4 +
 src/openai/resources/chat/completions.py      | 207 +++++++++++++++---
 .../types/beta/assistant_stream_event.py      |   5 +-
 src/openai/types/chat/__init__.py             |   6 +
 ...chat_completion_assistant_message_param.py |  14 +-
 .../types/chat/chat_completion_audio.py       |  27 +++
 .../types/chat/chat_completion_audio_param.py |  21 ++
 ...mpletion_content_part_input_audio_param.py |  22 ++
 .../chat_completion_content_part_param.py     |   3 +-
 .../types/chat/chat_completion_message.py     |   8 +
 .../types/chat/chat_completion_modality.py    |   7 +
 .../types/chat/completion_create_params.py    |  30 ++-
 src/openai/types/chat_model.py                |   3 +
 tests/api_resources/chat/test_completions.py  |  20 ++
 15 files changed, 341 insertions(+), 38 deletions(-)
 create mode 100644 src/openai/types/chat/chat_completion_audio.py
 create mode 100644 src/openai/types/chat/chat_completion_audio_param.py
 create mode 100644 src/openai/types/chat/chat_completion_content_part_input_audio_param.py
 create mode 100644 src/openai/types/chat/chat_completion_modality.py

diff --git a/.stats.yml b/.stats.yml
index ece287351b..984e8a8d5f 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 68
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-52b934aee6468039ec7f4ce046a282b5fbce114afc708e70f17121df654f71da.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-8729aaa35436531ab453224af10e67f89677db8f350f0346bb3537489edea649.yml
diff --git a/api.md b/api.md
index 3a8a7c595c..4600adf77a 100644
--- a/api.md
+++ b/api.md
@@ -39,9 +39,12 @@ Types:
 from openai.types.chat import (
     ChatCompletion,
     ChatCompletionAssistantMessageParam,
+    ChatCompletionAudio,
+    ChatCompletionAudioParam,
     ChatCompletionChunk,
     ChatCompletionContentPart,
     ChatCompletionContentPartImage,
+    ChatCompletionContentPartInputAudio,
     ChatCompletionContentPartRefusal,
     ChatCompletionContentPartText,
     ChatCompletionFunctionCallOption,
@@ -49,6 +52,7 @@ from openai.types.chat import (
     ChatCompletionMessage,
     ChatCompletionMessageParam,
     ChatCompletionMessageToolCall,
+    ChatCompletionModality,
     ChatCompletionNamedToolChoice,
     ChatCompletionRole,
     ChatCompletionStreamOptions,
diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py
index eff194d00c..03919aab2f 100644
--- a/src/openai/resources/chat/completions.py
+++ b/src/openai/resources/chat/completions.py
@@ -18,12 +18,17 @@
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ..._streaming import Stream, AsyncStream
-from ...types.chat import completion_create_params
+from ...types.chat import (
+    ChatCompletionAudioParam,
+    completion_create_params,
+)
 from ..._base_client import make_request_options
 from ...types.chat_model import ChatModel
 from ...types.chat.chat_completion import ChatCompletion
 from ...types.chat.chat_completion_chunk import ChatCompletionChunk
+from ...types.chat.chat_completion_modality import ChatCompletionModality
 from ...types.chat.chat_completion_tool_param import ChatCompletionToolParam
+from ...types.chat.chat_completion_audio_param import ChatCompletionAudioParam
 from ...types.chat.chat_completion_message_param import ChatCompletionMessageParam
 from ...types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
 from ...types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
@@ -57,6 +62,7 @@ def create(
         *,
         messages: Iterable[ChatCompletionMessageParam],
         model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
@@ -65,6 +71,7 @@ def create(
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -88,8 +95,12 @@ def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion:
-        """
-        Creates a model response for the given chat conversation.
+        """Creates a model response for the given chat conversation.
+
+        Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
 
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
@@ -103,6 +114,10 @@ def create(
               [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
               table for details on which models work with the Chat API.
 
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
           frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
@@ -150,7 +165,18 @@ def create(
               [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Developer-defined tags and values used for filtering completions in the
-              [dashboard](https://platform.openai.com/completions).
+              [dashboard](https://platform.openai.com/chat-completions).
+
+          modalities: Output types that you would like the model to generate for this request. Most
+              models are capable of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -211,8 +237,9 @@ def create(
 
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
-          store: Whether or not to store the output of this completion request for traffic
-              logging in the [dashboard](https://platform.openai.com/completions).
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
 
           stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
               sent as data-only
@@ -274,6 +301,7 @@ def create(
         messages: Iterable[ChatCompletionMessageParam],
         model: Union[str, ChatModel],
         stream: Literal[True],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
@@ -282,6 +310,7 @@ def create(
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -304,8 +333,12 @@ def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Stream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
+        """Creates a model response for the given chat conversation.
+
+        Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
 
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
@@ -326,6 +359,10 @@ def create(
               message.
               [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
           frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
@@ -373,7 +410,18 @@ def create(
               [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Developer-defined tags and values used for filtering completions in the
-              [dashboard](https://platform.openai.com/completions).
+              [dashboard](https://platform.openai.com/chat-completions).
+
+          modalities: Output types that you would like the model to generate for this request. Most
+              models are capable of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -434,8 +482,9 @@ def create(
 
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
-          store: Whether or not to store the output of this completion request for traffic
-              logging in the [dashboard](https://platform.openai.com/completions).
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
 
@@ -490,6 +539,7 @@ def create(
         messages: Iterable[ChatCompletionMessageParam],
         model: Union[str, ChatModel],
         stream: bool,
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
@@ -498,6 +548,7 @@ def create(
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -520,8 +571,12 @@ def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion | Stream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
+        """Creates a model response for the given chat conversation.
+
+        Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
 
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
@@ -542,6 +597,10 @@ def create(
               message.
               [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
           frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
@@ -589,7 +648,18 @@ def create(
               [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Developer-defined tags and values used for filtering completions in the
-              [dashboard](https://platform.openai.com/completions).
+              [dashboard](https://platform.openai.com/chat-completions).
+
+          modalities: Output types that you would like the model to generate for this request. Most
+              models are capable of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -650,8 +720,9 @@ def create(
 
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
-          store: Whether or not to store the output of this completion request for traffic
-              logging in the [dashboard](https://platform.openai.com/completions).
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
 
@@ -705,6 +776,7 @@ def create(
         *,
         messages: Iterable[ChatCompletionMessageParam],
         model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
@@ -713,6 +785,7 @@ def create(
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -742,6 +815,7 @@ def create(
                 {
                     "messages": messages,
                     "model": model,
+                    "audio": audio,
                     "frequency_penalty": frequency_penalty,
                     "function_call": function_call,
                     "functions": functions,
@@ -750,6 +824,7 @@ def create(
                     "max_completion_tokens": max_completion_tokens,
                     "max_tokens": max_tokens,
                     "metadata": metadata,
+                    "modalities": modalities,
                     "n": n,
                     "parallel_tool_calls": parallel_tool_calls,
                     "presence_penalty": presence_penalty,
@@ -804,6 +879,7 @@ async def create(
         *,
         messages: Iterable[ChatCompletionMessageParam],
         model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
@@ -812,6 +888,7 @@ async def create(
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -835,8 +912,12 @@ async def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion:
-        """
-        Creates a model response for the given chat conversation.
+        """Creates a model response for the given chat conversation.
+
+        Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
 
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
@@ -850,6 +931,10 @@ async def create(
               [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
               table for details on which models work with the Chat API.
 
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
           frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
@@ -897,7 +982,18 @@ async def create(
               [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Developer-defined tags and values used for filtering completions in the
-              [dashboard](https://platform.openai.com/completions).
+              [dashboard](https://platform.openai.com/chat-completions).
+
+          modalities: Output types that you would like the model to generate for this request. Most
+              models are capable of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -958,8 +1054,9 @@ async def create(
 
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
-          store: Whether or not to store the output of this completion request for traffic
-              logging in the [dashboard](https://platform.openai.com/completions).
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
 
           stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
               sent as data-only
@@ -1021,6 +1118,7 @@ async def create(
         messages: Iterable[ChatCompletionMessageParam],
         model: Union[str, ChatModel],
         stream: Literal[True],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
@@ -1029,6 +1127,7 @@ async def create(
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1051,8 +1150,12 @@ async def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> AsyncStream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
+        """Creates a model response for the given chat conversation.
+
+        Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
 
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
@@ -1073,6 +1176,10 @@ async def create(
               message.
               [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
           frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
@@ -1120,7 +1227,18 @@ async def create(
               [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Developer-defined tags and values used for filtering completions in the
-              [dashboard](https://platform.openai.com/completions).
+              [dashboard](https://platform.openai.com/chat-completions).
+
+          modalities: Output types that you would like the model to generate for this request. Most
+              models are capable of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -1181,8 +1299,9 @@ async def create(
 
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
-          store: Whether or not to store the output of this completion request for traffic
-              logging in the [dashboard](https://platform.openai.com/completions).
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
 
@@ -1237,6 +1356,7 @@ async def create(
         messages: Iterable[ChatCompletionMessageParam],
         model: Union[str, ChatModel],
         stream: bool,
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
@@ -1245,6 +1365,7 @@ async def create(
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1267,8 +1388,12 @@ async def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
+        """Creates a model response for the given chat conversation.
+
+        Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
 
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
@@ -1289,6 +1414,10 @@ async def create(
               message.
               [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
           frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
@@ -1336,7 +1465,18 @@ async def create(
               [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Developer-defined tags and values used for filtering completions in the
-              [dashboard](https://platform.openai.com/completions).
+              [dashboard](https://platform.openai.com/chat-completions).
+
+          modalities: Output types that you would like the model to generate for this request. Most
+              models are capable of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -1397,8 +1537,9 @@ async def create(
 
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
-          store: Whether or not to store the output of this completion request for traffic
-              logging in the [dashboard](https://platform.openai.com/completions).
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
 
@@ -1452,6 +1593,7 @@ async def create(
         *,
         messages: Iterable[ChatCompletionMessageParam],
         model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
@@ -1460,6 +1602,7 @@ async def create(
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[ChatCompletionModality]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1489,6 +1632,7 @@ async def create(
                 {
                     "messages": messages,
                     "model": model,
+                    "audio": audio,
                     "frequency_penalty": frequency_penalty,
                     "function_call": function_call,
                     "functions": functions,
@@ -1497,6 +1641,7 @@ async def create(
                     "max_completion_tokens": max_completion_tokens,
                     "max_tokens": max_tokens,
                     "metadata": metadata,
+                    "modalities": modalities,
                     "n": n,
                     "parallel_tool_calls": parallel_tool_calls,
                     "presence_penalty": presence_penalty,
diff --git a/src/openai/types/beta/assistant_stream_event.py b/src/openai/types/beta/assistant_stream_event.py
index f1d8898ff2..41d3a0c5ea 100644
--- a/src/openai/types/beta/assistant_stream_event.py
+++ b/src/openai/types/beta/assistant_stream_event.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Union
+from typing import Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
 from .thread import Thread
@@ -51,6 +51,9 @@ class ThreadCreated(BaseModel):
 
     event: Literal["thread.created"]
 
+    enabled: Optional[bool] = None
+    """Whether to enable input audio transcription."""
+
 
 class ThreadRunCreated(BaseModel):
     data: Run
diff --git a/src/openai/types/chat/__init__.py b/src/openai/types/chat/__init__.py
index df3b48149c..b85365ecb1 100644
--- a/src/openai/types/chat/__init__.py
+++ b/src/openai/types/chat/__init__.py
@@ -4,10 +4,13 @@
 
 from .chat_completion import ChatCompletion as ChatCompletion
 from .chat_completion_role import ChatCompletionRole as ChatCompletionRole
+from .chat_completion_audio import ChatCompletionAudio as ChatCompletionAudio
 from .chat_completion_chunk import ChatCompletionChunk as ChatCompletionChunk
 from .chat_completion_message import ChatCompletionMessage as ChatCompletionMessage
+from .chat_completion_modality import ChatCompletionModality as ChatCompletionModality
 from .completion_create_params import CompletionCreateParams as CompletionCreateParams
 from .chat_completion_tool_param import ChatCompletionToolParam as ChatCompletionToolParam
+from .chat_completion_audio_param import ChatCompletionAudioParam as ChatCompletionAudioParam
 from .chat_completion_message_param import ChatCompletionMessageParam as ChatCompletionMessageParam
 from .chat_completion_token_logprob import ChatCompletionTokenLogprob as ChatCompletionTokenLogprob
 from .chat_completion_message_tool_call import ChatCompletionMessageToolCall as ChatCompletionMessageToolCall
@@ -43,3 +46,6 @@
 from .chat_completion_function_call_option_param import (
     ChatCompletionFunctionCallOptionParam as ChatCompletionFunctionCallOptionParam,
 )
+from .chat_completion_content_part_input_audio_param import (
+    ChatCompletionContentPartInputAudioParam as ChatCompletionContentPartInputAudioParam,
+)
diff --git a/src/openai/types/chat/chat_completion_assistant_message_param.py b/src/openai/types/chat/chat_completion_assistant_message_param.py
index 2429d41d33..35e3a3d784 100644
--- a/src/openai/types/chat/chat_completion_assistant_message_param.py
+++ b/src/openai/types/chat/chat_completion_assistant_message_param.py
@@ -9,7 +9,13 @@
 from .chat_completion_message_tool_call_param import ChatCompletionMessageToolCallParam
 from .chat_completion_content_part_refusal_param import ChatCompletionContentPartRefusalParam
 
-__all__ = ["ChatCompletionAssistantMessageParam", "ContentArrayOfContentPart", "FunctionCall"]
+__all__ = ["ChatCompletionAssistantMessageParam", "Audio", "ContentArrayOfContentPart", "FunctionCall"]
+
+
+class Audio(TypedDict, total=False):
+    id: Required[str]
+    """Unique identifier for a previous audio response from the model."""
+
 
 ContentArrayOfContentPart: TypeAlias = Union[ChatCompletionContentPartTextParam, ChatCompletionContentPartRefusalParam]
 
@@ -31,6 +37,12 @@ class ChatCompletionAssistantMessageParam(TypedDict, total=False):
     role: Required[Literal["assistant"]]
     """The role of the messages author, in this case `assistant`."""
 
+    audio: Optional[Audio]
+    """Data about a previous audio response from the model.
+
+    [Learn more](https://platform.openai.com/docs/guides/audio).
+    """
+
     content: Union[str, Iterable[ContentArrayOfContentPart], None]
     """The contents of the assistant message.
 
diff --git a/src/openai/types/chat/chat_completion_audio.py b/src/openai/types/chat/chat_completion_audio.py
new file mode 100644
index 0000000000..135ee8845c
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_audio.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+
+from ..._models import BaseModel
+
+__all__ = ["ChatCompletionAudio"]
+
+
+class ChatCompletionAudio(BaseModel):
+    id: str
+    """Unique identifier for this audio response."""
+
+    data: str
+    """
+    Base64 encoded audio bytes generated by the model, in the format specified in
+    the request.
+    """
+
+    expires_at: int
+    """
+    The Unix timestamp (in seconds) for when this audio response will no longer be
+    accessible on the server for use in multi-turn conversations.
+    """
+
+    transcript: str
+    """Transcript of the audio generated by the model."""
diff --git a/src/openai/types/chat/chat_completion_audio_param.py b/src/openai/types/chat/chat_completion_audio_param.py
new file mode 100644
index 0000000000..6a4ce9ac1f
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_audio_param.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionAudioParam"]
+
+
+class ChatCompletionAudioParam(TypedDict, total=False):
+    format: Required[Literal["wav", "mp3", "flac", "opus", "pcm16"]]
+    """Specifies the output audio format.
+
+    Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`.
+    """
+
+    voice: Required[Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"]]
+    """Specifies the voice type.
+
+    Supported voices are `alloy`, `echo`, `fable`, `onyx`, `nova`, and `shimmer`.
+    """
diff --git a/src/openai/types/chat/chat_completion_content_part_input_audio_param.py b/src/openai/types/chat/chat_completion_content_part_input_audio_param.py
new file mode 100644
index 0000000000..0b1b1a80b1
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_content_part_input_audio_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionContentPartInputAudioParam", "InputAudio"]
+
+
+class InputAudio(TypedDict, total=False):
+    data: Required[str]
+    """Base64 encoded audio data."""
+
+    format: Required[Literal["wav", "mp3"]]
+    """The format of the encoded audio data. Currently supports "wav" and "mp3"."""
+
+
+class ChatCompletionContentPartInputAudioParam(TypedDict, total=False):
+    input_audio: Required[InputAudio]
+
+    type: Required[Literal["input_audio"]]
+    """The type of the content part. Always `input_audio`."""
diff --git a/src/openai/types/chat/chat_completion_content_part_param.py b/src/openai/types/chat/chat_completion_content_part_param.py
index e0c6e480f2..682d11f4c7 100644
--- a/src/openai/types/chat/chat_completion_content_part_param.py
+++ b/src/openai/types/chat/chat_completion_content_part_param.py
@@ -7,9 +7,10 @@
 
 from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
 from .chat_completion_content_part_image_param import ChatCompletionContentPartImageParam
+from .chat_completion_content_part_input_audio_param import ChatCompletionContentPartInputAudioParam
 
 __all__ = ["ChatCompletionContentPartParam"]
 
 ChatCompletionContentPartParam: TypeAlias = Union[
-    ChatCompletionContentPartTextParam, ChatCompletionContentPartImageParam
+    ChatCompletionContentPartTextParam, ChatCompletionContentPartImageParam, ChatCompletionContentPartInputAudioParam
 ]
diff --git a/src/openai/types/chat/chat_completion_message.py b/src/openai/types/chat/chat_completion_message.py
index 492bb68c85..704fa5d5d1 100644
--- a/src/openai/types/chat/chat_completion_message.py
+++ b/src/openai/types/chat/chat_completion_message.py
@@ -4,6 +4,7 @@
 from typing_extensions import Literal
 
 from ..._models import BaseModel
+from .chat_completion_audio import ChatCompletionAudio
 from .chat_completion_message_tool_call import ChatCompletionMessageToolCall
 
 __all__ = ["ChatCompletionMessage", "FunctionCall"]
@@ -32,6 +33,13 @@ class ChatCompletionMessage(BaseModel):
     role: Literal["assistant"]
     """The role of the author of this message."""
 
+    audio: Optional[ChatCompletionAudio] = None
+    """
+    If the audio output modality is requested, this object contains data about the
+    audio response from the model.
+    [Learn more](https://platform.openai.com/docs/guides/audio).
+    """
+
     function_call: Optional[FunctionCall] = None
     """Deprecated and replaced by `tool_calls`.
 
diff --git a/src/openai/types/chat/chat_completion_modality.py b/src/openai/types/chat/chat_completion_modality.py
new file mode 100644
index 0000000000..8e3c145979
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_modality.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ChatCompletionModality"]
+
+ChatCompletionModality: TypeAlias = Literal["text", "audio"]
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
index 3f55dfbe6e..af6a47c219 100644
--- a/src/openai/types/chat/completion_create_params.py
+++ b/src/openai/types/chat/completion_create_params.py
@@ -6,7 +6,9 @@
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
 from ..chat_model import ChatModel
+from .chat_completion_modality import ChatCompletionModality
 from .chat_completion_tool_param import ChatCompletionToolParam
+from .chat_completion_audio_param import ChatCompletionAudioParam
 from .chat_completion_message_param import ChatCompletionMessageParam
 from ..shared_params.function_parameters import FunctionParameters
 from ..shared_params.response_format_text import ResponseFormatText
@@ -45,6 +47,13 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     table for details on which models work with the Chat API.
     """
 
+    audio: Optional[ChatCompletionAudioParam]
+    """Parameters for audio output.
+
+    Required when audio output is requested with `modalities: ["audio"]`.
+    [Learn more](https://platform.openai.com/docs/guides/audio).
+    """
+
     frequency_penalty: Optional[float]
     """Number between -2.0 and 2.0.
 
@@ -112,7 +121,21 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     metadata: Optional[Dict[str, str]]
     """
     Developer-defined tags and values used for filtering completions in the
-    [dashboard](https://platform.openai.com/completions).
+    [dashboard](https://platform.openai.com/chat-completions).
+    """
+
+    modalities: Optional[List[ChatCompletionModality]]
+    """
+    Output types that you would like the model to generate for this request. Most
+    models are capable of generating text, which is the default:
+
+    `["text"]`
+
+    The `gpt-4o-audio-preview` model can also be used to
+    [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+    this model generate both text and audio responses, you can use:
+
+    `["text", "audio"]`
     """
 
     n: Optional[int]
@@ -195,8 +218,9 @@ class CompletionCreateParamsBase(TypedDict, total=False):
 
     store: Optional[bool]
     """
-    Whether or not to store the output of this completion request for traffic
-    logging in the [dashboard](https://platform.openai.com/completions).
+    Whether or not to store the output of this chat completion request for use in
+    our [model distillation](https://platform.openai.com/docs/guides/distillation)
+    or [evals](https://platform.openai.com/docs/guides/evals) products.
     """
 
     stream_options: Optional[ChatCompletionStreamOptionsParam]
diff --git a/src/openai/types/chat_model.py b/src/openai/types/chat_model.py
index f2d5674786..b801aa0914 100644
--- a/src/openai/types/chat_model.py
+++ b/src/openai/types/chat_model.py
@@ -12,7 +12,10 @@
     "gpt-4o",
     "gpt-4o-2024-08-06",
     "gpt-4o-2024-05-13",
+    "gpt-4o-realtime-preview",
     "gpt-4o-realtime-preview-2024-10-01",
+    "gpt-4o-audio-preview",
+    "gpt-4o-audio-preview-2024-10-01",
     "chatgpt-4o-latest",
     "gpt-4o-mini",
     "gpt-4o-mini-2024-07-18",
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index d353139543..a341e78f7e 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -43,6 +43,10 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
                 }
             ],
             model="gpt-4o",
+            audio={
+                "format": "wav",
+                "voice": "alloy",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
@@ -57,6 +61,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
             max_completion_tokens=0,
             max_tokens=0,
             metadata={"foo": "string"},
+            modalities=["text", "audio"],
             n=1,
             parallel_tool_calls=True,
             presence_penalty=-2,
@@ -166,6 +171,10 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             ],
             model="gpt-4o",
             stream=True,
+            audio={
+                "format": "wav",
+                "voice": "alloy",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
@@ -180,6 +189,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             max_completion_tokens=0,
             max_tokens=0,
             metadata={"foo": "string"},
+            modalities=["text", "audio"],
             n=1,
             parallel_tool_calls=True,
             presence_penalty=-2,
@@ -291,6 +301,10 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
                 }
             ],
             model="gpt-4o",
+            audio={
+                "format": "wav",
+                "voice": "alloy",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
@@ -305,6 +319,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             max_completion_tokens=0,
             max_tokens=0,
             metadata={"foo": "string"},
+            modalities=["text", "audio"],
             n=1,
             parallel_tool_calls=True,
             presence_penalty=-2,
@@ -414,6 +429,10 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             ],
             model="gpt-4o",
             stream=True,
+            audio={
+                "format": "wav",
+                "voice": "alloy",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
@@ -428,6 +447,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             max_completion_tokens=0,
             max_tokens=0,
             metadata={"foo": "string"},
+            modalities=["text", "audio"],
             n=1,
             parallel_tool_calls=True,
             presence_penalty=-2,