openai · stainless-app · Oct 17, 2024 · Oct 17, 2024
@@ -1,2 +1,2 @@
 configured_endpoints: 68
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-52b934aee6468039ec7f4ce046a282b5fbce114afc708e70f17121df654f71da.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-8729aaa35436531ab453224af10e67f89677db8f350f0346bb3537489edea649.yml
@@ -39,16 +39,20 @@ Types:
 from openai.types.chat import (
     ChatCompletion,
     ChatCompletionAssistantMessageParam,
+    ChatCompletionAudio,
+    ChatCompletionAudioParam,
     ChatCompletionChunk,
     ChatCompletionContentPart,
     ChatCompletionContentPartImage,
+    ChatCompletionContentPartInputAudio,
     ChatCompletionContentPartRefusal,
     ChatCompletionContentPartText,
     ChatCompletionFunctionCallOption,
     ChatCompletionFunctionMessageParam,
     ChatCompletionMessage,
     ChatCompletionMessageParam,
     ChatCompletionMessageToolCall,
+    ChatCompletionModality,
     ChatCompletionNamedToolChoice,
     ChatCompletionRole,
     ChatCompletionStreamOptions,

@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Union
+from typing import Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
 from .thread import Thread
@@ -51,6 +51,9 @@ class ThreadCreated(BaseModel):
 
     event: Literal["thread.created"]
 
+    enabled: Optional[bool] = None
+    """Whether to enable input audio transcription."""
+
 
 class ThreadRunCreated(BaseModel):
     data: Run

@@ -4,10 +4,13 @@
 
 from .chat_completion import ChatCompletion as ChatCompletion
 from .chat_completion_role import ChatCompletionRole as ChatCompletionRole
+from .chat_completion_audio import ChatCompletionAudio as ChatCompletionAudio
 from .chat_completion_chunk import ChatCompletionChunk as ChatCompletionChunk
 from .chat_completion_message import ChatCompletionMessage as ChatCompletionMessage
+from .chat_completion_modality import ChatCompletionModality as ChatCompletionModality
 from .completion_create_params import CompletionCreateParams as CompletionCreateParams
 from .chat_completion_tool_param import ChatCompletionToolParam as ChatCompletionToolParam
+from .chat_completion_audio_param import ChatCompletionAudioParam as ChatCompletionAudioParam
 from .chat_completion_message_param import ChatCompletionMessageParam as ChatCompletionMessageParam
 from .chat_completion_token_logprob import ChatCompletionTokenLogprob as ChatCompletionTokenLogprob
 from .chat_completion_message_tool_call import ChatCompletionMessageToolCall as ChatCompletionMessageToolCall
@@ -43,3 +46,6 @@
 from .chat_completion_function_call_option_param import (
     ChatCompletionFunctionCallOptionParam as ChatCompletionFunctionCallOptionParam,
 )
+from .chat_completion_content_part_input_audio_param import (
+    ChatCompletionContentPartInputAudioParam as ChatCompletionContentPartInputAudioParam,
+)
@@ -9,7 +9,13 @@
 from .chat_completion_message_tool_call_param import ChatCompletionMessageToolCallParam
 from .chat_completion_content_part_refusal_param import ChatCompletionContentPartRefusalParam
 
-__all__ = ["ChatCompletionAssistantMessageParam", "ContentArrayOfContentPart", "FunctionCall"]
+__all__ = ["ChatCompletionAssistantMessageParam", "Audio", "ContentArrayOfContentPart", "FunctionCall"]
+
+
+class Audio(TypedDict, total=False):
+    id: Required[str]
+    """Unique identifier for a previous audio response from the model."""
+
 
 ContentArrayOfContentPart: TypeAlias = Union[ChatCompletionContentPartTextParam, ChatCompletionContentPartRefusalParam]
 
@@ -31,6 +37,12 @@ class ChatCompletionAssistantMessageParam(TypedDict, total=False):
     role: Required[Literal["assistant"]]
     """The role of the messages author, in this case `assistant`."""
 
+    audio: Optional[Audio]
+    """Data about a previous audio response from the model.
+
+    [Learn more](https://platform.openai.com/docs/guides/audio).
+    """
+
     content: Union[str, Iterable[ContentArrayOfContentPart], None]
     """The contents of the assistant message.
 

@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+
+from ..._models import BaseModel
+
+__all__ = ["ChatCompletionAudio"]
+
+
+class ChatCompletionAudio(BaseModel):
+    id: str
+    """Unique identifier for this audio response."""
+
+    data: str
+    """
+    Base64 encoded audio bytes generated by the model, in the format specified in
+    the request.
+    """
+
+    expires_at: int
+    """
+    The Unix timestamp (in seconds) for when this audio response will no longer be
+    accessible on the server for use in multi-turn conversations.
+    """
+
+    transcript: str
+    """Transcript of the audio generated by the model."""
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionAudioParam"]
+
+
+class ChatCompletionAudioParam(TypedDict, total=False):
+    format: Required[Literal["wav", "mp3", "flac", "opus", "pcm16"]]
+    """Specifies the output audio format.
+
+    Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`.
+    """
+
+    voice: Required[Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"]]
+    """Specifies the voice type.
+
+    Supported voices are `alloy`, `echo`, `fable`, `onyx`, `nova`, and `shimmer`.
+    """
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionContentPartInputAudioParam", "InputAudio"]
+
+
+class InputAudio(TypedDict, total=False):
+    data: Required[str]
+    """Base64 encoded audio data."""
+
+    format: Required[Literal["wav", "mp3"]]
+    """The format of the encoded audio data. Currently supports "wav" and "mp3"."""
+
+
+class ChatCompletionContentPartInputAudioParam(TypedDict, total=False):
+    input_audio: Required[InputAudio]
+
+    type: Required[Literal["input_audio"]]
+    """The type of the content part. Always `input_audio`."""
@@ -7,9 +7,10 @@
 
 from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
 from .chat_completion_content_part_image_param import ChatCompletionContentPartImageParam
+from .chat_completion_content_part_input_audio_param import ChatCompletionContentPartInputAudioParam
 
 __all__ = ["ChatCompletionContentPartParam"]
 
 ChatCompletionContentPartParam: TypeAlias = Union[
-    ChatCompletionContentPartTextParam, ChatCompletionContentPartImageParam
+    ChatCompletionContentPartTextParam, ChatCompletionContentPartImageParam, ChatCompletionContentPartInputAudioParam
 ]
@@ -4,6 +4,7 @@
 from typing_extensions import Literal
 
 from ..._models import BaseModel
+from .chat_completion_audio import ChatCompletionAudio
 from .chat_completion_message_tool_call import ChatCompletionMessageToolCall
 
 __all__ = ["ChatCompletionMessage", "FunctionCall"]
@@ -32,6 +33,13 @@ class ChatCompletionMessage(BaseModel):
     role: Literal["assistant"]
     """The role of the author of this message."""
 
+    audio: Optional[ChatCompletionAudio] = None
+    """
+    If the audio output modality is requested, this object contains data about the
+    audio response from the model.
+    [Learn more](https://platform.openai.com/docs/guides/audio).
+    """
+
     function_call: Optional[FunctionCall] = None
     """Deprecated and replaced by `tool_calls`.
 

@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ChatCompletionModality"]
+
+ChatCompletionModality: TypeAlias = Literal["text", "audio"]
@@ -6,7 +6,9 @@
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
 from ..chat_model import ChatModel
+from .chat_completion_modality import ChatCompletionModality
 from .chat_completion_tool_param import ChatCompletionToolParam
+from .chat_completion_audio_param import ChatCompletionAudioParam
 from .chat_completion_message_param import ChatCompletionMessageParam
 from ..shared_params.function_parameters import FunctionParameters
 from ..shared_params.response_format_text import ResponseFormatText
@@ -45,6 +47,13 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     table for details on which models work with the Chat API.
     """
 
+    audio: Optional[ChatCompletionAudioParam]
+    """Parameters for audio output.
+
+    Required when audio output is requested with `modalities: ["audio"]`.
+    [Learn more](https://platform.openai.com/docs/guides/audio).
+    """
+
     frequency_penalty: Optional[float]
     """Number between -2.0 and 2.0.
 
@@ -112,7 +121,21 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     metadata: Optional[Dict[str, str]]
     """
     Developer-defined tags and values used for filtering completions in the
-    [dashboard](https://platform.openai.com/completions).
+    [dashboard](https://platform.openai.com/chat-completions).
+    """
+
+    modalities: Optional[List[ChatCompletionModality]]
+    """
+    Output types that you would like the model to generate for this request. Most
+    models are capable of generating text, which is the default:
+
+    `["text"]`
+
+    The `gpt-4o-audio-preview` model can also be used to
+    [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+    this model generate both text and audio responses, you can use:
+
+    `["text", "audio"]`
     """
 
     n: Optional[int]
@@ -195,8 +218,9 @@ class CompletionCreateParamsBase(TypedDict, total=False):
 
     store: Optional[bool]
     """
-    Whether or not to store the output of this completion request for traffic
-    logging in the [dashboard](https://platform.openai.com/completions).
+    Whether or not to store the output of this chat completion request for use in
+    our [model distillation](https://platform.openai.com/docs/guides/distillation)
+    or [evals](https://platform.openai.com/docs/guides/evals) products.
     """
 
     stream_options: Optional[ChatCompletionStreamOptionsParam]

@@ -12,7 +12,10 @@
     "gpt-4o",
     "gpt-4o-2024-08-06",
     "gpt-4o-2024-05-13",
+    "gpt-4o-realtime-preview",
     "gpt-4o-realtime-preview-2024-10-01",
+    "gpt-4o-audio-preview",
+    "gpt-4o-audio-preview-2024-10-01",
     "chatgpt-4o-latest",
     "gpt-4o-mini",
     "gpt-4o-mini-2024-07-18",

@@ -43,6 +43,10 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
                 }
             ],
             model="gpt-4o",
+            audio={
+                "format": "wav",
+                "voice": "alloy",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
@@ -57,6 +61,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
             max_completion_tokens=0,
             max_tokens=0,
             metadata={"foo": "string"},
+            modalities=["text", "audio"],
             n=1,
             parallel_tool_calls=True,
             presence_penalty=-2,
@@ -166,6 +171,10 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             ],
             model="gpt-4o",
             stream=True,
+            audio={
+                "format": "wav",
+                "voice": "alloy",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
@@ -180,6 +189,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             max_completion_tokens=0,
             max_tokens=0,
             metadata={"foo": "string"},
+            modalities=["text", "audio"],
             n=1,
             parallel_tool_calls=True,
             presence_penalty=-2,
@@ -291,6 +301,10 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
                 }
             ],
             model="gpt-4o",
+            audio={
+                "format": "wav",
+                "voice": "alloy",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
@@ -305,6 +319,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             max_completion_tokens=0,
             max_tokens=0,
             metadata={"foo": "string"},
+            modalities=["text", "audio"],
             n=1,
             parallel_tool_calls=True,
             presence_penalty=-2,
@@ -414,6 +429,10 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             ],
             model="gpt-4o",
             stream=True,
+            audio={
+                "format": "wav",
+                "voice": "alloy",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
@@ -428,6 +447,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             max_completion_tokens=0,
             max_tokens=0,
             metadata={"foo": "string"},
+            modalities=["text", "audio"],
             n=1,
             parallel_tool_calls=True,
             presence_penalty=-2,