From 251c911e4b6562fb1751ae2a880e7ff6bb2e7bd2 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Fri, 21 Nov 2025 15:16:37 +0000
Subject: [PATCH 1/3] feat(api): Change TTS call signature

BREAKING CHANGE: Change call signature for `audio.create` to `audio.speech.create` to match spec with python library and add space for future APIs
---
 .stats.yml                                    |   2 +-
 api.md                                        |   4 +-
 src/together/resources/audio/__init__.py      |  14 +
 src/together/resources/audio/audio.py         | 559 +---------------
 src/together/resources/audio/speech.py        | 605 ++++++++++++++++++
 src/together/types/__init__.py                |   1 -
 src/together/types/audio/__init__.py          |   1 +
 .../speech_create_params.py}                  |  10 +-
 .../{test_audio.py => audio/test_speech.py}   | 160 ++---
 9 files changed, 737 insertions(+), 619 deletions(-)
 create mode 100644 src/together/resources/audio/speech.py
 rename src/together/types/{audio_create_params.py => audio/speech_create_params.py} (86%)
 rename tests/api_resources/{test_audio.py => audio/test_speech.py} (63%)
diff --git a/.stats.yml b/.stats.yml
index 1e723030..b14d585f 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 44
 openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-b86f8e6c4674d1a7829ffa8ddff4bc93d21334d231e6a4d0fd734d411c07a4eb.yml
 openapi_spec_hash: 8af4975be6ae8f4655fa92fd26af9682
-config_hash: b337cdd3c62dbd3383529592a029b347
+config_hash: afbbabb8eb5bfbbf8139546a13addd9a
diff --git a/api.md b/api.md
index 13153257..56280f19 100644
--- a/api.md
+++ b/api.md
@@ -156,9 +156,11 @@ Types:
 from together.types import AudioFile, AudioSpeechStreamChunk
 ```
 
+## Speech
+
 Methods:
 
-- <code title="post /audio/speech">client.audio.<a href="./src/together/resources/audio/audio.py">create</a>(\*\*<a href="src/together/types/audio_create_params.py">params</a>) -> BinaryAPIResponse</code>
+- <code title="post /audio/speech">client.audio.speech.<a href="./src/together/resources/audio/speech.py">create</a>(\*\*<a href="src/together/types/audio/speech_create_params.py">params</a>) -> BinaryAPIResponse</code>
 
 ## Voices
 
diff --git a/src/together/resources/audio/__init__.py b/src/together/resources/audio/__init__.py
index 7219dabc..fae72db6 100644
--- a/src/together/resources/audio/__init__.py
+++ b/src/together/resources/audio/__init__.py
@@ -8,6 +8,14 @@
     AudioResourceWithStreamingResponse,
     AsyncAudioResourceWithStreamingResponse,
 )
+from .speech import (
+    SpeechResource,
+    AsyncSpeechResource,
+    SpeechResourceWithRawResponse,
+    AsyncSpeechResourceWithRawResponse,
+    SpeechResourceWithStreamingResponse,
+    AsyncSpeechResourceWithStreamingResponse,
+)
 from .voices import (
     VoicesResource,
     AsyncVoicesResource,
@@ -34,6 +42,12 @@
 )
 
 __all__ = [
+    "SpeechResource",
+    "AsyncSpeechResource",
+    "SpeechResourceWithRawResponse",
+    "AsyncSpeechResourceWithRawResponse",
+    "SpeechResourceWithStreamingResponse",
+    "AsyncSpeechResourceWithStreamingResponse",
     "VoicesResource",
     "AsyncVoicesResource",
     "VoicesResourceWithRawResponse",
diff --git a/src/together/resources/audio/audio.py b/src/together/resources/audio/audio.py
index 47d4f9d8..0bae6bcd 100644
--- a/src/together/resources/audio/audio.py
+++ b/src/together/resources/audio/audio.py
@@ -2,11 +2,14 @@
 
 from __future__ import annotations
 
-from typing import Union
-from typing_extensions import Literal, overload
-
-import httpx
-
+from .speech import (
+    SpeechResource,
+    AsyncSpeechResource,
+    SpeechResourceWithRawResponse,
+    AsyncSpeechResourceWithRawResponse,
+    SpeechResourceWithStreamingResponse,
+    AsyncSpeechResourceWithStreamingResponse,
+)
 from .voices import (
     VoicesResource,
     AsyncVoicesResource,
@@ -15,22 +18,8 @@
     VoicesResourceWithStreamingResponse,
     AsyncVoicesResourceWithStreamingResponse,
 )
-from ...types import audio_create_params
-from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
-from ..._utils import required_args, maybe_transform, async_maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
-    BinaryAPIResponse,
-    AsyncBinaryAPIResponse,
-    StreamedBinaryAPIResponse,
-    AsyncStreamedBinaryAPIResponse,
-    to_custom_raw_response_wrapper,
-    to_custom_streamed_response_wrapper,
-    async_to_custom_raw_response_wrapper,
-    async_to_custom_streamed_response_wrapper,
-)
-from ..._streaming import Stream, AsyncStream
 from .translations import (
     TranslationsResource,
     AsyncTranslationsResource,
@@ -39,7 +28,6 @@
     TranslationsResourceWithStreamingResponse,
     AsyncTranslationsResourceWithStreamingResponse,
 )
-from ..._base_client import make_request_options
 from .transcriptions import (
     TranscriptionsResource,
     AsyncTranscriptionsResource,
@@ -48,12 +36,15 @@
     TranscriptionsResourceWithStreamingResponse,
     AsyncTranscriptionsResourceWithStreamingResponse,
 )
-from ...types.audio_speech_stream_chunk import AudioSpeechStreamChunk
 
 __all__ = ["AudioResource", "AsyncAudioResource"]
 
 
 class AudioResource(SyncAPIResource):
+    @cached_property
+    def speech(self) -> SpeechResource:
+        return SpeechResource(self._client)
+
     @cached_property
     def voices(self) -> VoicesResource:
         return VoicesResource(self._client)
@@ -85,255 +76,12 @@ def with_streaming_response(self) -> AudioResourceWithStreamingResponse:
         """
         return AudioResourceWithStreamingResponse(self)
 
-    @overload
-    def create(
-        self,
-        *,
-        input: str,
-        model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
-        voice: str,
-        language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
-        | Omit = omit,
-        response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
-        response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
-        sample_rate: float | Omit = omit,
-        stream: Literal[False] | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> BinaryAPIResponse:
-        """
-        Generate audio from input text
-
-        Args:
-          input: Input text to generate the audio for
-
-          model: The name of the model to query.
-
-              [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
-              The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
-              canopylabs/orpheus-3b-0.1-ft
-
-          voice: The voice to use for generating the audio. The voices supported are different
-              for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
-              supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
-              af_alloy and for cartesia/sonic, one of the voices supported is "friendly
-              sidekick".
-
-              You can view the voices supported for each model using the /v1/voices endpoint
-              sending the model name as the query parameter.
-              [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
-
-          language: Language of input text.
-
-          response_encoding: Audio encoding of response
-
-          response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
-              false. If streaming is true, the only supported format is raw.
-
-          sample_rate: Sampling rate to use for the output audio. The default sampling rate for
-              canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
-              cartesia/sonic is 44100.
-
-          stream: If true, output is streamed for several characters at a time instead of waiting
-              for the full response. The stream terminates with `data: [DONE]`. If false,
-              return the encoded audio as octet stream
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        input: str,
-        model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
-        stream: Literal[True],
-        voice: str,
-        language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
-        | Omit = omit,
-        response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
-        response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
-        sample_rate: float | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Stream[AudioSpeechStreamChunk]:
-        """
-        Generate audio from input text
-
-        Args:
-          input: Input text to generate the audio for
-
-          model: The name of the model to query.
-
-              [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
-              The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
-              canopylabs/orpheus-3b-0.1-ft
-
-          stream: If true, output is streamed for several characters at a time instead of waiting
-              for the full response. The stream terminates with `data: [DONE]`. If false,
-              return the encoded audio as octet stream
-
-          voice: The voice to use for generating the audio. The voices supported are different
-              for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
-              supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
-              af_alloy and for cartesia/sonic, one of the voices supported is "friendly
-              sidekick".
-
-              You can view the voices supported for each model using the /v1/voices endpoint
-              sending the model name as the query parameter.
-              [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
-
-          language: Language of input text.
-
-          response_encoding: Audio encoding of response
-
-          response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
-              false. If streaming is true, the only supported format is raw.
-
-          sample_rate: Sampling rate to use for the output audio. The default sampling rate for
-              canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
-              cartesia/sonic is 44100.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        input: str,
-        model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
-        stream: bool,
-        voice: str,
-        language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
-        | Omit = omit,
-        response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
-        response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
-        sample_rate: float | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> BinaryAPIResponse | Stream[AudioSpeechStreamChunk]:
-        """
-        Generate audio from input text
-
-        Args:
-          input: Input text to generate the audio for
-
-          model: The name of the model to query.
-
-              [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
-              The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
-              canopylabs/orpheus-3b-0.1-ft
-
-          stream: If true, output is streamed for several characters at a time instead of waiting
-              for the full response. The stream terminates with `data: [DONE]`. If false,
-              return the encoded audio as octet stream
-
-          voice: The voice to use for generating the audio. The voices supported are different
-              for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
-              supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
-              af_alloy and for cartesia/sonic, one of the voices supported is "friendly
-              sidekick".
-
-              You can view the voices supported for each model using the /v1/voices endpoint
-              sending the model name as the query parameter.
-              [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
-
-          language: Language of input text.
-
-          response_encoding: Audio encoding of response
-
-          response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
-              false. If streaming is true, the only supported format is raw.
-
-          sample_rate: Sampling rate to use for the output audio. The default sampling rate for
-              canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
-              cartesia/sonic is 44100.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["input", "model", "voice"], ["input", "model", "stream", "voice"])
-    def create(
-        self,
-        *,
-        input: str,
-        model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
-        voice: str,
-        language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
-        | Omit = omit,
-        response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
-        response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
-        sample_rate: float | Omit = omit,
-        stream: Literal[False] | Literal[True] | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> BinaryAPIResponse | Stream[AudioSpeechStreamChunk]:
-        extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
-        return self._post(
-            "/audio/speech",
-            body=maybe_transform(
-                {
-                    "input": input,
-                    "model": model,
-                    "voice": voice,
-                    "language": language,
-                    "response_encoding": response_encoding,
-                    "response_format": response_format,
-                    "sample_rate": sample_rate,
-                    "stream": stream,
-                },
-                audio_create_params.AudioCreateParamsStreaming
-                if stream
-                else audio_create_params.AudioCreateParamsNonStreaming,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=BinaryAPIResponse,
-            stream=stream or False,
-            stream_cls=Stream[AudioSpeechStreamChunk],
-        )
-
 
 class AsyncAudioResource(AsyncAPIResource):
+    @cached_property
+    def speech(self) -> AsyncSpeechResource:
+        return AsyncSpeechResource(self._client)
+
     @cached_property
     def voices(self) -> AsyncVoicesResource:
         return AsyncVoicesResource(self._client)
@@ -365,262 +113,14 @@ def with_streaming_response(self) -> AsyncAudioResourceWithStreamingResponse:
         """
         return AsyncAudioResourceWithStreamingResponse(self)
 
-    @overload
-    async def create(
-        self,
-        *,
-        input: str,
-        model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
-        voice: str,
-        language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
-        | Omit = omit,
-        response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
-        response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
-        sample_rate: float | Omit = omit,
-        stream: Literal[False] | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> AsyncBinaryAPIResponse:
-        """
-        Generate audio from input text
-
-        Args:
-          input: Input text to generate the audio for
-
-          model: The name of the model to query.
-
-              [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
-              The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
-              canopylabs/orpheus-3b-0.1-ft
-
-          voice: The voice to use for generating the audio. The voices supported are different
-              for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
-              supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
-              af_alloy and for cartesia/sonic, one of the voices supported is "friendly
-              sidekick".
-
-              You can view the voices supported for each model using the /v1/voices endpoint
-              sending the model name as the query parameter.
-              [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
-
-          language: Language of input text.
-
-          response_encoding: Audio encoding of response
-
-          response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
-              false. If streaming is true, the only supported format is raw.
-
-          sample_rate: Sampling rate to use for the output audio. The default sampling rate for
-              canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
-              cartesia/sonic is 44100.
-
-          stream: If true, output is streamed for several characters at a time instead of waiting
-              for the full response. The stream terminates with `data: [DONE]`. If false,
-              return the encoded audio as octet stream
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        input: str,
-        model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
-        stream: Literal[True],
-        voice: str,
-        language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
-        | Omit = omit,
-        response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
-        response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
-        sample_rate: float | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> AsyncStream[AudioSpeechStreamChunk]:
-        """
-        Generate audio from input text
-
-        Args:
-          input: Input text to generate the audio for
-
-          model: The name of the model to query.
-
-              [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
-              The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
-              canopylabs/orpheus-3b-0.1-ft
-
-          stream: If true, output is streamed for several characters at a time instead of waiting
-              for the full response. The stream terminates with `data: [DONE]`. If false,
-              return the encoded audio as octet stream
-
-          voice: The voice to use for generating the audio. The voices supported are different
-              for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
-              supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
-              af_alloy and for cartesia/sonic, one of the voices supported is "friendly
-              sidekick".
-
-              You can view the voices supported for each model using the /v1/voices endpoint
-              sending the model name as the query parameter.
-              [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
-
-          language: Language of input text.
-
-          response_encoding: Audio encoding of response
-
-          response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
-              false. If streaming is true, the only supported format is raw.
-
-          sample_rate: Sampling rate to use for the output audio. The default sampling rate for
-              canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
-              cartesia/sonic is 44100.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        input: str,
-        model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
-        stream: bool,
-        voice: str,
-        language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
-        | Omit = omit,
-        response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
-        response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
-        sample_rate: float | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> AsyncBinaryAPIResponse | AsyncStream[AudioSpeechStreamChunk]:
-        """
-        Generate audio from input text
-
-        Args:
-          input: Input text to generate the audio for
-
-          model: The name of the model to query.
-
-              [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
-              The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
-              canopylabs/orpheus-3b-0.1-ft
-
-          stream: If true, output is streamed for several characters at a time instead of waiting
-              for the full response. The stream terminates with `data: [DONE]`. If false,
-              return the encoded audio as octet stream
-
-          voice: The voice to use for generating the audio. The voices supported are different
-              for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
-              supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
-              af_alloy and for cartesia/sonic, one of the voices supported is "friendly
-              sidekick".
-
-              You can view the voices supported for each model using the /v1/voices endpoint
-              sending the model name as the query parameter.
-              [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
-
-          language: Language of input text.
-
-          response_encoding: Audio encoding of response
-
-          response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
-              false. If streaming is true, the only supported format is raw.
-
-          sample_rate: Sampling rate to use for the output audio. The default sampling rate for
-              canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
-              cartesia/sonic is 44100.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["input", "model", "voice"], ["input", "model", "stream", "voice"])
-    async def create(
-        self,
-        *,
-        input: str,
-        model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
-        voice: str,
-        language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
-        | Omit = omit,
-        response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
-        response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
-        sample_rate: float | Omit = omit,
-        stream: Literal[False] | Literal[True] | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> AsyncBinaryAPIResponse | AsyncStream[AudioSpeechStreamChunk]:
-        extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
-        return await self._post(
-            "/audio/speech",
-            body=await async_maybe_transform(
-                {
-                    "input": input,
-                    "model": model,
-                    "voice": voice,
-                    "language": language,
-                    "response_encoding": response_encoding,
-                    "response_format": response_format,
-                    "sample_rate": sample_rate,
-                    "stream": stream,
-                },
-                audio_create_params.AudioCreateParamsStreaming
-                if stream
-                else audio_create_params.AudioCreateParamsNonStreaming,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=AsyncBinaryAPIResponse,
-            stream=stream or False,
-            stream_cls=AsyncStream[AudioSpeechStreamChunk],
-        )
-
 
 class AudioResourceWithRawResponse:
     def __init__(self, audio: AudioResource) -> None:
         self._audio = audio
 
-        self.create = to_custom_raw_response_wrapper(
-            audio.create,
-            BinaryAPIResponse,
-        )
+    @cached_property
+    def speech(self) -> SpeechResourceWithRawResponse:
+        return SpeechResourceWithRawResponse(self._audio.speech)
 
     @cached_property
     def voices(self) -> VoicesResourceWithRawResponse:
@@ -639,10 +139,9 @@ class AsyncAudioResourceWithRawResponse:
     def __init__(self, audio: AsyncAudioResource) -> None:
         self._audio = audio
 
-        self.create = async_to_custom_raw_response_wrapper(
-            audio.create,
-            AsyncBinaryAPIResponse,
-        )
+    @cached_property
+    def speech(self) -> AsyncSpeechResourceWithRawResponse:
+        return AsyncSpeechResourceWithRawResponse(self._audio.speech)
 
     @cached_property
     def voices(self) -> AsyncVoicesResourceWithRawResponse:
@@ -661,10 +160,9 @@ class AudioResourceWithStreamingResponse:
     def __init__(self, audio: AudioResource) -> None:
         self._audio = audio
 
-        self.create = to_custom_streamed_response_wrapper(
-            audio.create,
-            StreamedBinaryAPIResponse,
-        )
+    @cached_property
+    def speech(self) -> SpeechResourceWithStreamingResponse:
+        return SpeechResourceWithStreamingResponse(self._audio.speech)
 
     @cached_property
     def voices(self) -> VoicesResourceWithStreamingResponse:
@@ -683,10 +181,9 @@ class AsyncAudioResourceWithStreamingResponse:
     def __init__(self, audio: AsyncAudioResource) -> None:
         self._audio = audio
 
-        self.create = async_to_custom_streamed_response_wrapper(
-            audio.create,
-            AsyncStreamedBinaryAPIResponse,
-        )
+    @cached_property
+    def speech(self) -> AsyncSpeechResourceWithStreamingResponse:
+        return AsyncSpeechResourceWithStreamingResponse(self._audio.speech)
 
     @cached_property
     def voices(self) -> AsyncVoicesResourceWithStreamingResponse:
diff --git a/src/together/resources/audio/speech.py b/src/together/resources/audio/speech.py
new file mode 100644
index 00000000..78f61353
--- /dev/null
+++ b/src/together/resources/audio/speech.py
@@ -0,0 +1,605 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, overload
+
+import httpx
+
+from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ..._utils import required_args, maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    BinaryAPIResponse,
+    AsyncBinaryAPIResponse,
+    StreamedBinaryAPIResponse,
+    AsyncStreamedBinaryAPIResponse,
+    to_custom_raw_response_wrapper,
+    to_custom_streamed_response_wrapper,
+    async_to_custom_raw_response_wrapper,
+    async_to_custom_streamed_response_wrapper,
+)
+from ..._streaming import Stream, AsyncStream
+from ...types.audio import speech_create_params
+from ..._base_client import make_request_options
+from ...types.audio_speech_stream_chunk import AudioSpeechStreamChunk
+
+__all__ = ["SpeechResource", "AsyncSpeechResource"]
+
+
+class SpeechResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> SpeechResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+        """
+        return SpeechResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> SpeechResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+        """
+        return SpeechResourceWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        input: str,
+        model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
+        voice: str,
+        language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
+        | Omit = omit,
+        response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
+        response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
+        sample_rate: float | Omit = omit,
+        stream: Literal[False] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> BinaryAPIResponse:
+        """
+        Generate audio from input text
+
+        Args:
+          input: Input text to generate the audio for
+
+          model: The name of the model to query.
+
+              [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
+              The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
+              canopylabs/orpheus-3b-0.1-ft
+
+          voice: The voice to use for generating the audio. The voices supported are different
+              for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
+              supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
+              af_alloy and for cartesia/sonic, one of the voices supported is "friendly
+              sidekick".
+
+              You can view the voices supported for each model using the /v1/voices endpoint
+              sending the model name as the query parameter.
+              [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
+
+          language: Language of input text.
+
+          response_encoding: Audio encoding of response
+
+          response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
+              false. If streaming is true, the only supported format is raw.
+
+          sample_rate: Sampling rate to use for the output audio. The default sampling rate for
+              canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
+              cartesia/sonic is 44100.
+
+          stream: If true, output is streamed for several characters at a time instead of waiting
+              for the full response. The stream terminates with `data: [DONE]`. If false,
+              return the encoded audio as octet stream
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        input: str,
+        model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
+        stream: Literal[True],
+        voice: str,
+        language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
+        | Omit = omit,
+        response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
+        response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
+        sample_rate: float | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Stream[AudioSpeechStreamChunk]:
+        """
+        Generate audio from input text
+
+        Args:
+          input: Input text to generate the audio for
+
+          model: The name of the model to query.
+
+              [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
+              The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
+              canopylabs/orpheus-3b-0.1-ft
+
+          stream: If true, output is streamed for several characters at a time instead of waiting
+              for the full response. The stream terminates with `data: [DONE]`. If false,
+              return the encoded audio as octet stream
+
+          voice: The voice to use for generating the audio. The voices supported are different
+              for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
+              supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
+              af_alloy and for cartesia/sonic, one of the voices supported is "friendly
+              sidekick".
+
+              You can view the voices supported for each model using the /v1/voices endpoint
+              sending the model name as the query parameter.
+              [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
+
+          language: Language of input text.
+
+          response_encoding: Audio encoding of response
+
+          response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
+              false. If streaming is true, the only supported format is raw.
+
+          sample_rate: Sampling rate to use for the output audio. The default sampling rate for
+              canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
+              cartesia/sonic is 44100.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        input: str,
+        model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
+        stream: bool,
+        voice: str,
+        language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
+        | Omit = omit,
+        response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
+        response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
+        sample_rate: float | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> BinaryAPIResponse | Stream[AudioSpeechStreamChunk]:
+        """
+        Generate audio from input text
+
+        Args:
+          input: Input text to generate the audio for
+
+          model: The name of the model to query.
+
+              [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
+              The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
+              canopylabs/orpheus-3b-0.1-ft
+
+          stream: If true, output is streamed for several characters at a time instead of waiting
+              for the full response. The stream terminates with `data: [DONE]`. If false,
+              return the encoded audio as octet stream
+
+          voice: The voice to use for generating the audio. The voices supported are different
+              for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
+              supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
+              af_alloy and for cartesia/sonic, one of the voices supported is "friendly
+              sidekick".
+
+              You can view the voices supported for each model using the /v1/voices endpoint
+              sending the model name as the query parameter.
+              [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
+
+          language: Language of input text.
+
+          response_encoding: Audio encoding of response
+
+          response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
+              false. If streaming is true, the only supported format is raw.
+
+          sample_rate: Sampling rate to use for the output audio. The default sampling rate for
+              canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
+              cartesia/sonic is 44100.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["input", "model", "voice"], ["input", "model", "stream", "voice"])
+    def create(
+        self,
+        *,
+        input: str,
+        model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
+        voice: str,
+        language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
+        | Omit = omit,
+        response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
+        response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
+        sample_rate: float | Omit = omit,
+        stream: Literal[False] | Literal[True] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> BinaryAPIResponse | Stream[AudioSpeechStreamChunk]:
+        extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
+        return self._post(
+            "/audio/speech",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "voice": voice,
+                    "language": language,
+                    "response_encoding": response_encoding,
+                    "response_format": response_format,
+                    "sample_rate": sample_rate,
+                    "stream": stream,
+                },
+                speech_create_params.SpeechCreateParamsStreaming
+                if stream
+                else speech_create_params.SpeechCreateParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BinaryAPIResponse,
+            stream=stream or False,
+            stream_cls=Stream[AudioSpeechStreamChunk],
+        )
+
+
+class AsyncSpeechResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncSpeechResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+        """
+        return AsyncSpeechResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncSpeechResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+        """
+        return AsyncSpeechResourceWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        input: str,
+        model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
+        voice: str,
+        language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
+        | Omit = omit,
+        response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
+        response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
+        sample_rate: float | Omit = omit,
+        stream: Literal[False] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncBinaryAPIResponse:
+        """
+        Generate audio from input text
+
+        Args:
+          input: Input text to generate the audio for
+
+          model: The name of the model to query.
+
+              [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
+              The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
+              canopylabs/orpheus-3b-0.1-ft
+
+          voice: The voice to use for generating the audio. The voices supported are different
+              for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
+              supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
+              af_alloy and for cartesia/sonic, one of the voices supported is "friendly
+              sidekick".
+
+              You can view the voices supported for each model using the /v1/voices endpoint
+              sending the model name as the query parameter.
+              [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
+
+          language: Language of input text.
+
+          response_encoding: Audio encoding of response
+
+          response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
+              false. If streaming is true, the only supported format is raw.
+
+          sample_rate: Sampling rate to use for the output audio. The default sampling rate for
+              canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
+              cartesia/sonic is 44100.
+
+          stream: If true, output is streamed for several characters at a time instead of waiting
+              for the full response. The stream terminates with `data: [DONE]`. If false,
+              return the encoded audio as octet stream
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        input: str,
+        model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
+        stream: Literal[True],
+        voice: str,
+        language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
+        | Omit = omit,
+        response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
+        response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
+        sample_rate: float | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncStream[AudioSpeechStreamChunk]:
+        """
+        Generate audio from input text
+
+        Args:
+          input: Input text to generate the audio for
+
+          model: The name of the model to query.
+
+              [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
+              The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
+              canopylabs/orpheus-3b-0.1-ft
+
+          stream: If true, output is streamed for several characters at a time instead of waiting
+              for the full response. The stream terminates with `data: [DONE]`. If false,
+              return the encoded audio as octet stream
+
+          voice: The voice to use for generating the audio. The voices supported are different
+              for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
+              supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
+              af_alloy and for cartesia/sonic, one of the voices supported is "friendly
+              sidekick".
+
+              You can view the voices supported for each model using the /v1/voices endpoint
+              sending the model name as the query parameter.
+              [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
+
+          language: Language of input text.
+
+          response_encoding: Audio encoding of response
+
+          response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
+              false. If streaming is true, the only supported format is raw.
+
+          sample_rate: Sampling rate to use for the output audio. The default sampling rate for
+              canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
+              cartesia/sonic is 44100.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        input: str,
+        model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
+        stream: bool,
+        voice: str,
+        language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
+        | Omit = omit,
+        response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
+        response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
+        sample_rate: float | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncBinaryAPIResponse | AsyncStream[AudioSpeechStreamChunk]:
+        """
+        Generate audio from input text
+
+        Args:
+          input: Input text to generate the audio for
+
+          model: The name of the model to query.
+
+              [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
+              The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
+              canopylabs/orpheus-3b-0.1-ft
+
+          stream: If true, output is streamed for several characters at a time instead of waiting
+              for the full response. The stream terminates with `data: [DONE]`. If false,
+              return the encoded audio as octet stream
+
+          voice: The voice to use for generating the audio. The voices supported are different
+              for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
+              supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
+              af_alloy and for cartesia/sonic, one of the voices supported is "friendly
+              sidekick".
+
+              You can view the voices supported for each model using the /v1/voices endpoint
+              sending the model name as the query parameter.
+              [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
+
+          language: Language of input text.
+
+          response_encoding: Audio encoding of response
+
+          response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
+              false. If streaming is true, the only supported format is raw.
+
+          sample_rate: Sampling rate to use for the output audio. The default sampling rate for
+              canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
+              cartesia/sonic is 44100.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["input", "model", "voice"], ["input", "model", "stream", "voice"])
+    async def create(
+        self,
+        *,
+        input: str,
+        model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
+        voice: str,
+        language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
+        | Omit = omit,
+        response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
+        response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
+        sample_rate: float | Omit = omit,
+        stream: Literal[False] | Literal[True] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncBinaryAPIResponse | AsyncStream[AudioSpeechStreamChunk]:
+        extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
+        return await self._post(
+            "/audio/speech",
+            body=await async_maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "voice": voice,
+                    "language": language,
+                    "response_encoding": response_encoding,
+                    "response_format": response_format,
+                    "sample_rate": sample_rate,
+                    "stream": stream,
+                },
+                speech_create_params.SpeechCreateParamsStreaming
+                if stream
+                else speech_create_params.SpeechCreateParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=AsyncBinaryAPIResponse,
+            stream=stream or False,
+            stream_cls=AsyncStream[AudioSpeechStreamChunk],
+        )
+
+
+class SpeechResourceWithRawResponse:
+    def __init__(self, speech: SpeechResource) -> None:
+        self._speech = speech
+
+        self.create = to_custom_raw_response_wrapper(
+            speech.create,
+            BinaryAPIResponse,
+        )
+
+
+class AsyncSpeechResourceWithRawResponse:
+    def __init__(self, speech: AsyncSpeechResource) -> None:
+        self._speech = speech
+
+        self.create = async_to_custom_raw_response_wrapper(
+            speech.create,
+            AsyncBinaryAPIResponse,
+        )
+
+
+class SpeechResourceWithStreamingResponse:
+    def __init__(self, speech: SpeechResource) -> None:
+        self._speech = speech
+
+        self.create = to_custom_streamed_response_wrapper(
+            speech.create,
+            StreamedBinaryAPIResponse,
+        )
+
+
+class AsyncSpeechResourceWithStreamingResponse:
+    def __init__(self, speech: AsyncSpeechResource) -> None:
+        self._speech = speech
+
+        self.create = async_to_custom_streamed_response_wrapper(
+            speech.create,
+            AsyncStreamedBinaryAPIResponse,
+        )
diff --git a/src/together/types/__init__.py b/src/together/types/__init__.py
index 11c20cc9..4779716d 100644
--- a/src/together/types/__init__.py
+++ b/src/together/types/__init__.py
@@ -30,7 +30,6 @@
 from .eval_update_params import EvalUpdateParams as EvalUpdateParams
 from .file_list_response import FileListResponse as FileListResponse
 from .full_training_type import FullTrainingType as FullTrainingType
-from .audio_create_params import AudioCreateParams as AudioCreateParams
 from .batch_create_params import BatchCreateParams as BatchCreateParams
 from .batch_list_response import BatchListResponse as BatchListResponse
 from .lo_ra_training_type import LoRaTrainingType as LoRaTrainingType
diff --git a/src/together/types/audio/__init__.py b/src/together/types/audio/__init__.py
index 867b4f25..245749a6 100644
--- a/src/together/types/audio/__init__.py
+++ b/src/together/types/audio/__init__.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 from .voice_list_response import VoiceListResponse as VoiceListResponse
+from .speech_create_params import SpeechCreateParams as SpeechCreateParams
 from .translation_create_params import TranslationCreateParams as TranslationCreateParams
 from .transcription_create_params import TranscriptionCreateParams as TranscriptionCreateParams
 from .translation_create_response import TranslationCreateResponse as TranslationCreateResponse
diff --git a/src/together/types/audio_create_params.py b/src/together/types/audio/speech_create_params.py
similarity index 86%
rename from src/together/types/audio_create_params.py
rename to src/together/types/audio/speech_create_params.py
index c2b64aca..085a5f74 100644
--- a/src/together/types/audio_create_params.py
+++ b/src/together/types/audio/speech_create_params.py
@@ -5,10 +5,10 @@
 from typing import Union
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["AudioCreateParamsBase", "AudioCreateParamsNonStreaming", "AudioCreateParamsStreaming"]
+__all__ = ["SpeechCreateParamsBase", "SpeechCreateParamsNonStreaming", "SpeechCreateParamsStreaming"]
 
 
-class AudioCreateParamsBase(TypedDict, total=False):
+class SpeechCreateParamsBase(TypedDict, total=False):
     input: Required[str]
     """Input text to generate the audio for"""
 
@@ -54,7 +54,7 @@ class AudioCreateParamsBase(TypedDict, total=False):
     """
 
 
-class AudioCreateParamsNonStreaming(AudioCreateParamsBase, total=False):
+class SpeechCreateParamsNonStreaming(SpeechCreateParamsBase, total=False):
     stream: Literal[False]
     """
     If true, output is streamed for several characters at a time instead of waiting
@@ -63,7 +63,7 @@ class AudioCreateParamsNonStreaming(AudioCreateParamsBase, total=False):
     """
 
 
-class AudioCreateParamsStreaming(AudioCreateParamsBase):
+class SpeechCreateParamsStreaming(SpeechCreateParamsBase):
     stream: Required[Literal[True]]
     """
     If true, output is streamed for several characters at a time instead of waiting
@@ -72,4 +72,4 @@ class AudioCreateParamsStreaming(AudioCreateParamsBase):
     """
 
 
-AudioCreateParams = Union[AudioCreateParamsNonStreaming, AudioCreateParamsStreaming]
+SpeechCreateParams = Union[SpeechCreateParamsNonStreaming, SpeechCreateParamsStreaming]
diff --git a/tests/api_resources/test_audio.py b/tests/api_resources/audio/test_speech.py
similarity index 63%
rename from tests/api_resources/test_audio.py
rename to tests/api_resources/audio/test_speech.py
index 4e756493..ce213402 100644
--- a/tests/api_resources/test_audio.py
+++ b/tests/api_resources/audio/test_speech.py
@@ -20,28 +20,28 @@
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
 
-class TestAudio:
+class TestSpeech:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_method_create_overload_1(self, client: Together, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
-        audio = client.audio.create(
+        speech = client.audio.speech.create(
             input="input",
             model="canopylabs/orpheus-3b-0.1-ft",
             voice="voice",
         )
-        assert audio.is_closed
-        assert audio.json() == {"foo": "bar"}
-        assert cast(Any, audio.is_closed) is True
-        assert isinstance(audio, BinaryAPIResponse)
+        assert speech.is_closed
+        assert speech.json() == {"foo": "bar"}
+        assert cast(Any, speech.is_closed) is True
+        assert isinstance(speech, BinaryAPIResponse)
 
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_method_create_with_all_params_overload_1(self, client: Together, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
-        audio = client.audio.create(
+        speech = client.audio.speech.create(
             input="input",
             model="canopylabs/orpheus-3b-0.1-ft",
             voice="voice",
@@ -51,62 +51,62 @@ def test_method_create_with_all_params_overload_1(self, client: Together, respx_
             sample_rate=0,
             stream=False,
         )
-        assert audio.is_closed
-        assert audio.json() == {"foo": "bar"}
-        assert cast(Any, audio.is_closed) is True
-        assert isinstance(audio, BinaryAPIResponse)
+        assert speech.is_closed
+        assert speech.json() == {"foo": "bar"}
+        assert cast(Any, speech.is_closed) is True
+        assert isinstance(speech, BinaryAPIResponse)
 
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_raw_response_create_overload_1(self, client: Together, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        audio = client.audio.with_raw_response.create(
+        speech = client.audio.speech.with_raw_response.create(
             input="input",
             model="canopylabs/orpheus-3b-0.1-ft",
             voice="voice",
         )
 
-        assert audio.is_closed is True
-        assert audio.http_request.headers.get("X-Stainless-Lang") == "python"
-        assert audio.json() == {"foo": "bar"}
-        assert isinstance(audio, BinaryAPIResponse)
+        assert speech.is_closed is True
+        assert speech.http_request.headers.get("X-Stainless-Lang") == "python"
+        assert speech.json() == {"foo": "bar"}
+        assert isinstance(speech, BinaryAPIResponse)
 
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_streaming_response_create_overload_1(self, client: Together, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
-        with client.audio.with_streaming_response.create(
+        with client.audio.speech.with_streaming_response.create(
             input="input",
             model="canopylabs/orpheus-3b-0.1-ft",
             voice="voice",
-        ) as audio:
-            assert not audio.is_closed
-            assert audio.http_request.headers.get("X-Stainless-Lang") == "python"
+        ) as speech:
+            assert not speech.is_closed
+            assert speech.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            assert audio.json() == {"foo": "bar"}
-            assert cast(Any, audio.is_closed) is True
-            assert isinstance(audio, StreamedBinaryAPIResponse)
+            assert speech.json() == {"foo": "bar"}
+            assert cast(Any, speech.is_closed) is True
+            assert isinstance(speech, StreamedBinaryAPIResponse)
 
-        assert cast(Any, audio.is_closed) is True
+        assert cast(Any, speech.is_closed) is True
 
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_method_create_overload_2(self, client: Together, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
-        audio_stream = client.audio.create(
+        speech_stream = client.audio.speech.create(
             input="input",
             model="canopylabs/orpheus-3b-0.1-ft",
             stream=True,
             voice="voice",
         )
-        audio_stream.response.close()
+        speech_stream.response.close()
 
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_method_create_with_all_params_overload_2(self, client: Together, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
-        audio_stream = client.audio.create(
+        speech_stream = client.audio.speech.create(
             input="input",
             model="canopylabs/orpheus-3b-0.1-ft",
             stream=True,
@@ -116,45 +116,45 @@ def test_method_create_with_all_params_overload_2(self, client: Together, respx_
             response_format="mp3",
             sample_rate=0,
         )
-        audio_stream.response.close()
+        speech_stream.response.close()
 
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_raw_response_create_overload_2(self, client: Together, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        audio_stream = client.audio.with_raw_response.create(
+        speech_stream = client.audio.speech.with_raw_response.create(
             input="input",
             model="canopylabs/orpheus-3b-0.1-ft",
             stream=True,
             voice="voice",
         )
 
-        assert audio_stream.http_request.headers.get("X-Stainless-Lang") == "python"
-        assert audio_stream.json() == {"foo": "bar"}
-        assert isinstance(audio_stream, BinaryAPIResponse)
+        assert speech_stream.http_request.headers.get("X-Stainless-Lang") == "python"
+        assert speech_stream.json() == {"foo": "bar"}
+        assert isinstance(speech_stream, BinaryAPIResponse)
 
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_streaming_response_create_overload_2(self, client: Together, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
-        with client.audio.with_streaming_response.create(
+        with client.audio.speech.with_streaming_response.create(
             input="input",
             model="canopylabs/orpheus-3b-0.1-ft",
             stream=True,
             voice="voice",
-        ) as audio_stream:
-            assert not audio_stream.is_closed
-            assert audio_stream.http_request.headers.get("X-Stainless-Lang") == "python"
+        ) as speech_stream:
+            assert not speech_stream.is_closed
+            assert speech_stream.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            assert audio_stream.json() == {"foo": "bar"}
-            assert cast(Any, audio_stream.is_closed) is True
-            assert isinstance(audio_stream, StreamedBinaryAPIResponse)
+            assert speech_stream.json() == {"foo": "bar"}
+            assert cast(Any, speech_stream.is_closed) is True
+            assert isinstance(speech_stream, StreamedBinaryAPIResponse)
 
-        assert cast(Any, audio_stream.is_closed) is True
+        assert cast(Any, speech_stream.is_closed) is True
 
 
-class TestAsyncAudio:
+class TestAsyncSpeech:
     parametrize = pytest.mark.parametrize(
         "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
     )
@@ -163,15 +163,15 @@ class TestAsyncAudio:
     @pytest.mark.respx(base_url=base_url)
     async def test_method_create_overload_1(self, async_client: AsyncTogether, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
-        audio = await async_client.audio.create(
+        speech = await async_client.audio.speech.create(
             input="input",
             model="canopylabs/orpheus-3b-0.1-ft",
             voice="voice",
         )
-        assert audio.is_closed
-        assert await audio.json() == {"foo": "bar"}
-        assert cast(Any, audio.is_closed) is True
-        assert isinstance(audio, AsyncBinaryAPIResponse)
+        assert speech.is_closed
+        assert await speech.json() == {"foo": "bar"}
+        assert cast(Any, speech.is_closed) is True
+        assert isinstance(speech, AsyncBinaryAPIResponse)
 
     @parametrize
     @pytest.mark.respx(base_url=base_url)
@@ -179,7 +179,7 @@ async def test_method_create_with_all_params_overload_1(
         self, async_client: AsyncTogether, respx_mock: MockRouter
     ) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
-        audio = await async_client.audio.create(
+        speech = await async_client.audio.speech.create(
             input="input",
             model="canopylabs/orpheus-3b-0.1-ft",
             voice="voice",
@@ -189,26 +189,26 @@ async def test_method_create_with_all_params_overload_1(
             sample_rate=0,
             stream=False,
         )
-        assert audio.is_closed
-        assert await audio.json() == {"foo": "bar"}
-        assert cast(Any, audio.is_closed) is True
-        assert isinstance(audio, AsyncBinaryAPIResponse)
+        assert speech.is_closed
+        assert await speech.json() == {"foo": "bar"}
+        assert cast(Any, speech.is_closed) is True
+        assert isinstance(speech, AsyncBinaryAPIResponse)
 
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     async def test_raw_response_create_overload_1(self, async_client: AsyncTogether, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        audio = await async_client.audio.with_raw_response.create(
+        speech = await async_client.audio.speech.with_raw_response.create(
             input="input",
             model="canopylabs/orpheus-3b-0.1-ft",
             voice="voice",
         )
 
-        assert audio.is_closed is True
-        assert audio.http_request.headers.get("X-Stainless-Lang") == "python"
-        assert await audio.json() == {"foo": "bar"}
-        assert isinstance(audio, AsyncBinaryAPIResponse)
+        assert speech.is_closed is True
+        assert speech.http_request.headers.get("X-Stainless-Lang") == "python"
+        assert await speech.json() == {"foo": "bar"}
+        assert isinstance(speech, AsyncBinaryAPIResponse)
 
     @parametrize
     @pytest.mark.respx(base_url=base_url)
@@ -216,31 +216,31 @@ async def test_streaming_response_create_overload_1(
         self, async_client: AsyncTogether, respx_mock: MockRouter
     ) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
-        async with async_client.audio.with_streaming_response.create(
+        async with async_client.audio.speech.with_streaming_response.create(
             input="input",
             model="canopylabs/orpheus-3b-0.1-ft",
             voice="voice",
-        ) as audio:
-            assert not audio.is_closed
-            assert audio.http_request.headers.get("X-Stainless-Lang") == "python"
+        ) as speech:
+            assert not speech.is_closed
+            assert speech.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            assert await audio.json() == {"foo": "bar"}
-            assert cast(Any, audio.is_closed) is True
-            assert isinstance(audio, AsyncStreamedBinaryAPIResponse)
+            assert await speech.json() == {"foo": "bar"}
+            assert cast(Any, speech.is_closed) is True
+            assert isinstance(speech, AsyncStreamedBinaryAPIResponse)
 
-        assert cast(Any, audio.is_closed) is True
+        assert cast(Any, speech.is_closed) is True
 
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     async def test_method_create_overload_2(self, async_client: AsyncTogether, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
-        audio_stream = await async_client.audio.create(
+        speech_stream = await async_client.audio.speech.create(
             input="input",
             model="canopylabs/orpheus-3b-0.1-ft",
             stream=True,
             voice="voice",
         )
-        await audio_stream.response.aclose()
+        await speech_stream.response.aclose()
 
     @parametrize
     @pytest.mark.respx(base_url=base_url)
@@ -248,7 +248,7 @@ async def test_method_create_with_all_params_overload_2(
         self, async_client: AsyncTogether, respx_mock: MockRouter
     ) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
-        audio_stream = await async_client.audio.create(
+        speech_stream = await async_client.audio.speech.create(
             input="input",
             model="canopylabs/orpheus-3b-0.1-ft",
             stream=True,
@@ -258,23 +258,23 @@ async def test_method_create_with_all_params_overload_2(
             response_format="mp3",
             sample_rate=0,
         )
-        await audio_stream.response.aclose()
+        await speech_stream.response.aclose()
 
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     async def test_raw_response_create_overload_2(self, async_client: AsyncTogether, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        audio_stream = await async_client.audio.with_raw_response.create(
+        speech_stream = await async_client.audio.speech.with_raw_response.create(
             input="input",
             model="canopylabs/orpheus-3b-0.1-ft",
             stream=True,
             voice="voice",
         )
 
-        assert audio_stream.http_request.headers.get("X-Stainless-Lang") == "python"
-        assert await audio_stream.json() == {"foo": "bar"}
-        assert isinstance(audio_stream, AsyncBinaryAPIResponse)
+        assert speech_stream.http_request.headers.get("X-Stainless-Lang") == "python"
+        assert await speech_stream.json() == {"foo": "bar"}
+        assert isinstance(speech_stream, AsyncBinaryAPIResponse)
 
     @parametrize
     @pytest.mark.respx(base_url=base_url)
@@ -282,17 +282,17 @@ async def test_streaming_response_create_overload_2(
         self, async_client: AsyncTogether, respx_mock: MockRouter
     ) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
-        async with async_client.audio.with_streaming_response.create(
+        async with async_client.audio.speech.with_streaming_response.create(
             input="input",
             model="canopylabs/orpheus-3b-0.1-ft",
             stream=True,
             voice="voice",
-        ) as audio_stream:
-            assert not audio_stream.is_closed
-            assert audio_stream.http_request.headers.get("X-Stainless-Lang") == "python"
+        ) as speech_stream:
+            assert not speech_stream.is_closed
+            assert speech_stream.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            assert await audio_stream.json() == {"foo": "bar"}
-            assert cast(Any, audio_stream.is_closed) is True
-            assert isinstance(audio_stream, AsyncStreamedBinaryAPIResponse)
+            assert await speech_stream.json() == {"foo": "bar"}
+            assert cast(Any, speech_stream.is_closed) is True
+            assert isinstance(speech_stream, AsyncStreamedBinaryAPIResponse)
 
-        assert cast(Any, audio_stream.is_closed) is True
+        assert cast(Any, speech_stream.is_closed) is True

From 9d5e1a2a8fe09f01ac9ed984361139064d42a2d8 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Fri, 21 Nov 2025 15:43:20 +0000
Subject: [PATCH 2/3] feat(api): api update

---
 .stats.yml                                       |  4 ++--
 src/together/resources/audio/speech.py           | 16 ++++++++--------
 src/together/types/audio/speech_create_params.py |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/.stats.yml b/.stats.yml
index b14d585f..2abb58a1 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 44
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-b86f8e6c4674d1a7829ffa8ddff4bc93d21334d231e6a4d0fd734d411c07a4eb.yml
-openapi_spec_hash: 8af4975be6ae8f4655fa92fd26af9682
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-e9e60279414ac3279c025d6318b5f67a8f6d01170e365612e791f3a1f259b94f.yml
+openapi_spec_hash: 26c59292808c5ae9f222f95f056430cf
 config_hash: afbbabb8eb5bfbbf8139546a13addd9a
diff --git a/src/together/resources/audio/speech.py b/src/together/resources/audio/speech.py
index 78f61353..b2b19518 100644
--- a/src/together/resources/audio/speech.py
+++ b/src/together/resources/audio/speech.py
@@ -60,7 +60,7 @@ def create(
         | Omit = omit,
         response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
         response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
-        sample_rate: float | Omit = omit,
+        sample_rate: int | Omit = omit,
         stream: Literal[False] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -128,7 +128,7 @@ def create(
         | Omit = omit,
         response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
         response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
-        sample_rate: float | Omit = omit,
+        sample_rate: int | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -195,7 +195,7 @@ def create(
         | Omit = omit,
         response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
         response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
-        sample_rate: float | Omit = omit,
+        sample_rate: int | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -261,7 +261,7 @@ def create(
         | Omit = omit,
         response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
         response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
-        sample_rate: float | Omit = omit,
+        sample_rate: int | Omit = omit,
         stream: Literal[False] | Literal[True] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -328,7 +328,7 @@ async def create(
         | Omit = omit,
         response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
         response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
-        sample_rate: float | Omit = omit,
+        sample_rate: int | Omit = omit,
         stream: Literal[False] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -396,7 +396,7 @@ async def create(
         | Omit = omit,
         response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
         response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
-        sample_rate: float | Omit = omit,
+        sample_rate: int | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -463,7 +463,7 @@ async def create(
         | Omit = omit,
         response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
         response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
-        sample_rate: float | Omit = omit,
+        sample_rate: int | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -529,7 +529,7 @@ async def create(
         | Omit = omit,
         response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
         response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
-        sample_rate: float | Omit = omit,
+        sample_rate: int | Omit = omit,
         stream: Literal[False] | Literal[True] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
diff --git a/src/together/types/audio/speech_create_params.py b/src/together/types/audio/speech_create_params.py
index 085a5f74..1db9f9ca 100644
--- a/src/together/types/audio/speech_create_params.py
+++ b/src/together/types/audio/speech_create_params.py
@@ -46,7 +46,7 @@ class SpeechCreateParamsBase(TypedDict, total=False):
     the only supported format is raw.
     """
 
-    sample_rate: float
+    sample_rate: int
     """Sampling rate to use for the output audio.
 
     The default sampling rate for canopylabs/orpheus-3b-0.1-ft and

From b801cd063ebae99828912328130b8d2321a4e36e Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Fri, 21 Nov 2025 15:43:37 +0000
Subject: [PATCH 3/3] release: 2.0.0-alpha.2

---
 .release-please-manifest.json |  2 +-
 CHANGELOG.md                  | 13 +++++++++++++
 pyproject.toml                |  2 +-
 src/together/_version.py      |  2 +-
 4 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 6e011e8a..0c548e2f 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "2.0.0-alpha.1"
+  ".": "2.0.0-alpha.2"
 }
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9478df67..089feefd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,18 @@
 # Changelog
 
+## 2.0.0-alpha.2 (2025-11-21)
+
+Full Changelog: [v2.0.0-alpha.1...v2.0.0-alpha.2](https://github.com/togethercomputer/together-py/compare/v2.0.0-alpha.1...v2.0.0-alpha.2)
+
+### ⚠ BREAKING CHANGES
+
+* **api:** Change call signature for `audio.create` to `audio.speech.create` to match spec with python library and add space for future APIs
+
+### Features
+
+* **api:** api update ([9d5e1a2](https://github.com/togethercomputer/together-py/commit/9d5e1a2a8fe09f01ac9ed984361139064d42a2d8))
+* **api:** Change TTS call signature ([251c911](https://github.com/togethercomputer/together-py/commit/251c911e4b6562fb1751ae2a880e7ff6bb2e7bd2))
+
 ## 2.0.0-alpha.1 (2025-11-21)
 
 Full Changelog: [v0.1.0-alpha.28...v2.0.0-alpha.1](https://github.com/togethercomputer/together-py/compare/v0.1.0-alpha.28...v2.0.0-alpha.1)
diff --git a/pyproject.toml b/pyproject.toml
index b39dfdf9..64e583cf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "together"
-version = "2.0.0-alpha.1"
+version = "2.0.0-alpha.2"
 description = "The official Python library for the together API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/src/together/_version.py b/src/together/_version.py
index a5854438..54bac7bd 100644
--- a/src/together/_version.py
+++ b/src/together/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "together"
-__version__ = "2.0.0-alpha.1"  # x-release-please-version
+__version__ = "2.0.0-alpha.2"  # x-release-please-version