From 251c911e4b6562fb1751ae2a880e7ff6bb2e7bd2 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Fri, 21 Nov 2025 15:16:37 +0000
Subject: [PATCH 1/3] feat(api): Change TTS call signature
BREAKING CHANGE: Change call signature for `audio.create` to `audio.speech.create` to match spec with python library and add space for future APIs
---
.stats.yml | 2 +-
api.md | 4 +-
src/together/resources/audio/__init__.py | 14 +
src/together/resources/audio/audio.py | 559 +---------------
src/together/resources/audio/speech.py | 605 ++++++++++++++++++
src/together/types/__init__.py | 1 -
src/together/types/audio/__init__.py | 1 +
.../speech_create_params.py} | 10 +-
.../{test_audio.py => audio/test_speech.py} | 160 ++---
9 files changed, 737 insertions(+), 619 deletions(-)
create mode 100644 src/together/resources/audio/speech.py
rename src/together/types/{audio_create_params.py => audio/speech_create_params.py} (86%)
rename tests/api_resources/{test_audio.py => audio/test_speech.py} (63%)
diff --git a/.stats.yml b/.stats.yml
index 1e723030..b14d585f 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
configured_endpoints: 44
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-b86f8e6c4674d1a7829ffa8ddff4bc93d21334d231e6a4d0fd734d411c07a4eb.yml
openapi_spec_hash: 8af4975be6ae8f4655fa92fd26af9682
-config_hash: b337cdd3c62dbd3383529592a029b347
+config_hash: afbbabb8eb5bfbbf8139546a13addd9a
diff --git a/api.md b/api.md
index 13153257..56280f19 100644
--- a/api.md
+++ b/api.md
@@ -156,9 +156,11 @@ Types:
from together.types import AudioFile, AudioSpeechStreamChunk
```
+## Speech
+
Methods:
-- client.audio.create(\*\*params) -> BinaryAPIResponse
+- client.audio.speech.create(\*\*params) -> BinaryAPIResponse
## Voices
diff --git a/src/together/resources/audio/__init__.py b/src/together/resources/audio/__init__.py
index 7219dabc..fae72db6 100644
--- a/src/together/resources/audio/__init__.py
+++ b/src/together/resources/audio/__init__.py
@@ -8,6 +8,14 @@
AudioResourceWithStreamingResponse,
AsyncAudioResourceWithStreamingResponse,
)
+from .speech import (
+ SpeechResource,
+ AsyncSpeechResource,
+ SpeechResourceWithRawResponse,
+ AsyncSpeechResourceWithRawResponse,
+ SpeechResourceWithStreamingResponse,
+ AsyncSpeechResourceWithStreamingResponse,
+)
from .voices import (
VoicesResource,
AsyncVoicesResource,
@@ -34,6 +42,12 @@
)
__all__ = [
+ "SpeechResource",
+ "AsyncSpeechResource",
+ "SpeechResourceWithRawResponse",
+ "AsyncSpeechResourceWithRawResponse",
+ "SpeechResourceWithStreamingResponse",
+ "AsyncSpeechResourceWithStreamingResponse",
"VoicesResource",
"AsyncVoicesResource",
"VoicesResourceWithRawResponse",
diff --git a/src/together/resources/audio/audio.py b/src/together/resources/audio/audio.py
index 47d4f9d8..0bae6bcd 100644
--- a/src/together/resources/audio/audio.py
+++ b/src/together/resources/audio/audio.py
@@ -2,11 +2,14 @@
from __future__ import annotations
-from typing import Union
-from typing_extensions import Literal, overload
-
-import httpx
-
+from .speech import (
+ SpeechResource,
+ AsyncSpeechResource,
+ SpeechResourceWithRawResponse,
+ AsyncSpeechResourceWithRawResponse,
+ SpeechResourceWithStreamingResponse,
+ AsyncSpeechResourceWithStreamingResponse,
+)
from .voices import (
VoicesResource,
AsyncVoicesResource,
@@ -15,22 +18,8 @@
VoicesResourceWithStreamingResponse,
AsyncVoicesResourceWithStreamingResponse,
)
-from ...types import audio_create_params
-from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
-from ..._utils import required_args, maybe_transform, async_maybe_transform
from ..._compat import cached_property
from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
- BinaryAPIResponse,
- AsyncBinaryAPIResponse,
- StreamedBinaryAPIResponse,
- AsyncStreamedBinaryAPIResponse,
- to_custom_raw_response_wrapper,
- to_custom_streamed_response_wrapper,
- async_to_custom_raw_response_wrapper,
- async_to_custom_streamed_response_wrapper,
-)
-from ..._streaming import Stream, AsyncStream
from .translations import (
TranslationsResource,
AsyncTranslationsResource,
@@ -39,7 +28,6 @@
TranslationsResourceWithStreamingResponse,
AsyncTranslationsResourceWithStreamingResponse,
)
-from ..._base_client import make_request_options
from .transcriptions import (
TranscriptionsResource,
AsyncTranscriptionsResource,
@@ -48,12 +36,15 @@
TranscriptionsResourceWithStreamingResponse,
AsyncTranscriptionsResourceWithStreamingResponse,
)
-from ...types.audio_speech_stream_chunk import AudioSpeechStreamChunk
__all__ = ["AudioResource", "AsyncAudioResource"]
class AudioResource(SyncAPIResource):
+ @cached_property
+ def speech(self) -> SpeechResource:
+ return SpeechResource(self._client)
+
@cached_property
def voices(self) -> VoicesResource:
return VoicesResource(self._client)
@@ -85,255 +76,12 @@ def with_streaming_response(self) -> AudioResourceWithStreamingResponse:
"""
return AudioResourceWithStreamingResponse(self)
- @overload
- def create(
- self,
- *,
- input: str,
- model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
- voice: str,
- language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
- | Omit = omit,
- response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
- response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
- sample_rate: float | Omit = omit,
- stream: Literal[False] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> BinaryAPIResponse:
- """
- Generate audio from input text
-
- Args:
- input: Input text to generate the audio for
-
- model: The name of the model to query.
-
- [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
- The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
- canopylabs/orpheus-3b-0.1-ft
-
- voice: The voice to use for generating the audio. The voices supported are different
- for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
- supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
- af_alloy and for cartesia/sonic, one of the voices supported is "friendly
- sidekick".
-
- You can view the voices supported for each model using the /v1/voices endpoint
- sending the model name as the query parameter.
- [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
-
- language: Language of input text.
-
- response_encoding: Audio encoding of response
-
- response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
- false. If streaming is true, the only supported format is raw.
-
- sample_rate: Sampling rate to use for the output audio. The default sampling rate for
- canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
- cartesia/sonic is 44100.
-
- stream: If true, output is streamed for several characters at a time instead of waiting
- for the full response. The stream terminates with `data: [DONE]`. If false,
- return the encoded audio as octet stream
-
- extra_headers: Send extra headers
-
- extra_query: Add additional query parameters to the request
-
- extra_body: Add additional JSON properties to the request
-
- timeout: Override the client-level default timeout for this request, in seconds
- """
- ...
-
- @overload
- def create(
- self,
- *,
- input: str,
- model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
- stream: Literal[True],
- voice: str,
- language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
- | Omit = omit,
- response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
- response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
- sample_rate: float | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> Stream[AudioSpeechStreamChunk]:
- """
- Generate audio from input text
-
- Args:
- input: Input text to generate the audio for
-
- model: The name of the model to query.
-
- [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
- The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
- canopylabs/orpheus-3b-0.1-ft
-
- stream: If true, output is streamed for several characters at a time instead of waiting
- for the full response. The stream terminates with `data: [DONE]`. If false,
- return the encoded audio as octet stream
-
- voice: The voice to use for generating the audio. The voices supported are different
- for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
- supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
- af_alloy and for cartesia/sonic, one of the voices supported is "friendly
- sidekick".
-
- You can view the voices supported for each model using the /v1/voices endpoint
- sending the model name as the query parameter.
- [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
-
- language: Language of input text.
-
- response_encoding: Audio encoding of response
-
- response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
- false. If streaming is true, the only supported format is raw.
-
- sample_rate: Sampling rate to use for the output audio. The default sampling rate for
- canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
- cartesia/sonic is 44100.
-
- extra_headers: Send extra headers
-
- extra_query: Add additional query parameters to the request
-
- extra_body: Add additional JSON properties to the request
-
- timeout: Override the client-level default timeout for this request, in seconds
- """
- ...
-
- @overload
- def create(
- self,
- *,
- input: str,
- model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
- stream: bool,
- voice: str,
- language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
- | Omit = omit,
- response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
- response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
- sample_rate: float | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> BinaryAPIResponse | Stream[AudioSpeechStreamChunk]:
- """
- Generate audio from input text
-
- Args:
- input: Input text to generate the audio for
-
- model: The name of the model to query.
-
- [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
- The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
- canopylabs/orpheus-3b-0.1-ft
-
- stream: If true, output is streamed for several characters at a time instead of waiting
- for the full response. The stream terminates with `data: [DONE]`. If false,
- return the encoded audio as octet stream
-
- voice: The voice to use for generating the audio. The voices supported are different
- for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
- supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
- af_alloy and for cartesia/sonic, one of the voices supported is "friendly
- sidekick".
-
- You can view the voices supported for each model using the /v1/voices endpoint
- sending the model name as the query parameter.
- [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
-
- language: Language of input text.
-
- response_encoding: Audio encoding of response
-
- response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
- false. If streaming is true, the only supported format is raw.
-
- sample_rate: Sampling rate to use for the output audio. The default sampling rate for
- canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
- cartesia/sonic is 44100.
-
- extra_headers: Send extra headers
-
- extra_query: Add additional query parameters to the request
-
- extra_body: Add additional JSON properties to the request
-
- timeout: Override the client-level default timeout for this request, in seconds
- """
- ...
-
- @required_args(["input", "model", "voice"], ["input", "model", "stream", "voice"])
- def create(
- self,
- *,
- input: str,
- model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
- voice: str,
- language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
- | Omit = omit,
- response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
- response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
- sample_rate: float | Omit = omit,
- stream: Literal[False] | Literal[True] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> BinaryAPIResponse | Stream[AudioSpeechStreamChunk]:
- extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
- return self._post(
- "/audio/speech",
- body=maybe_transform(
- {
- "input": input,
- "model": model,
- "voice": voice,
- "language": language,
- "response_encoding": response_encoding,
- "response_format": response_format,
- "sample_rate": sample_rate,
- "stream": stream,
- },
- audio_create_params.AudioCreateParamsStreaming
- if stream
- else audio_create_params.AudioCreateParamsNonStreaming,
- ),
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=BinaryAPIResponse,
- stream=stream or False,
- stream_cls=Stream[AudioSpeechStreamChunk],
- )
-
class AsyncAudioResource(AsyncAPIResource):
+ @cached_property
+ def speech(self) -> AsyncSpeechResource:
+ return AsyncSpeechResource(self._client)
+
@cached_property
def voices(self) -> AsyncVoicesResource:
return AsyncVoicesResource(self._client)
@@ -365,262 +113,14 @@ def with_streaming_response(self) -> AsyncAudioResourceWithStreamingResponse:
"""
return AsyncAudioResourceWithStreamingResponse(self)
- @overload
- async def create(
- self,
- *,
- input: str,
- model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
- voice: str,
- language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
- | Omit = omit,
- response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
- response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
- sample_rate: float | Omit = omit,
- stream: Literal[False] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> AsyncBinaryAPIResponse:
- """
- Generate audio from input text
-
- Args:
- input: Input text to generate the audio for
-
- model: The name of the model to query.
-
- [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
- The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
- canopylabs/orpheus-3b-0.1-ft
-
- voice: The voice to use for generating the audio. The voices supported are different
- for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
- supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
- af_alloy and for cartesia/sonic, one of the voices supported is "friendly
- sidekick".
-
- You can view the voices supported for each model using the /v1/voices endpoint
- sending the model name as the query parameter.
- [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
-
- language: Language of input text.
-
- response_encoding: Audio encoding of response
-
- response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
- false. If streaming is true, the only supported format is raw.
-
- sample_rate: Sampling rate to use for the output audio. The default sampling rate for
- canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
- cartesia/sonic is 44100.
-
- stream: If true, output is streamed for several characters at a time instead of waiting
- for the full response. The stream terminates with `data: [DONE]`. If false,
- return the encoded audio as octet stream
-
- extra_headers: Send extra headers
-
- extra_query: Add additional query parameters to the request
-
- extra_body: Add additional JSON properties to the request
-
- timeout: Override the client-level default timeout for this request, in seconds
- """
- ...
-
- @overload
- async def create(
- self,
- *,
- input: str,
- model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
- stream: Literal[True],
- voice: str,
- language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
- | Omit = omit,
- response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
- response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
- sample_rate: float | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> AsyncStream[AudioSpeechStreamChunk]:
- """
- Generate audio from input text
-
- Args:
- input: Input text to generate the audio for
-
- model: The name of the model to query.
-
- [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
- The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
- canopylabs/orpheus-3b-0.1-ft
-
- stream: If true, output is streamed for several characters at a time instead of waiting
- for the full response. The stream terminates with `data: [DONE]`. If false,
- return the encoded audio as octet stream
-
- voice: The voice to use for generating the audio. The voices supported are different
- for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
- supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
- af_alloy and for cartesia/sonic, one of the voices supported is "friendly
- sidekick".
-
- You can view the voices supported for each model using the /v1/voices endpoint
- sending the model name as the query parameter.
- [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
-
- language: Language of input text.
-
- response_encoding: Audio encoding of response
-
- response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
- false. If streaming is true, the only supported format is raw.
-
- sample_rate: Sampling rate to use for the output audio. The default sampling rate for
- canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
- cartesia/sonic is 44100.
-
- extra_headers: Send extra headers
-
- extra_query: Add additional query parameters to the request
-
- extra_body: Add additional JSON properties to the request
-
- timeout: Override the client-level default timeout for this request, in seconds
- """
- ...
-
- @overload
- async def create(
- self,
- *,
- input: str,
- model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
- stream: bool,
- voice: str,
- language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
- | Omit = omit,
- response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
- response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
- sample_rate: float | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> AsyncBinaryAPIResponse | AsyncStream[AudioSpeechStreamChunk]:
- """
- Generate audio from input text
-
- Args:
- input: Input text to generate the audio for
-
- model: The name of the model to query.
-
- [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
- The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
- canopylabs/orpheus-3b-0.1-ft
-
- stream: If true, output is streamed for several characters at a time instead of waiting
- for the full response. The stream terminates with `data: [DONE]`. If false,
- return the encoded audio as octet stream
-
- voice: The voice to use for generating the audio. The voices supported are different
- for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
- supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
- af_alloy and for cartesia/sonic, one of the voices supported is "friendly
- sidekick".
-
- You can view the voices supported for each model using the /v1/voices endpoint
- sending the model name as the query parameter.
- [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
-
- language: Language of input text.
-
- response_encoding: Audio encoding of response
-
- response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
- false. If streaming is true, the only supported format is raw.
-
- sample_rate: Sampling rate to use for the output audio. The default sampling rate for
- canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
- cartesia/sonic is 44100.
-
- extra_headers: Send extra headers
-
- extra_query: Add additional query parameters to the request
-
- extra_body: Add additional JSON properties to the request
-
- timeout: Override the client-level default timeout for this request, in seconds
- """
- ...
-
- @required_args(["input", "model", "voice"], ["input", "model", "stream", "voice"])
- async def create(
- self,
- *,
- input: str,
- model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
- voice: str,
- language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
- | Omit = omit,
- response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
- response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
- sample_rate: float | Omit = omit,
- stream: Literal[False] | Literal[True] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> AsyncBinaryAPIResponse | AsyncStream[AudioSpeechStreamChunk]:
- extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
- return await self._post(
- "/audio/speech",
- body=await async_maybe_transform(
- {
- "input": input,
- "model": model,
- "voice": voice,
- "language": language,
- "response_encoding": response_encoding,
- "response_format": response_format,
- "sample_rate": sample_rate,
- "stream": stream,
- },
- audio_create_params.AudioCreateParamsStreaming
- if stream
- else audio_create_params.AudioCreateParamsNonStreaming,
- ),
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=AsyncBinaryAPIResponse,
- stream=stream or False,
- stream_cls=AsyncStream[AudioSpeechStreamChunk],
- )
-
class AudioResourceWithRawResponse:
def __init__(self, audio: AudioResource) -> None:
self._audio = audio
- self.create = to_custom_raw_response_wrapper(
- audio.create,
- BinaryAPIResponse,
- )
+ @cached_property
+ def speech(self) -> SpeechResourceWithRawResponse:
+ return SpeechResourceWithRawResponse(self._audio.speech)
@cached_property
def voices(self) -> VoicesResourceWithRawResponse:
@@ -639,10 +139,9 @@ class AsyncAudioResourceWithRawResponse:
def __init__(self, audio: AsyncAudioResource) -> None:
self._audio = audio
- self.create = async_to_custom_raw_response_wrapper(
- audio.create,
- AsyncBinaryAPIResponse,
- )
+ @cached_property
+ def speech(self) -> AsyncSpeechResourceWithRawResponse:
+ return AsyncSpeechResourceWithRawResponse(self._audio.speech)
@cached_property
def voices(self) -> AsyncVoicesResourceWithRawResponse:
@@ -661,10 +160,9 @@ class AudioResourceWithStreamingResponse:
def __init__(self, audio: AudioResource) -> None:
self._audio = audio
- self.create = to_custom_streamed_response_wrapper(
- audio.create,
- StreamedBinaryAPIResponse,
- )
+ @cached_property
+ def speech(self) -> SpeechResourceWithStreamingResponse:
+ return SpeechResourceWithStreamingResponse(self._audio.speech)
@cached_property
def voices(self) -> VoicesResourceWithStreamingResponse:
@@ -683,10 +181,9 @@ class AsyncAudioResourceWithStreamingResponse:
def __init__(self, audio: AsyncAudioResource) -> None:
self._audio = audio
- self.create = async_to_custom_streamed_response_wrapper(
- audio.create,
- AsyncStreamedBinaryAPIResponse,
- )
+ @cached_property
+ def speech(self) -> AsyncSpeechResourceWithStreamingResponse:
+ return AsyncSpeechResourceWithStreamingResponse(self._audio.speech)
@cached_property
def voices(self) -> AsyncVoicesResourceWithStreamingResponse:
diff --git a/src/together/resources/audio/speech.py b/src/together/resources/audio/speech.py
new file mode 100644
index 00000000..78f61353
--- /dev/null
+++ b/src/together/resources/audio/speech.py
@@ -0,0 +1,605 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, overload
+
+import httpx
+
+from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ..._utils import required_args, maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+ BinaryAPIResponse,
+ AsyncBinaryAPIResponse,
+ StreamedBinaryAPIResponse,
+ AsyncStreamedBinaryAPIResponse,
+ to_custom_raw_response_wrapper,
+ to_custom_streamed_response_wrapper,
+ async_to_custom_raw_response_wrapper,
+ async_to_custom_streamed_response_wrapper,
+)
+from ..._streaming import Stream, AsyncStream
+from ...types.audio import speech_create_params
+from ..._base_client import make_request_options
+from ...types.audio_speech_stream_chunk import AudioSpeechStreamChunk
+
+__all__ = ["SpeechResource", "AsyncSpeechResource"]
+
+
+class SpeechResource(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> SpeechResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+ """
+ return SpeechResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> SpeechResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+ """
+ return SpeechResourceWithStreamingResponse(self)
+
+ @overload
+ def create(
+ self,
+ *,
+ input: str,
+ model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
+ voice: str,
+ language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
+ | Omit = omit,
+ response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
+ response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
+ sample_rate: float | Omit = omit,
+ stream: Literal[False] | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> BinaryAPIResponse:
+ """
+ Generate audio from input text
+
+ Args:
+ input: Input text to generate the audio for
+
+ model: The name of the model to query.
+
+ [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
+ The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
+ canopylabs/orpheus-3b-0.1-ft
+
+ voice: The voice to use for generating the audio. The voices supported are different
+ for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
+ supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
+ af_alloy and for cartesia/sonic, one of the voices supported is "friendly
+ sidekick".
+
+ You can view the voices supported for each model using the /v1/voices endpoint
+ sending the model name as the query parameter.
+ [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
+
+ language: Language of input text.
+
+ response_encoding: Audio encoding of response
+
+ response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
+ false. If streaming is true, the only supported format is raw.
+
+ sample_rate: Sampling rate to use for the output audio. The default sampling rate for
+ canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
+ cartesia/sonic is 44100.
+
+ stream: If true, output is streamed for several characters at a time instead of waiting
+ for the full response. The stream terminates with `data: [DONE]`. If false,
+ return the encoded audio as octet stream
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ def create(
+ self,
+ *,
+ input: str,
+ model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
+ stream: Literal[True],
+ voice: str,
+ language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
+ | Omit = omit,
+ response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
+ response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
+ sample_rate: float | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> Stream[AudioSpeechStreamChunk]:
+ """
+ Generate audio from input text
+
+ Args:
+ input: Input text to generate the audio for
+
+ model: The name of the model to query.
+
+ [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
+ The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
+ canopylabs/orpheus-3b-0.1-ft
+
+ stream: If true, output is streamed for several characters at a time instead of waiting
+ for the full response. The stream terminates with `data: [DONE]`. If false,
+ return the encoded audio as octet stream
+
+ voice: The voice to use for generating the audio. The voices supported are different
+ for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
+ supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
+ af_alloy and for cartesia/sonic, one of the voices supported is "friendly
+ sidekick".
+
+ You can view the voices supported for each model using the /v1/voices endpoint
+ sending the model name as the query parameter.
+ [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
+
+ language: Language of input text.
+
+ response_encoding: Audio encoding of response
+
+ response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
+ false. If streaming is true, the only supported format is raw.
+
+ sample_rate: Sampling rate to use for the output audio. The default sampling rate for
+ canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
+ cartesia/sonic is 44100.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ def create(
+ self,
+ *,
+ input: str,
+ model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
+ stream: bool,
+ voice: str,
+ language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
+ | Omit = omit,
+ response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
+ response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
+ sample_rate: float | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> BinaryAPIResponse | Stream[AudioSpeechStreamChunk]:
+ """
+ Generate audio from input text
+
+ Args:
+ input: Input text to generate the audio for
+
+ model: The name of the model to query.
+
+ [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
+ The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
+ canopylabs/orpheus-3b-0.1-ft
+
+ stream: If true, output is streamed for several characters at a time instead of waiting
+ for the full response. The stream terminates with `data: [DONE]`. If false,
+ return the encoded audio as octet stream
+
+ voice: The voice to use for generating the audio. The voices supported are different
+ for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
+ supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
+ af_alloy and for cartesia/sonic, one of the voices supported is "friendly
+ sidekick".
+
+ You can view the voices supported for each model using the /v1/voices endpoint
+ sending the model name as the query parameter.
+ [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
+
+ language: Language of input text.
+
+ response_encoding: Audio encoding of response
+
+ response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
+ false. If streaming is true, the only supported format is raw.
+
+ sample_rate: Sampling rate to use for the output audio. The default sampling rate for
+ canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
+ cartesia/sonic is 44100.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @required_args(["input", "model", "voice"], ["input", "model", "stream", "voice"])
+ def create(
+ self,
+ *,
+ input: str,
+ model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
+ voice: str,
+ language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
+ | Omit = omit,
+ response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
+ response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
+ sample_rate: float | Omit = omit,
+ stream: Literal[False] | Literal[True] | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> BinaryAPIResponse | Stream[AudioSpeechStreamChunk]:
+ extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
+ return self._post(
+ "/audio/speech",
+ body=maybe_transform(
+ {
+ "input": input,
+ "model": model,
+ "voice": voice,
+ "language": language,
+ "response_encoding": response_encoding,
+ "response_format": response_format,
+ "sample_rate": sample_rate,
+ "stream": stream,
+ },
+ speech_create_params.SpeechCreateParamsStreaming
+ if stream
+ else speech_create_params.SpeechCreateParamsNonStreaming,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=BinaryAPIResponse,
+ stream=stream or False,
+ stream_cls=Stream[AudioSpeechStreamChunk],
+ )
+
+
+class AsyncSpeechResource(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncSpeechResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+ """
+ return AsyncSpeechResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncSpeechResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+ """
+ return AsyncSpeechResourceWithStreamingResponse(self)
+
+ @overload
+ async def create(
+ self,
+ *,
+ input: str,
+ model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
+ voice: str,
+ language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
+ | Omit = omit,
+ response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
+ response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
+ sample_rate: float | Omit = omit,
+ stream: Literal[False] | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> AsyncBinaryAPIResponse:
+ """
+ Generate audio from input text
+
+ Args:
+ input: Input text to generate the audio for
+
+ model: The name of the model to query.
+
+ [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
+ The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
+ canopylabs/orpheus-3b-0.1-ft
+
+ voice: The voice to use for generating the audio. The voices supported are different
+ for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
+ supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
+ af_alloy and for cartesia/sonic, one of the voices supported is "friendly
+ sidekick".
+
+ You can view the voices supported for each model using the /v1/voices endpoint
+ sending the model name as the query parameter.
+ [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
+
+ language: Language of input text.
+
+ response_encoding: Audio encoding of response
+
+ response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
+ false. If streaming is true, the only supported format is raw.
+
+ sample_rate: Sampling rate to use for the output audio. The default sampling rate for
+ canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
+ cartesia/sonic is 44100.
+
+ stream: If true, output is streamed for several characters at a time instead of waiting
+ for the full response. The stream terminates with `data: [DONE]`. If false,
+ return the encoded audio as octet stream
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ async def create(
+ self,
+ *,
+ input: str,
+ model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
+ stream: Literal[True],
+ voice: str,
+ language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
+ | Omit = omit,
+ response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
+ response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
+ sample_rate: float | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> AsyncStream[AudioSpeechStreamChunk]:
+ """
+ Generate audio from input text
+
+ Args:
+ input: Input text to generate the audio for
+
+ model: The name of the model to query.
+
+ [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
+ The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
+ canopylabs/orpheus-3b-0.1-ft
+
+ stream: If true, output is streamed for several characters at a time instead of waiting
+ for the full response. The stream terminates with `data: [DONE]`. If false,
+ return the encoded audio as octet stream
+
+ voice: The voice to use for generating the audio. The voices supported are different
+ for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
+ supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
+ af_alloy and for cartesia/sonic, one of the voices supported is "friendly
+ sidekick".
+
+ You can view the voices supported for each model using the /v1/voices endpoint
+ sending the model name as the query parameter.
+ [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
+
+ language: Language of input text.
+
+ response_encoding: Audio encoding of response
+
+ response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
+ false. If streaming is true, the only supported format is raw.
+
+ sample_rate: Sampling rate to use for the output audio. The default sampling rate for
+ canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
+ cartesia/sonic is 44100.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ async def create(
+ self,
+ *,
+ input: str,
+ model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
+ stream: bool,
+ voice: str,
+ language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
+ | Omit = omit,
+ response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
+ response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
+ sample_rate: float | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> AsyncBinaryAPIResponse | AsyncStream[AudioSpeechStreamChunk]:
+ """
+ Generate audio from input text
+
+ Args:
+ input: Input text to generate the audio for
+
+ model: The name of the model to query.
+
+ [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
+ The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
+ canopylabs/orpheus-3b-0.1-ft
+
+ stream: If true, output is streamed for several characters at a time instead of waiting
+ for the full response. The stream terminates with `data: [DONE]`. If false,
+ return the encoded audio as octet stream
+
+ voice: The voice to use for generating the audio. The voices supported are different
+ for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
+ supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
+ af_alloy and for cartesia/sonic, one of the voices supported is "friendly
+ sidekick".
+
+ You can view the voices supported for each model using the /v1/voices endpoint
+ sending the model name as the query parameter.
+ [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
+
+ language: Language of input text.
+
+ response_encoding: Audio encoding of response
+
+ response_format: The format of audio output. Supported formats are mp3, wav, raw if streaming is
+ false. If streaming is true, the only supported format is raw.
+
+ sample_rate: Sampling rate to use for the output audio. The default sampling rate for
+ canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
+ cartesia/sonic is 44100.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @required_args(["input", "model", "voice"], ["input", "model", "stream", "voice"])
+ async def create(
+ self,
+ *,
+ input: str,
+ model: Union[Literal["cartesia/sonic", "hexgrad/Kokoro-82M", "canopylabs/orpheus-3b-0.1-ft"], str],
+ voice: str,
+ language: Literal["en", "de", "fr", "es", "hi", "it", "ja", "ko", "nl", "pl", "pt", "ru", "sv", "tr", "zh"]
+ | Omit = omit,
+ response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
+ response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
+ sample_rate: float | Omit = omit,
+ stream: Literal[False] | Literal[True] | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> AsyncBinaryAPIResponse | AsyncStream[AudioSpeechStreamChunk]:
+ extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
+ return await self._post(
+ "/audio/speech",
+ body=await async_maybe_transform(
+ {
+ "input": input,
+ "model": model,
+ "voice": voice,
+ "language": language,
+ "response_encoding": response_encoding,
+ "response_format": response_format,
+ "sample_rate": sample_rate,
+ "stream": stream,
+ },
+ speech_create_params.SpeechCreateParamsStreaming
+ if stream
+ else speech_create_params.SpeechCreateParamsNonStreaming,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=AsyncBinaryAPIResponse,
+ stream=stream or False,
+ stream_cls=AsyncStream[AudioSpeechStreamChunk],
+ )
+
+
+class SpeechResourceWithRawResponse:
+ def __init__(self, speech: SpeechResource) -> None:
+ self._speech = speech
+
+ self.create = to_custom_raw_response_wrapper(
+ speech.create,
+ BinaryAPIResponse,
+ )
+
+
+class AsyncSpeechResourceWithRawResponse:
+ def __init__(self, speech: AsyncSpeechResource) -> None:
+ self._speech = speech
+
+ self.create = async_to_custom_raw_response_wrapper(
+ speech.create,
+ AsyncBinaryAPIResponse,
+ )
+
+
+class SpeechResourceWithStreamingResponse:
+ def __init__(self, speech: SpeechResource) -> None:
+ self._speech = speech
+
+ self.create = to_custom_streamed_response_wrapper(
+ speech.create,
+ StreamedBinaryAPIResponse,
+ )
+
+
+class AsyncSpeechResourceWithStreamingResponse:
+ def __init__(self, speech: AsyncSpeechResource) -> None:
+ self._speech = speech
+
+ self.create = async_to_custom_streamed_response_wrapper(
+ speech.create,
+ AsyncStreamedBinaryAPIResponse,
+ )
diff --git a/src/together/types/__init__.py b/src/together/types/__init__.py
index 11c20cc9..4779716d 100644
--- a/src/together/types/__init__.py
+++ b/src/together/types/__init__.py
@@ -30,7 +30,6 @@
from .eval_update_params import EvalUpdateParams as EvalUpdateParams
from .file_list_response import FileListResponse as FileListResponse
from .full_training_type import FullTrainingType as FullTrainingType
-from .audio_create_params import AudioCreateParams as AudioCreateParams
from .batch_create_params import BatchCreateParams as BatchCreateParams
from .batch_list_response import BatchListResponse as BatchListResponse
from .lo_ra_training_type import LoRaTrainingType as LoRaTrainingType
diff --git a/src/together/types/audio/__init__.py b/src/together/types/audio/__init__.py
index 867b4f25..245749a6 100644
--- a/src/together/types/audio/__init__.py
+++ b/src/together/types/audio/__init__.py
@@ -3,6 +3,7 @@
from __future__ import annotations
from .voice_list_response import VoiceListResponse as VoiceListResponse
+from .speech_create_params import SpeechCreateParams as SpeechCreateParams
from .translation_create_params import TranslationCreateParams as TranslationCreateParams
from .transcription_create_params import TranscriptionCreateParams as TranscriptionCreateParams
from .translation_create_response import TranslationCreateResponse as TranslationCreateResponse
diff --git a/src/together/types/audio_create_params.py b/src/together/types/audio/speech_create_params.py
similarity index 86%
rename from src/together/types/audio_create_params.py
rename to src/together/types/audio/speech_create_params.py
index c2b64aca..085a5f74 100644
--- a/src/together/types/audio_create_params.py
+++ b/src/together/types/audio/speech_create_params.py
@@ -5,10 +5,10 @@
from typing import Union
from typing_extensions import Literal, Required, TypedDict
-__all__ = ["AudioCreateParamsBase", "AudioCreateParamsNonStreaming", "AudioCreateParamsStreaming"]
+__all__ = ["SpeechCreateParamsBase", "SpeechCreateParamsNonStreaming", "SpeechCreateParamsStreaming"]
-class AudioCreateParamsBase(TypedDict, total=False):
+class SpeechCreateParamsBase(TypedDict, total=False):
input: Required[str]
"""Input text to generate the audio for"""
@@ -54,7 +54,7 @@ class AudioCreateParamsBase(TypedDict, total=False):
"""
-class AudioCreateParamsNonStreaming(AudioCreateParamsBase, total=False):
+class SpeechCreateParamsNonStreaming(SpeechCreateParamsBase, total=False):
stream: Literal[False]
"""
If true, output is streamed for several characters at a time instead of waiting
@@ -63,7 +63,7 @@ class AudioCreateParamsNonStreaming(AudioCreateParamsBase, total=False):
"""
-class AudioCreateParamsStreaming(AudioCreateParamsBase):
+class SpeechCreateParamsStreaming(SpeechCreateParamsBase):
stream: Required[Literal[True]]
"""
If true, output is streamed for several characters at a time instead of waiting
@@ -72,4 +72,4 @@ class AudioCreateParamsStreaming(AudioCreateParamsBase):
"""
-AudioCreateParams = Union[AudioCreateParamsNonStreaming, AudioCreateParamsStreaming]
+SpeechCreateParams = Union[SpeechCreateParamsNonStreaming, SpeechCreateParamsStreaming]
diff --git a/tests/api_resources/test_audio.py b/tests/api_resources/audio/test_speech.py
similarity index 63%
rename from tests/api_resources/test_audio.py
rename to tests/api_resources/audio/test_speech.py
index 4e756493..ce213402 100644
--- a/tests/api_resources/test_audio.py
+++ b/tests/api_resources/audio/test_speech.py
@@ -20,28 +20,28 @@
base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-class TestAudio:
+class TestSpeech:
parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
@parametrize
@pytest.mark.respx(base_url=base_url)
def test_method_create_overload_1(self, client: Together, respx_mock: MockRouter) -> None:
respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- audio = client.audio.create(
+ speech = client.audio.speech.create(
input="input",
model="canopylabs/orpheus-3b-0.1-ft",
voice="voice",
)
- assert audio.is_closed
- assert audio.json() == {"foo": "bar"}
- assert cast(Any, audio.is_closed) is True
- assert isinstance(audio, BinaryAPIResponse)
+ assert speech.is_closed
+ assert speech.json() == {"foo": "bar"}
+ assert cast(Any, speech.is_closed) is True
+ assert isinstance(speech, BinaryAPIResponse)
@parametrize
@pytest.mark.respx(base_url=base_url)
def test_method_create_with_all_params_overload_1(self, client: Together, respx_mock: MockRouter) -> None:
respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- audio = client.audio.create(
+ speech = client.audio.speech.create(
input="input",
model="canopylabs/orpheus-3b-0.1-ft",
voice="voice",
@@ -51,62 +51,62 @@ def test_method_create_with_all_params_overload_1(self, client: Together, respx_
sample_rate=0,
stream=False,
)
- assert audio.is_closed
- assert audio.json() == {"foo": "bar"}
- assert cast(Any, audio.is_closed) is True
- assert isinstance(audio, BinaryAPIResponse)
+ assert speech.is_closed
+ assert speech.json() == {"foo": "bar"}
+ assert cast(Any, speech.is_closed) is True
+ assert isinstance(speech, BinaryAPIResponse)
@parametrize
@pytest.mark.respx(base_url=base_url)
def test_raw_response_create_overload_1(self, client: Together, respx_mock: MockRouter) -> None:
respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- audio = client.audio.with_raw_response.create(
+ speech = client.audio.speech.with_raw_response.create(
input="input",
model="canopylabs/orpheus-3b-0.1-ft",
voice="voice",
)
- assert audio.is_closed is True
- assert audio.http_request.headers.get("X-Stainless-Lang") == "python"
- assert audio.json() == {"foo": "bar"}
- assert isinstance(audio, BinaryAPIResponse)
+ assert speech.is_closed is True
+ assert speech.http_request.headers.get("X-Stainless-Lang") == "python"
+ assert speech.json() == {"foo": "bar"}
+ assert isinstance(speech, BinaryAPIResponse)
@parametrize
@pytest.mark.respx(base_url=base_url)
def test_streaming_response_create_overload_1(self, client: Together, respx_mock: MockRouter) -> None:
respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- with client.audio.with_streaming_response.create(
+ with client.audio.speech.with_streaming_response.create(
input="input",
model="canopylabs/orpheus-3b-0.1-ft",
voice="voice",
- ) as audio:
- assert not audio.is_closed
- assert audio.http_request.headers.get("X-Stainless-Lang") == "python"
+ ) as speech:
+ assert not speech.is_closed
+ assert speech.http_request.headers.get("X-Stainless-Lang") == "python"
- assert audio.json() == {"foo": "bar"}
- assert cast(Any, audio.is_closed) is True
- assert isinstance(audio, StreamedBinaryAPIResponse)
+ assert speech.json() == {"foo": "bar"}
+ assert cast(Any, speech.is_closed) is True
+ assert isinstance(speech, StreamedBinaryAPIResponse)
- assert cast(Any, audio.is_closed) is True
+ assert cast(Any, speech.is_closed) is True
@parametrize
@pytest.mark.respx(base_url=base_url)
def test_method_create_overload_2(self, client: Together, respx_mock: MockRouter) -> None:
respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- audio_stream = client.audio.create(
+ speech_stream = client.audio.speech.create(
input="input",
model="canopylabs/orpheus-3b-0.1-ft",
stream=True,
voice="voice",
)
- audio_stream.response.close()
+ speech_stream.response.close()
@parametrize
@pytest.mark.respx(base_url=base_url)
def test_method_create_with_all_params_overload_2(self, client: Together, respx_mock: MockRouter) -> None:
respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- audio_stream = client.audio.create(
+ speech_stream = client.audio.speech.create(
input="input",
model="canopylabs/orpheus-3b-0.1-ft",
stream=True,
@@ -116,45 +116,45 @@ def test_method_create_with_all_params_overload_2(self, client: Together, respx_
response_format="mp3",
sample_rate=0,
)
- audio_stream.response.close()
+ speech_stream.response.close()
@parametrize
@pytest.mark.respx(base_url=base_url)
def test_raw_response_create_overload_2(self, client: Together, respx_mock: MockRouter) -> None:
respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- audio_stream = client.audio.with_raw_response.create(
+ speech_stream = client.audio.speech.with_raw_response.create(
input="input",
model="canopylabs/orpheus-3b-0.1-ft",
stream=True,
voice="voice",
)
- assert audio_stream.http_request.headers.get("X-Stainless-Lang") == "python"
- assert audio_stream.json() == {"foo": "bar"}
- assert isinstance(audio_stream, BinaryAPIResponse)
+ assert speech_stream.http_request.headers.get("X-Stainless-Lang") == "python"
+ assert speech_stream.json() == {"foo": "bar"}
+ assert isinstance(speech_stream, BinaryAPIResponse)
@parametrize
@pytest.mark.respx(base_url=base_url)
def test_streaming_response_create_overload_2(self, client: Together, respx_mock: MockRouter) -> None:
respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- with client.audio.with_streaming_response.create(
+ with client.audio.speech.with_streaming_response.create(
input="input",
model="canopylabs/orpheus-3b-0.1-ft",
stream=True,
voice="voice",
- ) as audio_stream:
- assert not audio_stream.is_closed
- assert audio_stream.http_request.headers.get("X-Stainless-Lang") == "python"
+ ) as speech_stream:
+ assert not speech_stream.is_closed
+ assert speech_stream.http_request.headers.get("X-Stainless-Lang") == "python"
- assert audio_stream.json() == {"foo": "bar"}
- assert cast(Any, audio_stream.is_closed) is True
- assert isinstance(audio_stream, StreamedBinaryAPIResponse)
+ assert speech_stream.json() == {"foo": "bar"}
+ assert cast(Any, speech_stream.is_closed) is True
+ assert isinstance(speech_stream, StreamedBinaryAPIResponse)
- assert cast(Any, audio_stream.is_closed) is True
+ assert cast(Any, speech_stream.is_closed) is True
-class TestAsyncAudio:
+class TestAsyncSpeech:
parametrize = pytest.mark.parametrize(
"async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
)
@@ -163,15 +163,15 @@ class TestAsyncAudio:
@pytest.mark.respx(base_url=base_url)
async def test_method_create_overload_1(self, async_client: AsyncTogether, respx_mock: MockRouter) -> None:
respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- audio = await async_client.audio.create(
+ speech = await async_client.audio.speech.create(
input="input",
model="canopylabs/orpheus-3b-0.1-ft",
voice="voice",
)
- assert audio.is_closed
- assert await audio.json() == {"foo": "bar"}
- assert cast(Any, audio.is_closed) is True
- assert isinstance(audio, AsyncBinaryAPIResponse)
+ assert speech.is_closed
+ assert await speech.json() == {"foo": "bar"}
+ assert cast(Any, speech.is_closed) is True
+ assert isinstance(speech, AsyncBinaryAPIResponse)
@parametrize
@pytest.mark.respx(base_url=base_url)
@@ -179,7 +179,7 @@ async def test_method_create_with_all_params_overload_1(
self, async_client: AsyncTogether, respx_mock: MockRouter
) -> None:
respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- audio = await async_client.audio.create(
+ speech = await async_client.audio.speech.create(
input="input",
model="canopylabs/orpheus-3b-0.1-ft",
voice="voice",
@@ -189,26 +189,26 @@ async def test_method_create_with_all_params_overload_1(
sample_rate=0,
stream=False,
)
- assert audio.is_closed
- assert await audio.json() == {"foo": "bar"}
- assert cast(Any, audio.is_closed) is True
- assert isinstance(audio, AsyncBinaryAPIResponse)
+ assert speech.is_closed
+ assert await speech.json() == {"foo": "bar"}
+ assert cast(Any, speech.is_closed) is True
+ assert isinstance(speech, AsyncBinaryAPIResponse)
@parametrize
@pytest.mark.respx(base_url=base_url)
async def test_raw_response_create_overload_1(self, async_client: AsyncTogether, respx_mock: MockRouter) -> None:
respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- audio = await async_client.audio.with_raw_response.create(
+ speech = await async_client.audio.speech.with_raw_response.create(
input="input",
model="canopylabs/orpheus-3b-0.1-ft",
voice="voice",
)
- assert audio.is_closed is True
- assert audio.http_request.headers.get("X-Stainless-Lang") == "python"
- assert await audio.json() == {"foo": "bar"}
- assert isinstance(audio, AsyncBinaryAPIResponse)
+ assert speech.is_closed is True
+ assert speech.http_request.headers.get("X-Stainless-Lang") == "python"
+ assert await speech.json() == {"foo": "bar"}
+ assert isinstance(speech, AsyncBinaryAPIResponse)
@parametrize
@pytest.mark.respx(base_url=base_url)
@@ -216,31 +216,31 @@ async def test_streaming_response_create_overload_1(
self, async_client: AsyncTogether, respx_mock: MockRouter
) -> None:
respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- async with async_client.audio.with_streaming_response.create(
+ async with async_client.audio.speech.with_streaming_response.create(
input="input",
model="canopylabs/orpheus-3b-0.1-ft",
voice="voice",
- ) as audio:
- assert not audio.is_closed
- assert audio.http_request.headers.get("X-Stainless-Lang") == "python"
+ ) as speech:
+ assert not speech.is_closed
+ assert speech.http_request.headers.get("X-Stainless-Lang") == "python"
- assert await audio.json() == {"foo": "bar"}
- assert cast(Any, audio.is_closed) is True
- assert isinstance(audio, AsyncStreamedBinaryAPIResponse)
+ assert await speech.json() == {"foo": "bar"}
+ assert cast(Any, speech.is_closed) is True
+ assert isinstance(speech, AsyncStreamedBinaryAPIResponse)
- assert cast(Any, audio.is_closed) is True
+ assert cast(Any, speech.is_closed) is True
@parametrize
@pytest.mark.respx(base_url=base_url)
async def test_method_create_overload_2(self, async_client: AsyncTogether, respx_mock: MockRouter) -> None:
respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- audio_stream = await async_client.audio.create(
+ speech_stream = await async_client.audio.speech.create(
input="input",
model="canopylabs/orpheus-3b-0.1-ft",
stream=True,
voice="voice",
)
- await audio_stream.response.aclose()
+ await speech_stream.response.aclose()
@parametrize
@pytest.mark.respx(base_url=base_url)
@@ -248,7 +248,7 @@ async def test_method_create_with_all_params_overload_2(
self, async_client: AsyncTogether, respx_mock: MockRouter
) -> None:
respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- audio_stream = await async_client.audio.create(
+ speech_stream = await async_client.audio.speech.create(
input="input",
model="canopylabs/orpheus-3b-0.1-ft",
stream=True,
@@ -258,23 +258,23 @@ async def test_method_create_with_all_params_overload_2(
response_format="mp3",
sample_rate=0,
)
- await audio_stream.response.aclose()
+ await speech_stream.response.aclose()
@parametrize
@pytest.mark.respx(base_url=base_url)
async def test_raw_response_create_overload_2(self, async_client: AsyncTogether, respx_mock: MockRouter) -> None:
respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- audio_stream = await async_client.audio.with_raw_response.create(
+ speech_stream = await async_client.audio.speech.with_raw_response.create(
input="input",
model="canopylabs/orpheus-3b-0.1-ft",
stream=True,
voice="voice",
)
- assert audio_stream.http_request.headers.get("X-Stainless-Lang") == "python"
- assert await audio_stream.json() == {"foo": "bar"}
- assert isinstance(audio_stream, AsyncBinaryAPIResponse)
+ assert speech_stream.http_request.headers.get("X-Stainless-Lang") == "python"
+ assert await speech_stream.json() == {"foo": "bar"}
+ assert isinstance(speech_stream, AsyncBinaryAPIResponse)
@parametrize
@pytest.mark.respx(base_url=base_url)
@@ -282,17 +282,17 @@ async def test_streaming_response_create_overload_2(
self, async_client: AsyncTogether, respx_mock: MockRouter
) -> None:
respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- async with async_client.audio.with_streaming_response.create(
+ async with async_client.audio.speech.with_streaming_response.create(
input="input",
model="canopylabs/orpheus-3b-0.1-ft",
stream=True,
voice="voice",
- ) as audio_stream:
- assert not audio_stream.is_closed
- assert audio_stream.http_request.headers.get("X-Stainless-Lang") == "python"
+ ) as speech_stream:
+ assert not speech_stream.is_closed
+ assert speech_stream.http_request.headers.get("X-Stainless-Lang") == "python"
- assert await audio_stream.json() == {"foo": "bar"}
- assert cast(Any, audio_stream.is_closed) is True
- assert isinstance(audio_stream, AsyncStreamedBinaryAPIResponse)
+ assert await speech_stream.json() == {"foo": "bar"}
+ assert cast(Any, speech_stream.is_closed) is True
+ assert isinstance(speech_stream, AsyncStreamedBinaryAPIResponse)
- assert cast(Any, audio_stream.is_closed) is True
+ assert cast(Any, speech_stream.is_closed) is True
From 9d5e1a2a8fe09f01ac9ed984361139064d42a2d8 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Fri, 21 Nov 2025 15:43:20 +0000
Subject: [PATCH 2/3] feat(api): api update
---
.stats.yml | 4 ++--
src/together/resources/audio/speech.py | 16 ++++++++--------
src/together/types/audio/speech_create_params.py | 2 +-
3 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/.stats.yml b/.stats.yml
index b14d585f..2abb58a1 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
configured_endpoints: 44
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-b86f8e6c4674d1a7829ffa8ddff4bc93d21334d231e6a4d0fd734d411c07a4eb.yml
-openapi_spec_hash: 8af4975be6ae8f4655fa92fd26af9682
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-e9e60279414ac3279c025d6318b5f67a8f6d01170e365612e791f3a1f259b94f.yml
+openapi_spec_hash: 26c59292808c5ae9f222f95f056430cf
config_hash: afbbabb8eb5bfbbf8139546a13addd9a
diff --git a/src/together/resources/audio/speech.py b/src/together/resources/audio/speech.py
index 78f61353..b2b19518 100644
--- a/src/together/resources/audio/speech.py
+++ b/src/together/resources/audio/speech.py
@@ -60,7 +60,7 @@ def create(
| Omit = omit,
response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
- sample_rate: float | Omit = omit,
+ sample_rate: int | Omit = omit,
stream: Literal[False] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -128,7 +128,7 @@ def create(
| Omit = omit,
response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
- sample_rate: float | Omit = omit,
+ sample_rate: int | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -195,7 +195,7 @@ def create(
| Omit = omit,
response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
- sample_rate: float | Omit = omit,
+ sample_rate: int | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -261,7 +261,7 @@ def create(
| Omit = omit,
response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
- sample_rate: float | Omit = omit,
+ sample_rate: int | Omit = omit,
stream: Literal[False] | Literal[True] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -328,7 +328,7 @@ async def create(
| Omit = omit,
response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
- sample_rate: float | Omit = omit,
+ sample_rate: int | Omit = omit,
stream: Literal[False] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -396,7 +396,7 @@ async def create(
| Omit = omit,
response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
- sample_rate: float | Omit = omit,
+ sample_rate: int | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -463,7 +463,7 @@ async def create(
| Omit = omit,
response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
- sample_rate: float | Omit = omit,
+ sample_rate: int | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -529,7 +529,7 @@ async def create(
| Omit = omit,
response_encoding: Literal["pcm_f32le", "pcm_s16le", "pcm_mulaw", "pcm_alaw"] | Omit = omit,
response_format: Literal["mp3", "wav", "raw"] | Omit = omit,
- sample_rate: float | Omit = omit,
+ sample_rate: int | Omit = omit,
stream: Literal[False] | Literal[True] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
diff --git a/src/together/types/audio/speech_create_params.py b/src/together/types/audio/speech_create_params.py
index 085a5f74..1db9f9ca 100644
--- a/src/together/types/audio/speech_create_params.py
+++ b/src/together/types/audio/speech_create_params.py
@@ -46,7 +46,7 @@ class SpeechCreateParamsBase(TypedDict, total=False):
the only supported format is raw.
"""
- sample_rate: float
+ sample_rate: int
"""Sampling rate to use for the output audio.
The default sampling rate for canopylabs/orpheus-3b-0.1-ft and
From b801cd063ebae99828912328130b8d2321a4e36e Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Fri, 21 Nov 2025 15:43:37 +0000
Subject: [PATCH 3/3] release: 2.0.0-alpha.2
---
.release-please-manifest.json | 2 +-
CHANGELOG.md | 13 +++++++++++++
pyproject.toml | 2 +-
src/together/_version.py | 2 +-
4 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 6e011e8a..0c548e2f 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "2.0.0-alpha.1"
+ ".": "2.0.0-alpha.2"
}
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9478df67..089feefd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,18 @@
# Changelog
+## 2.0.0-alpha.2 (2025-11-21)
+
+Full Changelog: [v2.0.0-alpha.1...v2.0.0-alpha.2](https://github.com/togethercomputer/together-py/compare/v2.0.0-alpha.1...v2.0.0-alpha.2)
+
+### ⚠ BREAKING CHANGES
+
+* **api:** Change call signature for `audio.create` to `audio.speech.create` to match spec with python library and add space for future APIs
+
+### Features
+
+* **api:** api update ([9d5e1a2](https://github.com/togethercomputer/together-py/commit/9d5e1a2a8fe09f01ac9ed984361139064d42a2d8))
+* **api:** Change TTS call signature ([251c911](https://github.com/togethercomputer/together-py/commit/251c911e4b6562fb1751ae2a880e7ff6bb2e7bd2))
+
## 2.0.0-alpha.1 (2025-11-21)
Full Changelog: [v0.1.0-alpha.28...v2.0.0-alpha.1](https://github.com/togethercomputer/together-py/compare/v0.1.0-alpha.28...v2.0.0-alpha.1)
diff --git a/pyproject.toml b/pyproject.toml
index b39dfdf9..64e583cf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "together"
-version = "2.0.0-alpha.1"
+version = "2.0.0-alpha.2"
description = "The official Python library for the together API"
dynamic = ["readme"]
license = "Apache-2.0"
diff --git a/src/together/_version.py b/src/together/_version.py
index a5854438..54bac7bd 100644
--- a/src/together/_version.py
+++ b/src/together/_version.py
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
__title__ = "together"
-__version__ = "2.0.0-alpha.1" # x-release-please-version
+__version__ = "2.0.0-alpha.2" # x-release-please-version