From 1ab425dd7adc713de33a4ddcfe5088b9bf65edc1 Mon Sep 17 00:00:00 2001 From: Christopher Hertel Date: Wed, 12 Nov 2025 23:16:17 +0100 Subject: [PATCH] Add support for OpenAI text-to-speech --- examples/openai/audio-output.php | 24 +++++ .../src/Bridge/OpenAi/ModelCatalog.php | 21 +++++ .../src/Bridge/OpenAi/PlatformFactory.php | 2 + .../src/Bridge/OpenAi/TextToSpeech.php | 21 +++++ .../src/Bridge/OpenAi/TextToSpeech/Format.php | 25 ++++++ .../OpenAi/TextToSpeech/ModelClient.php | 56 ++++++++++++ .../OpenAi/TextToSpeech/ResultConverter.php | 43 +++++++++ .../src/Bridge/OpenAi/TextToSpeech/Voice.php | 30 +++++++ .../tests/Bridge/OpenAi/ModelCatalogTest.php | 6 ++ .../OpenAi/TextToSpeech/ModelClientTest.php | 88 +++++++++++++++++++ .../TextToSpeech/ResultConverterTest.php | 75 ++++++++++++++++ 11 files changed, 391 insertions(+) create mode 100644 examples/openai/audio-output.php create mode 100644 src/platform/src/Bridge/OpenAi/TextToSpeech.php create mode 100644 src/platform/src/Bridge/OpenAi/TextToSpeech/Format.php create mode 100644 src/platform/src/Bridge/OpenAi/TextToSpeech/ModelClient.php create mode 100644 src/platform/src/Bridge/OpenAi/TextToSpeech/ResultConverter.php create mode 100644 src/platform/src/Bridge/OpenAi/TextToSpeech/Voice.php create mode 100644 src/platform/tests/Bridge/OpenAi/TextToSpeech/ModelClientTest.php create mode 100644 src/platform/tests/Bridge/OpenAi/TextToSpeech/ResultConverterTest.php diff --git a/examples/openai/audio-output.php b/examples/openai/audio-output.php new file mode 100644 index 000000000..664556fbd --- /dev/null +++ b/examples/openai/audio-output.php @@ -0,0 +1,24 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory; +use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech\Voice; + +require_once dirname(__DIR__).'/bootstrap.php'; + +$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); + +$result = $platform->invoke('gpt-4o-mini-tts', 'Today is a wonderful day to build something people love!', [ + 'voice' => Voice::CORAL, + 'instructions' => 'Speak in a cheerful and positive tone.', +]); + +echo $result->asBinary(); diff --git a/src/platform/src/Bridge/OpenAi/ModelCatalog.php b/src/platform/src/Bridge/OpenAi/ModelCatalog.php index 0fae198cf..b853da045 100644 --- a/src/platform/src/Bridge/OpenAi/ModelCatalog.php +++ b/src/platform/src/Bridge/OpenAi/ModelCatalog.php @@ -234,6 +234,27 @@ public function __construct(array $additionalModels = []) 'class' => Embeddings::class, 'capabilities' => [Capability::INPUT_TEXT], ], + 'tts-1' => [ + 'class' => TextToSpeech::class, + 'capabilities' => [ + Capability::INPUT_TEXT, + Capability::OUTPUT_AUDIO, + ], + ], + 'tts-1-hd' => [ + 'class' => TextToSpeech::class, + 'capabilities' => [ + Capability::INPUT_TEXT, + Capability::OUTPUT_AUDIO, + ], + ], + 'gpt-4o-mini-tts' => [ + 'class' => TextToSpeech::class, + 'capabilities' => [ + Capability::INPUT_TEXT, + Capability::OUTPUT_AUDIO, + ], + ], 'whisper-1' => [ 'class' => Whisper::class, 'capabilities' => [ diff --git a/src/platform/src/Bridge/OpenAi/PlatformFactory.php b/src/platform/src/Bridge/OpenAi/PlatformFactory.php index 946ed2ff9..b52837610 100644 --- a/src/platform/src/Bridge/OpenAi/PlatformFactory.php +++ b/src/platform/src/Bridge/OpenAi/PlatformFactory.php @@ -42,12 +42,14 @@ public static function create( new Gpt\ModelClient($httpClient, $apiKey, $region), new Embeddings\ModelClient($httpClient, $apiKey, $region), new DallE\ModelClient($httpClient, $apiKey, $region), + new TextToSpeech\ModelClient($httpClient, $apiKey, $region), new Whisper\ModelClient($httpClient, $apiKey, $region), ], [ new Gpt\ResultConverter(), new Embeddings\ResultConverter(), new DallE\ResultConverter(), + new TextToSpeech\ResultConverter(), new Whisper\ResultConverter(), ], $modelCatalog, diff --git a/src/platform/src/Bridge/OpenAi/TextToSpeech.php b/src/platform/src/Bridge/OpenAi/TextToSpeech.php new file mode 100644 index 000000000..5a0dce6c2 --- /dev/null +++ b/src/platform/src/Bridge/OpenAi/TextToSpeech.php @@ -0,0 +1,21 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Bridge\OpenAi; + +use Symfony\AI\Platform\Model; + +/** + * @author Christopher Hertel + */ +class TextToSpeech extends Model +{ +} diff --git a/src/platform/src/Bridge/OpenAi/TextToSpeech/Format.php b/src/platform/src/Bridge/OpenAi/TextToSpeech/Format.php new file mode 100644 index 000000000..1897cb01e --- /dev/null +++ b/src/platform/src/Bridge/OpenAi/TextToSpeech/Format.php @@ -0,0 +1,25 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech; + +/** + * @author Christopher Hertel + */ +interface Format +{ + public const MP3 = 'mp3'; + public const OPUS = 'opus'; + public const AAC = 'aac'; + public const FLAC = 'flac'; + public const WAV = 'wav'; + public const PCM = 'pcm'; +} diff --git a/src/platform/src/Bridge/OpenAi/TextToSpeech/ModelClient.php b/src/platform/src/Bridge/OpenAi/TextToSpeech/ModelClient.php new file mode 100644 index 000000000..9b60fe8e1 --- /dev/null +++ b/src/platform/src/Bridge/OpenAi/TextToSpeech/ModelClient.php @@ -0,0 +1,56 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech; + +use Symfony\AI\Platform\Bridge\OpenAi\AbstractModelClient; +use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech; +use Symfony\AI\Platform\Exception\InvalidArgumentException; +use Symfony\AI\Platform\Model; +use Symfony\AI\Platform\ModelClientInterface; +use Symfony\AI\Platform\Result\RawHttpResult; +use Symfony\Contracts\HttpClient\HttpClientInterface; + +/** + * @author Christopher Hertel + */ +final class ModelClient extends AbstractModelClient implements ModelClientInterface +{ + public function __construct( + private readonly HttpClientInterface $httpClient, + #[\SensitiveParameter] private readonly string $apiKey, + private readonly ?string $region = null, + ) { + self::validateApiKey($apiKey); + } + + public function supports(Model $model): bool + { + return $model instanceof TextToSpeech; + } + + public function request(Model $model, array|string $payload, array $options = []): RawHttpResult + { + if (!isset($options['voice'])) { + throw new InvalidArgumentException('The "voice" option is required for TextToSpeech requests.'); + } + + if (isset($options['stream_format']) || isset($options['stream'])) { + throw new InvalidArgumentException('Streaming text to speech results is not supported yet.'); + } + + return new RawHttpResult($this->httpClient->request('POST', \sprintf('%s/v1/audio/speech', self::getBaseUrl($this->region)), [ + 'auth_bearer' => $this->apiKey, + 'headers' => ['Content-Type' => 'application/json'], + 'json' => array_merge($options, ['model' => $model->getName(), 'input' => $payload]), + ])); + } +} diff --git a/src/platform/src/Bridge/OpenAi/TextToSpeech/ResultConverter.php b/src/platform/src/Bridge/OpenAi/TextToSpeech/ResultConverter.php new file mode 100644 index 000000000..1f93f2e9a --- /dev/null +++ b/src/platform/src/Bridge/OpenAi/TextToSpeech/ResultConverter.php @@ -0,0 +1,43 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech; + +use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech; +use Symfony\AI\Platform\Exception\RuntimeException; +use Symfony\AI\Platform\Model; +use Symfony\AI\Platform\Result\BinaryResult; +use Symfony\AI\Platform\Result\RawHttpResult; +use Symfony\AI\Platform\Result\RawResultInterface; +use Symfony\AI\Platform\Result\ResultInterface; +use Symfony\AI\Platform\ResultConverterInterface as BaseResponseConverter; + +/** + * @author Christopher Hertel + */ +final class ResultConverter implements BaseResponseConverter +{ + public function supports(Model $model): bool + { + return $model instanceof TextToSpeech; + } + + public function convert(RawResultInterface|RawHttpResult $result, array $options = []): ResultInterface + { + $response = $result->getObject(); + + if (200 !== $response->getStatusCode()) { + throw new RuntimeException(\sprintf('The OpenAI Text-to-Speech API returned an error: "%s"', $response->getContent(false))); + } + + return new BinaryResult($result->getObject()->getContent()); + } +} diff --git a/src/platform/src/Bridge/OpenAi/TextToSpeech/Voice.php b/src/platform/src/Bridge/OpenAi/TextToSpeech/Voice.php new file mode 100644 index 000000000..58e7bc58c --- /dev/null +++ b/src/platform/src/Bridge/OpenAi/TextToSpeech/Voice.php @@ -0,0 +1,30 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech; + +/** + * @author Christopher Hertel + */ +interface Voice +{ + public const ALLOY = 'alloy'; + public const ASH = 'ash'; + public const BALLAD = 'ballad'; + public const CORAL = 'coral'; + public const ECHO = 'echo'; + public const FABLE = 'fable'; + public const NOVA = 'nova'; + public const ONYX = 'onyx'; + public const SAGE = 'sage'; + public const SHIMMER = 'shimmer'; + public const VERSE = 'verse'; +} diff --git a/src/platform/tests/Bridge/OpenAi/ModelCatalogTest.php b/src/platform/tests/Bridge/OpenAi/ModelCatalogTest.php index fa1cd5e23..260af4f1f 100644 --- a/src/platform/tests/Bridge/OpenAi/ModelCatalogTest.php +++ b/src/platform/tests/Bridge/OpenAi/ModelCatalogTest.php @@ -15,6 +15,7 @@ use Symfony\AI\Platform\Bridge\OpenAi\Embeddings; use Symfony\AI\Platform\Bridge\OpenAi\Gpt; use Symfony\AI\Platform\Bridge\OpenAi\ModelCatalog; +use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech; use Symfony\AI\Platform\Bridge\OpenAi\Whisper; use Symfony\AI\Platform\Capability; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; @@ -53,6 +54,11 @@ public static function modelsProvider(): iterable yield 'text-embedding-3-large' => ['text-embedding-3-large', Embeddings::class, [Capability::INPUT_TEXT]]; yield 'text-embedding-3-small' => ['text-embedding-3-small', Embeddings::class, [Capability::INPUT_TEXT]]; + // Text-to-speech models + yield 'tts-1' => ['tts-1', TextToSpeech::class, [Capability::INPUT_TEXT, Capability::OUTPUT_AUDIO]]; + yield 'tts-1-hd' => ['tts-1-hd', TextToSpeech::class, [Capability::INPUT_TEXT, Capability::OUTPUT_AUDIO]]; + yield 'gpt-4o-mini-tts' => ['gpt-4o-mini-tts', TextToSpeech::class, [Capability::INPUT_TEXT, Capability::OUTPUT_AUDIO]]; + // Whisper models yield 'whisper-1' => ['whisper-1', Whisper::class, [Capability::INPUT_AUDIO, Capability::OUTPUT_TEXT]]; diff --git a/src/platform/tests/Bridge/OpenAi/TextToSpeech/ModelClientTest.php b/src/platform/tests/Bridge/OpenAi/TextToSpeech/ModelClientTest.php new file mode 100644 index 000000000..018f74d6d --- /dev/null +++ b/src/platform/tests/Bridge/OpenAi/TextToSpeech/ModelClientTest.php @@ -0,0 +1,88 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Tests\Bridge\OpenAi\TextToSpeech; + +use PHPUnit\Framework\TestCase; +use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech; +use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech\ModelClient; +use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech\ResultConverter; +use Symfony\AI\Platform\Exception\InvalidArgumentException; +use Symfony\AI\Platform\Model; +use Symfony\Component\HttpClient\MockHttpClient; +use Symfony\Component\HttpClient\Response\MockResponse; +use Symfony\Contracts\HttpClient\ResponseInterface as HttpResponse; + +/** + * @author Christopher Hertel + */ +final class ModelClientTest extends TestCase +{ + public function testSupportsTextToSpeechModel() + { + $converter = new ResultConverter(); + $model = new TextToSpeech('tts-1'); + + $this->assertTrue($converter->supports($model)); + } + + public function testDoesntSupportOtherModels() + { + $converter = new ResultConverter(); + $model = new Model('test-model'); + + $this->assertFalse($converter->supports($model)); + } + + public function testHappyCase() + { + $resultCallback = static function (string $method, string $url, array $options): HttpResponse { + self::assertSame('POST', $method); + self::assertSame('https://api.openai.com/v1/audio/speech', $url); + self::assertSame('Authorization: Bearer sk-api-key', $options['normalized_headers']['authorization'][0]); + $expectedBody = '{"voice":"alloy","instruction":"Speak like a pirate","model":"tts-1","input":"Hello World!"}'; + self::assertSame($expectedBody, $options['body']); + + return new MockResponse(); + }; + $httpClient = new MockHttpClient([$resultCallback]); + $modelClient = new ModelClient($httpClient, 'sk-api-key'); + $modelClient->request(new TextToSpeech('tts-1'), 'Hello World!', [ + 'voice' => 'alloy', + 'instruction' => 'Speak like a pirate', + ]); + } + + public function testFailsWithoutVoiceOption() + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('The "voice" option is required for TextToSpeech requests.'); + + $httpClient = new MockHttpClient(); + $modelClient = new ModelClient($httpClient, 'sk-api-key'); + $modelClient->request(new TextToSpeech('tts-1'), 'Hello World!', [ + 'instruction' => 'Speak like a pirate', + ]); + } + + public function testFailsWithStreamingOptions() + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Streaming text to speech results is not supported yet.'); + + $httpClient = new MockHttpClient(); + $modelClient = new ModelClient($httpClient, 'sk-api-key'); + $modelClient->request(new TextToSpeech('tts-1'), 'Hello World!', [ + 'voice' => 'alloy', + 'stream' => true, + ]); + } +} diff --git a/src/platform/tests/Bridge/OpenAi/TextToSpeech/ResultConverterTest.php b/src/platform/tests/Bridge/OpenAi/TextToSpeech/ResultConverterTest.php new file mode 100644 index 000000000..3071c140f --- /dev/null +++ b/src/platform/tests/Bridge/OpenAi/TextToSpeech/ResultConverterTest.php @@ -0,0 +1,75 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Tests\Bridge\OpenAi\TextToSpeech; + +use PHPUnit\Framework\TestCase; +use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech; +use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech\ResultConverter; +use Symfony\AI\Platform\Exception\RuntimeException; +use Symfony\AI\Platform\Model; +use Symfony\AI\Platform\Result\BinaryResult; +use Symfony\AI\Platform\Result\RawHttpResult; +use Symfony\Contracts\HttpClient\ResponseInterface; + +/** + * @author Christopher Hertel + */ +final class ResultConverterTest extends TestCase +{ + public function testSupportsTextToSpeechModel() + { + $converter = new ResultConverter(); + $model = new TextToSpeech('tts-1'); + + $this->assertTrue($converter->supports($model)); + } + + public function testDoesntSupportOtherModels() + { + $converter = new ResultConverter(); + $model = new Model('test-model'); + + $this->assertFalse($converter->supports($model)); + } + + public function testThrowsOnErrorResponse() + { + $this->expectException(RuntimeException::class); + $this->expectExceptionMessage('The OpenAI Text-to-Speech API returned an error: "Hi Test!"'); + + $result = $this->createStub(ResponseInterface::class); + $result + ->method('getStatusCode') + ->willReturn(400); + $result + ->method('getContent') + ->willReturn('Hi Test!'); + + (new ResultConverter())->convert(new RawHttpResult($result)); + } + + public function testReturnResponseAsBinary() + { + $result = $this->createStub(ResponseInterface::class); + $result + ->method('getStatusCode') + ->willReturn(200); + $result + ->method('getContent') + ->willReturn('fake-audio-bytes'); + + $binaryResult = (new ResultConverter())->convert(new RawHttpResult($result)); + + $this->assertInstanceOf(BinaryResult::class, $binaryResult); + $this->assertSame('fake-audio-bytes', $binaryResult->getContent()); + } +}