Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions examples/openai/audio-output.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech\Voice;

require_once dirname(__DIR__).'/bootstrap.php';

$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());

$result = $platform->invoke('gpt-4o-mini-tts', 'Today is a wonderful day to build something people love!', [
'voice' => Voice::CORAL,
'instructions' => 'Speak in a cheerful and positive tone.',
]);

echo $result->asBinary();
21 changes: 21 additions & 0 deletions src/platform/src/Bridge/OpenAi/ModelCatalog.php
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,27 @@ public function __construct(array $additionalModels = [])
'class' => Embeddings::class,
'capabilities' => [Capability::INPUT_TEXT],
],
'tts-1' => [
'class' => TextToSpeech::class,
'capabilities' => [
Capability::INPUT_TEXT,
Capability::OUTPUT_AUDIO,
],
],
'tts-1-hd' => [
'class' => TextToSpeech::class,
'capabilities' => [
Capability::INPUT_TEXT,
Capability::OUTPUT_AUDIO,
],
],
'gpt-4o-mini-tts' => [
'class' => TextToSpeech::class,
'capabilities' => [
Capability::INPUT_TEXT,
Capability::OUTPUT_AUDIO,
],
],
'whisper-1' => [
'class' => Whisper::class,
'capabilities' => [
Expand Down
2 changes: 2 additions & 0 deletions src/platform/src/Bridge/OpenAi/PlatformFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,14 @@ public static function create(
new Gpt\ModelClient($httpClient, $apiKey, $region),
new Embeddings\ModelClient($httpClient, $apiKey, $region),
new DallE\ModelClient($httpClient, $apiKey, $region),
new TextToSpeech\ModelClient($httpClient, $apiKey, $region),
new Whisper\ModelClient($httpClient, $apiKey, $region),
],
[
new Gpt\ResultConverter(),
new Embeddings\ResultConverter(),
new DallE\ResultConverter(),
new TextToSpeech\ResultConverter(),
new Whisper\ResultConverter(),
],
$modelCatalog,
Expand Down
21 changes: 21 additions & 0 deletions src/platform/src/Bridge/OpenAi/TextToSpeech.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace Symfony\AI\Platform\Bridge\OpenAi;

use Symfony\AI\Platform\Model;

/**
* @author Christopher Hertel <mail@christopher-hertel.de>
*/
class TextToSpeech extends Model
{
}
25 changes: 25 additions & 0 deletions src/platform/src/Bridge/OpenAi/TextToSpeech/Format.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech;

/**
* @author Christopher Hertel <mail@christopher-hertel.de>
*/
interface Format
{
public const MP3 = 'mp3';
public const OPUS = 'opus';
public const AAC = 'aac';
public const FLAC = 'flac';
public const WAV = 'wav';
public const PCM = 'pcm';
}
56 changes: 56 additions & 0 deletions src/platform/src/Bridge/OpenAi/TextToSpeech/ModelClient.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech;

use Symfony\AI\Platform\Bridge\OpenAi\AbstractModelClient;
use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech;
use Symfony\AI\Platform\Exception\InvalidArgumentException;
use Symfony\AI\Platform\Model;
use Symfony\AI\Platform\ModelClientInterface;
use Symfony\AI\Platform\Result\RawHttpResult;
use Symfony\Contracts\HttpClient\HttpClientInterface;

/**
* @author Christopher Hertel <mail@christopher-hertel.de>
*/
final class ModelClient extends AbstractModelClient implements ModelClientInterface
{
public function __construct(
private readonly HttpClientInterface $httpClient,
#[\SensitiveParameter] private readonly string $apiKey,
private readonly ?string $region = null,
) {
self::validateApiKey($apiKey);
}

public function supports(Model $model): bool
{
return $model instanceof TextToSpeech;
}

public function request(Model $model, array|string $payload, array $options = []): RawHttpResult
{
if (!isset($options['voice'])) {
throw new InvalidArgumentException('The "voice" option is required for TextToSpeech requests.');
}

if (isset($options['stream_format']) || isset($options['stream'])) {
throw new InvalidArgumentException('Streaming text to speech results is not supported yet.');
}

return new RawHttpResult($this->httpClient->request('POST', \sprintf('%s/v1/audio/speech', self::getBaseUrl($this->region)), [
'auth_bearer' => $this->apiKey,
'headers' => ['Content-Type' => 'application/json'],
'json' => array_merge($options, ['model' => $model->getName(), 'input' => $payload]),
]));
}
}
43 changes: 43 additions & 0 deletions src/platform/src/Bridge/OpenAi/TextToSpeech/ResultConverter.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech;

use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech;
use Symfony\AI\Platform\Exception\RuntimeException;
use Symfony\AI\Platform\Model;
use Symfony\AI\Platform\Result\BinaryResult;
use Symfony\AI\Platform\Result\RawHttpResult;
use Symfony\AI\Platform\Result\RawResultInterface;
use Symfony\AI\Platform\Result\ResultInterface;
use Symfony\AI\Platform\ResultConverterInterface as BaseResponseConverter;

/**
* @author Christopher Hertel <mail@christopher-hertel.de>
*/
final class ResultConverter implements BaseResponseConverter
{
public function supports(Model $model): bool
{
return $model instanceof TextToSpeech;
}

public function convert(RawResultInterface|RawHttpResult $result, array $options = []): ResultInterface
{
$response = $result->getObject();

if (200 !== $response->getStatusCode()) {
throw new RuntimeException(\sprintf('The OpenAI Text-to-Speech API returned an error: "%s"', $response->getContent(false)));
}

return new BinaryResult($result->getObject()->getContent());
}
}
30 changes: 30 additions & 0 deletions src/platform/src/Bridge/OpenAi/TextToSpeech/Voice.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech;

/**
* @author Christopher Hertel <mail@christopher-hertel.de>
*/
interface Voice
{
public const ALLOY = 'alloy';
public const ASH = 'ash';
public const BALLAD = 'ballad';
public const CORAL = 'coral';
public const ECHO = 'echo';
public const FABLE = 'fable';
public const NOVA = 'nova';
public const ONYX = 'onyx';
public const SAGE = 'sage';
public const SHIMMER = 'shimmer';
public const VERSE = 'verse';
}
6 changes: 6 additions & 0 deletions src/platform/tests/Bridge/OpenAi/ModelCatalogTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
use Symfony\AI\Platform\Bridge\OpenAi\Embeddings;
use Symfony\AI\Platform\Bridge\OpenAi\Gpt;
use Symfony\AI\Platform\Bridge\OpenAi\ModelCatalog;
use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech;
use Symfony\AI\Platform\Bridge\OpenAi\Whisper;
use Symfony\AI\Platform\Capability;
use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface;
Expand Down Expand Up @@ -53,6 +54,11 @@ public static function modelsProvider(): iterable
yield 'text-embedding-3-large' => ['text-embedding-3-large', Embeddings::class, [Capability::INPUT_TEXT]];
yield 'text-embedding-3-small' => ['text-embedding-3-small', Embeddings::class, [Capability::INPUT_TEXT]];

// Text-to-speech models
yield 'tts-1' => ['tts-1', TextToSpeech::class, [Capability::INPUT_TEXT, Capability::OUTPUT_AUDIO]];
yield 'tts-1-hd' => ['tts-1-hd', TextToSpeech::class, [Capability::INPUT_TEXT, Capability::OUTPUT_AUDIO]];
yield 'gpt-4o-mini-tts' => ['gpt-4o-mini-tts', TextToSpeech::class, [Capability::INPUT_TEXT, Capability::OUTPUT_AUDIO]];

// Whisper models
yield 'whisper-1' => ['whisper-1', Whisper::class, [Capability::INPUT_AUDIO, Capability::OUTPUT_TEXT]];

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace Symfony\AI\Platform\Tests\Bridge\OpenAi\TextToSpeech;

use PHPUnit\Framework\TestCase;
use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech;
use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech\ModelClient;
use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech\ResultConverter;
use Symfony\AI\Platform\Exception\InvalidArgumentException;
use Symfony\AI\Platform\Model;
use Symfony\Component\HttpClient\MockHttpClient;
use Symfony\Component\HttpClient\Response\MockResponse;
use Symfony\Contracts\HttpClient\ResponseInterface as HttpResponse;

/**
* @author Christopher Hertel <mail@christopher-hertel.de>
*/
final class ModelClientTest extends TestCase
{
public function testSupportsTextToSpeechModel()
{
$converter = new ResultConverter();
$model = new TextToSpeech('tts-1');

$this->assertTrue($converter->supports($model));
}

public function testDoesntSupportOtherModels()
{
$converter = new ResultConverter();
$model = new Model('test-model');

$this->assertFalse($converter->supports($model));
}

public function testHappyCase()
{
$resultCallback = static function (string $method, string $url, array $options): HttpResponse {
self::assertSame('POST', $method);
self::assertSame('https://api.openai.com/v1/audio/speech', $url);
self::assertSame('Authorization: Bearer sk-api-key', $options['normalized_headers']['authorization'][0]);
$expectedBody = '{"voice":"alloy","instruction":"Speak like a pirate","model":"tts-1","input":"Hello World!"}';
self::assertSame($expectedBody, $options['body']);

return new MockResponse();
};
$httpClient = new MockHttpClient([$resultCallback]);
$modelClient = new ModelClient($httpClient, 'sk-api-key');
$modelClient->request(new TextToSpeech('tts-1'), 'Hello World!', [
'voice' => 'alloy',
'instruction' => 'Speak like a pirate',
]);
}

public function testFailsWithoutVoiceOption()
{
$this->expectException(InvalidArgumentException::class);
$this->expectExceptionMessage('The "voice" option is required for TextToSpeech requests.');

$httpClient = new MockHttpClient();
$modelClient = new ModelClient($httpClient, 'sk-api-key');
$modelClient->request(new TextToSpeech('tts-1'), 'Hello World!', [
'instruction' => 'Speak like a pirate',
]);
}

public function testFailsWithStreamingOptions()
{
$this->expectException(InvalidArgumentException::class);
$this->expectExceptionMessage('Streaming text to speech results is not supported yet.');

$httpClient = new MockHttpClient();
$modelClient = new ModelClient($httpClient, 'sk-api-key');
$modelClient->request(new TextToSpeech('tts-1'), 'Hello World!', [
'voice' => 'alloy',
'stream' => true,
]);
}
}
Loading