Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions examples/elevenlabs/text-to-speech-as-stream.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabs;
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
use Symfony\AI\Platform\Message\Content\Text;

require_once dirname(__DIR__).'/bootstrap.php';

$platform = PlatformFactory::create(
apiKey: env('ELEVEN_LABS_API_KEY'),
httpClient: http_client(),
);
$model = new ElevenLabs(options: [
'voice' => 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
'stream' => true,
]);

$result = $platform->invoke($model, new Text('The first move is what sets everything in motion.'));

$content = '';

foreach ($result->asStream() as $chunk) {
echo $chunk;
}

echo \PHP_EOL;
12 changes: 8 additions & 4 deletions src/platform/src/Bridge/ElevenLabs/ElevenLabsClient.php
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public function request(Model $model, array|string $payload, array $options = []
}

if (\in_array($model->getName(), [ElevenLabs::SCRIBE_V1, ElevenLabs::SCRIBE_V1_EXPERIMENTAL], true)) {
return $this->doSpeechToTextRequest($model, $payload, $options);
return $this->doSpeechToTextRequest($model, $payload);
}

$capabilities = $this->retrieveCapabilities($model);
Expand All @@ -56,9 +56,8 @@ public function request(Model $model, array|string $payload, array $options = []

/**
* @param array<string|int, mixed> $payload
* @param array<string, mixed> $options
*/
private function doSpeechToTextRequest(Model $model, array|string $payload, array $options): RawHttpResult
private function doSpeechToTextRequest(Model $model, array|string $payload): RawHttpResult
{
return new RawHttpResult($this->httpClient->request('POST', \sprintf('%s/speech-to-text', $this->hostUrl), [
'headers' => [
Expand Down Expand Up @@ -86,8 +85,13 @@ private function doTextToSpeechRequest(Model $model, array|string $payload, arra
}

$voice = $options['voice'] ??= $model->getOptions()['voice'];
$stream = $options['stream'] ??= $model->getOptions()['stream'] ?? false;

$url = $stream
? \sprintf('%s/text-to-speech/%s/stream', $this->hostUrl, $voice)
: \sprintf('%s/text-to-speech/%s', $this->hostUrl, $voice);

return new RawHttpResult($this->httpClient->request('POST', \sprintf('%s/text-to-speech/%s', $this->hostUrl, $voice), [
return new RawHttpResult($this->httpClient->request('POST', $url, [
'headers' => [
'xi-api-key' => $this->apiKey,
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,22 @@
use Symfony\AI\Platform\Result\BinaryResult;
use Symfony\AI\Platform\Result\RawResultInterface;
use Symfony\AI\Platform\Result\ResultInterface;
use Symfony\AI\Platform\Result\StreamResult;
use Symfony\AI\Platform\Result\TextResult;
use Symfony\AI\Platform\ResultConverterInterface;
use Symfony\Contracts\HttpClient\HttpClientInterface;
use Symfony\Contracts\HttpClient\ResponseInterface;

/**
* @author Guillaume Loulier <personal@guillaumeloulier.fr>
*/
final readonly class ElevenLabsResultConverter implements ResultConverterInterface
{
public function __construct(
private HttpClientInterface $httpClient,
) {
}

public function supports(Model $model): bool
{
return $model instanceof ElevenLabs;
Expand All @@ -36,9 +43,25 @@ public function convert(RawResultInterface $result, array $options = []): Result
$response = $result->getObject();

return match (true) {
\array_key_exists('stream', $options) && $options['stream'] => new StreamResult($this->convertToGenerator($response)),
str_contains($response->getInfo('url'), 'speech-to-text') => new TextResult($result->getData()['text']),
str_contains($response->getInfo('url'), 'text-to-speech') => new BinaryResult($result->getObject()->getContent(), 'audio/mpeg'),
default => throw new RuntimeException('Unsupported ElevenLabs response.'),
};
}

private function convertToGenerator(ResponseInterface $response): \Generator
{
foreach ($this->httpClient->stream($response) as $chunk) {
if ($chunk->isFirst() || $chunk->isLast()) {
continue;
}

if ('' === $chunk->getContent()) {
continue;
}

yield $chunk->getContent();
}
}
}
2 changes: 1 addition & 1 deletion src/platform/src/Bridge/ElevenLabs/PlatformFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public static function create(

return new Platform(
[new ElevenLabsClient($httpClient, $apiKey, $hostUrl)],
[new ElevenLabsResultConverter()],
[new ElevenLabsResultConverter($httpClient)],
$contract ?? ElevenLabsContract::create(),
);
}
Expand Down
35 changes: 33 additions & 2 deletions src/platform/tests/Bridge/ElevenLabs/ElevenLabsClientTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
namespace Symfony\AI\Platform\Tests\Bridge\ElevenLabs;

use PHPUnit\Framework\Attributes\CoversClass;
use PHPUnit\Framework\Attributes\Group;
use PHPUnit\Framework\Attributes\UsesClass;
use PHPUnit\Framework\TestCase;
use Symfony\AI\Platform\Bridge\ElevenLabs\Contract\AudioNormalizer;
Expand All @@ -21,6 +20,7 @@
use Symfony\AI\Platform\Exception\InvalidArgumentException;
use Symfony\AI\Platform\Message\Content\Audio;
use Symfony\AI\Platform\Model;
use Symfony\AI\Platform\Result\RawHttpResult;
use Symfony\Component\HttpClient\MockHttpClient;
use Symfony\Component\HttpClient\Response\JsonMockResponse;
use Symfony\Component\HttpClient\Response\MockResponse;
Expand All @@ -30,6 +30,7 @@
#[UsesClass(Model::class)]
#[UsesClass(Audio::class)]
#[UsesClass(AudioNormalizer::class)]
#[UsesClass(RawHttpResult::class)]
final class ElevenLabsClientTest extends TestCase
{
public function testSupportsModel()
Expand Down Expand Up @@ -133,7 +134,6 @@ public function testClientCannotPerformTextToSpeechRequestWithoutValidPayload()
]), []);
}

#[Group('foo')]
public function testClientCanPerformTextToSpeechRequest()
{
$payload = Audio::fromFile(\dirname(__DIR__, 5).'/fixtures/audio.mp3');
Expand Down Expand Up @@ -162,4 +162,35 @@ public function testClientCanPerformTextToSpeechRequest()

$this->assertSame(2, $httpClient->getRequestsCount());
}

public function testClientCanPerformTextToSpeechRequestAsStream()
{
$payload = Audio::fromFile(\dirname(__DIR__, 5).'/fixtures/audio.mp3');

$httpClient = new MockHttpClient([
new JsonMockResponse([
[
'model_id' => ElevenLabs::ELEVEN_MULTILINGUAL_V2,
'can_do_text_to_speech' => true,
],
]),
new MockResponse($payload->asBinary()),
]);

$client = new ElevenLabsClient(
$httpClient,
'https://api.elevenlabs.io/v1',
'my-api-key',
);

$result = $client->request(new ElevenLabs(options: [
'voice' => 'Dslrhjl3ZpzrctukrQSN',
'stream' => true,
]), [
'text' => 'foo',
]);

$this->assertInstanceOf(RawHttpResult::class, $result);
$this->assertSame(2, $httpClient->getRequestsCount());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
use Symfony\AI\Platform\Result\BinaryResult;
use Symfony\AI\Platform\Result\InMemoryRawResult;
use Symfony\AI\Platform\Result\TextResult;
use Symfony\Component\HttpClient\MockHttpClient;

#[CoversClass(ElevenLabsResultConverter::class)]
#[UsesClass(ElevenLabs::class)]
Expand All @@ -31,15 +32,15 @@ final class ElevenLabsConverterTest extends TestCase
{
public function testSupportsModel()
{
$converter = new ElevenLabsResultConverter();
$converter = new ElevenLabsResultConverter(new MockHttpClient());

$this->assertTrue($converter->supports(new ElevenLabs()));
$this->assertFalse($converter->supports(new Model('any-model')));
}

public function testConvertSpeechToTextResponse()
{
$converter = new ElevenLabsResultConverter();
$converter = new ElevenLabsResultConverter(new MockHttpClient());
$rawResult = new InMemoryRawResult([
'text' => 'Hello there',
], new class {
Expand All @@ -57,7 +58,7 @@ public function getInfo(): string

public function testConvertTextToSpeechResponse()
{
$converter = new ElevenLabsResultConverter();
$converter = new ElevenLabsResultConverter(new MockHttpClient());
$rawResult = new InMemoryRawResult([], new class {
public function getInfo(): string
{
Expand Down