Skip to content

Commit 41de046

Browse files
committed
feat(platform): add Speech
1 parent c72b187 commit 41de046

File tree

58 files changed

+1360
-15
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+1360
-15
lines changed

docs/components/platform.rst

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,59 @@ This allows fast and isolated testing of AI-powered features without relying on
501501

502502
This requires `cURL` and the `ext-curl` extension to be installed.
503503

504+
Speech support
505+
~~~~~~~~~~~~~~
506+
507+
Using speech to send messages / receive answers as audio is a common use case when integrating agents and/or chats.
508+
509+
Speech support can be enable using ``Symfony\AI\Platform\Speech\SpeechProviderListener``::
510+
511+
use Symfony\AI\Agent\Agent;
512+
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechProvider;
513+
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
514+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
515+
use Symfony\AI\Platform\Message\Message;
516+
use Symfony\AI\Platform\Message\MessageBag;
517+
use Symfony\AI\Platform\Speech\SpeechConfiguration;
518+
use Symfony\AI\Platform\Speech\SpeechProviderListener;
519+
use Symfony\Component\EventDispatcher\EventDispatcher;
520+
521+
$eventDispatcher = new EventDispatcher();
522+
$eventDispatcher->addSubscriber(new SpeechProviderListener([
523+
new ElevenLabsSpeechProvider(PlatformFactory::create(
524+
apiKey: $elevenLabsApiKey,
525+
httpClient: http_client(),
526+
speechConfiguration: new SpeechConfiguration(
527+
ttsModel: 'eleven_multilingual_v2',
528+
ttsVoice: 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
529+
sttModel: 'eleven_multilingual_v2'
530+
)),
531+
),
532+
], []));
533+
534+
$platform = OpenAiPlatformFactory::create($openAiApiKey, httpClient: HttpClient::create(), eventDispatcher: $eventDispatcher);
535+
536+
$agent = new Agent($platform, 'gpt-4o');
537+
$answer = $agent->call(new MessageBag(
538+
Message::ofUser('Tina has one brother and one sister. How many sisters do Tina\'s siblings have?'),
539+
));
540+
541+
echo $answer->getSpeech('eleven_labs')->asBinary();
542+
543+
When using the bundle, the configuration allows to configure models and voices::
544+
545+
ai:
546+
platform:
547+
eleven_labs:
548+
api_key: '%env(ELEVEN_LABS_API_KEY)%'
549+
550+
speech:
551+
eleven_labs:
552+
tts_model: 'eleven_multilingual_v2'
553+
tts_voice: '%env(ELEVEN_LABS_VOICE_IDENTIFIER)%'
554+
tts_extra_options:
555+
foo: bar
556+
504557
Code Examples
505558
~~~~~~~~~~~~~
506559

examples/speech/README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Speech Examples
2+
3+
Speech is mainly used to transform text to audio and vice versa, it can also be used to create an audio to audio pipeline.
4+
5+
To run the examples, you can use additional tools like (mpg123)[https://www.mpg123.de/]:
6+
7+
```bash
8+
php speech/agent-eleven-labs-speech-tts.php | mpg123 -
9+
php speech/agent-eleven-labs-speech-sts.php | mpg123 -
10+
```
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Agent\Agent;
13+
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechListener;
14+
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechProvider;
15+
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
16+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
17+
use Symfony\AI\Platform\Message\Content\Audio;
18+
use Symfony\AI\Platform\Message\Message;
19+
use Symfony\AI\Platform\Message\MessageBag;
20+
use Symfony\AI\Platform\Speech\SpeechConfiguration;
21+
use Symfony\AI\Platform\Speech\SpeechProviderListener;
22+
use Symfony\Component\EventDispatcher\EventDispatcher;
23+
24+
require_once dirname(__DIR__).'/bootstrap.php';
25+
26+
$eventDispatcher = new EventDispatcher();
27+
$eventDispatcher->addSubscriber(new SpeechProviderListener([
28+
new ElevenLabsSpeechProvider(PlatformFactory::create(
29+
apiKey: env('ELEVEN_LABS_API_KEY'),
30+
httpClient: http_client(),
31+
speechConfiguration: new SpeechConfiguration(
32+
ttsModel: 'eleven_multilingual_v2',
33+
ttsVoice: 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
34+
sttModel: 'eleven_multilingual_v2'
35+
)),
36+
),
37+
], [
38+
new ElevenLabsSpeechListener(PlatformFactory::create(
39+
apiKey: env('ELEVEN_LABS_API_KEY'),
40+
httpClient: http_client(),
41+
speechConfiguration: new SpeechConfiguration(
42+
sttModel: 'scribe_v1'
43+
)),
44+
),
45+
]));
46+
47+
$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher);
48+
49+
$agent = new Agent($platform, 'gpt-4o');
50+
$answer = $agent->call(new MessageBag(
51+
Message::ofUser(Audio::fromFile(dirname(__DIR__, 2).'/fixtures/audio.mp3'))
52+
));
53+
54+
echo $answer->getSpeech('eleven_labs')->asBinary();
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Agent\Agent;
13+
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechListener;
14+
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
15+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
16+
use Symfony\AI\Platform\Message\Content\Audio;
17+
use Symfony\AI\Platform\Message\Message;
18+
use Symfony\AI\Platform\Message\MessageBag;
19+
use Symfony\AI\Platform\Speech\SpeechConfiguration;
20+
use Symfony\AI\Platform\Speech\SpeechProviderListener;
21+
use Symfony\Component\EventDispatcher\EventDispatcher;
22+
23+
require_once dirname(__DIR__).'/bootstrap.php';
24+
25+
$eventDispatcher = new EventDispatcher();
26+
$eventDispatcher->addSubscriber(new SpeechProviderListener([], [
27+
new ElevenLabsSpeechListener(PlatformFactory::create(
28+
apiKey: env('ELEVEN_LABS_API_KEY'),
29+
httpClient: http_client(),
30+
speechConfiguration: new SpeechConfiguration(
31+
sttModel: 'scribe_v1'
32+
)),
33+
),
34+
]));
35+
36+
$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher);
37+
38+
$agent = new Agent($platform, 'gpt-4o');
39+
$answer = $agent->call(new MessageBag(
40+
Message::ofUser(Audio::fromFile(dirname(__DIR__, 2).'/fixtures/audio.mp3'))
41+
));
42+
43+
echo $answer->getContent();
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Agent\Agent;
13+
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechProvider;
14+
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
15+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
16+
use Symfony\AI\Platform\Message\Message;
17+
use Symfony\AI\Platform\Message\MessageBag;
18+
use Symfony\AI\Platform\Speech\SpeechConfiguration;
19+
use Symfony\AI\Platform\Speech\SpeechProviderListener;
20+
use Symfony\Component\EventDispatcher\EventDispatcher;
21+
22+
require_once dirname(__DIR__).'/bootstrap.php';
23+
24+
$eventDispatcher = new EventDispatcher();
25+
$eventDispatcher->addSubscriber(new SpeechProviderListener([
26+
new ElevenLabsSpeechProvider(PlatformFactory::create(
27+
apiKey: env('ELEVEN_LABS_API_KEY'),
28+
httpClient: http_client(),
29+
speechConfiguration: new SpeechConfiguration(
30+
ttsModel: 'eleven_multilingual_v2',
31+
ttsVoice: 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
32+
sttModel: 'eleven_multilingual_v2'
33+
)),
34+
),
35+
], []));
36+
37+
$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher);
38+
39+
$agent = new Agent($platform, 'gpt-4o');
40+
$answer = $agent->call(new MessageBag(
41+
Message::ofUser('Tina has one brother and one sister. How many sisters do Tina\'s siblings have?'),
42+
));
43+
44+
echo $answer->getSpeech('eleven_labs')->asBinary();

src/agent/src/Agent.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ public function getName(): string
6969
public function call(MessageBag $messages, array $options = []): ResultInterface
7070
{
7171
$input = new Input($this->getModel(), $messages, $options);
72-
array_map(fn (InputProcessorInterface $processor) => $processor->processInput($input), $this->inputProcessors);
72+
array_map(static fn (InputProcessorInterface $processor) => $processor->processInput($input), $this->inputProcessors);
7373

7474
$model = $input->getModel();
7575
$messages = $input->getMessageBag();
@@ -78,7 +78,7 @@ public function call(MessageBag $messages, array $options = []): ResultInterface
7878
$result = $this->platform->invoke($model, $messages, $options)->getResult();
7979

8080
$output = new Output($model, $result, $messages, $options);
81-
array_map(fn (OutputProcessorInterface $processor) => $processor->processOutput($output), $this->outputProcessors);
81+
array_map(static fn (OutputProcessorInterface $processor) => $processor->processOutput($output), $this->outputProcessors);
8282

8383
return $output->getResult();
8484
}

src/agent/src/Output.php

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
use Symfony\AI\Platform\Message\MessageBag;
1515
use Symfony\AI\Platform\Result\ResultInterface;
16+
use Symfony\AI\Platform\Speech\Speech;
1617

1718
/**
1819
* @author Christopher Hertel <mail@christopher-hertel.de>
@@ -27,6 +28,7 @@ public function __construct(
2728
private ResultInterface $result,
2829
private readonly MessageBag $messageBag,
2930
private readonly array $options = [],
31+
private ?Speech $speech = null,
3032
) {
3133
}
3234

@@ -57,4 +59,14 @@ public function getOptions(): array
5759
{
5860
return $this->options;
5961
}
62+
63+
public function setSpeech(?Speech $speech): void
64+
{
65+
$this->speech = $speech;
66+
}
67+
68+
public function getSpeech(): ?Speech
69+
{
70+
return $this->speech;
71+
}
6072
}

src/ai-bundle/config/options.php

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,10 @@
9292
->end()
9393
->arrayNode('eleven_labs')
9494
->children()
95-
->stringNode('host')->end()
9695
->stringNode('api_key')->isRequired()->end()
96+
->stringNode('host')
97+
->defaultValue('https://api.elevenlabs.io/v1')
98+
->end()
9799
->stringNode('http_client')
98100
->defaultValue('http_client')
99101
->info('Service ID of the HTTP client to use')
@@ -960,6 +962,23 @@
960962
->end()
961963
->end()
962964
->end()
965+
->arrayNode('speech')
966+
->children()
967+
->arrayNode('eleven_labs')
968+
->children()
969+
->stringNode('tts_model')->end()
970+
->stringNode('tts_voice')->end()
971+
->arrayNode('tts_extra_options')
972+
->scalarPrototype()->end()
973+
->end()
974+
->stringNode('stt_model')->end()
975+
->arrayNode('stt_extra_options')
976+
->scalarPrototype()->end()
977+
->end()
978+
->end()
979+
->end()
980+
->end()
981+
->end()
963982
->arrayNode('vectorizer')
964983
->info('Vectorizers for converting strings to Vector objects and transforming TextDocument arrays to VectorDocument arrays')
965984
->useAttributeAsKey('name')

src/ai-bundle/config/services.php

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
use Symfony\AI\Platform\Contract\JsonSchema\DescriptionParser;
6464
use Symfony\AI\Platform\Contract\JsonSchema\Factory as SchemaFactory;
6565
use Symfony\AI\Platform\Serializer\StructuredOutputSerializer;
66+
use Symfony\AI\Platform\Speech\SpeechProviderListener;
6667
use Symfony\AI\Platform\StructuredOutput\PlatformSubscriber;
6768
use Symfony\AI\Platform\StructuredOutput\ResponseFormatFactory;
6869
use Symfony\AI\Platform\StructuredOutput\ResponseFormatFactoryInterface;
@@ -235,5 +236,13 @@
235236
tagged_locator('ai.message_store', 'name'),
236237
])
237238
->tag('console.command')
239+
240+
// listeners
241+
->set('ai.speech_provider.listener', SpeechProviderListener::class)
242+
->args([
243+
tagged_iterator('ai.speech_provider', 'name'),
244+
tagged_iterator('ai.speech_listener', 'name'),
245+
])
246+
->tag('kernel.event_subscriber')
238247
;
239248
};

0 commit comments

Comments
 (0)