From 74bd8cb5049002a9f3078d6fc8a07a239970aab4 Mon Sep 17 00:00:00 2001 From: Guillaume Loulier Date: Sat, 13 Sep 2025 15:52:21 +0200 Subject: [PATCH] feat(platform): add Speech --- demo/tests/Blog/Command/StreamCommandTest.php | 14 ++ docs/components/platform.rst | 53 +++++++ examples/speech/README.md | 10 ++ .../speech/agent-eleven-labs-speech-sts.php | 54 +++++++ .../speech/agent-eleven-labs-speech-stt.php | 43 ++++++ .../speech/agent-eleven-labs-speech-tts.php | 44 ++++++ src/agent/src/Agent.php | 4 +- src/agent/src/Output.php | 12 ++ src/ai-bundle/config/options.php | 21 ++- src/ai-bundle/config/services.php | 9 ++ src/ai-bundle/src/AiBundle.php | 95 ++++++++++++- .../src/Profiler/TraceablePlatform.php | 7 +- .../DependencyInjection/AiBundleTest.php | 117 ++++++++++++++- .../src/Bridge/AiMlApi/PlatformFactory.php | 3 + .../src/Bridge/Albert/PlatformFactory.php | 3 + .../src/Bridge/Anthropic/PlatformFactory.php | 3 + .../src/Bridge/Azure/Meta/PlatformFactory.php | 4 +- .../Bridge/Azure/OpenAi/PlatformFactory.php | 3 + .../src/Bridge/Bedrock/PlatformFactory.php | 3 + .../src/Bridge/Cartesia/PlatformFactory.php | 3 + .../src/Bridge/Cerebras/PlatformFactory.php | 3 + .../src/Bridge/DeepSeek/PlatformFactory.php | 3 + .../DockerModelRunner/PlatformFactory.php | 3 + .../ElevenLabs/ElevenLabsSpeechListener.php | 53 +++++++ .../ElevenLabs/ElevenLabsSpeechProvider.php | 57 ++++++++ .../src/Bridge/ElevenLabs/PlatformFactory.php | 3 + .../src/Bridge/Gemini/PlatformFactory.php | 3 + .../Bridge/HuggingFace/PlatformFactory.php | 3 + .../src/Bridge/LiteLlm/PlatformFactory.php | 3 + .../src/Bridge/LmStudio/PlatformFactory.php | 3 + .../src/Bridge/Mistral/PlatformFactory.php | 3 + .../src/Bridge/Ollama/PlatformFactory.php | 3 + .../src/Bridge/OpenAi/PlatformFactory.php | 3 + .../src/Bridge/OpenRouter/PlatformFactory.php | 3 + .../src/Bridge/Perplexity/PlatformFactory.php | 3 + .../src/Bridge/Replicate/PlatformFactory.php | 3 + .../src/Bridge/Scaleway/PlatformFactory.php | 3 + .../TransformersPhp/PlatformFactory.php | 4 +- .../src/Bridge/VertexAi/PlatformFactory.php | 3 + .../src/Bridge/Voyage/PlatformFactory.php | 3 + src/platform/src/CachedPlatform.php | 6 + src/platform/src/Message/UserMessage.php | 14 ++ src/platform/src/Platform.php | 7 + src/platform/src/PlatformInterface.php | 3 + src/platform/src/Result/BaseResult.php | 2 + src/platform/src/Result/DeferredResult.php | 6 + src/platform/src/Result/ResultInterface.php | 5 + src/platform/src/Speech/Speech.php | 48 +++++++ src/platform/src/Speech/SpeechBag.php | 47 +++++++ .../src/Speech/SpeechBagAwareTrait.php | 43 ++++++ .../src/Speech/SpeechConfiguration.php | 31 ++++ .../src/Speech/SpeechListenerInterface.php | 32 +++++ .../src/Speech/SpeechProviderInterface.php | 30 ++++ .../src/Speech/SpeechProviderListener.php | 82 +++++++++++ src/platform/src/Test/InMemoryPlatform.php | 9 ++ .../ElevenLabsSpeechListenerTest.php | 127 +++++++++++++++++ .../ElevenLabsSpeechProviderTest.php | 115 +++++++++++++++ src/platform/tests/Speech/SpeechBagTest.php | 44 ++++++ .../Speech/SpeechProviderListenerTest.php | 133 ++++++++++++++++++ 59 files changed, 1439 insertions(+), 15 deletions(-) create mode 100644 examples/speech/README.md create mode 100644 examples/speech/agent-eleven-labs-speech-sts.php create mode 100644 examples/speech/agent-eleven-labs-speech-stt.php create mode 100644 examples/speech/agent-eleven-labs-speech-tts.php create mode 100644 src/platform/src/Bridge/ElevenLabs/ElevenLabsSpeechListener.php create mode 100644 src/platform/src/Bridge/ElevenLabs/ElevenLabsSpeechProvider.php create mode 100644 src/platform/src/Speech/Speech.php create mode 100644 src/platform/src/Speech/SpeechBag.php create mode 100644 src/platform/src/Speech/SpeechBagAwareTrait.php create mode 100644 src/platform/src/Speech/SpeechConfiguration.php create mode 100644 src/platform/src/Speech/SpeechListenerInterface.php create mode 100644 src/platform/src/Speech/SpeechProviderInterface.php create mode 100644 src/platform/src/Speech/SpeechProviderListener.php create mode 100644 src/platform/tests/Bridge/ElevenLabs/ElevenLabsSpeechListenerTest.php create mode 100644 src/platform/tests/Bridge/ElevenLabs/ElevenLabsSpeechProviderTest.php create mode 100644 src/platform/tests/Speech/SpeechBagTest.php create mode 100644 src/platform/tests/Speech/SpeechProviderListenerTest.php diff --git a/demo/tests/Blog/Command/StreamCommandTest.php b/demo/tests/Blog/Command/StreamCommandTest.php index b93daf92f..b0f0c4038 100644 --- a/demo/tests/Blog/Command/StreamCommandTest.php +++ b/demo/tests/Blog/Command/StreamCommandTest.php @@ -16,8 +16,13 @@ use Symfony\AI\Agent\AgentInterface; use Symfony\AI\Platform\Message\MessageBag; use Symfony\AI\Platform\Metadata\Metadata; +use Symfony\AI\Platform\Result\DeferredResult; +use Symfony\AI\Platform\Result\InMemoryRawResult; use Symfony\AI\Platform\Result\RawResultInterface; use Symfony\AI\Platform\Result\ResultInterface; +use Symfony\AI\Platform\Result\TextResult; +use Symfony\AI\Platform\Speech\Speech; +use Symfony\AI\Platform\Test\PlainConverter; use Symfony\Component\Console\Input\ArrayInput; use Symfony\Component\Console\Output\BufferedOutput; use Symfony\Component\Console\Style\SymfonyStyle; @@ -52,6 +57,15 @@ public function getRawResult(): ?RawResultInterface public function setRawResult(RawResultInterface $rawResult): void { } + + public function addSpeech(Speech $speech): void + { + } + + public function getSpeech(string $identifier): Speech + { + return new Speech([], new DeferredResult(new PlainConverter(new TextResult('foo')), new InMemoryRawResult()), 'bar'); + } }); $input = new ArrayInput([]); diff --git a/docs/components/platform.rst b/docs/components/platform.rst index 400d0ce96..98c804a69 100644 --- a/docs/components/platform.rst +++ b/docs/components/platform.rst @@ -501,6 +501,59 @@ This allows fast and isolated testing of AI-powered features without relying on This requires `cURL` and the `ext-curl` extension to be installed. +Speech support +~~~~~~~~~~~~~~ + +Using speech to send messages / receive answers as audio is a common use case when integrating agents and/or chats. + +Speech support can be enable using ``Symfony\AI\Platform\Speech\SpeechProviderListener``:: + + use Symfony\AI\Agent\Agent; + use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechProvider; + use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory; + use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory; + use Symfony\AI\Platform\Message\Message; + use Symfony\AI\Platform\Message\MessageBag; + use Symfony\AI\Platform\Speech\SpeechConfiguration; + use Symfony\AI\Platform\Speech\SpeechProviderListener; + use Symfony\Component\EventDispatcher\EventDispatcher; + + $eventDispatcher = new EventDispatcher(); + $eventDispatcher->addSubscriber(new SpeechProviderListener([ + new ElevenLabsSpeechProvider(PlatformFactory::create( + apiKey: $elevenLabsApiKey, + httpClient: http_client(), + speechConfiguration: new SpeechConfiguration( + ttsModel: 'eleven_multilingual_v2', + ttsVoice: 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN) + sttModel: 'eleven_multilingual_v2' + )), + ), + ], [])); + + $platform = OpenAiPlatformFactory::create($openAiApiKey, httpClient: HttpClient::create(), eventDispatcher: $eventDispatcher); + + $agent = new Agent($platform, 'gpt-4o'); + $answer = $agent->call(new MessageBag( + Message::ofUser('Tina has one brother and one sister. How many sisters do Tina\'s siblings have?'), + )); + + echo $answer->getSpeech('eleven_labs')->asBinary(); + +When using the bundle, the configuration allows to configure models and voices:: + + ai: + platform: + eleven_labs: + api_key: '%env(ELEVEN_LABS_API_KEY)%' + + speech: + eleven_labs: + tts_model: 'eleven_multilingual_v2' + tts_voice: '%env(ELEVEN_LABS_VOICE_IDENTIFIER)%' + tts_extra_options: + foo: bar + Code Examples ~~~~~~~~~~~~~ diff --git a/examples/speech/README.md b/examples/speech/README.md new file mode 100644 index 000000000..4a54d0da0 --- /dev/null +++ b/examples/speech/README.md @@ -0,0 +1,10 @@ +# Speech Examples + +Speech is mainly used to transform text to audio and vice versa, it can also be used to create an audio to audio pipeline. + +To run the examples, you can use additional tools like (mpg123)[https://www.mpg123.de/]: + +```bash +php speech/agent-eleven-labs-speech-tts.php | mpg123 - +php speech/agent-eleven-labs-speech-sts.php | mpg123 - +``` diff --git a/examples/speech/agent-eleven-labs-speech-sts.php b/examples/speech/agent-eleven-labs-speech-sts.php new file mode 100644 index 000000000..d172e285b --- /dev/null +++ b/examples/speech/agent-eleven-labs-speech-sts.php @@ -0,0 +1,54 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +use Symfony\AI\Agent\Agent; +use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechListener; +use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechProvider; +use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory; +use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory; +use Symfony\AI\Platform\Message\Content\Audio; +use Symfony\AI\Platform\Message\Message; +use Symfony\AI\Platform\Message\MessageBag; +use Symfony\AI\Platform\Speech\SpeechConfiguration; +use Symfony\AI\Platform\Speech\SpeechProviderListener; +use Symfony\Component\EventDispatcher\EventDispatcher; + +require_once dirname(__DIR__).'/bootstrap.php'; + +$eventDispatcher = new EventDispatcher(); +$eventDispatcher->addSubscriber(new SpeechProviderListener([ + new ElevenLabsSpeechProvider(PlatformFactory::create( + apiKey: env('ELEVEN_LABS_API_KEY'), + httpClient: http_client(), + speechConfiguration: new SpeechConfiguration( + ttsModel: 'eleven_multilingual_v2', + ttsVoice: 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN) + sttModel: 'eleven_multilingual_v2' + )), + ), +], [ + new ElevenLabsSpeechListener(PlatformFactory::create( + apiKey: env('ELEVEN_LABS_API_KEY'), + httpClient: http_client(), + speechConfiguration: new SpeechConfiguration( + sttModel: 'scribe_v1' + )), + ), +])); + +$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher); + +$agent = new Agent($platform, 'gpt-4o'); +$answer = $agent->call(new MessageBag( + Message::ofUser(Audio::fromFile(dirname(__DIR__, 2).'/fixtures/audio.mp3')) +)); + +echo $answer->getSpeech('eleven_labs')->asBinary(); diff --git a/examples/speech/agent-eleven-labs-speech-stt.php b/examples/speech/agent-eleven-labs-speech-stt.php new file mode 100644 index 000000000..c92eb675f --- /dev/null +++ b/examples/speech/agent-eleven-labs-speech-stt.php @@ -0,0 +1,43 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +use Symfony\AI\Agent\Agent; +use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechListener; +use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory; +use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory; +use Symfony\AI\Platform\Message\Content\Audio; +use Symfony\AI\Platform\Message\Message; +use Symfony\AI\Platform\Message\MessageBag; +use Symfony\AI\Platform\Speech\SpeechConfiguration; +use Symfony\AI\Platform\Speech\SpeechProviderListener; +use Symfony\Component\EventDispatcher\EventDispatcher; + +require_once dirname(__DIR__).'/bootstrap.php'; + +$eventDispatcher = new EventDispatcher(); +$eventDispatcher->addSubscriber(new SpeechProviderListener([], [ + new ElevenLabsSpeechListener(PlatformFactory::create( + apiKey: env('ELEVEN_LABS_API_KEY'), + httpClient: http_client(), + speechConfiguration: new SpeechConfiguration( + sttModel: 'scribe_v1' + )), + ), +])); + +$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher); + +$agent = new Agent($platform, 'gpt-4o'); +$answer = $agent->call(new MessageBag( + Message::ofUser(Audio::fromFile(dirname(__DIR__, 2).'/fixtures/audio.mp3')) +)); + +echo $answer->getContent(); diff --git a/examples/speech/agent-eleven-labs-speech-tts.php b/examples/speech/agent-eleven-labs-speech-tts.php new file mode 100644 index 000000000..67a1a3933 --- /dev/null +++ b/examples/speech/agent-eleven-labs-speech-tts.php @@ -0,0 +1,44 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +use Symfony\AI\Agent\Agent; +use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechProvider; +use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory; +use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory; +use Symfony\AI\Platform\Message\Message; +use Symfony\AI\Platform\Message\MessageBag; +use Symfony\AI\Platform\Speech\SpeechConfiguration; +use Symfony\AI\Platform\Speech\SpeechProviderListener; +use Symfony\Component\EventDispatcher\EventDispatcher; + +require_once dirname(__DIR__).'/bootstrap.php'; + +$eventDispatcher = new EventDispatcher(); +$eventDispatcher->addSubscriber(new SpeechProviderListener([ + new ElevenLabsSpeechProvider(PlatformFactory::create( + apiKey: env('ELEVEN_LABS_API_KEY'), + httpClient: http_client(), + speechConfiguration: new SpeechConfiguration( + ttsModel: 'eleven_multilingual_v2', + ttsVoice: 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN) + sttModel: 'eleven_multilingual_v2' + )), + ), +], [])); + +$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher); + +$agent = new Agent($platform, 'gpt-4o'); +$answer = $agent->call(new MessageBag( + Message::ofUser('Tina has one brother and one sister. How many sisters do Tina\'s siblings have?'), +)); + +echo $answer->getSpeech('eleven_labs')->asBinary(); diff --git a/src/agent/src/Agent.php b/src/agent/src/Agent.php index c51714bc2..7b505effb 100644 --- a/src/agent/src/Agent.php +++ b/src/agent/src/Agent.php @@ -69,7 +69,7 @@ public function getName(): string public function call(MessageBag $messages, array $options = []): ResultInterface { $input = new Input($this->getModel(), $messages, $options); - array_map(fn (InputProcessorInterface $processor) => $processor->processInput($input), $this->inputProcessors); + array_map(static fn (InputProcessorInterface $processor) => $processor->processInput($input), $this->inputProcessors); $model = $input->getModel(); $messages = $input->getMessageBag(); @@ -78,7 +78,7 @@ public function call(MessageBag $messages, array $options = []): ResultInterface $result = $this->platform->invoke($model, $messages, $options)->getResult(); $output = new Output($model, $result, $messages, $options); - array_map(fn (OutputProcessorInterface $processor) => $processor->processOutput($output), $this->outputProcessors); + array_map(static fn (OutputProcessorInterface $processor) => $processor->processOutput($output), $this->outputProcessors); return $output->getResult(); } diff --git a/src/agent/src/Output.php b/src/agent/src/Output.php index d069d47a7..a98206aa1 100644 --- a/src/agent/src/Output.php +++ b/src/agent/src/Output.php @@ -13,6 +13,7 @@ use Symfony\AI\Platform\Message\MessageBag; use Symfony\AI\Platform\Result\ResultInterface; +use Symfony\AI\Platform\Speech\Speech; /** * @author Christopher Hertel @@ -27,6 +28,7 @@ public function __construct( private ResultInterface $result, private readonly MessageBag $messageBag, private readonly array $options = [], + private ?Speech $speech = null, ) { } @@ -57,4 +59,14 @@ public function getOptions(): array { return $this->options; } + + public function setSpeech(?Speech $speech): void + { + $this->speech = $speech; + } + + public function getSpeech(): ?Speech + { + return $this->speech; + } } diff --git a/src/ai-bundle/config/options.php b/src/ai-bundle/config/options.php index a21b35562..f2460ed5f 100644 --- a/src/ai-bundle/config/options.php +++ b/src/ai-bundle/config/options.php @@ -92,8 +92,10 @@ ->end() ->arrayNode('eleven_labs') ->children() - ->stringNode('host')->end() ->stringNode('api_key')->isRequired()->end() + ->stringNode('host') + ->defaultValue('https://api.elevenlabs.io/v1') + ->end() ->stringNode('http_client') ->defaultValue('http_client') ->info('Service ID of the HTTP client to use') @@ -960,6 +962,23 @@ ->end() ->end() ->end() + ->arrayNode('speech') + ->children() + ->arrayNode('eleven_labs') + ->children() + ->stringNode('tts_model')->end() + ->stringNode('tts_voice')->end() + ->arrayNode('tts_extra_options') + ->scalarPrototype()->end() + ->end() + ->stringNode('stt_model')->end() + ->arrayNode('stt_extra_options') + ->scalarPrototype()->end() + ->end() + ->end() + ->end() + ->end() + ->end() ->arrayNode('vectorizer') ->info('Vectorizers for converting strings to Vector objects and transforming TextDocument arrays to VectorDocument arrays') ->useAttributeAsKey('name') diff --git a/src/ai-bundle/config/services.php b/src/ai-bundle/config/services.php index 1baacccc9..2c82739dd 100644 --- a/src/ai-bundle/config/services.php +++ b/src/ai-bundle/config/services.php @@ -63,6 +63,7 @@ use Symfony\AI\Platform\Contract\JsonSchema\DescriptionParser; use Symfony\AI\Platform\Contract\JsonSchema\Factory as SchemaFactory; use Symfony\AI\Platform\Serializer\StructuredOutputSerializer; +use Symfony\AI\Platform\Speech\SpeechProviderListener; use Symfony\AI\Platform\StructuredOutput\PlatformSubscriber; use Symfony\AI\Platform\StructuredOutput\ResponseFormatFactory; use Symfony\AI\Platform\StructuredOutput\ResponseFormatFactoryInterface; @@ -235,5 +236,13 @@ tagged_locator('ai.message_store', 'name'), ]) ->tag('console.command') + + // listeners + ->set('ai.speech_provider.listener', SpeechProviderListener::class) + ->args([ + tagged_iterator('ai.speech_provider', 'name'), + tagged_iterator('ai.speech_listener', 'name'), + ]) + ->tag('kernel.event_subscriber') ; }; diff --git a/src/ai-bundle/src/AiBundle.php b/src/ai-bundle/src/AiBundle.php index 1b5f927fe..858ab5564 100644 --- a/src/ai-bundle/src/AiBundle.php +++ b/src/ai-bundle/src/AiBundle.php @@ -55,6 +55,8 @@ use Symfony\AI\Platform\Bridge\Cerebras\PlatformFactory as CerebrasPlatformFactory; use Symfony\AI\Platform\Bridge\DeepSeek\PlatformFactory as DeepSeekPlatformFactory; use Symfony\AI\Platform\Bridge\DockerModelRunner\PlatformFactory as DockerModelRunnerPlatformFactory; +use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechListener; +use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechProvider; use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory as ElevenLabsPlatformFactory; use Symfony\AI\Platform\Bridge\Gemini\PlatformFactory as GeminiPlatformFactory; use Symfony\AI\Platform\Bridge\HuggingFace\PlatformFactory as HuggingFacePlatformFactory; @@ -76,6 +78,9 @@ use Symfony\AI\Platform\Platform; use Symfony\AI\Platform\PlatformInterface; use Symfony\AI\Platform\ResultConverterInterface; +use Symfony\AI\Platform\Speech\SpeechConfiguration; +use Symfony\AI\Platform\Speech\SpeechListenerInterface; +use Symfony\AI\Platform\Speech\SpeechProviderInterface; use Symfony\AI\Store\Bridge\Azure\SearchStore as AzureSearchStore; use Symfony\AI\Store\Bridge\ChromaDb\Store as ChromaDbStore; use Symfony\AI\Store\Bridge\ClickHouse\Store as ClickHouseStore; @@ -250,6 +255,17 @@ public function loadExtension(array $config, ContainerConfigurator $container, C } } + foreach ($config['speech'] ?? [] as $voiceProvider => $provider) { + $this->processSpeechConfig($voiceProvider, $provider, $builder); + } + + $speechProviders = array_keys($builder->findTaggedServiceIds('ai.speech_provider')); + $speechListeners = array_keys($builder->findTaggedServiceIds('ai.speech_listener')); + + if ([] === $speechProviders && [] === $speechListeners) { + $builder->removeDefinition('ai.speech_provider.listener'); + } + foreach ($config['vectorizer'] ?? [] as $vectorizerName => $vectorizer) { $this->processVectorizerConfig($vectorizerName, $vectorizer, $builder); } @@ -323,6 +339,7 @@ private function processPlatformConfig(string $type, array $platform, ContainerB $platform['base_url'], new Reference($platform['http_client'], ContainerInterface::NULL_ON_INVALID_REFERENCE), new Reference('ai.platform.model_catalog.albert'), + null, new Reference('event_dispatcher'), ]) ->addTag('ai.platform', ['name' => 'albert']); @@ -343,6 +360,7 @@ private function processPlatformConfig(string $type, array $platform, ContainerB new Reference($platform['http_client'], ContainerInterface::NULL_ON_INVALID_REFERENCE), new Reference('ai.platform.model_catalog.anthropic'), new Reference('ai.platform.contract.anthropic'), + null, new Reference('event_dispatcher'), ]) ->addTag('ai.platform', ['name' => 'anthropic']); @@ -367,6 +385,7 @@ private function processPlatformConfig(string $type, array $platform, ContainerB new Reference($config['http_client'], ContainerInterface::NULL_ON_INVALID_REFERENCE), new Reference('ai.platform.model_catalog.openai'), new Reference('ai.platform.contract.openai'), + null, new Reference('event_dispatcher'), ]) ->addTag('ai.platform', ['name' => 'azure.'.$name]); @@ -406,6 +425,7 @@ private function processPlatformConfig(string $type, array $platform, ContainerB new Reference($platform['http_client'], ContainerInterface::NULL_ON_INVALID_REFERENCE), new Reference('ai.platform.model_catalog.cartesia'), null, + null, new Reference('event_dispatcher'), ]) ->addTag('ai.platform', ['name' => 'cartesia']); @@ -416,22 +436,22 @@ private function processPlatformConfig(string $type, array $platform, ContainerB } if ('eleven_labs' === $type) { - $platformId = 'ai.platform.eleven_labs'; $definition = (new Definition(Platform::class)) ->setFactory(ElevenLabsPlatformFactory::class.'::create') ->setLazy(true) - ->addTag('proxy', ['interface' => PlatformInterface::class]) ->setArguments([ $platform['api_key'], $platform['host'], new Reference($platform['http_client'], ContainerInterface::NULL_ON_INVALID_REFERENCE), new Reference('ai.platform.model_catalog.elevenlabs'), null, + null, new Reference('event_dispatcher'), ]) + ->addTag('proxy', ['interface' => PlatformInterface::class]) ->addTag('ai.platform', ['name' => 'eleven_labs']); - $container->setDefinition($platformId, $definition); + $container->setDefinition('ai.platform.eleven_labs', $definition); return; } @@ -447,6 +467,7 @@ private function processPlatformConfig(string $type, array $platform, ContainerB new Reference($platform['http_client'], ContainerInterface::NULL_ON_INVALID_REFERENCE), new Reference('ai.platform.model_catalog.gemini'), new Reference('ai.platform.contract.gemini'), + null, new Reference('event_dispatcher'), ]) ->addTag('ai.platform', ['name' => 'gemini']); @@ -468,6 +489,7 @@ private function processPlatformConfig(string $type, array $platform, ContainerB new Reference($platform['http_client'], ContainerInterface::NULL_ON_INVALID_REFERENCE), new Reference('ai.platform.model_catalog.huggingface'), new Reference('ai.platform.contract.huggingface'), + null, new Reference('event_dispatcher'), ]) ->addTag('ai.platform', ['name' => 'huggingface']); @@ -509,6 +531,7 @@ private function processPlatformConfig(string $type, array $platform, ContainerB $httpClient, new Reference('ai.platform.model_catalog.vertexai.gemini'), new Reference('ai.platform.contract.vertexai.gemini'), + null, new Reference('event_dispatcher'), ]) ->addTag('ai.platform', ['name' => 'vertexai']); @@ -530,6 +553,7 @@ private function processPlatformConfig(string $type, array $platform, ContainerB new Reference('ai.platform.model_catalog.openai'), new Reference('ai.platform.contract.openai'), $platform['region'] ?? null, + null, new Reference('event_dispatcher'), ]) ->addTag('ai.platform', ['name' => 'openai']); @@ -550,6 +574,7 @@ private function processPlatformConfig(string $type, array $platform, ContainerB new Reference($platform['http_client'], ContainerInterface::NULL_ON_INVALID_REFERENCE), new Reference('ai.platform.model_catalog.openrouter'), null, + null, new Reference('event_dispatcher'), ]) ->addTag('ai.platform', ['name' => 'openrouter']); @@ -570,6 +595,7 @@ private function processPlatformConfig(string $type, array $platform, ContainerB new Reference($platform['http_client'], ContainerInterface::NULL_ON_INVALID_REFERENCE), new Reference('ai.platform.model_catalog.mistral'), null, + null, new Reference('event_dispatcher'), ]) ->addTag('ai.platform', ['name' => 'mistral']); @@ -590,6 +616,7 @@ private function processPlatformConfig(string $type, array $platform, ContainerB new Reference($platform['http_client'], ContainerInterface::NULL_ON_INVALID_REFERENCE), new Reference('ai.platform.model_catalog.lmstudio'), null, + null, new Reference('event_dispatcher'), ]) ->addTag('ai.platform', ['name' => 'lmstudio']); @@ -619,6 +646,7 @@ private function processPlatformConfig(string $type, array $platform, ContainerB new Reference($platform['http_client'], ContainerInterface::NULL_ON_INVALID_REFERENCE), new Reference('ai.platform.model_catalog.ollama'), new Reference('ai.platform.contract.ollama'), + null, new Reference('event_dispatcher'), ]) ->addTag('proxy', ['interface' => PlatformInterface::class]) @@ -640,6 +668,7 @@ private function processPlatformConfig(string $type, array $platform, ContainerB new Reference($platform['http_client'], ContainerInterface::NULL_ON_INVALID_REFERENCE), new Reference('ai.platform.model_catalog.cerebras'), null, + null, new Reference('event_dispatcher'), ]) ->addTag('ai.platform', ['name' => 'cerebras']); @@ -660,6 +689,7 @@ private function processPlatformConfig(string $type, array $platform, ContainerB new Reference($platform['http_client'], ContainerInterface::NULL_ON_INVALID_REFERENCE), new Reference('ai.platform.model_catalog.deepseek'), null, + null, new Reference('event_dispatcher'), ]) ->addTag('ai.platform', ['name' => 'deepseek']); @@ -680,6 +710,7 @@ private function processPlatformConfig(string $type, array $platform, ContainerB new Reference($platform['http_client'], ContainerInterface::NULL_ON_INVALID_REFERENCE), new Reference('ai.platform.model_catalog.voyage'), null, + null, new Reference('event_dispatcher'), ]) ->addTag('ai.platform', ['name' => 'voyage']); @@ -700,6 +731,7 @@ private function processPlatformConfig(string $type, array $platform, ContainerB new Reference($platform['http_client'], ContainerInterface::NULL_ON_INVALID_REFERENCE), new Reference('ai.platform.model_catalog.perplexity'), new Reference('ai.platform.contract.perplexity'), + null, new Reference('event_dispatcher'), ]) ->addTag('ai.platform'); @@ -720,6 +752,7 @@ private function processPlatformConfig(string $type, array $platform, ContainerB new Reference($platform['http_client'], ContainerInterface::NULL_ON_INVALID_REFERENCE), new Reference('ai.platform.model_catalog.dockermodelrunner'), null, + null, new Reference('event_dispatcher'), ]) ->addTag('ai.platform'); @@ -740,6 +773,7 @@ private function processPlatformConfig(string $type, array $platform, ContainerB new Reference('http_client', ContainerInterface::NULL_ON_INVALID_REFERENCE), new Reference('ai.platform.model_catalog.scaleway'), null, + null, new Reference('event_dispatcher'), ]) ->addTag('ai.platform'); @@ -1827,6 +1861,61 @@ private function processChatConfig(string $name, array $configuration, Container $container->registerAliasForArgument('ai.chat.'.$name, ChatInterface::class, $name); } + /** + * @param array $provider + */ + private function processSpeechConfig(string $name, array $provider, ContainerBuilder $container): void + { + if ('eleven_labs' === $name) { + $configurationDefinition = new Definition(SpeechConfiguration::class); + $configurationDefinition + ->setArguments([ + $provider['tts_model'], + $provider['tts_voice'], + $provider['tts_extra_options'] ?? [], + $provider['stt_model'], + $provider['stt_extra_options'] ?? [], + ]); + + $container->setDefinition('ai.speech.'.$name.'.configuration', $configurationDefinition); + + if (!$container->hasDefinition('ai.platform.eleven_labs')) { + throw new RuntimeException('The ElevenLabs platform cannot be found.'); + } + + $container->getDefinition('ai.platform.eleven_labs') + ->replaceArgument(5, new Reference('ai.speech.'.$name.'.configuration')); + + if (\array_key_exists('tts_model', $provider)) { + $definition = new Definition(ElevenLabsSpeechProvider::class); + $definition + ->setLazy(true) + ->setArguments([ + new Reference('ai.platform.eleven_labs'), + ]) + ->addTag('proxy', ['interface' => SpeechProviderInterface::class]) + ->addTag('ai.speech_provider'); + + $container->setDefinition('ai.speech_provider.'.$name, $definition); + $container->registerAliasForArgument('ai.speech_provider.'.$name, SpeechProviderInterface::class, $name); + } + + if (\array_key_exists('stt_model', $provider)) { + $definition = new Definition(ElevenLabsSpeechListener::class); + $definition + ->setLazy(true) + ->setArguments([ + new Reference('ai.platform.eleven_labs'), + ]) + ->addTag('proxy', ['interface' => SpeechListenerInterface::class]) + ->addTag('ai.speech_listener'); + + $container->setDefinition('ai.speech_listener.'.$name, $definition); + $container->registerAliasForArgument('ai.speech_listener.'.$name, SpeechListenerInterface::class, $name); + } + } + } + /** * @param array $config */ diff --git a/src/ai-bundle/src/Profiler/TraceablePlatform.php b/src/ai-bundle/src/Profiler/TraceablePlatform.php index 5bfe6b58b..5f131633e 100644 --- a/src/ai-bundle/src/Profiler/TraceablePlatform.php +++ b/src/ai-bundle/src/Profiler/TraceablePlatform.php @@ -12,12 +12,12 @@ namespace Symfony\AI\AiBundle\Profiler; use Symfony\AI\Platform\Message\Content\File; -use Symfony\AI\Platform\Model; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\PlatformInterface; use Symfony\AI\Platform\Result\DeferredResult; use Symfony\AI\Platform\Result\ResultInterface; use Symfony\AI\Platform\Result\StreamResult; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\AI\Platform\Test\PlainConverter; /** @@ -75,6 +75,11 @@ public function getModelCatalog(): ModelCatalogInterface return $this->platform->getModelCatalog(); } + public function getSpeechConfiguration(): ?SpeechConfiguration + { + return $this->platform->getSpeechConfiguration(); + } + private function createTraceableStreamResult(\Generator $originalStream): StreamResult { return $result = new StreamResult((function () use (&$result, $originalStream) { diff --git a/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php b/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php index 4dd896ba9..b77bc3aef 100644 --- a/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php +++ b/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php @@ -28,7 +28,10 @@ use Symfony\AI\Chat\MessageStoreInterface; use Symfony\AI\Platform\Bridge\Ollama\OllamaApiCatalog; use Symfony\AI\Platform\Capability; +use Symfony\AI\Platform\Exception\RuntimeException as PlatformRuntimeException; use Symfony\AI\Platform\Model; +use Symfony\AI\Platform\Speech\SpeechListenerInterface; +use Symfony\AI\Platform\Speech\SpeechProviderInterface; use Symfony\AI\Store\Bridge\Azure\SearchStore as AzureStore; use Symfony\AI\Store\Bridge\ChromaDb\Store as ChromaDbStore; use Symfony\AI\Store\Bridge\ClickHouse\Store as ClickhouseStore; @@ -112,6 +115,7 @@ public function testStoreCommandsArentDefinedWithoutStore() 'ai.command.drop_store' => true, 'ai.command.setup_message_store' => true, 'ai.command.drop_message_store' => true, + 'ai.speech_provider.listener' => true, ], $container->getRemovedIds()); } @@ -134,6 +138,7 @@ public function testMessageStoreCommandsArentDefinedWithoutMessageStore() 'ai.command.drop_store' => true, 'ai.command.setup_message_store' => true, 'ai.command.drop_message_store' => true, + 'ai.speech_provider.listener' => true, ], $container->getRemovedIds()); } @@ -958,7 +963,7 @@ public function testOllamaCanBeCreatedWithCatalogFromApi() $ollamaDefinition = $container->getDefinition('ai.platform.ollama'); $this->assertTrue($ollamaDefinition->isLazy()); - $this->assertCount(5, $ollamaDefinition->getArguments()); + $this->assertCount(6, $ollamaDefinition->getArguments()); $this->assertSame('http://127.0.0.1:11434', $ollamaDefinition->getArgument(0)); $this->assertInstanceOf(Reference::class, $ollamaDefinition->getArgument(1)); $this->assertSame('http_client', (string) $ollamaDefinition->getArgument(1)); @@ -966,8 +971,9 @@ public function testOllamaCanBeCreatedWithCatalogFromApi() $this->assertSame('ai.platform.model_catalog.ollama', (string) $ollamaDefinition->getArgument(2)); $this->assertInstanceOf(Reference::class, $ollamaDefinition->getArgument(3)); $this->assertSame('ai.platform.contract.ollama', (string) $ollamaDefinition->getArgument(3)); - $this->assertInstanceOf(Reference::class, $ollamaDefinition->getArgument(4)); - $this->assertSame('event_dispatcher', (string) $ollamaDefinition->getArgument(4)); + $this->assertNull($ollamaDefinition->getArgument(4)); + $this->assertInstanceOf(Reference::class, $ollamaDefinition->getArgument(5)); + $this->assertSame('event_dispatcher', (string) $ollamaDefinition->getArgument(5)); $ollamaCatalogDefinition = $container->getDefinition('ai.platform.model_catalog.ollama'); @@ -1167,7 +1173,7 @@ public function testOpenAiPlatformWithDefaultRegion() $definition = $container->getDefinition('ai.platform.openai'); $arguments = $definition->getArguments(); - $this->assertCount(6, $arguments); + $this->assertCount(7, $arguments); $this->assertSame('sk-test-key', $arguments[0]); $this->assertNull($arguments[4]); // region should be null by default } @@ -1193,7 +1199,7 @@ public function testOpenAiPlatformWithRegion(?string $region) $definition = $container->getDefinition('ai.platform.openai'); $arguments = $definition->getArguments(); - $this->assertCount(6, $arguments); + $this->assertCount(7, $arguments); $this->assertSame('sk-test-key', $arguments[0]); $this->assertSame($region, $arguments[4]); } @@ -1232,7 +1238,7 @@ public function testPerplexityPlatformConfiguration() $definition = $container->getDefinition('ai.platform.perplexity'); $arguments = $definition->getArguments(); - $this->assertCount(5, $arguments); + $this->assertCount(6, $arguments); $this->assertSame('pplx-test-key', $arguments[0]); $this->assertInstanceOf(Reference::class, $arguments[1]); $this->assertSame('http_client', (string) $arguments[1]); @@ -3971,6 +3977,92 @@ public function testModelConfigurationIsIgnoredForUnknownPlatform() $this->assertSame([], $definition->getArguments()); } + public function testSpeechProviderListenerCannotBeRegisteredWithoutSpeechProviders() + { + $container = $this->buildContainer([ + 'ai' => [ + 'speech' => [], + ], + ]); + + $this->assertFalse($container->hasDefinition('ai.speech_provider.listener')); + $this->assertFalse($container->hasDefinition('ai.speech.eleven_labs.configuration')); + } + + public function testElevenLabsSpeechProviderListenerCannotBeRegisteredWithoutPlaform() + { + $this->expectException(PlatformRuntimeException::class); + $this->expectExceptionMessage('The ElevenLabs platform cannot be found.'); + $this->expectExceptionCode(0); + $this->buildContainer([ + 'ai' => [ + 'speech' => [ + 'eleven_labs' => [ + 'tts_model' => 'foo', + 'tts_voice' => 'bar', + 'tts_extra_options' => [ + 'foo' => 'bar', + ], + 'stt_model' => 'foo', + 'stt_extra_options' => [ + 'foo' => 'bar', + ], + ], + ], + ], + ]); + } + + public function testElevenLabsSpeechProviderListenerIsRegisteredWithSpeechProviders() + { + $container = $this->buildContainer([ + 'ai' => [ + 'platform' => [ + 'eleven_labs' => [ + 'api_key' => 'foo', + ], + ], + 'speech' => [ + 'eleven_labs' => [ + 'tts_model' => 'foo', + 'tts_voice' => 'bar', + 'tts_extra_options' => [ + 'foo' => 'bar', + ], + 'stt_model' => 'foo', + 'stt_extra_options' => [ + 'foo' => 'bar', + ], + ], + ], + ], + ]); + + $this->assertTrue($container->hasDefinition('ai.speech_provider.listener')); + $this->assertTrue($container->hasDefinition('ai.speech.eleven_labs.configuration')); + + $platformDefinition = $container->getDefinition('ai.platform.eleven_labs'); + $this->assertCount(7, $platformDefinition->getArguments()); + $this->assertInstanceOf(Reference::class, $platformDefinition->getArgument(5)); + $this->assertSame('ai.speech.eleven_labs.configuration', (string) $platformDefinition->getArgument(5)); + + $providerDefinition = $container->getDefinition('ai.speech_provider.eleven_labs'); + $this->assertTrue($providerDefinition->isLazy()); + $this->assertCount(1, $providerDefinition->getArguments()); + $this->assertSame('ai.platform.eleven_labs', (string) $providerDefinition->getArgument(0)); + + $this->assertSame([['interface' => SpeechProviderInterface::class]], $providerDefinition->getTag('proxy')); + $this->assertTrue($providerDefinition->hasTag('ai.speech_provider')); + + $listenerDefinition = $container->getDefinition('ai.speech_listener.eleven_labs'); + $this->assertTrue($listenerDefinition->isLazy()); + $this->assertCount(1, $listenerDefinition->getArguments()); + $this->assertSame('ai.platform.eleven_labs', (string) $listenerDefinition->getArgument(0)); + + $this->assertSame([['interface' => SpeechListenerInterface::class]], $listenerDefinition->getTag('proxy')); + $this->assertTrue($listenerDefinition->hasTag('ai.speech_listener')); + } + private function buildContainer(array $configuration): ContainerBuilder { $container = new ContainerBuilder(); @@ -4399,6 +4491,19 @@ private function getFullConfig(): array 'message_store' => 'cache', ], ], + 'speech' => [ + 'eleven_labs' => [ + 'tts_model' => 'foo', + 'tts_voice' => 'bar', + 'tts_extra_options' => [ + 'foo' => 'bar', + ], + 'stt_model' => 'foo', + 'stt_extra_options' => [ + 'foo' => 'bar', + ], + ], + ], 'vectorizer' => [ 'test_vectorizer' => [ 'platform' => 'mistral_platform_service_id', diff --git a/src/platform/src/Bridge/AiMlApi/PlatformFactory.php b/src/platform/src/Bridge/AiMlApi/PlatformFactory.php index 1592393c7..21c184933 100644 --- a/src/platform/src/Bridge/AiMlApi/PlatformFactory.php +++ b/src/platform/src/Bridge/AiMlApi/PlatformFactory.php @@ -15,6 +15,7 @@ use Symfony\AI\Platform\Bridge\AiMlApi\Embeddings\ModelClient; use Symfony\AI\Platform\Contract; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Contracts\HttpClient\HttpClientInterface; /** @@ -27,6 +28,7 @@ public static function create( ?HttpClientInterface $httpClient = null, ?Contract $contract = null, string $hostUrl = 'https://api.aimlapi.com', + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { return new Platform( @@ -40,6 +42,7 @@ public static function create( ], new ModelCatalog(), $contract, + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/Albert/PlatformFactory.php b/src/platform/src/Bridge/Albert/PlatformFactory.php index d83722581..cf8543311 100644 --- a/src/platform/src/Bridge/Albert/PlatformFactory.php +++ b/src/platform/src/Bridge/Albert/PlatformFactory.php @@ -18,6 +18,7 @@ use Symfony\AI\Platform\Exception\InvalidArgumentException; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -31,6 +32,7 @@ public static function create( string $baseUrl, ?HttpClientInterface $httpClient = null, ModelCatalogInterface $modelCatalog = new ModelCatalog(), + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { if (!str_starts_with($baseUrl, 'https://')) { @@ -56,6 +58,7 @@ public static function create( [new Gpt\ResultConverter(), new Embeddings\ResultConverter()], $modelCatalog, Contract::create(), + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/Anthropic/PlatformFactory.php b/src/platform/src/Bridge/Anthropic/PlatformFactory.php index 92c4e817a..5b5739c62 100644 --- a/src/platform/src/Bridge/Anthropic/PlatformFactory.php +++ b/src/platform/src/Bridge/Anthropic/PlatformFactory.php @@ -16,6 +16,7 @@ use Symfony\AI\Platform\Contract; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -29,6 +30,7 @@ public static function create( ?HttpClientInterface $httpClient = null, ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); @@ -38,6 +40,7 @@ public static function create( [new ResultConverter()], $modelCatalog, $contract ?? AnthropicContract::create(), + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/Azure/Meta/PlatformFactory.php b/src/platform/src/Bridge/Azure/Meta/PlatformFactory.php index cbb528654..47c24a1aa 100644 --- a/src/platform/src/Bridge/Azure/Meta/PlatformFactory.php +++ b/src/platform/src/Bridge/Azure/Meta/PlatformFactory.php @@ -16,6 +16,7 @@ use Symfony\AI\Platform\Contract; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\HttpClient\HttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -30,10 +31,11 @@ public static function create( ?HttpClientInterface $httpClient = null, ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { $modelClient = new LlamaModelClient($httpClient ?? HttpClient::create(), $baseUrl, $apiKey); - return new Platform([$modelClient], [new LlamaResultConverter()], $modelCatalog, $contract, $eventDispatcher); + return new Platform([$modelClient], [new LlamaResultConverter()], $modelCatalog, $contract, $speechConfiguration, $eventDispatcher); } } diff --git a/src/platform/src/Bridge/Azure/OpenAi/PlatformFactory.php b/src/platform/src/Bridge/Azure/OpenAi/PlatformFactory.php index 0ff04f960..afee4ccdd 100644 --- a/src/platform/src/Bridge/Azure/OpenAi/PlatformFactory.php +++ b/src/platform/src/Bridge/Azure/OpenAi/PlatformFactory.php @@ -20,6 +20,7 @@ use Symfony\AI\Platform\Contract; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -36,6 +37,7 @@ public static function create( ?HttpClientInterface $httpClient = null, ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); @@ -48,6 +50,7 @@ public static function create( [new Gpt\ResultConverter(), new Embeddings\ResultConverter(), new Whisper\ResultConverter()], $modelCatalog, $contract ?? Contract::create(new AudioNormalizer()), + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/Bedrock/PlatformFactory.php b/src/platform/src/Bridge/Bedrock/PlatformFactory.php index 2d7632fe0..88b062b7a 100644 --- a/src/platform/src/Bridge/Bedrock/PlatformFactory.php +++ b/src/platform/src/Bridge/Bedrock/PlatformFactory.php @@ -26,6 +26,7 @@ use Symfony\AI\Platform\Exception\RuntimeException; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; /** * @author Björn Altmann @@ -36,6 +37,7 @@ public static function create( BedrockRuntimeClient $bedrockRuntimeClient = new BedrockRuntimeClient(), ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { if (!class_exists(BedrockRuntimeClient::class)) { @@ -70,6 +72,7 @@ public static function create( new NovaContract\ToolNormalizer(), new NovaContract\UserMessageNormalizer(), ), + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/Cartesia/PlatformFactory.php b/src/platform/src/Bridge/Cartesia/PlatformFactory.php index 8bb80aaba..eb71ed848 100644 --- a/src/platform/src/Bridge/Cartesia/PlatformFactory.php +++ b/src/platform/src/Bridge/Cartesia/PlatformFactory.php @@ -16,6 +16,7 @@ use Symfony\AI\Platform\Contract; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -30,6 +31,7 @@ public static function create( ?HttpClientInterface $httpClient = null, ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); @@ -39,6 +41,7 @@ public static function create( [new CartesiaResultConverter()], $modelCatalog, $contract ?? CartesiaContract::create(), + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/Cerebras/PlatformFactory.php b/src/platform/src/Bridge/Cerebras/PlatformFactory.php index c5706812b..3435e7453 100644 --- a/src/platform/src/Bridge/Cerebras/PlatformFactory.php +++ b/src/platform/src/Bridge/Cerebras/PlatformFactory.php @@ -15,6 +15,7 @@ use Symfony\AI\Platform\Contract; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -28,6 +29,7 @@ public static function create( ?HttpClientInterface $httpClient = null, ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); @@ -37,6 +39,7 @@ public static function create( [new ResultConverter()], $modelCatalog, $contract, + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/DeepSeek/PlatformFactory.php b/src/platform/src/Bridge/DeepSeek/PlatformFactory.php index ce1d1334e..10306d245 100644 --- a/src/platform/src/Bridge/DeepSeek/PlatformFactory.php +++ b/src/platform/src/Bridge/DeepSeek/PlatformFactory.php @@ -15,6 +15,7 @@ use Symfony\AI\Platform\Contract; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -25,6 +26,7 @@ public static function create( ?HttpClientInterface $httpClient = null, ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); @@ -34,6 +36,7 @@ public static function create( [new ResultConverter()], $modelCatalog, $contract ?? Contract::create(), + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/DockerModelRunner/PlatformFactory.php b/src/platform/src/Bridge/DockerModelRunner/PlatformFactory.php index 30fb2c86b..18325fb9e 100644 --- a/src/platform/src/Bridge/DockerModelRunner/PlatformFactory.php +++ b/src/platform/src/Bridge/DockerModelRunner/PlatformFactory.php @@ -15,6 +15,7 @@ use Symfony\AI\Platform\Contract; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -28,6 +29,7 @@ public static function create( ?HttpClientInterface $httpClient = null, ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); @@ -43,6 +45,7 @@ public static function create( ], $modelCatalog, $contract, + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/ElevenLabs/ElevenLabsSpeechListener.php b/src/platform/src/Bridge/ElevenLabs/ElevenLabsSpeechListener.php new file mode 100644 index 000000000..1881ca0cd --- /dev/null +++ b/src/platform/src/Bridge/ElevenLabs/ElevenLabsSpeechListener.php @@ -0,0 +1,53 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Bridge\ElevenLabs; + +use Symfony\AI\Platform\Capability; +use Symfony\AI\Platform\Message\Content\Text; +use Symfony\AI\Platform\Message\MessageBag; +use Symfony\AI\Platform\PlatformInterface; +use Symfony\AI\Platform\Speech\SpeechListenerInterface; + +/** + * @author Guillaume Loulier + */ +final class ElevenLabsSpeechListener implements SpeechListenerInterface +{ + public function __construct( + private readonly PlatformInterface $platform, + ) { + } + + public function listen(object|array|string $input, array $options): Text + { + $speechConfiguration = $this->platform->getSpeechConfiguration(); + + $input = ($input instanceof MessageBag && $input->containsAudio()) ? $input->getUserMessage()->getAudioContent() : $input; + + $result = $this->platform->invoke($speechConfiguration->sttModel, $input, $options); + + return new Text($result->asText()); + } + + public function support(object|array|string $input, array $options): bool + { + $speechConfiguration = $this->platform->getSpeechConfiguration(); + + if (null === $speechConfiguration) { + return false; + } + + $model = $this->platform->getModelCatalog()->getModel($speechConfiguration->sttModel); + + return \in_array(Capability::SPEECH_TO_TEXT, $model->getCapabilities(), true); + } +} diff --git a/src/platform/src/Bridge/ElevenLabs/ElevenLabsSpeechProvider.php b/src/platform/src/Bridge/ElevenLabs/ElevenLabsSpeechProvider.php new file mode 100644 index 000000000..870bbf41c --- /dev/null +++ b/src/platform/src/Bridge/ElevenLabs/ElevenLabsSpeechProvider.php @@ -0,0 +1,57 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Bridge\ElevenLabs; + +use Symfony\AI\Platform\Capability; +use Symfony\AI\Platform\PlatformInterface; +use Symfony\AI\Platform\Result\DeferredResult; +use Symfony\AI\Platform\Speech\Speech; +use Symfony\AI\Platform\Speech\SpeechProviderInterface; + +/** + * @author Guillaume Loulier + */ +final class ElevenLabsSpeechProvider implements SpeechProviderInterface +{ + public function __construct( + private readonly PlatformInterface $platform, + ) { + } + + public function generate(DeferredResult $result, array $options): Speech + { + $speechConfiguration = $this->platform->getSpeechConfiguration(); + + $payload = $result->asText(); + + $speechResult = $this->platform->invoke($speechConfiguration->ttsModel, ['text' => $payload], [ + 'voice' => $speechConfiguration->ttsVoice, + ...$speechConfiguration->ttsExtraOptions, + ...$options, + ]); + + return new Speech($payload, $speechResult, 'eleven_labs'); + } + + public function support(DeferredResult $result, array $options): bool + { + $speechConfiguration = $this->platform->getSpeechConfiguration(); + + if (null === $speechConfiguration) { + return false; + } + + $model = $this->platform->getModelCatalog()->getModel($speechConfiguration->ttsModel); + + return \in_array(Capability::TEXT_TO_SPEECH, $model->getCapabilities(), true); + } +} diff --git a/src/platform/src/Bridge/ElevenLabs/PlatformFactory.php b/src/platform/src/Bridge/ElevenLabs/PlatformFactory.php index a1ed7249d..fc6adba29 100644 --- a/src/platform/src/Bridge/ElevenLabs/PlatformFactory.php +++ b/src/platform/src/Bridge/ElevenLabs/PlatformFactory.php @@ -16,6 +16,7 @@ use Symfony\AI\Platform\Contract; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -30,6 +31,7 @@ public static function create( ?HttpClientInterface $httpClient = null, ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); @@ -39,6 +41,7 @@ public static function create( [new ElevenLabsResultConverter($httpClient)], $modelCatalog, $contract ?? ElevenLabsContract::create(), + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/Gemini/PlatformFactory.php b/src/platform/src/Bridge/Gemini/PlatformFactory.php index 584b8ab8e..db135927b 100644 --- a/src/platform/src/Bridge/Gemini/PlatformFactory.php +++ b/src/platform/src/Bridge/Gemini/PlatformFactory.php @@ -20,6 +20,7 @@ use Symfony\AI\Platform\Contract; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -33,6 +34,7 @@ public static function create( ?HttpClientInterface $httpClient = null, ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); @@ -42,6 +44,7 @@ public static function create( [new EmbeddingsResultConverter(), new GeminiResultConverter()], $modelCatalog, $contract ?? GeminiContract::create(), + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/HuggingFace/PlatformFactory.php b/src/platform/src/Bridge/HuggingFace/PlatformFactory.php index 52c2b0aad..96ad10fe1 100644 --- a/src/platform/src/Bridge/HuggingFace/PlatformFactory.php +++ b/src/platform/src/Bridge/HuggingFace/PlatformFactory.php @@ -16,6 +16,7 @@ use Symfony\AI\Platform\Contract; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -30,6 +31,7 @@ public static function create( ?HttpClientInterface $httpClient = null, ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); @@ -39,6 +41,7 @@ public static function create( [new ResultConverter()], $modelCatalog, $contract ?? HuggingFaceContract::create(), + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/LiteLlm/PlatformFactory.php b/src/platform/src/Bridge/LiteLlm/PlatformFactory.php index e63c6e669..78cd01586 100644 --- a/src/platform/src/Bridge/LiteLlm/PlatformFactory.php +++ b/src/platform/src/Bridge/LiteLlm/PlatformFactory.php @@ -15,6 +15,7 @@ use Symfony\AI\Platform\Contract; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -29,6 +30,7 @@ public static function create( ?HttpClientInterface $httpClient = null, ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); @@ -42,6 +44,7 @@ public static function create( ], $modelCatalog, $contract, + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/LmStudio/PlatformFactory.php b/src/platform/src/Bridge/LmStudio/PlatformFactory.php index 411a88e46..1df3a035d 100644 --- a/src/platform/src/Bridge/LmStudio/PlatformFactory.php +++ b/src/platform/src/Bridge/LmStudio/PlatformFactory.php @@ -16,6 +16,7 @@ use Symfony\AI\Platform\Contract; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -29,6 +30,7 @@ public static function create( ?HttpClientInterface $httpClient = null, ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); @@ -44,6 +46,7 @@ public static function create( ], $modelCatalog, $contract, + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/Mistral/PlatformFactory.php b/src/platform/src/Bridge/Mistral/PlatformFactory.php index 65ddeb5d4..83e655bf6 100644 --- a/src/platform/src/Bridge/Mistral/PlatformFactory.php +++ b/src/platform/src/Bridge/Mistral/PlatformFactory.php @@ -19,6 +19,7 @@ use Symfony\AI\Platform\Contract; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -32,6 +33,7 @@ public static function create( ?HttpClientInterface $httpClient = null, ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); @@ -46,6 +48,7 @@ public static function create( new DocumentUrlNormalizer(), new ImageUrlNormalizer(), ), + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/Ollama/PlatformFactory.php b/src/platform/src/Bridge/Ollama/PlatformFactory.php index bf1d16c06..7472f98ee 100644 --- a/src/platform/src/Bridge/Ollama/PlatformFactory.php +++ b/src/platform/src/Bridge/Ollama/PlatformFactory.php @@ -16,6 +16,7 @@ use Symfony\AI\Platform\Contract; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -29,6 +30,7 @@ public static function create( ?HttpClientInterface $httpClient = null, ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); @@ -38,6 +40,7 @@ public static function create( [new OllamaResultConverter()], $modelCatalog, $contract ?? OllamaContract::create(), + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/OpenAi/PlatformFactory.php b/src/platform/src/Bridge/OpenAi/PlatformFactory.php index b52837610..b9a08c495 100644 --- a/src/platform/src/Bridge/OpenAi/PlatformFactory.php +++ b/src/platform/src/Bridge/OpenAi/PlatformFactory.php @@ -16,6 +16,7 @@ use Symfony\AI\Platform\Contract; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -33,6 +34,7 @@ public static function create( ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, ?string $region = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); @@ -54,6 +56,7 @@ public static function create( ], $modelCatalog, $contract ?? OpenAiContract::create(), + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/OpenRouter/PlatformFactory.php b/src/platform/src/Bridge/OpenRouter/PlatformFactory.php index 9261a7cce..b033ae100 100644 --- a/src/platform/src/Bridge/OpenRouter/PlatformFactory.php +++ b/src/platform/src/Bridge/OpenRouter/PlatformFactory.php @@ -22,6 +22,7 @@ use Symfony\AI\Platform\Contract; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -35,6 +36,7 @@ public static function create( ?HttpClientInterface $httpClient = null, ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); @@ -48,6 +50,7 @@ public static function create( new MessageBagNormalizer(), new UserMessageNormalizer(), ), + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/Perplexity/PlatformFactory.php b/src/platform/src/Bridge/Perplexity/PlatformFactory.php index a30ced9e3..054ba8cb2 100644 --- a/src/platform/src/Bridge/Perplexity/PlatformFactory.php +++ b/src/platform/src/Bridge/Perplexity/PlatformFactory.php @@ -16,6 +16,7 @@ use Symfony\AI\Platform\Contract; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -29,6 +30,7 @@ public static function create( ?HttpClientInterface $httpClient = null, ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); @@ -38,6 +40,7 @@ public static function create( [new ResultConverter()], $modelCatalog, $contract ?? PerplexityContract::create(), + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/Replicate/PlatformFactory.php b/src/platform/src/Bridge/Replicate/PlatformFactory.php index 9cddcfd9f..a31ec9d52 100644 --- a/src/platform/src/Bridge/Replicate/PlatformFactory.php +++ b/src/platform/src/Bridge/Replicate/PlatformFactory.php @@ -16,6 +16,7 @@ use Symfony\AI\Platform\Contract; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\Clock\Clock; use Symfony\Component\HttpClient\HttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -30,6 +31,7 @@ public static function create( ?HttpClientInterface $httpClient = null, ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { return new Platform( @@ -37,6 +39,7 @@ public static function create( [new LlamaResultConverter()], $modelCatalog, $contract ?? Contract::create(new LlamaMessageBagNormalizer()), + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/Scaleway/PlatformFactory.php b/src/platform/src/Bridge/Scaleway/PlatformFactory.php index 4acd5e31c..2be1fa3e7 100644 --- a/src/platform/src/Bridge/Scaleway/PlatformFactory.php +++ b/src/platform/src/Bridge/Scaleway/PlatformFactory.php @@ -19,6 +19,7 @@ use Symfony\AI\Platform\Contract; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -32,6 +33,7 @@ public static function create( ?HttpClientInterface $httpClient = null, ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); @@ -47,6 +49,7 @@ public static function create( ], $modelCatalog, $contract, + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/TransformersPhp/PlatformFactory.php b/src/platform/src/Bridge/TransformersPhp/PlatformFactory.php index 348533af0..5346894c0 100644 --- a/src/platform/src/Bridge/TransformersPhp/PlatformFactory.php +++ b/src/platform/src/Bridge/TransformersPhp/PlatformFactory.php @@ -16,6 +16,7 @@ use Symfony\AI\Platform\Exception\RuntimeException; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; /** * @author Christopher Hertel @@ -24,12 +25,13 @@ final class PlatformFactory { public static function create( ModelCatalogInterface $modelCatalog = new ModelCatalog(), + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { if (!class_exists(Transformers::class)) { throw new RuntimeException('For using the TransformersPHP with FFI to run models in PHP, the codewithkyrian/transformers package is required. Try running "composer require codewithkyrian/transformers".'); } - return new Platform([new ModelClient()], [new ResultConverter()], $modelCatalog, eventDispatcher: $eventDispatcher); + return new Platform([new ModelClient()], [new ResultConverter()], $modelCatalog, speechConfiguration: $speechConfiguration, eventDispatcher: $eventDispatcher); } } diff --git a/src/platform/src/Bridge/VertexAi/PlatformFactory.php b/src/platform/src/Bridge/VertexAi/PlatformFactory.php index e80865edd..58f158ecf 100644 --- a/src/platform/src/Bridge/VertexAi/PlatformFactory.php +++ b/src/platform/src/Bridge/VertexAi/PlatformFactory.php @@ -22,6 +22,7 @@ use Symfony\AI\Platform\Exception\RuntimeException; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -36,6 +37,7 @@ public static function create( ?HttpClientInterface $httpClient = null, ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { if (!class_exists(ApplicationDefaultCredentials::class)) { @@ -49,6 +51,7 @@ public static function create( [new GeminiResultConverter(), new EmbeddingsResultConverter()], $modelCatalog, $contract ?? GeminiContract::create(), + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/Bridge/Voyage/PlatformFactory.php b/src/platform/src/Bridge/Voyage/PlatformFactory.php index 4e65b58eb..1d996317f 100644 --- a/src/platform/src/Bridge/Voyage/PlatformFactory.php +++ b/src/platform/src/Bridge/Voyage/PlatformFactory.php @@ -16,6 +16,7 @@ use Symfony\AI\Platform\Contract; use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -29,6 +30,7 @@ public static function create( ?HttpClientInterface $httpClient = null, ModelCatalogInterface $modelCatalog = new ModelCatalog(), ?Contract $contract = null, + ?SpeechConfiguration $speechConfiguration = new SpeechConfiguration(), ?EventDispatcherInterface $eventDispatcher = null, ): Platform { $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); @@ -38,6 +40,7 @@ public static function create( [new ResultConverter()], $modelCatalog, $contract ?? VoyageContract::create(), + $speechConfiguration, $eventDispatcher, ); } diff --git a/src/platform/src/CachedPlatform.php b/src/platform/src/CachedPlatform.php index b443f8048..513aeeb94 100644 --- a/src/platform/src/CachedPlatform.php +++ b/src/platform/src/CachedPlatform.php @@ -13,6 +13,7 @@ use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Result\DeferredResult; +use Symfony\AI\Platform\Speech\SpeechConfiguration; use Symfony\Component\Cache\Adapter\TagAwareAdapterInterface; use Symfony\Contracts\Cache\CacheInterface; use Symfony\Contracts\Cache\ItemInterface; @@ -66,4 +67,9 @@ public function getModelCatalog(): ModelCatalogInterface { return $this->platform->getModelCatalog(); } + + public function getSpeechConfiguration(): ?SpeechConfiguration + { + return $this->platform->getSpeechConfiguration(); + } } diff --git a/src/platform/src/Message/UserMessage.php b/src/platform/src/Message/UserMessage.php index 445af64c3..efd053bfe 100644 --- a/src/platform/src/Message/UserMessage.php +++ b/src/platform/src/Message/UserMessage.php @@ -11,6 +11,7 @@ namespace Symfony\AI\Platform\Message; +use Symfony\AI\Platform\Exception\RuntimeException; use Symfony\AI\Platform\Message\Content\Audio; use Symfony\AI\Platform\Message\Content\ContentInterface; use Symfony\AI\Platform\Message\Content\Image; @@ -71,6 +72,19 @@ public function hasAudioContent(): bool return false; } + public function getAudioContent(): Audio + { + foreach ($this->content as $content) { + if (!$content instanceof Audio) { + continue; + } + + return $content; + } + + throw new RuntimeException('No Audio content found.'); + } + public function hasImageContent(): bool { foreach ($this->content as $content) { diff --git a/src/platform/src/Platform.php b/src/platform/src/Platform.php index 16ff947c7..71be00bd6 100644 --- a/src/platform/src/Platform.php +++ b/src/platform/src/Platform.php @@ -18,6 +18,7 @@ use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Result\DeferredResult; use Symfony\AI\Platform\Result\RawResultInterface; +use Symfony\AI\Platform\Speech\SpeechConfiguration; /** * @author Christopher Hertel @@ -43,6 +44,7 @@ public function __construct( iterable $resultConverters, private readonly ModelCatalogInterface $modelCatalog, private ?Contract $contract = null, + private readonly ?SpeechConfiguration $speechConfiguration = null, private readonly ?EventDispatcherInterface $eventDispatcher = null, ) { $this->contract = $contract ?? Contract::create(); @@ -77,6 +79,11 @@ public function getModelCatalog(): ModelCatalogInterface return $this->modelCatalog; } + public function getSpeechConfiguration(): ?SpeechConfiguration + { + return $this->speechConfiguration; + } + /** * @param array $payload * @param array $options diff --git a/src/platform/src/PlatformInterface.php b/src/platform/src/PlatformInterface.php index 109ad018e..fc3e8d26b 100644 --- a/src/platform/src/PlatformInterface.php +++ b/src/platform/src/PlatformInterface.php @@ -13,6 +13,7 @@ use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; use Symfony\AI\Platform\Result\DeferredResult; +use Symfony\AI\Platform\Speech\SpeechConfiguration; /** * @author Christopher Hertel @@ -27,4 +28,6 @@ interface PlatformInterface public function invoke(string $model, array|string|object $input, array $options = []): DeferredResult; public function getModelCatalog(): ModelCatalogInterface; + + public function getSpeechConfiguration(): ?SpeechConfiguration; } diff --git a/src/platform/src/Result/BaseResult.php b/src/platform/src/Result/BaseResult.php index fb447594d..93e3392f3 100644 --- a/src/platform/src/Result/BaseResult.php +++ b/src/platform/src/Result/BaseResult.php @@ -12,6 +12,7 @@ namespace Symfony\AI\Platform\Result; use Symfony\AI\Platform\Metadata\MetadataAwareTrait; +use Symfony\AI\Platform\Speech\SpeechBagAwareTrait; /** * Base result of converted result classes. @@ -22,4 +23,5 @@ abstract class BaseResult implements ResultInterface { use MetadataAwareTrait; use RawResultAwareTrait; + use SpeechBagAwareTrait; } diff --git a/src/platform/src/Result/DeferredResult.php b/src/platform/src/Result/DeferredResult.php index ea9ce05cd..007df8d91 100644 --- a/src/platform/src/Result/DeferredResult.php +++ b/src/platform/src/Result/DeferredResult.php @@ -15,6 +15,7 @@ use Symfony\AI\Platform\Exception\UnexpectedResultTypeException; use Symfony\AI\Platform\Metadata\MetadataAwareTrait; use Symfony\AI\Platform\ResultConverterInterface; +use Symfony\AI\Platform\Speech\SpeechBagAwareTrait; use Symfony\AI\Platform\Vector\Vector; /** @@ -23,6 +24,7 @@ final class DeferredResult { use MetadataAwareTrait; + use SpeechBagAwareTrait; private bool $isConverted = false; private ResultInterface $convertedResult; @@ -55,6 +57,10 @@ public function getResult(): ResultInterface $this->convertedResult->getMetadata()->set($this->getMetadata()->all()); + foreach ($this->getSpeechBag() as $speech) { + $this->convertedResult->addSpeech($speech); + } + return $this->convertedResult; } diff --git a/src/platform/src/Result/ResultInterface.php b/src/platform/src/Result/ResultInterface.php index 63ac7a435..c5844e241 100644 --- a/src/platform/src/Result/ResultInterface.php +++ b/src/platform/src/Result/ResultInterface.php @@ -13,6 +13,7 @@ use Symfony\AI\Platform\Metadata\Metadata; use Symfony\AI\Platform\Result\Exception\RawResultAlreadySetException; +use Symfony\AI\Platform\Speech\Speech; /** * @author Christopher Hertel @@ -33,4 +34,8 @@ public function getRawResult(): ?RawResultInterface; * @throws RawResultAlreadySetException if the result is tried to be set more than once */ public function setRawResult(RawResultInterface $rawResult): void; + + public function addSpeech(Speech $speech): void; + + public function getSpeech(string $identifier): Speech; } diff --git a/src/platform/src/Speech/Speech.php b/src/platform/src/Speech/Speech.php new file mode 100644 index 000000000..e5e9ab2d9 --- /dev/null +++ b/src/platform/src/Speech/Speech.php @@ -0,0 +1,48 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Speech; + +use Symfony\AI\Platform\Result\DeferredResult; + +/** + * @author Guillaume Loulier + */ +final class Speech +{ + /** + * @param string|array $payload + */ + public function __construct( + private readonly string|array $payload, + private readonly DeferredResult $result, + private readonly string $identifier, + ) { + } + + /** + * @return string|array + */ + public function getPayload(): string|array + { + return $this->payload; + } + + public function asBinary(): string + { + return $this->result->asBinary(); + } + + public function getIdentifier(): string + { + return $this->identifier; + } +} diff --git a/src/platform/src/Speech/SpeechBag.php b/src/platform/src/Speech/SpeechBag.php new file mode 100644 index 000000000..79d6ff73a --- /dev/null +++ b/src/platform/src/Speech/SpeechBag.php @@ -0,0 +1,47 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Speech; + +use Symfony\AI\Platform\Exception\InvalidArgumentException; + +/** + * @author Guillaume Loulier + * + * @implements \IteratorAggregate + */ +final class SpeechBag implements \IteratorAggregate, \Countable +{ + /** + * @var Speech[] + */ + private array $speeches = []; + + public function add(Speech $speech): void + { + $this->speeches[$speech->getIdentifier()] = $speech; + } + + public function get(string $identifier): Speech + { + return $this->speeches[$identifier] ?? throw new InvalidArgumentException(\sprintf('No speech with identifier "%s" found.', $identifier)); + } + + public function count(): int + { + return \count($this->speeches); + } + + public function getIterator(): \Traversable + { + return new \ArrayIterator($this->speeches); + } +} diff --git a/src/platform/src/Speech/SpeechBagAwareTrait.php b/src/platform/src/Speech/SpeechBagAwareTrait.php new file mode 100644 index 000000000..8f561605f --- /dev/null +++ b/src/platform/src/Speech/SpeechBagAwareTrait.php @@ -0,0 +1,43 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Speech; + +/** + * @author Guillaume Loulier + */ +trait SpeechBagAwareTrait +{ + private ?SpeechBag $speechBag = null; + + public function addSpeech(?Speech $speech): void + { + if (null === $this->speechBag) { + $this->speechBag = new SpeechBag(); + } + + $this->speechBag->add($speech); + } + + public function getSpeech(string $identifier): Speech + { + if (null === $this->speechBag) { + $this->speechBag = new SpeechBag(); + } + + return $this->speechBag->get($identifier); + } + + public function getSpeechBag(): SpeechBag + { + return $this->speechBag ??= new SpeechBag(); + } +} diff --git a/src/platform/src/Speech/SpeechConfiguration.php b/src/platform/src/Speech/SpeechConfiguration.php new file mode 100644 index 000000000..dd84d1714 --- /dev/null +++ b/src/platform/src/Speech/SpeechConfiguration.php @@ -0,0 +1,31 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Speech; + +/** + * @author Guillaume Loulier + */ +final class SpeechConfiguration +{ + /** + * @param array $ttsExtraOptions + * @param array $sttExtraOptions + */ + public function __construct( + public readonly ?string $ttsModel = null, + public readonly ?string $ttsVoice = null, + public readonly array $ttsExtraOptions = [], + public readonly ?string $sttModel = null, + public readonly array $sttExtraOptions = [], + ) { + } +} diff --git a/src/platform/src/Speech/SpeechListenerInterface.php b/src/platform/src/Speech/SpeechListenerInterface.php new file mode 100644 index 000000000..a8dbce764 --- /dev/null +++ b/src/platform/src/Speech/SpeechListenerInterface.php @@ -0,0 +1,32 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Speech; + +use Symfony\AI\Platform\Message\Content\Text; + +/** + * @author Guillaume Loulier + */ +interface SpeechListenerInterface +{ + /** + * @param array|string|object $input The input data + * @param array $options The options to customize the text generation + */ + public function listen(array|string|object $input, array $options): Text; + + /** + * @param array|string|object $input + * @param array $options + */ + public function support(array|string|object $input, array $options): bool; +} diff --git a/src/platform/src/Speech/SpeechProviderInterface.php b/src/platform/src/Speech/SpeechProviderInterface.php new file mode 100644 index 000000000..fa4401918 --- /dev/null +++ b/src/platform/src/Speech/SpeechProviderInterface.php @@ -0,0 +1,30 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Speech; + +use Symfony\AI\Platform\Result\DeferredResult; + +/** + * @author Guillaume Loulier + */ +interface SpeechProviderInterface +{ + /** + * @param array $options + */ + public function generate(DeferredResult $result, array $options): Speech; + + /** + * @param array $options + */ + public function support(DeferredResult $result, array $options): bool; +} diff --git a/src/platform/src/Speech/SpeechProviderListener.php b/src/platform/src/Speech/SpeechProviderListener.php new file mode 100644 index 000000000..d95a97f43 --- /dev/null +++ b/src/platform/src/Speech/SpeechProviderListener.php @@ -0,0 +1,82 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Speech; + +use Symfony\AI\Platform\Event\InvocationEvent; +use Symfony\AI\Platform\Event\ResultEvent; +use Symfony\AI\Platform\Message\Message; +use Symfony\AI\Platform\Message\MessageBag; +use Symfony\Component\EventDispatcher\EventSubscriberInterface; + +/** + * @author Guillaume Loulier + */ +final class SpeechProviderListener implements EventSubscriberInterface +{ + /** + * @param SpeechProviderInterface[] $speechProviders + * @param SpeechListenerInterface[] $speechListeners + */ + public function __construct( + private readonly iterable $speechProviders, + private readonly iterable $speechListeners, + ) { + } + + public static function getSubscribedEvents(): array + { + return [ + InvocationEvent::class => ['onInvocation', 255], + ResultEvent::class => 'onResult', + ]; + } + + public function onInvocation(InvocationEvent $event): void + { + $input = $event->getInput(); + $options = $event->getOptions(); + + foreach ($this->speechListeners as $speechListener) { + if (!$speechListener->support($input, $options)) { + continue; + } + + $overriddenInput = $speechListener->listen($input, $options); + + if (!$input instanceof MessageBag) { + $event->setInput($overriddenInput); + + return; + } + + $event->setInput(new MessageBag( + Message::ofUser($overriddenInput), + )); + } + } + + public function onResult(ResultEvent $event): void + { + $deferredResult = $event->getDeferredResult(); + $options = $event->getOptions(); + + foreach ($this->speechProviders as $speechProvider) { + if (!$speechProvider->support($deferredResult, $options)) { + continue; + } + + $deferredResult->addSpeech($speechProvider->generate($deferredResult, $options)); + + $event->setDeferredResult($deferredResult); + } + } +} diff --git a/src/platform/src/Test/InMemoryPlatform.php b/src/platform/src/Test/InMemoryPlatform.php index 1586c0f20..853b609e8 100644 --- a/src/platform/src/Test/InMemoryPlatform.php +++ b/src/platform/src/Test/InMemoryPlatform.php @@ -19,6 +19,7 @@ use Symfony\AI\Platform\Result\InMemoryRawResult; use Symfony\AI\Platform\Result\ResultInterface; use Symfony\AI\Platform\Result\TextResult; +use Symfony\AI\Platform\Speech\SpeechConfiguration; /** * A fake implementation of PlatformInterface that returns fixed or callable responses. @@ -31,6 +32,8 @@ class InMemoryPlatform implements PlatformInterface { private readonly ModelCatalogInterface $modelCatalog; + private readonly SpeechConfiguration $speechConfiguration; + /** * The mock result can be a string or a callable that returns a string. * If it's a closure, it receives the model, input, and optionally options as parameters like a real platform call. @@ -38,6 +41,7 @@ class InMemoryPlatform implements PlatformInterface public function __construct(private readonly \Closure|string $mockResult) { $this->modelCatalog = new FallbackModelCatalog(); + $this->speechConfiguration = new SpeechConfiguration(); } public function invoke(string $model, array|string|object $input, array $options = []): DeferredResult @@ -62,6 +66,11 @@ public function getModelCatalog(): ModelCatalogInterface return $this->modelCatalog; } + public function getSpeechConfiguration(): ?SpeechConfiguration + { + return $this->speechConfiguration; + } + /** * Creates a ResultPromise from a ResultInterface. * diff --git a/src/platform/tests/Bridge/ElevenLabs/ElevenLabsSpeechListenerTest.php b/src/platform/tests/Bridge/ElevenLabs/ElevenLabsSpeechListenerTest.php new file mode 100644 index 000000000..229e42114 --- /dev/null +++ b/src/platform/tests/Bridge/ElevenLabs/ElevenLabsSpeechListenerTest.php @@ -0,0 +1,127 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Tests\Bridge\ElevenLabs; + +use PHPUnit\Framework\TestCase; +use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabs; +use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechListener; +use Symfony\AI\Platform\Capability; +use Symfony\AI\Platform\Message\Content\Audio; +use Symfony\AI\Platform\Message\Content\Text; +use Symfony\AI\Platform\Message\Message; +use Symfony\AI\Platform\Message\MessageBag; +use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; +use Symfony\AI\Platform\PlatformInterface; +use Symfony\AI\Platform\Result\DeferredResult; +use Symfony\AI\Platform\Result\RawResultInterface; +use Symfony\AI\Platform\Result\TextResult; +use Symfony\AI\Platform\ResultConverterInterface; +use Symfony\AI\Platform\Speech\SpeechConfiguration; + +final class ElevenLabsSpeechListenerTest extends TestCase +{ + public function testListenerCannotSupportOnMissingSpeechConfiguration() + { + $platform = $this->createMock(PlatformInterface::class); + $platform->expects($this->once())->method('getSpeechConfiguration')->willReturn(null); + + $speechListener = new ElevenLabsSpeechListener($platform); + + $this->assertFalse($speechListener->support([], [])); + } + + public function testListenerCannotSupportOnWrongModel() + { + $speechConfiguration = new SpeechConfiguration(sttModel: 'foo'); + + $model = new ElevenLabs('foo'); + + $modelCatalog = $this->createMock(ModelCatalogInterface::class); + $modelCatalog->expects($this->once())->method('getModel')->willReturn($model); + + $platform = $this->createMock(PlatformInterface::class); + $platform->expects($this->once())->method('getSpeechConfiguration')->willReturn($speechConfiguration); + $platform->expects($this->once())->method('getModelCatalog')->willReturn($modelCatalog); + + $speechListener = new ElevenLabsSpeechListener($platform); + + $this->assertFalse($speechListener->support([], [])); + } + + public function testListenerCanSupportOnValidModel() + { + $speechConfiguration = new SpeechConfiguration(sttModel: 'foo'); + + $model = new ElevenLabs('foo', [ + Capability::SPEECH_TO_TEXT, + ]); + + $modelCatalog = $this->createMock(ModelCatalogInterface::class); + $modelCatalog->expects($this->once())->method('getModel')->willReturn($model); + + $platform = $this->createMock(PlatformInterface::class); + $platform->expects($this->once())->method('getSpeechConfiguration')->willReturn($speechConfiguration); + $platform->expects($this->once())->method('getModelCatalog')->willReturn($modelCatalog); + + $speechListener = new ElevenLabsSpeechListener($platform); + + $this->assertTrue($speechListener->support([], [])); + } + + public function testListenerCanListenOnArrayInput() + { + $configuration = new SpeechConfiguration(sttModel: 'foo'); + + $rawResult = $this->createMock(RawResultInterface::class); + + $resultConverter = $this->createMock(ResultConverterInterface::class); + $resultConverter->expects($this->once())->method('convert')->willReturn(new TextResult('foo')); + + $deferredResult = new DeferredResult($resultConverter, $rawResult); + + $platform = $this->createMock(PlatformInterface::class); + $platform->expects($this->once())->method('getSpeechConfiguration')->willReturn($configuration); + $platform->expects($this->once())->method('invoke')->with('foo')->willReturn($deferredResult); + + $speechListener = new ElevenLabsSpeechListener($platform); + + $text = $speechListener->listen(['text' => 'foo'], []); + + $this->assertInstanceOf(Text::class, $text); + $this->assertSame('foo', $text->getText()); + } + + public function testListenerCanListenOnMessageBag() + { + $configuration = new SpeechConfiguration(sttModel: 'foo'); + + $rawResult = $this->createMock(RawResultInterface::class); + + $resultConverter = $this->createMock(ResultConverterInterface::class); + $resultConverter->expects($this->once())->method('convert')->willReturn(new TextResult('foo')); + + $deferredResult = new DeferredResult($resultConverter, $rawResult); + + $platform = $this->createMock(PlatformInterface::class); + $platform->expects($this->once())->method('getSpeechConfiguration')->willReturn($configuration); + $platform->expects($this->once())->method('invoke')->with('foo')->willReturn($deferredResult); + + $speechListener = new ElevenLabsSpeechListener($platform); + + $text = $speechListener->listen(new MessageBag( + Message::ofUser(Audio::fromFile(\dirname(__DIR__, 5).'/fixtures/audio.mp3')), + ), []); + + $this->assertInstanceOf(Text::class, $text); + $this->assertSame('foo', $text->getText()); + } +} diff --git a/src/platform/tests/Bridge/ElevenLabs/ElevenLabsSpeechProviderTest.php b/src/platform/tests/Bridge/ElevenLabs/ElevenLabsSpeechProviderTest.php new file mode 100644 index 000000000..7010b6329 --- /dev/null +++ b/src/platform/tests/Bridge/ElevenLabs/ElevenLabsSpeechProviderTest.php @@ -0,0 +1,115 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Tests\Bridge\ElevenLabs; + +use PHPUnit\Framework\TestCase; +use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabs; +use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechProvider; +use Symfony\AI\Platform\Capability; +use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; +use Symfony\AI\Platform\PlatformInterface; +use Symfony\AI\Platform\Result\DeferredResult; +use Symfony\AI\Platform\Result\RawResultInterface; +use Symfony\AI\Platform\Result\TextResult; +use Symfony\AI\Platform\ResultConverterInterface; +use Symfony\AI\Platform\Speech\SpeechConfiguration; + +final class ElevenLabsSpeechProviderTest extends TestCase +{ + public function testProviderCannotSupportOnMissingSpeechConfiguration() + { + $rawResult = $this->createMock(RawResultInterface::class); + $resultConverter = $this->createMock(ResultConverterInterface::class); + + $deferredResult = new DeferredResult($resultConverter, $rawResult); + + $platform = $this->createMock(PlatformInterface::class); + $platform->expects($this->once())->method('getSpeechConfiguration')->willReturn(null); + + $speechListener = new ElevenLabsSpeechProvider($platform); + + $this->assertFalse($speechListener->support($deferredResult, [])); + } + + public function testProviderCannotSupportOnWrongModel() + { + $speechConfiguration = new SpeechConfiguration(ttsModel: 'foo'); + $model = new ElevenLabs('foo'); + + $rawResult = $this->createMock(RawResultInterface::class); + $resultConverter = $this->createMock(ResultConverterInterface::class); + + $deferredResult = new DeferredResult($resultConverter, $rawResult); + + $modelCatalog = $this->createMock(ModelCatalogInterface::class); + $modelCatalog->expects($this->once())->method('getModel')->willReturn($model); + + $platform = $this->createMock(PlatformInterface::class); + $platform->expects($this->once())->method('getSpeechConfiguration')->willReturn($speechConfiguration); + $platform->expects($this->once())->method('getModelCatalog')->willReturn($modelCatalog); + + $speechListener = new ElevenLabsSpeechProvider($platform); + + $this->assertFalse($speechListener->support($deferredResult, [])); + } + + public function testProviderCanSupportOnValidModel() + { + $speechConfiguration = new SpeechConfiguration(ttsModel: 'foo'); + $model = new ElevenLabs('foo', [ + Capability::TEXT_TO_SPEECH, + ]); + + $rawResult = $this->createMock(RawResultInterface::class); + $resultConverter = $this->createMock(ResultConverterInterface::class); + + $deferredResult = new DeferredResult($resultConverter, $rawResult); + + $modelCatalog = $this->createMock(ModelCatalogInterface::class); + $modelCatalog->expects($this->once())->method('getModel')->willReturn($model); + + $platform = $this->createMock(PlatformInterface::class); + $platform->expects($this->once())->method('getSpeechConfiguration')->willReturn($speechConfiguration); + $platform->expects($this->once())->method('getModelCatalog')->willReturn($modelCatalog); + + $speechListener = new ElevenLabsSpeechProvider($platform); + + $this->assertTrue($speechListener->support($deferredResult, [])); + } + + public function testProviderCanGenerate() + { + $configuration = new SpeechConfiguration(ttsModel: 'foo', ttsVoice: 'bar'); + + $rawResult = $this->createMock(RawResultInterface::class); + + $resultConverter = $this->createMock(ResultConverterInterface::class); + $resultConverter->expects($this->once())->method('convert')->willReturn(new TextResult('foo')); + + $secondResultConverter = $this->createMock(ResultConverterInterface::class); + $secondResultConverter->expects($this->never())->method('convert'); + + $deferredResult = new DeferredResult($resultConverter, $rawResult); + + $speechResult = new DeferredResult($secondResultConverter, $rawResult); + + $platform = $this->createMock(PlatformInterface::class); + $platform->expects($this->once())->method('getSpeechConfiguration')->willReturn($configuration); + $platform->expects($this->once())->method('invoke')->willReturn($speechResult); + + $speechListener = new ElevenLabsSpeechProvider($platform); + + $speech = $speechListener->generate($deferredResult, []); + + $this->assertSame('eleven_labs', $speech->getIdentifier()); + } +} diff --git a/src/platform/tests/Speech/SpeechBagTest.php b/src/platform/tests/Speech/SpeechBagTest.php new file mode 100644 index 000000000..d26e8847c --- /dev/null +++ b/src/platform/tests/Speech/SpeechBagTest.php @@ -0,0 +1,44 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Tests\Speech; + +use PHPUnit\Framework\TestCase; +use Symfony\AI\Platform\Exception\InvalidArgumentException; +use Symfony\AI\Platform\Result\DeferredResult; +use Symfony\AI\Platform\Result\RawResultInterface; +use Symfony\AI\Platform\ResultConverterInterface; +use Symfony\AI\Platform\Speech\Speech; +use Symfony\AI\Platform\Speech\SpeechBag; + +final class SpeechBagTest extends TestCase +{ + public function testBagCanStoreSpeech() + { + $converter = $this->createMock(ResultConverterInterface::class); + $rawResult = $this->createMock(RawResultInterface::class); + + $result = new DeferredResult($converter, $rawResult); + + $bag = new SpeechBag(); + + $bag->add(new Speech([], $result, 'foo')); + + $this->assertCount(1, $bag); + + $this->assertInstanceOf(Speech::class, $bag->get('foo')); + + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('No speech with identifier "bar" found.'); + $this->expectExceptionCode(0); + $bag->get('bar'); + } +} diff --git a/src/platform/tests/Speech/SpeechProviderListenerTest.php b/src/platform/tests/Speech/SpeechProviderListenerTest.php new file mode 100644 index 000000000..21534e1a4 --- /dev/null +++ b/src/platform/tests/Speech/SpeechProviderListenerTest.php @@ -0,0 +1,133 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Tests\Speech; + +use PHPUnit\Framework\TestCase; +use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabs; +use Symfony\AI\Platform\Event\InvocationEvent; +use Symfony\AI\Platform\Event\ResultEvent; +use Symfony\AI\Platform\Message\Content\Text; +use Symfony\AI\Platform\Message\MessageBag; +use Symfony\AI\Platform\Result\DeferredResult; +use Symfony\AI\Platform\Result\RawResultInterface; +use Symfony\AI\Platform\ResultConverterInterface; +use Symfony\AI\Platform\Speech\Speech; +use Symfony\AI\Platform\Speech\SpeechListenerInterface; +use Symfony\AI\Platform\Speech\SpeechProviderInterface; +use Symfony\AI\Platform\Speech\SpeechProviderListener; + +final class SpeechProviderListenerTest extends TestCase +{ + public function testListenerIsConfigured() + { + $this->assertArrayHasKey(InvocationEvent::class, SpeechProviderListener::getSubscribedEvents()); + $this->assertArrayHasKey(ResultEvent::class, SpeechProviderListener::getSubscribedEvents()); + $this->assertSame(['onInvocation', 255], SpeechProviderListener::getSubscribedEvents()[InvocationEvent::class]); + $this->assertSame('onResult', SpeechProviderListener::getSubscribedEvents()[ResultEvent::class]); + } + + public function testListenerCannotBeTriggeredWithoutSupporting() + { + $speechListener = $this->createMock(SpeechListenerInterface::class); + $speechListener->expects($this->once())->method('support')->willReturn(false); + $speechListener->expects($this->never())->method('listen'); + + $listener = new SpeechProviderListener([], [ + $speechListener, + ]); + + $event = new InvocationEvent(new ElevenLabs('foo'), []); + + $listener->onInvocation($event); + } + + public function testListenerCanBeTriggeredWhenSupporting() + { + $speechListener = $this->createMock(SpeechListenerInterface::class); + $speechListener->expects($this->once())->method('support')->willReturn(true); + $speechListener->expects($this->once())->method('listen')->willReturn(new Text('foo')); + + $listener = new SpeechProviderListener([], [ + $speechListener, + ]); + + $event = new InvocationEvent(new ElevenLabs('foo'), []); + + $listener->onInvocation($event); + + $this->assertInstanceOf(Text::class, $event->getInput()); + } + + public function testListenerCanBeTriggeredWhenSupportingWithMessageBag() + { + $speechListener = $this->createMock(SpeechListenerInterface::class); + $speechListener->expects($this->once())->method('support')->willReturn(true); + $speechListener->expects($this->once())->method('listen')->willReturn(new Text('foo')); + + $listener = new SpeechProviderListener([], [ + $speechListener, + ]); + + $event = new InvocationEvent(new ElevenLabs('foo'), new MessageBag()); + + $listener->onInvocation($event); + + $this->assertInstanceOf(MessageBag::class, $event->getInput()); + $this->assertCount(1, $event->getInput()); + } + + public function testProviderCannotBeTriggeredWithoutSupporting() + { + $rawResult = $this->createMock(RawResultInterface::class); + $resultConverter = $this->createMock(ResultConverterInterface::class); + + $deferredResult = new DeferredResult($resultConverter, $rawResult); + + $speechProvider = $this->createMock(SpeechProviderInterface::class); + $speechProvider->expects($this->once())->method('support')->willReturn(false); + $speechProvider->expects($this->never())->method('generate'); + + $listener = new SpeechProviderListener([ + $speechProvider, + ], []); + + $event = new ResultEvent(new ElevenLabs('foo'), $deferredResult); + + $listener->onResult($event); + } + + public function testProviderCanBeTriggeredWhenSupporting() + { + $rawResult = $this->createMock(RawResultInterface::class); + $resultConverter = $this->createMock(ResultConverterInterface::class); + + $deferredResult = new DeferredResult($resultConverter, $rawResult); + $speechDeferredResult = new DeferredResult($resultConverter, $rawResult); + + $speech = new Speech([], $speechDeferredResult, 'foo'); + + $speechProvider = $this->createMock(SpeechProviderInterface::class); + $speechProvider->expects($this->once())->method('support')->willReturn(true); + $speechProvider->expects($this->once())->method('generate')->willReturn($speech); + + $listener = new SpeechProviderListener([ + $speechProvider, + ], []); + + $event = new ResultEvent(new ElevenLabs('foo'), $deferredResult); + + $listener->onResult($event); + + $this->assertSame($deferredResult, $event->getDeferredResult()); + $this->assertSame($speech, $event->getDeferredResult()->getSpeech('foo')); + } +}