diff --git a/demo/config/packages/ai.yaml b/demo/config/packages/ai.yaml index 27b3921fc..df1c3c3ba 100644 --- a/demo/config/packages/ai.yaml +++ b/demo/config/packages/ai.yaml @@ -53,11 +53,15 @@ ai: chroma_db: symfonycon: collection: 'symfony_blog' - indexer: - default: + vectorizer: + openai_embeddings: model: class: 'Symfony\AI\Platform\Bridge\OpenAi\Embeddings' name: !php/const Symfony\AI\Platform\Bridge\OpenAi\Embeddings::TEXT_ADA_002 + indexer: + default: + vectorizer: 'ai.vectorizer.openai_embeddings' + store: 'ai.store.chroma_db.symfonycon' services: _defaults: diff --git a/src/ai-bundle/config/options.php b/src/ai-bundle/config/options.php index ffd5556d0..67b45c32f 100644 --- a/src/ai-bundle/config/options.php +++ b/src/ai-bundle/config/options.php @@ -16,6 +16,7 @@ use Probots\Pinecone\Client as PineconeClient; use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory; use Symfony\AI\Platform\PlatformInterface; +use Symfony\AI\Store\Document\VectorizerInterface; use Symfony\AI\Store\StoreInterface; return static function (DefinitionConfigurator $configurator): void { @@ -371,14 +372,10 @@ ->end() ->end() ->end() - ->arrayNode('indexer') + ->arrayNode('vectorizer') ->useAttributeAsKey('name') ->arrayPrototype() ->children() - ->scalarNode('store') - ->info('Service name of store') - ->defaultValue(StoreInterface::class) - ->end() ->scalarNode('platform') ->info('Service name of platform') ->defaultValue(PlatformInterface::class) @@ -395,6 +392,21 @@ ->end() ->end() ->end() + ->arrayNode('indexer') + ->useAttributeAsKey('name') + ->arrayPrototype() + ->children() + ->scalarNode('vectorizer') + ->info('Service name of vectorizer') + ->defaultValue(VectorizerInterface::class) + ->end() + ->scalarNode('store') + ->info('Service name of store') + ->defaultValue(StoreInterface::class) + ->end() + ->end() + ->end() + ->end() ->end() ; }; diff --git a/src/ai-bundle/doc/index.rst b/src/ai-bundle/doc/index.rst index 6425fe112..4e0b5678f 100644 --- a/src/ai-bundle/doc/index.rst +++ b/src/ai-bundle/doc/index.rst @@ -113,13 +113,28 @@ Configuration memory: ollama: strategy: 'manhattan' - indexer: - default: - # platform: 'ai.platform.mistral' - # store: 'ai.store.chroma_db.default' + vectorizer: + # Reusable vectorizer configurations + openai_embeddings: + platform: 'ai.platform.openai' + model: + class: 'Symfony\AI\Platform\Bridge\OpenAi\Embeddings' + name: !php/const Symfony\AI\Platform\Bridge\OpenAi\Embeddings::TEXT_EMBEDDING_3_SMALL + options: + dimensions: 512 + mistral_embeddings: + platform: 'ai.platform.mistral' model: class: 'Symfony\AI\Platform\Bridge\Mistral\Embeddings' name: !php/const Symfony\AI\Platform\Bridge\Mistral\Embeddings::MISTRAL_EMBED + indexer: + default: + vectorizer: 'ai.vectorizer.openai_embeddings' + store: 'ai.store.chroma_db.default' + + research: + vectorizer: 'ai.vectorizer.mistral_embeddings' + store: 'ai.store.memory.research' Usage ----- @@ -319,6 +334,66 @@ To disable token usage tracking for an agent, set the ``track_token_usage`` opti class: 'Symfony\AI\Platform\Bridge\OpenAi\Gpt' name: !php/const Symfony\AI\Platform\Bridge\OpenAi\Gpt::GPT_4O_MINI +Vectorizers +----------- + +Vectorizers are components that convert text documents into vector embeddings for storage and retrieval. +They can be configured once and reused across multiple indexers, providing better maintainability and consistency. + +**Configuring Vectorizers** + +Vectorizers are defined in the ``vectorizer`` section of your configuration: + +.. code-block:: yaml + + ai: + vectorizer: + openai_small: + platform: 'ai.platform.openai' + model: + class: 'Symfony\AI\Platform\Bridge\OpenAi\Embeddings' + name: !php/const Symfony\AI\Platform\Bridge\OpenAi\Embeddings::TEXT_EMBEDDING_3_SMALL + options: + dimensions: 512 + + openai_large: + platform: 'ai.platform.openai' + model: + class: 'Symfony\AI\Platform\Bridge\OpenAi\Embeddings' + name: !php/const Symfony\AI\Platform\Bridge\OpenAi\Embeddings::TEXT_EMBEDDING_3_LARGE + + mistral_embed: + platform: 'ai.platform.mistral' + model: + class: 'Symfony\AI\Platform\Bridge\Mistral\Embeddings' + name: !php/const Symfony\AI\Platform\Bridge\Mistral\Embeddings::MISTRAL_EMBED + +**Using Vectorizers in Indexers** + +Once configured, vectorizers can be referenced by name in indexer configurations: + +.. code-block:: yaml + + ai: + indexer: + documents: + vectorizer: 'ai.vectorizer.openai_small' + store: 'ai.store.chroma_db.documents' + + research: + vectorizer: 'ai.vectorizer.openai_large' + store: 'ai.store.chroma_db.research' + + knowledge_base: + vectorizer: 'ai.vectorizer.mistral_embed' + store: 'ai.store.memory.kb' + +**Benefits of Configured Vectorizers** + +* **Reusability**: Define once, use in multiple indexers +* **Consistency**: Ensure all indexers using the same vectorizer have identical embedding configuration +* **Maintainability**: Change vectorizer settings in one place + Profiler -------- diff --git a/src/ai-bundle/src/AiBundle.php b/src/ai-bundle/src/AiBundle.php index aec2af5df..bbcc69299 100644 --- a/src/ai-bundle/src/AiBundle.php +++ b/src/ai-bundle/src/AiBundle.php @@ -148,6 +148,10 @@ public function loadExtension(array $config, ContainerConfigurator $container, C $builder->removeDefinition('ai.command.drop_store'); } + foreach ($config['vectorizer'] ?? [] as $vectorizerName => $vectorizer) { + $this->processVectorizerConfig($vectorizerName, $vectorizer, $builder); + } + foreach ($config['indexer'] as $indexerName => $indexer) { $this->processIndexerConfig($indexerName, $indexer, $builder); } @@ -1031,7 +1035,7 @@ private function processStoreConfig(string $type, array $stores, ContainerBuilde /** * @param array $config */ - private function processIndexerConfig(int|string $name, array $config, ContainerBuilder $container): void + private function processVectorizerConfig(string $name, array $config, ContainerBuilder $container): void { ['class' => $modelClass, 'name' => $modelName, 'options' => $options] = $config['model']; @@ -1048,16 +1052,23 @@ private function processIndexerConfig(int|string $name, array $config, Container } $modelDefinition->addTag('ai.model.embeddings_model'); - $container->setDefinition('ai.indexer.'.$name.'.model', $modelDefinition); + $container->setDefinition('ai.vectorizer.'.$name.'.model', $modelDefinition); $vectorizerDefinition = new Definition(Vectorizer::class, [ new Reference($config['platform']), - new Reference('ai.indexer.'.$name.'.model'), + new Reference('ai.vectorizer.'.$name.'.model'), ]); - $container->setDefinition('ai.indexer.'.$name.'.vectorizer', $vectorizerDefinition); + $vectorizerDefinition->addTag('ai.vectorizer', ['name' => $name]); + $container->setDefinition('ai.vectorizer.'.$name, $vectorizerDefinition); + } + /** + * @param array $config + */ + private function processIndexerConfig(int|string $name, array $config, ContainerBuilder $container): void + { $definition = new Definition(Indexer::class, [ - new Reference('ai.indexer.'.$name.'.vectorizer'), + new Reference($config['vectorizer']), new Reference($config['store']), new Reference('logger', ContainerInterface::IGNORE_ON_INVALID_REFERENCE), ]); diff --git a/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php b/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php index 29dea496f..4eabe71df 100644 --- a/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php +++ b/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php @@ -18,6 +18,8 @@ use PHPUnit\Framework\Attributes\UsesClass; use PHPUnit\Framework\TestCase; use Symfony\AI\AiBundle\AiBundle; +use Symfony\AI\Platform\Bridge\OpenAi\Embeddings; +use Symfony\AI\Store\Document\Vectorizer; use Symfony\Component\Config\Definition\Exception\InvalidConfigurationException; use Symfony\Component\DependencyInjection\ContainerBuilder; use Symfony\Component\DependencyInjection\Definition; @@ -591,6 +593,77 @@ public function testOpenAiPlatformWithInvalidRegion() ]); } + public function testVectorizerConfiguration() + { + $container = $this->buildContainer([ + 'ai' => [ + 'vectorizer' => [ + 'my_vectorizer' => [ + 'platform' => 'my_platform_service_id', + 'model' => [ + 'class' => 'Symfony\AI\Platform\Bridge\OpenAi\Embeddings', + 'name' => 'text-embedding-3-small', + 'options' => ['dimension' => 512], + ], + ], + ], + ], + ]); + + $this->assertTrue($container->hasDefinition('ai.vectorizer.my_vectorizer')); + $this->assertTrue($container->hasDefinition('ai.vectorizer.my_vectorizer.model')); + + $vectorizerDefinition = $container->getDefinition('ai.vectorizer.my_vectorizer'); + $this->assertSame(Vectorizer::class, $vectorizerDefinition->getClass()); + $this->assertTrue($vectorizerDefinition->hasTag('ai.vectorizer')); + + $modelDefinition = $container->getDefinition('ai.vectorizer.my_vectorizer.model'); + $this->assertSame(Embeddings::class, $modelDefinition->getClass()); + $this->assertTrue($modelDefinition->hasTag('ai.model.embeddings_model')); + } + + public function testIndexerWithConfiguredVectorizer() + { + $container = $this->buildContainer([ + 'ai' => [ + 'store' => [ + 'memory' => [ + 'my_store' => [], + ], + ], + 'vectorizer' => [ + 'my_vectorizer' => [ + 'platform' => 'my_platform_service_id', + 'model' => [ + 'class' => 'Symfony\AI\Platform\Bridge\OpenAi\Embeddings', + 'name' => 'text-embedding-3-small', + ], + ], + ], + 'indexer' => [ + 'my_indexer' => [ + 'vectorizer' => 'ai.vectorizer.my_vectorizer', + 'store' => 'ai.store.memory.my_store', + ], + ], + ], + ]); + + $this->assertTrue($container->hasDefinition('ai.indexer.my_indexer')); + $this->assertTrue($container->hasDefinition('ai.vectorizer.my_vectorizer')); + + $indexerDefinition = $container->getDefinition('ai.indexer.my_indexer'); + $arguments = $indexerDefinition->getArguments(); + + // First argument should be a reference to the vectorizer + $this->assertInstanceOf(Reference::class, $arguments[0]); + $this->assertSame('ai.vectorizer.my_vectorizer', (string) $arguments[0]); + + // Should not create model-specific vectorizer when using configured one + $this->assertFalse($container->hasDefinition('ai.indexer.my_indexer.vectorizer')); + $this->assertFalse($container->hasDefinition('ai.indexer.my_indexer.model')); + } + private function buildContainer(array $configuration): ContainerBuilder { $container = new ContainerBuilder(); @@ -838,9 +911,8 @@ private function getFullConfig(): array ], ], ], - 'indexer' => [ - 'my_text_indexer' => [ - 'store' => 'my_azure_search_store_service_id', + 'vectorizer' => [ + 'test_vectorizer' => [ 'platform' => 'mistral_platform_service_id', 'model' => [ 'class' => 'Symfony\AI\Platform\Bridge\Mistral\Embeddings', @@ -849,6 +921,12 @@ private function getFullConfig(): array ], ], ], + 'indexer' => [ + 'my_text_indexer' => [ + 'vectorizer' => 'ai.vectorizer.test_vectorizer', + 'store' => 'my_azure_search_store_service_id', + ], + ], ], ]; }