Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions demo/config/packages/ai.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,15 @@ ai:
chroma_db:
symfonycon:
collection: 'symfony_blog'
indexer:
default:
vectorizer:
openai_embeddings:
model:
class: 'Symfony\AI\Platform\Bridge\OpenAi\Embeddings'
name: !php/const Symfony\AI\Platform\Bridge\OpenAi\Embeddings::TEXT_ADA_002
indexer:
default:
vectorizer: 'ai.vectorizer.openai_embeddings'
store: 'ai.store.chroma_db.symfonycon'

services:
_defaults:
Expand Down
22 changes: 17 additions & 5 deletions src/ai-bundle/config/options.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
use Probots\Pinecone\Client as PineconeClient;
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
use Symfony\AI\Platform\PlatformInterface;
use Symfony\AI\Store\Document\VectorizerInterface;
use Symfony\AI\Store\StoreInterface;

return static function (DefinitionConfigurator $configurator): void {
Expand Down Expand Up @@ -371,14 +372,10 @@
->end()
->end()
->end()
->arrayNode('indexer')
->arrayNode('vectorizer')
->useAttributeAsKey('name')
->arrayPrototype()
->children()
->scalarNode('store')
->info('Service name of store')
->defaultValue(StoreInterface::class)
->end()
->scalarNode('platform')
->info('Service name of platform')
->defaultValue(PlatformInterface::class)
Expand All @@ -395,6 +392,21 @@
->end()
->end()
->end()
->arrayNode('indexer')
->useAttributeAsKey('name')
->arrayPrototype()
->children()
->scalarNode('vectorizer')
->info('Service name of vectorizer')
->defaultValue(VectorizerInterface::class)
->end()
->scalarNode('store')
->info('Service name of store')
->defaultValue(StoreInterface::class)
->end()
->end()
->end()
->end()
->end()
;
};
83 changes: 79 additions & 4 deletions src/ai-bundle/doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,28 @@ Configuration
memory:
ollama:
strategy: 'manhattan'
indexer:
default:
# platform: 'ai.platform.mistral'
# store: 'ai.store.chroma_db.default'
vectorizer:
# Reusable vectorizer configurations
openai_embeddings:
platform: 'ai.platform.openai'
model:
class: 'Symfony\AI\Platform\Bridge\OpenAi\Embeddings'
name: !php/const Symfony\AI\Platform\Bridge\OpenAi\Embeddings::TEXT_EMBEDDING_3_SMALL
options:
dimensions: 512
mistral_embeddings:
platform: 'ai.platform.mistral'
model:
class: 'Symfony\AI\Platform\Bridge\Mistral\Embeddings'
name: !php/const Symfony\AI\Platform\Bridge\Mistral\Embeddings::MISTRAL_EMBED
indexer:
default:
vectorizer: 'ai.vectorizer.openai_embeddings'
store: 'ai.store.chroma_db.default'

research:
vectorizer: 'ai.vectorizer.mistral_embeddings'
store: 'ai.store.memory.research'

Usage
-----
Expand Down Expand Up @@ -319,6 +334,66 @@ To disable token usage tracking for an agent, set the ``track_token_usage`` opti
class: 'Symfony\AI\Platform\Bridge\OpenAi\Gpt'
name: !php/const Symfony\AI\Platform\Bridge\OpenAi\Gpt::GPT_4O_MINI

Vectorizers
-----------

Vectorizers are components that convert text documents into vector embeddings for storage and retrieval.
They can be configured once and reused across multiple indexers, providing better maintainability and consistency.

**Configuring Vectorizers**

Vectorizers are defined in the ``vectorizer`` section of your configuration:

.. code-block:: yaml

ai:
vectorizer:
openai_small:
platform: 'ai.platform.openai'
model:
class: 'Symfony\AI\Platform\Bridge\OpenAi\Embeddings'
name: !php/const Symfony\AI\Platform\Bridge\OpenAi\Embeddings::TEXT_EMBEDDING_3_SMALL
options:
dimensions: 512

openai_large:
platform: 'ai.platform.openai'
model:
class: 'Symfony\AI\Platform\Bridge\OpenAi\Embeddings'
name: !php/const Symfony\AI\Platform\Bridge\OpenAi\Embeddings::TEXT_EMBEDDING_3_LARGE

mistral_embed:
platform: 'ai.platform.mistral'
model:
class: 'Symfony\AI\Platform\Bridge\Mistral\Embeddings'
name: !php/const Symfony\AI\Platform\Bridge\Mistral\Embeddings::MISTRAL_EMBED

**Using Vectorizers in Indexers**

Once configured, vectorizers can be referenced by name in indexer configurations:

.. code-block:: yaml

ai:
indexer:
documents:
vectorizer: 'ai.vectorizer.openai_small'
store: 'ai.store.chroma_db.documents'

research:
vectorizer: 'ai.vectorizer.openai_large'
store: 'ai.store.chroma_db.research'

knowledge_base:
vectorizer: 'ai.vectorizer.mistral_embed'
store: 'ai.store.memory.kb'

**Benefits of Configured Vectorizers**

* **Reusability**: Define once, use in multiple indexers
* **Consistency**: Ensure all indexers using the same vectorizer have identical embedding configuration
* **Maintainability**: Change vectorizer settings in one place

Profiler
--------

Expand Down
21 changes: 16 additions & 5 deletions src/ai-bundle/src/AiBundle.php
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,10 @@ public function loadExtension(array $config, ContainerConfigurator $container, C
$builder->removeDefinition('ai.command.drop_store');
}

foreach ($config['vectorizer'] ?? [] as $vectorizerName => $vectorizer) {
$this->processVectorizerConfig($vectorizerName, $vectorizer, $builder);
}

foreach ($config['indexer'] as $indexerName => $indexer) {
$this->processIndexerConfig($indexerName, $indexer, $builder);
}
Expand Down Expand Up @@ -1031,7 +1035,7 @@ private function processStoreConfig(string $type, array $stores, ContainerBuilde
/**
* @param array<string, mixed> $config
*/
private function processIndexerConfig(int|string $name, array $config, ContainerBuilder $container): void
private function processVectorizerConfig(string $name, array $config, ContainerBuilder $container): void
{
['class' => $modelClass, 'name' => $modelName, 'options' => $options] = $config['model'];

Expand All @@ -1048,16 +1052,23 @@ private function processIndexerConfig(int|string $name, array $config, Container
}

$modelDefinition->addTag('ai.model.embeddings_model');
$container->setDefinition('ai.indexer.'.$name.'.model', $modelDefinition);
$container->setDefinition('ai.vectorizer.'.$name.'.model', $modelDefinition);

$vectorizerDefinition = new Definition(Vectorizer::class, [
new Reference($config['platform']),
new Reference('ai.indexer.'.$name.'.model'),
new Reference('ai.vectorizer.'.$name.'.model'),
]);
$container->setDefinition('ai.indexer.'.$name.'.vectorizer', $vectorizerDefinition);
$vectorizerDefinition->addTag('ai.vectorizer', ['name' => $name]);
$container->setDefinition('ai.vectorizer.'.$name, $vectorizerDefinition);
}

/**
* @param array<string, mixed> $config
*/
private function processIndexerConfig(int|string $name, array $config, ContainerBuilder $container): void
{
$definition = new Definition(Indexer::class, [
new Reference('ai.indexer.'.$name.'.vectorizer'),
new Reference($config['vectorizer']),
new Reference($config['store']),
new Reference('logger', ContainerInterface::IGNORE_ON_INVALID_REFERENCE),
]);
Expand Down
84 changes: 81 additions & 3 deletions src/ai-bundle/tests/DependencyInjection/AiBundleTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
use PHPUnit\Framework\Attributes\UsesClass;
use PHPUnit\Framework\TestCase;
use Symfony\AI\AiBundle\AiBundle;
use Symfony\AI\Platform\Bridge\OpenAi\Embeddings;
use Symfony\AI\Store\Document\Vectorizer;
use Symfony\Component\Config\Definition\Exception\InvalidConfigurationException;
use Symfony\Component\DependencyInjection\ContainerBuilder;
use Symfony\Component\DependencyInjection\Definition;
Expand Down Expand Up @@ -591,6 +593,77 @@ public function testOpenAiPlatformWithInvalidRegion()
]);
}

public function testVectorizerConfiguration()
{
$container = $this->buildContainer([
'ai' => [
'vectorizer' => [
'my_vectorizer' => [
'platform' => 'my_platform_service_id',
'model' => [
'class' => 'Symfony\AI\Platform\Bridge\OpenAi\Embeddings',
'name' => 'text-embedding-3-small',
'options' => ['dimension' => 512],
],
],
],
],
]);

$this->assertTrue($container->hasDefinition('ai.vectorizer.my_vectorizer'));
$this->assertTrue($container->hasDefinition('ai.vectorizer.my_vectorizer.model'));

$vectorizerDefinition = $container->getDefinition('ai.vectorizer.my_vectorizer');
$this->assertSame(Vectorizer::class, $vectorizerDefinition->getClass());
$this->assertTrue($vectorizerDefinition->hasTag('ai.vectorizer'));

$modelDefinition = $container->getDefinition('ai.vectorizer.my_vectorizer.model');
$this->assertSame(Embeddings::class, $modelDefinition->getClass());
$this->assertTrue($modelDefinition->hasTag('ai.model.embeddings_model'));
}

public function testIndexerWithConfiguredVectorizer()
{
$container = $this->buildContainer([
'ai' => [
'store' => [
'memory' => [
'my_store' => [],
],
],
'vectorizer' => [
'my_vectorizer' => [
'platform' => 'my_platform_service_id',
'model' => [
'class' => 'Symfony\AI\Platform\Bridge\OpenAi\Embeddings',
'name' => 'text-embedding-3-small',
],
],
],
'indexer' => [
'my_indexer' => [
'vectorizer' => 'ai.vectorizer.my_vectorizer',
'store' => 'ai.store.memory.my_store',
],
],
],
]);

$this->assertTrue($container->hasDefinition('ai.indexer.my_indexer'));
$this->assertTrue($container->hasDefinition('ai.vectorizer.my_vectorizer'));

$indexerDefinition = $container->getDefinition('ai.indexer.my_indexer');
$arguments = $indexerDefinition->getArguments();

// First argument should be a reference to the vectorizer
$this->assertInstanceOf(Reference::class, $arguments[0]);
$this->assertSame('ai.vectorizer.my_vectorizer', (string) $arguments[0]);

// Should not create model-specific vectorizer when using configured one
$this->assertFalse($container->hasDefinition('ai.indexer.my_indexer.vectorizer'));
$this->assertFalse($container->hasDefinition('ai.indexer.my_indexer.model'));
}

private function buildContainer(array $configuration): ContainerBuilder
{
$container = new ContainerBuilder();
Expand Down Expand Up @@ -838,9 +911,8 @@ private function getFullConfig(): array
],
],
],
'indexer' => [
'my_text_indexer' => [
'store' => 'my_azure_search_store_service_id',
'vectorizer' => [
'test_vectorizer' => [
'platform' => 'mistral_platform_service_id',
'model' => [
'class' => 'Symfony\AI\Platform\Bridge\Mistral\Embeddings',
Expand All @@ -849,6 +921,12 @@ private function getFullConfig(): array
],
],
],
'indexer' => [
'my_text_indexer' => [
'vectorizer' => 'ai.vectorizer.test_vectorizer',
'store' => 'my_azure_search_store_service_id',
],
],
],
];
}
Expand Down