diff --git a/demo/AGENTS.md b/demo/AGENTS.md index b7a5f2926f..fe5d76edd0 100644 --- a/demo/AGENTS.md +++ b/demo/AGENTS.md @@ -62,7 +62,7 @@ symfony console mcp:server - **Agents**: blog, stream, youtube, wikipedia, audio - **Platform**: OpenAI integration - **Store**: ChromaDB vector store -- **Indexer**: Text embedding model +- **Ingester**: Text embedding model ### Chat Pattern - `Chat` class: Message flow and session management @@ -76,4 +76,4 @@ symfony console mcp:server - OpenAI GPT-4o-mini default model - ChromaDB on port 8080 - LiveComponents for real-time UI -- Symfony DI and best practices \ No newline at end of file +- Symfony DI and best practices diff --git a/demo/CLAUDE.md b/demo/CLAUDE.md index 82893752e2..62a10f13f1 100644 --- a/demo/CLAUDE.md +++ b/demo/CLAUDE.md @@ -10,7 +10,7 @@ This is a Symfony 7.3 demo application showcasing AI integration capabilities us ### Core Components - **Chat Systems**: Multiple specialized chat implementations in `src/` (Blog, YouTube, Wikipedia, Audio, Stream) -- **Twig LiveComponents**: Interactive UI components using Symfony UX for real-time chat interfaces +- **Twig LiveComponents**: Interactive UI components using Symfony UX for real-time chat interfaces - **AI Agents**: Configured agents with different models, tools, and system prompts - **Vector Store**: ChromaDB integration for embedding storage and similarity search - **MCP Tools**: Model Context Protocol tools for extending agent capabilities @@ -36,7 +36,7 @@ composer install echo "OPENAI_API_KEY='sk-...'" > .env.local # Initialize vector store -symfony console ai:store:index blog -vv +symfony console ai:store:ingest blog -vv # Test vector store symfony console ai:store:retrieve blog "Week of Symfony" @@ -81,7 +81,7 @@ symfony console mcp:server - **Agents**: Multiple pre-configured agents (blog, stream, youtube, wikipedia, audio) - **Platform**: OpenAI integration with API key from environment - **Store**: ChromaDB vector store for similarity search -- **Indexer**: Text embedding model configuration +- **Ingester**: Text embedding model configuration ### Chat Implementations Each chat type follows the pattern: @@ -100,4 +100,4 @@ Chat history stored in Symfony sessions with component-specific keys (e.g., 'blo - ChromaDB runs on port 8080 (mapped from container port 8000) - Application follows Symfony best practices with dependency injection - LiveComponents provide real-time UI updates without custom JavaScript -- MCP server enables tool integration for AI agents \ No newline at end of file +- MCP server enables tool integration for AI agents diff --git a/demo/config/packages/ai.yaml b/demo/config/packages/ai.yaml index a34557c8eb..7c04905f6e 100644 --- a/demo/config/packages/ai.yaml +++ b/demo/config/packages/ai.yaml @@ -89,7 +89,7 @@ ai: openai: platform: 'ai.platform.openai' model: 'text-embedding-ada-002' - indexer: + ingester: blog: loader: 'Symfony\AI\Store\Document\Loader\RssFeedLoader' source: 'https://feeds.feedburner.com/symfony/blog' diff --git a/docs/bundles/ai-bundle.rst b/docs/bundles/ai-bundle.rst index 8e81f6258f..94ab5f86ed 100644 --- a/docs/bundles/ai-bundle.rst +++ b/docs/bundles/ai-bundle.rst @@ -124,7 +124,7 @@ Advanced Example with Multiple Agents mistral_embeddings: platform: 'ai.platform.mistral' model: 'mistral-embed' - indexer: + ingester: default: loader: 'Symfony\AI\Store\Document\Loader\InMemoryLoader' vectorizer: 'ai.vectorizer.openai_embeddings' @@ -721,26 +721,26 @@ The ``ai:store:drop`` command drops the infrastructure for a store (e.g., remove This command only works with stores that implement ``ManagedStoreInterface``. Not all store types support drop operations. -``ai:store:index`` -~~~~~~~~~~~~~~~~~~ +``ai:store:ingest`` +~~~~~~~~~~~~~~~~~~~ -The ``ai:store:index`` command indexes documents into a store using a configured indexer. +The ``ai:store:ingest`` command ingests documents into a store using a configured ingester. .. code-block:: terminal - $ php bin/console ai:store:index + $ php bin/console ai:store:ingest - # Index using the default indexer - $ php bin/console ai:store:index default + # Ingest using the default ingester + $ php bin/console ai:store:ingest default # Override the configured source with a single file - $ php bin/console ai:store:index blog --source=/path/to/file.txt + $ php bin/console ai:store:ingest blog --source=/path/to/file.txt # Override with multiple sources - $ php bin/console ai:store:index blog --source=/path/to/file1.txt --source=/path/to/file2.txt + $ php bin/console ai:store:ingest blog --source=/path/to/file1.txt --source=/path/to/file2.txt -The ``--source`` (or ``-s``) option allows you to override the source(s) configured in your indexer. -This is useful for ad-hoc indexing operations or testing different data sources. +The ``--source`` (or ``-s``) option allows you to override the source(s) configured in your ingester. +This is useful for ad-hoc ingesting operations or testing different data sources. Usage ----- @@ -935,7 +935,7 @@ Vectorizers ----------- Vectorizers are components that convert text documents into vector embeddings for storage and retrieval. -They can be configured once and reused across multiple indexers, providing better maintainability and consistency. +They can be configured once and reused across multiple ingesters, providing better maintainability and consistency. Configuring Vectorizers ~~~~~~~~~~~~~~~~~~~~~~~ @@ -961,15 +961,15 @@ Vectorizers are defined in the ``vectorizer`` section of your configuration: platform: 'ai.platform.mistral' model: 'mistral-embed' -Using Vectorizers in Indexers -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Using Vectorizers in Ingesters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Once configured, vectorizers can be referenced by name in indexer configurations: +Once configured, vectorizers can be referenced by name in ingester configurations: .. code-block:: yaml ai: - indexer: + ingester: documents: loader: 'Symfony\AI\Store\Document\Loader\TextFileLoader' vectorizer: 'ai.vectorizer.openai_small' @@ -988,14 +988,14 @@ Once configured, vectorizers can be referenced by name in indexer configurations Benefits of Configured Vectorizers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -* **Reusability**: Define once, use in multiple indexers -* **Consistency**: Ensure all indexers using the same vectorizer have identical embedding configuration +* **Reusability**: Define once, use in multiple ingesters +* **Consistency**: Ensure all ingesters using the same vectorizer have identical embedding configuration * **Maintainability**: Change vectorizer settings in one place Retrievers ---------- -Retrievers are the opposite of indexers. While indexers populate a vector store with documents, +Retrievers are the opposite of ingesters. While ingesters populate a vector store with documents, retrievers allow you to search for documents in a store based on a query string. They vectorize the query and retrieve similar documents from the store. diff --git a/docs/components/store.rst b/docs/components/store.rst index cc644f1d8d..a586288b3e 100644 --- a/docs/components/store.rst +++ b/docs/components/store.rst @@ -19,19 +19,21 @@ implemented by different concrete and vendor-specific implementations, so called On top of those bridges, the Store component provides higher level features to populate and query those stores with and for documents. -Indexing --------- +Ingesting +--------- -One higher level feature is the :class:`Symfony\\AI\\Store\\Indexer`. The purpose of this service is to populate a store with documents. +One higher level feature is the :class:`Symfony\\AI\\Store\\Ingester`. The purpose of this service is to populate a store with documents. Therefore it accepts one or multiple :class:`Symfony\\AI\\Store\\Document\\TextDocument` objects, converts them into embeddings and stores them in the used vector store:: use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Indexer; + use Symfony\AI\Store\Ingester; - $indexer = new Indexer($platform, $model, $store); - $document = new TextDocument('This is a sample document.'); - $indexer->index($document); + $document = [new TextDocument('This is a sample document.')]; + $loader = new InMemoryLoader($documents) + $indexer = new Ingester($loader, new Indexer($vectorizer, $store)); + $indexer->index(); You can find more advanced usage in combination with an Agent using the store for RAG in the examples folder. diff --git a/docs/cookbook/rag-implementation.rst b/docs/cookbook/rag-implementation.rst index d406dbcde0..ff6418e6a2 100644 --- a/docs/cookbook/rag-implementation.rst +++ b/docs/cookbook/rag-implementation.rst @@ -89,17 +89,20 @@ Use a vectorizer to convert documents into embeddings and store them:: use Symfony\AI\Store\Document\Loader\InMemoryLoader; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; + use Symfony\AI\Store\Ingester; $platform = PlatformFactory::create(env('OPENAI_API_KEY')); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small'); - $indexer = new Indexer( + $ingester = new Ingester( new InMemoryLoader($documents), - $vectorizer, - $store + new Indexer( + $vectorizer, + $store + ), ); - $indexer->index($documents); + $ingester->ingest(); -The indexer handles: +The ingester handles: * Loading documents from the source * Generating vector embeddings @@ -324,7 +327,7 @@ Index documents in batches for better performance:: $batchSize = 100; foreach (array_chunk($documents, $batchSize) as $batch) { - $indexer->index($batch); + $ingester->ingest(options: $batch); } Caching Embeddings diff --git a/examples/indexer/index-file-loader.php b/examples/ingester/ingest-file-loader.php similarity index 66% rename from examples/indexer/index-file-loader.php rename to examples/ingester/ingest-file-loader.php index 82a4554311..e7422a6b6e 100644 --- a/examples/indexer/index-file-loader.php +++ b/examples/ingester/ingest-file-loader.php @@ -15,6 +15,7 @@ use Symfony\AI\Store\Document\Transformer\TextSplitTransformer; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\AI\Store\InMemory\Store as InMemoryStore; require_once dirname(__DIR__).'/bootstrap.php'; @@ -22,22 +23,23 @@ $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $store = new InMemoryStore(); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small'); -$indexer = new Indexer( +$ingester = new Ingester( loader: new TextFileLoader(), - vectorizer: $vectorizer, - store: $store, - source: [ - dirname(__DIR__, 2).'/fixtures/movies/gladiator.md', - dirname(__DIR__, 2).'/fixtures/movies/inception.md', - dirname(__DIR__, 2).'/fixtures/movies/jurassic-park.md', - ], - transformers: [ - new TextReplaceTransformer(search: '## Plot', replace: '## Synopsis'), - new TextSplitTransformer(chunkSize: 500, overlap: 100), - ], + indexer: new Indexer( + vectorizer: $vectorizer, + store: $store, + transformers: [ + new TextReplaceTransformer(search: '## Plot', replace: '## Synopsis'), + new TextSplitTransformer(chunkSize: 500, overlap: 100), + ], + ), ); -$indexer->index(); +$ingester->ingest([ + dirname(__DIR__, 2).'/fixtures/movies/gladiator.md', + dirname(__DIR__, 2).'/fixtures/movies/inception.md', + dirname(__DIR__, 2).'/fixtures/movies/jurassic-park.md', +]); $vector = $vectorizer->vectorize('Roman gladiator revenge'); $results = $store->query($vector); diff --git a/examples/indexer/index-inmemory-loader.php b/examples/ingester/ingest-inmemory-loader.php similarity index 86% rename from examples/indexer/index-inmemory-loader.php rename to examples/ingester/ingest-inmemory-loader.php index 2425bba4bf..e2f9daf4fd 100644 --- a/examples/indexer/index-inmemory-loader.php +++ b/examples/ingester/ingest-inmemory-loader.php @@ -16,6 +16,7 @@ use Symfony\AI\Store\Document\Transformer\TextSplitTransformer; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\AI\Store\InMemory\Store as InMemoryStore; use Symfony\Component\Uid\Uuid; @@ -38,17 +39,18 @@ ), ]; -$indexer = new Indexer( +$ingester = new Ingester( loader: new InMemoryLoader($documents), - vectorizer: $vectorizer, - store: $store, - source: null, - transformers: [ - new TextSplitTransformer(chunkSize: 100, overlap: 20), - ], + indexer: new Indexer( + vectorizer: $vectorizer, + store: $store, + transformers: [ + new TextSplitTransformer(chunkSize: 100, overlap: 20), + ], + ), ); -$indexer->index(); +$ingester->ingest(); $vector = $vectorizer->vectorize('machine learning artificial intelligence'); $results = $store->query($vector); diff --git a/examples/indexer/index-rss-loader.php b/examples/ingester/ingest-rss-loader.php similarity index 74% rename from examples/indexer/index-rss-loader.php rename to examples/ingester/ingest-rss-loader.php index f21ccfbf94..c0c978b879 100644 --- a/examples/indexer/index-rss-loader.php +++ b/examples/ingester/ingest-rss-loader.php @@ -14,6 +14,7 @@ use Symfony\AI\Store\Document\Transformer\TextSplitTransformer; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\AI\Store\InMemory\Store as InMemoryStore; use Symfony\Component\HttpClient\HttpClient; @@ -22,20 +23,21 @@ $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $store = new InMemoryStore(); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small'); -$indexer = new Indexer( +$ingester = new Ingester( loader: new RssFeedLoader(HttpClient::create()), - vectorizer: $vectorizer, - store: $store, - source: [ - 'https://feeds.feedburner.com/symfony/blog', - 'https://www.tagesschau.de/index~rss2.xml', - ], - transformers: [ - new TextSplitTransformer(chunkSize: 500, overlap: 100), - ], + indexer: new Indexer( + vectorizer: $vectorizer, + store: $store, + transformers: [ + new TextSplitTransformer(chunkSize: 500, overlap: 100), + ], + ) ); -$indexer->index(); +$ingester->ingest([ + 'https://feeds.feedburner.com/symfony/blog', + 'https://www.tagesschau.de/index~rss2.xml', +]); $vector = $vectorizer->vectorize('Week of Symfony'); $results = $store->query($vector); diff --git a/examples/indexer/index-with-filters.php b/examples/ingester/ingest-with-filters.php similarity index 92% rename from examples/indexer/index-with-filters.php rename to examples/ingester/ingest-with-filters.php index 4f78943754..e5d38ed4e5 100644 --- a/examples/indexer/index-with-filters.php +++ b/examples/ingester/ingest-with-filters.php @@ -17,6 +17,7 @@ use Symfony\AI\Store\Document\Transformer\TextTrimTransformer; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\AI\Store\InMemory\Store as InMemoryStore; use Symfony\Component\Uid\Uuid; @@ -56,18 +57,19 @@ new TextContainsFilter('SPAM:', caseSensitive: true), ]; -$indexer = new Indexer( +$ingester = new Ingester( loader: new InMemoryLoader($documents), - vectorizer: $vectorizer, - store: $store, - source: null, - filters: $filters, - transformers: [ - new TextTrimTransformer(), - ], + indexer: new Indexer( + vectorizer: $vectorizer, + store: $store, + filters: $filters, + transformers: [ + new TextTrimTransformer(), + ], + ), ); -$indexer->index(); +$ingester->ingest(); $vector = $vectorizer->vectorize('technology artificial intelligence'); $results = $store->query($vector); diff --git a/examples/memory/mariadb.php b/examples/memory/mariadb.php index d3641098cb..e7a7157683 100644 --- a/examples/memory/mariadb.php +++ b/examples/memory/mariadb.php @@ -23,6 +23,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Uid\Uuid; require_once dirname(__DIR__).'/bootstrap.php'; @@ -57,8 +58,8 @@ // create embeddings for documents as preparation of the chain memory $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $vectorizer = new Vectorizer($platform, $embeddings = 'text-embedding-3-small'); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); // Execute a chat call that is utilizing the memory $embeddingsModel = $platform->getModelCatalog()->getModel($embeddings); diff --git a/examples/ollama/indexer.php b/examples/ollama/ingester.php similarity index 67% rename from examples/ollama/indexer.php rename to examples/ollama/ingester.php index d2005f3c25..2d5758d6ae 100644 --- a/examples/ollama/indexer.php +++ b/examples/ollama/ingester.php @@ -15,6 +15,7 @@ use Symfony\AI\Store\Document\Transformer\TextSplitTransformer; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\AI\Store\InMemory\Store as InMemoryStore; require_once dirname(__DIR__).'/bootstrap.php'; @@ -22,22 +23,23 @@ $platform = PlatformFactory::create(env('OLLAMA_HOST_URL'), http_client()); $store = new InMemoryStore(); $vectorizer = new Vectorizer($platform, env('OLLAMA_EMBEDDINGS'), logger()); -$indexer = new Indexer( +$ingester = new Ingester( loader: new TextFileLoader(), - vectorizer: $vectorizer, - store: $store, - source: [ - dirname(__DIR__, 2).'/fixtures/movies/gladiator.md', - dirname(__DIR__, 2).'/fixtures/movies/inception.md', - dirname(__DIR__, 2).'/fixtures/movies/jurassic-park.md', - ], - transformers: [ - new TextReplaceTransformer(search: '## Plot', replace: '## Synopsis'), - new TextSplitTransformer(chunkSize: 500, overlap: 100), - ], + indexer: new Indexer( + vectorizer: $vectorizer, + store: $store, + transformers: [ + new TextReplaceTransformer(search: '## Plot', replace: '## Synopsis'), + new TextSplitTransformer(chunkSize: 500, overlap: 100), + ], + ), ); -$indexer->index(); +$ingester->ingest([ + dirname(__DIR__, 2).'/fixtures/movies/gladiator.md', + dirname(__DIR__, 2).'/fixtures/movies/inception.md', + dirname(__DIR__, 2).'/fixtures/movies/jurassic-park.md', +]); $vector = $vectorizer->vectorize('Roman gladiator revenge'); $results = $store->query($vector); diff --git a/examples/ollama/rag.php b/examples/ollama/rag.php index cb9959ad7b..ad32e944bd 100644 --- a/examples/ollama/rag.php +++ b/examples/ollama/rag.php @@ -22,6 +22,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\AI\Store\InMemory\Store as InMemoryStore; use Symfony\Component\Uid\Uuid; @@ -43,8 +44,8 @@ // create embeddings for documents $platform = PlatformFactory::create(env('OLLAMA_HOST_URL'), http_client()); $vectorizer = new Vectorizer($platform, env('OLLAMA_EMBEDDINGS'), logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/rag/cache.php b/examples/rag/cache.php index 3ef15ee061..28ce2eaba9 100644 --- a/examples/rag/cache.php +++ b/examples/rag/cache.php @@ -23,6 +23,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Cache\Adapter\ArrayAdapter; use Symfony\Component\Uid\Uuid; @@ -44,8 +45,8 @@ // create embeddings for documents $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/rag/chromadb.php b/examples/rag/chromadb.php index ebdb684d04..4b0dad300d 100644 --- a/examples/rag/chromadb.php +++ b/examples/rag/chromadb.php @@ -24,6 +24,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Uid\Uuid; require_once dirname(__DIR__).'/bootstrap.php'; @@ -51,8 +52,8 @@ // create embeddings for documents $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/rag/clickhouse.php b/examples/rag/clickhouse.php index 52d9c8f2e3..61fe454c9a 100644 --- a/examples/rag/clickhouse.php +++ b/examples/rag/clickhouse.php @@ -23,6 +23,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\HttpClient\HttpClient; use Symfony\Component\Uid\Uuid; @@ -51,8 +52,8 @@ // create embeddings for documents $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/rag/cloudflare.php b/examples/rag/cloudflare.php index f51e76637d..d9b3ef9624 100644 --- a/examples/rag/cloudflare.php +++ b/examples/rag/cloudflare.php @@ -23,6 +23,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Uid\Uuid; require_once dirname(__DIR__).'/bootstrap.php'; @@ -51,8 +52,8 @@ // create embeddings for documents (keep in mind that upserting vectors is asynchronous) $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/rag/elasticsearch.php b/examples/rag/elasticsearch.php index a23c076f9d..79a3f70443 100644 --- a/examples/rag/elasticsearch.php +++ b/examples/rag/elasticsearch.php @@ -23,6 +23,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Uid\Uuid; require_once dirname(__DIR__).'/bootstrap.php'; @@ -50,8 +51,8 @@ // create embeddings for documents $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store), logger: logger()); +$ingester->ingest($documents); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/rag/in-memory.php b/examples/rag/in-memory.php index 8aff814b6a..a74c25bdb9 100644 --- a/examples/rag/in-memory.php +++ b/examples/rag/in-memory.php @@ -22,6 +22,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\AI\Store\InMemory\Store as InMemoryStore; use Symfony\Component\Uid\Uuid; @@ -43,8 +44,8 @@ // create embeddings for documents $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/rag/manticore.php b/examples/rag/manticore.php index 5f8a569dbb..b5b4466888 100644 --- a/examples/rag/manticore.php +++ b/examples/rag/manticore.php @@ -23,6 +23,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Uid\Uuid; require_once dirname(__DIR__).'/bootstrap.php'; @@ -51,8 +52,8 @@ // create embeddings for documents $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/rag/mariadb-gemini.php b/examples/rag/mariadb-gemini.php index 2c44feb983..c6080f0d2a 100644 --- a/examples/rag/mariadb-gemini.php +++ b/examples/rag/mariadb-gemini.php @@ -25,6 +25,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Uid\Uuid; require_once dirname(__DIR__).'/bootstrap.php'; @@ -53,8 +54,8 @@ $platform = PlatformFactory::create(env('GEMINI_API_KEY'), http_client()); $model = 'gemini-embedding-exp-03-07?dimensions=768&task_type=SEMANTIC_SIMILARITY'; $vectorizer = new Vectorizer($platform, $model, logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/rag/mariadb-openai.php b/examples/rag/mariadb-openai.php index b616958a92..39cdc53141 100644 --- a/examples/rag/mariadb-openai.php +++ b/examples/rag/mariadb-openai.php @@ -25,6 +25,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Uid\Uuid; require_once dirname(__DIR__).'/bootstrap.php'; @@ -52,8 +53,8 @@ // create embeddings for documents $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/rag/meilisearch-hybrid.php b/examples/rag/meilisearch-hybrid.php index cb8346b663..c14d3a34c6 100644 --- a/examples/rag/meilisearch-hybrid.php +++ b/examples/rag/meilisearch-hybrid.php @@ -17,6 +17,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Uid\Uuid; require_once dirname(__DIR__).'/bootstrap.php'; @@ -50,8 +51,8 @@ // Create embeddings for documents $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); // Create a query embedding $queryText = 'futuristic technology and artificial intelligence'; diff --git a/examples/rag/meilisearch.php b/examples/rag/meilisearch.php index e5ce5742e8..212d0cede4 100644 --- a/examples/rag/meilisearch.php +++ b/examples/rag/meilisearch.php @@ -23,6 +23,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Uid\Uuid; require_once dirname(__DIR__).'/bootstrap.php'; @@ -51,8 +52,8 @@ // create embeddings for documents $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/rag/milvus.php b/examples/rag/milvus.php index 1f8411bf87..3168f25160 100644 --- a/examples/rag/milvus.php +++ b/examples/rag/milvus.php @@ -23,6 +23,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Uid\Uuid; require_once dirname(__DIR__).'/bootstrap.php'; @@ -52,8 +53,8 @@ // create embeddings for documents $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/rag/mongodb.php b/examples/rag/mongodb.php index 589f388388..316eba6198 100644 --- a/examples/rag/mongodb.php +++ b/examples/rag/mongodb.php @@ -24,6 +24,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Uid\Uuid; require_once dirname(__DIR__).'/bootstrap.php'; @@ -50,8 +51,8 @@ // create embeddings for documents $platform = PlatformFactory::create(env('OPENAI_API_KEY')); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); // initialize the index $store->setup(); diff --git a/examples/rag/neo4j.php b/examples/rag/neo4j.php index 8a109fbfe3..3c66e1d0e7 100644 --- a/examples/rag/neo4j.php +++ b/examples/rag/neo4j.php @@ -23,6 +23,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Uid\Uuid; require_once dirname(__DIR__).'/bootstrap.php'; @@ -54,8 +55,8 @@ // create embeddings for documents $platform = PlatformFactory::create($_SERVER['OPENAI_API_KEY']); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/rag/opensearch.php b/examples/rag/opensearch.php index a23517fb50..c4050ef2ed 100644 --- a/examples/rag/opensearch.php +++ b/examples/rag/opensearch.php @@ -23,6 +23,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Uid\Uuid; require_once dirname(__DIR__).'/bootstrap.php'; @@ -50,8 +51,8 @@ // create embeddings for documents $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/rag/pinecone.php b/examples/rag/pinecone.php index 072dd6b960..41b0fe8c05 100644 --- a/examples/rag/pinecone.php +++ b/examples/rag/pinecone.php @@ -24,6 +24,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Uid\Uuid; require_once dirname(__DIR__).'/bootstrap.php'; @@ -44,8 +45,8 @@ // create embeddings for documents $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/rag/postgres.php b/examples/rag/postgres.php index 5a9dc126a6..2d723453e3 100644 --- a/examples/rag/postgres.php +++ b/examples/rag/postgres.php @@ -25,6 +25,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Uid\Uuid; require_once dirname(__DIR__).'/bootstrap.php'; @@ -51,8 +52,8 @@ // create embeddings for documents $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/rag/qdrant.php b/examples/rag/qdrant.php index 7a142ca535..fd64b4d1f7 100644 --- a/examples/rag/qdrant.php +++ b/examples/rag/qdrant.php @@ -23,6 +23,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Uid\Uuid; require_once dirname(__DIR__).'/bootstrap.php'; @@ -35,7 +36,7 @@ 'movies', ); -// initialize the collection (needs to be called before the indexer) +// initialize the collection (needs to be called before the ingester) $store->setup(); // create embeddings and documents @@ -51,8 +52,8 @@ // create embeddings for documents $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/rag/redis.php b/examples/rag/redis.php index eb682a4fff..038f229648 100644 --- a/examples/rag/redis.php +++ b/examples/rag/redis.php @@ -23,6 +23,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Uid\Uuid; require_once dirname(__DIR__).'/bootstrap.php'; @@ -53,8 +54,8 @@ // create embeddings for documents $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/rag/supabase.php b/examples/rag/supabase.php index c7cdfca64f..88a4dc426c 100644 --- a/examples/rag/supabase.php +++ b/examples/rag/supabase.php @@ -23,6 +23,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Uid\Uuid; require_once dirname(__DIR__).'/bootstrap.php'; @@ -51,8 +52,8 @@ functionName: env('SUPABASE_MATCH_FUNCTION'), $vectorizer = new Vectorizer($platform, env('OLLAMA_EMBEDDINGS')); $loader = new InMemoryLoader($documents); -$indexer = new Indexer($loader, $vectorizer, $store, logger: logger()); -$indexer->index(); +$ingester = new Ingester($loader, new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/rag/surrealdb.php b/examples/rag/surrealdb.php index 4a99a39c07..b0dc36ae9d 100644 --- a/examples/rag/surrealdb.php +++ b/examples/rag/surrealdb.php @@ -23,6 +23,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Uid\Uuid; require_once dirname(__DIR__).'/bootstrap.php'; @@ -54,8 +55,8 @@ // create embeddings for documents $platform = PlatformFactory::create($_SERVER['OPENAI_API_KEY']); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/rag/typesense.php b/examples/rag/typesense.php index bd2128d02a..9c1785bb4c 100644 --- a/examples/rag/typesense.php +++ b/examples/rag/typesense.php @@ -23,6 +23,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Uid\Uuid; require_once dirname(__DIR__).'/bootstrap.php'; @@ -51,8 +52,8 @@ // create embeddings for documents $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/rag/weaviate.php b/examples/rag/weaviate.php index 15e14c46f4..ce7844545c 100644 --- a/examples/rag/weaviate.php +++ b/examples/rag/weaviate.php @@ -24,6 +24,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\Component\Uid\Uuid; require_once dirname(__DIR__).'/bootstrap.php'; @@ -52,8 +53,8 @@ // create embeddings for documents $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index($documents); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $similaritySearch = new SimilaritySearch($vectorizer, $store); $toolbox = new Toolbox([$similaritySearch], logger: logger()); diff --git a/examples/retriever/basic.php b/examples/retriever/basic.php index 9b6364783b..e2b55afb69 100644 --- a/examples/retriever/basic.php +++ b/examples/retriever/basic.php @@ -14,6 +14,7 @@ use Symfony\AI\Store\Document\Transformer\TextSplitTransformer; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\AI\Store\InMemory\Store as InMemoryStore; use Symfony\AI\Store\Retriever; @@ -24,20 +25,21 @@ $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small'); -$indexer = new Indexer( +$ingester = new Ingester( loader: new TextFileLoader(), - vectorizer: $vectorizer, - store: $store, - source: [ - dirname(__DIR__, 2).'/fixtures/movies/gladiator.md', - dirname(__DIR__, 2).'/fixtures/movies/inception.md', - dirname(__DIR__, 2).'/fixtures/movies/jurassic-park.md', - ], - transformers: [ - new TextSplitTransformer(chunkSize: 500, overlap: 100), - ], + indexer: new Indexer( + vectorizer: $vectorizer, + store: $store, + transformers: [ + new TextSplitTransformer(chunkSize: 500, overlap: 100), + ], + ), ); -$indexer->index(); +$ingester->ingest([ + dirname(__DIR__, 2).'/fixtures/movies/gladiator.md', + dirname(__DIR__, 2).'/fixtures/movies/inception.md', + dirname(__DIR__, 2).'/fixtures/movies/jurassic-park.md', +]); $retriever = new Retriever( vectorizer: $vectorizer, diff --git a/examples/retriever/movies.php b/examples/retriever/movies.php index 804dc7a155..de89fc2806 100644 --- a/examples/retriever/movies.php +++ b/examples/retriever/movies.php @@ -16,6 +16,7 @@ use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\AI\Store\InMemory\Store as InMemoryStore; use Symfony\AI\Store\Retriever; use Symfony\Component\Uid\Uuid; @@ -36,8 +37,8 @@ $platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); -$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); -$indexer->index(); +$ingester = new Ingester(new InMemoryLoader($documents), new Indexer($vectorizer, $store, logger: logger()), logger: logger()); +$ingester->ingest(); $retriever = new Retriever($vectorizer, $store, logger()); diff --git a/src/ai-bundle/CLAUDE.md b/src/ai-bundle/CLAUDE.md index 206179662f..30ae28caa7 100644 --- a/src/ai-bundle/CLAUDE.md +++ b/src/ai-bundle/CLAUDE.md @@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -The Symfony AI Bundle is an integration bundle that provides Symfony dependency injection configuration for the Symfony AI components (Platform, Agent, Store). It enables declarative configuration of AI agents, platforms, vector stores, and indexers through semantic YAML configuration and PHP attributes. +The Symfony AI Bundle is an integration bundle that provides Symfony dependency injection configuration for the Symfony AI components (Platform, Agent, Store). It enables declarative configuration of AI agents, platforms, vector stores, and ingesters through semantic YAML configuration and PHP attributes. ## Architecture @@ -114,4 +114,4 @@ The test suite demonstrates: - Security integration testing with mock authorization checker - Profiler data collection and tracing functionality -Tests use PHPUnit 11 with strict configuration and coverage requirements. \ No newline at end of file +Tests use PHPUnit 11 with strict configuration and coverage requirements. diff --git a/src/ai-bundle/config/options.php b/src/ai-bundle/config/options.php index adc0a656cb..96da73a135 100644 --- a/src/ai-bundle/config/options.php +++ b/src/ai-bundle/config/options.php @@ -1184,7 +1184,7 @@ ->end() ->end() ->end() - ->arrayNode('indexer') + ->arrayNode('ingester') ->useAttributeAsKey('name') ->arrayPrototype() ->children() diff --git a/src/ai-bundle/config/services.php b/src/ai-bundle/config/services.php index 6980570d1b..dc69ccfe74 100644 --- a/src/ai-bundle/config/services.php +++ b/src/ai-bundle/config/services.php @@ -68,7 +68,7 @@ use Symfony\AI\Platform\StructuredOutput\ResponseFormatFactory; use Symfony\AI\Platform\StructuredOutput\ResponseFormatFactoryInterface; use Symfony\AI\Store\Command\DropStoreCommand; -use Symfony\AI\Store\Command\IndexCommand; +use Symfony\AI\Store\Command\IngestCommand; use Symfony\AI\Store\Command\RetrieveCommand; use Symfony\AI\Store\Command\SetupStoreCommand; use Symfony\Component\ExpressionLanguage\ExpressionLanguage; @@ -238,9 +238,9 @@ tagged_locator('ai.store', 'name'), ]) ->tag('console.command') - ->set('ai.command.index', IndexCommand::class) + ->set('ai.command.ingest', IngestCommand::class) ->args([ - tagged_locator('ai.indexer', 'name'), + tagged_locator('ai.ingester', 'name'), ]) ->tag('console.command') ->set('ai.command.retrieve', RetrieveCommand::class) diff --git a/src/ai-bundle/src/AiBundle.php b/src/ai-bundle/src/AiBundle.php index 6810c6c031..2fd137048c 100644 --- a/src/ai-bundle/src/AiBundle.php +++ b/src/ai-bundle/src/AiBundle.php @@ -110,7 +110,8 @@ use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Document\VectorizerInterface; use Symfony\AI\Store\Indexer; -use Symfony\AI\Store\IndexerInterface; +use Symfony\AI\Store\Ingester; +use Symfony\AI\Store\IngesterInterface; use Symfony\AI\Store\InMemory\Store as InMemoryStore; use Symfony\AI\Store\ManagedStoreInterface; use Symfony\AI\Store\Retriever; @@ -266,11 +267,11 @@ public function loadExtension(array $config, ContainerConfigurator $container, C $this->processVectorizerConfig($vectorizerName, $vectorizer, $builder); } - foreach ($config['indexer'] as $indexerName => $indexer) { - $this->processIndexerConfig($indexerName, $indexer, $builder); + foreach ($config['ingester'] as $ingesterName => $ingester) { + $this->processIngesterConfig($ingesterName, $ingester, $builder); } - if (1 === \count($config['indexer']) && isset($indexerName)) { - $builder->setAlias(IndexerInterface::class, 'ai.indexer.'.$indexerName); + if (1 === \count($config['ingester']) && isset($ingesterName)) { + $builder->setAlias(IngesterInterface::class, 'ai.ingester.'.$ingesterName); } foreach ($config['retriever'] ?? [] as $retrieverName => $retriever) { @@ -1969,7 +1970,7 @@ private function processVectorizerConfig(string $name, array $config, ContainerB /** * @param array $config */ - private function processIndexerConfig(int|string $name, array $config, ContainerBuilder $container): void + private function processIngesterConfig(int|string $name, array $config, ContainerBuilder $container): void { $transformers = []; foreach ($config['transformers'] as $transformer) { @@ -1981,20 +1982,23 @@ private function processIndexerConfig(int|string $name, array $config, Container $filters[] = new Reference($filter); } - $definition = new Definition(Indexer::class, [ + $definition = new Definition(Ingester::class, [ new Reference($config['loader']), - new Reference($config['vectorizer']), - new Reference($config['store']), + new Definition(Indexer::class, [ + new Reference($config['vectorizer']), + new Reference($config['store']), + $filters, + $transformers, + new Reference('logger', ContainerInterface::IGNORE_ON_INVALID_REFERENCE), + ]), $config['source'], - $filters, - $transformers, new Reference('logger', ContainerInterface::IGNORE_ON_INVALID_REFERENCE), ]); - $definition->addTag('ai.indexer', ['name' => $name]); + $definition->addTag('ai.ingester', ['name' => $name]); - $serviceId = 'ai.indexer.'.$name; + $serviceId = 'ai.ingester.'.$name; $container->setDefinition($serviceId, $definition); - $container->registerAliasForArgument($serviceId, IndexerInterface::class, (new Target((string) $name))->getParsedName()); + $container->registerAliasForArgument($serviceId, IngesterInterface::class, (new Target((string) $name))->getParsedName()); } /** diff --git a/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php b/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php index c59c9f5f71..e04ee7595b 100644 --- a/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php +++ b/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php @@ -69,7 +69,8 @@ use Symfony\AI\Store\Document\Transformer\TextTrimTransformer; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Document\VectorizerInterface; -use Symfony\AI\Store\IndexerInterface; +use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\IngesterInterface; use Symfony\AI\Store\InMemory\Store as InMemoryStore; use Symfony\AI\Store\ManagedStoreInterface; use Symfony\AI\Store\RetrieverInterface; @@ -5074,7 +5075,7 @@ public function testInjectionVectorizerAliasIsRegistered() $this->assertTrue($container->hasAlias(VectorizerInterface::class.' $another')); } - public function testIndexerWithConfiguredVectorizer() + public function testIngesterWithConfiguredVectorizer() { $container = $this->buildContainer([ 'ai' => [ @@ -5089,8 +5090,8 @@ public function testIndexerWithConfiguredVectorizer() 'model' => 'text-embedding-3-small', ], ], - 'indexer' => [ - 'my_indexer' => [ + 'ingester' => [ + 'my_ingester' => [ 'loader' => InMemoryLoader::class, 'vectorizer' => 'ai.vectorizer.my_vectorizer', 'store' => 'ai.store.memory.my_store', @@ -5099,24 +5100,26 @@ public function testIndexerWithConfiguredVectorizer() ], ]); - $this->assertTrue($container->hasDefinition('ai.indexer.my_indexer')); + $this->assertTrue($container->hasDefinition('ai.ingester.my_ingester')); $this->assertTrue($container->hasDefinition('ai.vectorizer.my_vectorizer')); - $indexerDefinition = $container->getDefinition('ai.indexer.my_indexer'); - $arguments = $indexerDefinition->getArguments(); + $ingesterDefinition = $container->getDefinition('ai.ingester.my_ingester'); + $arguments = $ingesterDefinition->getArguments(); $this->assertInstanceOf(Reference::class, $arguments[0]); $this->assertSame(InMemoryLoader::class, (string) $arguments[0]); - $this->assertInstanceOf(Reference::class, $arguments[1]); - $this->assertSame('ai.vectorizer.my_vectorizer', (string) $arguments[1]); + $this->assertInstanceOf(Definition::class, $arguments[1]); + $this->assertSame(Indexer::class, $arguments[1]->getClass()); + $this->assertSame('ai.vectorizer.my_vectorizer', (string) $arguments[1]->getArguments()[0]); + $this->assertSame('ai.store.memory.my_store', (string) $arguments[1]->getArguments()[1]); // Should not create model-specific vectorizer when using configured one - $this->assertFalse($container->hasDefinition('ai.indexer.my_indexer.vectorizer')); - $this->assertFalse($container->hasDefinition('ai.indexer.my_indexer.model')); + $this->assertFalse($container->hasDefinition('ai.ingester.my_ingester.vectorizer')); + $this->assertFalse($container->hasDefinition('ai.ingester.my_ingester.model')); } - public function testIndexerWithStringSource() + public function testIngesterWithStringSource() { $container = $this->buildContainer([ 'ai' => [ @@ -5125,8 +5128,8 @@ public function testIndexerWithStringSource() 'my_store' => [], ], ], - 'indexer' => [ - 'my_indexer' => [ + 'ingester' => [ + 'my_ingester' => [ 'loader' => InMemoryLoader::class, 'source' => 'https://example.com/feed.xml', 'vectorizer' => 'my_vectorizer_service', @@ -5136,14 +5139,14 @@ public function testIndexerWithStringSource() ], ]); - $this->assertTrue($container->hasDefinition('ai.indexer.my_indexer')); - $indexerDefinition = $container->getDefinition('ai.indexer.my_indexer'); - $arguments = $indexerDefinition->getArguments(); + $this->assertTrue($container->hasDefinition('ai.ingester.my_ingester')); + $ingesterDefinition = $container->getDefinition('ai.ingester.my_ingester'); + $arguments = $ingesterDefinition->getArguments(); - $this->assertSame('https://example.com/feed.xml', $arguments[3]); + $this->assertSame('https://example.com/feed.xml', $arguments[2]); } - public function testIndexerWithArraySource() + public function testIngesterWithArraySource() { $container = $this->buildContainer([ 'ai' => [ @@ -5152,8 +5155,8 @@ public function testIndexerWithArraySource() 'my_store' => [], ], ], - 'indexer' => [ - 'my_indexer' => [ + 'ingester' => [ + 'my_ingester' => [ 'loader' => InMemoryLoader::class, 'source' => [ '/path/to/file1.txt', @@ -5167,20 +5170,20 @@ public function testIndexerWithArraySource() ], ]); - $this->assertTrue($container->hasDefinition('ai.indexer.my_indexer')); - $indexerDefinition = $container->getDefinition('ai.indexer.my_indexer'); - $arguments = $indexerDefinition->getArguments(); + $this->assertTrue($container->hasDefinition('ai.ingester.my_ingester')); + $ingesterDefinition = $container->getDefinition('ai.ingester.my_ingester'); + $arguments = $ingesterDefinition->getArguments(); - $this->assertIsArray($arguments[3]); - $this->assertCount(3, $arguments[3]); + $this->assertIsArray($arguments[2]); + $this->assertCount(3, $arguments[2]); $this->assertSame([ '/path/to/file1.txt', '/path/to/file2.txt', 'https://example.com/feed.xml', - ], $arguments[3]); + ], $arguments[2]); } - public function testIndexerWithNullSource() + public function testIngesterWithNullSource() { $container = $this->buildContainer([ 'ai' => [ @@ -5189,8 +5192,8 @@ public function testIndexerWithNullSource() 'my_store' => [], ], ], - 'indexer' => [ - 'my_indexer' => [ + 'ingester' => [ + 'my_ingester' => [ 'loader' => InMemoryLoader::class, 'vectorizer' => 'my_vectorizer_service', 'store' => 'ai.store.memory.my_store', @@ -5200,14 +5203,14 @@ public function testIndexerWithNullSource() ], ]); - $this->assertTrue($container->hasDefinition('ai.indexer.my_indexer')); - $indexerDefinition = $container->getDefinition('ai.indexer.my_indexer'); - $arguments = $indexerDefinition->getArguments(); + $this->assertTrue($container->hasDefinition('ai.ingester.my_ingester')); + $ingesterDefinition = $container->getDefinition('ai.ingester.my_ingester'); + $arguments = $ingesterDefinition->getArguments(); - $this->assertNull($arguments[3]); + $this->assertNull($arguments[2]); } - public function testIndexerWithConfiguredTransformers() + public function testIngesterWithConfiguredTransformers() { $container = $this->buildContainer([ 'ai' => [ @@ -5216,8 +5219,8 @@ public function testIndexerWithConfiguredTransformers() 'my_store' => [], ], ], - 'indexer' => [ - 'my_indexer' => [ + 'ingester' => [ + 'my_ingester' => [ 'loader' => InMemoryLoader::class, 'transformers' => [ TextTrimTransformer::class, @@ -5230,22 +5233,24 @@ public function testIndexerWithConfiguredTransformers() ], ]); - $this->assertTrue($container->hasDefinition('ai.indexer.my_indexer')); - $indexerDefinition = $container->getDefinition('ai.indexer.my_indexer'); - $arguments = $indexerDefinition->getArguments(); + $this->assertTrue($container->hasDefinition('ai.ingester.my_ingester')); + $ingesterDefinition = $container->getDefinition('ai.ingester.my_ingester'); + $arguments = $ingesterDefinition->getArguments(); + + $indexerArguments = $arguments[1]->getArguments(); - $this->assertSame([], $arguments[4]); // Empty filters - $this->assertIsArray($arguments[5]); - $this->assertCount(2, $arguments[5]); + $this->assertSame([], $indexerArguments[2]); // Empty filters + $this->assertIsArray($indexerArguments[3]); + $this->assertCount(2, $indexerArguments[3]); - $this->assertInstanceOf(Reference::class, $arguments[5][0]); - $this->assertSame(TextTrimTransformer::class, (string) $arguments[5][0]); + $this->assertInstanceOf(Reference::class, $indexerArguments[3][0]); + $this->assertSame(TextTrimTransformer::class, (string) $indexerArguments[3][0]); - $this->assertInstanceOf(Reference::class, $arguments[5][1]); - $this->assertSame('App\CustomTransformer', (string) $arguments[5][1]); + $this->assertInstanceOf(Reference::class, $indexerArguments[3][1]); + $this->assertSame('App\CustomTransformer', (string) $indexerArguments[3][1]); } - public function testIndexerWithEmptyTransformers() + public function testIngesterWithEmptyTransformers() { $container = $this->buildContainer([ 'ai' => [ @@ -5254,8 +5259,8 @@ public function testIndexerWithEmptyTransformers() 'my_store' => [], ], ], - 'indexer' => [ - 'my_indexer' => [ + 'ingester' => [ + 'my_ingester' => [ 'loader' => InMemoryLoader::class, 'transformers' => [], 'vectorizer' => 'my_vectorizer_service', @@ -5265,15 +5270,16 @@ public function testIndexerWithEmptyTransformers() ], ]); - $this->assertTrue($container->hasDefinition('ai.indexer.my_indexer')); - $indexerDefinition = $container->getDefinition('ai.indexer.my_indexer'); - $arguments = $indexerDefinition->getArguments(); + $this->assertTrue($container->hasDefinition('ai.ingester.my_ingester')); + $ingesterDefinition = $container->getDefinition('ai.ingester.my_ingester'); + $arguments = $ingesterDefinition->getArguments(); + $indexerArguments = $arguments[1]->getArguments(); - $this->assertSame([], $arguments[4]); // Empty filters - $this->assertSame([], $arguments[5]); // Empty transformers + $this->assertSame([], $indexerArguments[2]); // Empty filters + $this->assertSame([], $indexerArguments[3]); // Empty transformers } - public function testIndexerWithoutTransformers() + public function testIngesterWithoutTransformers() { $container = $this->buildContainer([ 'ai' => [ @@ -5282,8 +5288,8 @@ public function testIndexerWithoutTransformers() 'my_store' => [], ], ], - 'indexer' => [ - 'my_indexer' => [ + 'ingester' => [ + 'my_ingester' => [ 'loader' => InMemoryLoader::class, 'vectorizer' => 'my_vectorizer_service', 'store' => 'ai.store.memory.my_store', @@ -5293,15 +5299,16 @@ public function testIndexerWithoutTransformers() ], ]); - $this->assertTrue($container->hasDefinition('ai.indexer.my_indexer')); - $indexerDefinition = $container->getDefinition('ai.indexer.my_indexer'); - $arguments = $indexerDefinition->getArguments(); + $this->assertTrue($container->hasDefinition('ai.ingester.my_ingester')); + $ingesterDefinition = $container->getDefinition('ai.ingester.my_ingester'); + $arguments = $ingesterDefinition->getArguments(); + $indexerArguments = $arguments[1]->getArguments(); - $this->assertSame([], $arguments[4]); // Empty filters - $this->assertSame([], $arguments[5]); // Empty transformers + $this->assertSame([], $indexerArguments[2]); // Empty filters + $this->assertSame([], $indexerArguments[3]); // Empty transformers } - public function testIndexerWithSourceAndTransformers() + public function testIngesterWithSourceAndTransformers() { $container = $this->buildContainer([ 'ai' => [ @@ -5310,8 +5317,8 @@ public function testIndexerWithSourceAndTransformers() 'my_store' => [], ], ], - 'indexer' => [ - 'my_indexer' => [ + 'ingester' => [ + 'my_ingester' => [ 'loader' => InMemoryLoader::class, 'source' => [ '/path/to/file1.txt', @@ -5327,34 +5334,36 @@ public function testIndexerWithSourceAndTransformers() ], ]); - $this->assertTrue($container->hasDefinition('ai.indexer.my_indexer')); - $indexerDefinition = $container->getDefinition('ai.indexer.my_indexer'); - $arguments = $indexerDefinition->getArguments(); + $this->assertTrue($container->hasDefinition('ai.ingester.my_ingester')); + $ingesterDefinition = $container->getDefinition('ai.ingester.my_ingester'); + $arguments = $ingesterDefinition->getArguments(); $this->assertInstanceOf(Reference::class, $arguments[0]); $this->assertSame(InMemoryLoader::class, (string) $arguments[0]); - $this->assertInstanceOf(Reference::class, $arguments[1]); - $this->assertSame('my_vectorizer_service', (string) $arguments[1]); + $indexerArguments = $arguments[1]->getArguments(); - $this->assertInstanceOf(Reference::class, $arguments[2]); - $this->assertSame('ai.store.memory.my_store', (string) $arguments[2]); + $this->assertInstanceOf(Reference::class, $indexerArguments[0]); + $this->assertSame('my_vectorizer_service', (string) $indexerArguments[0]); - $this->assertIsArray($arguments[3]); - $this->assertCount(2, $arguments[3]); + $this->assertInstanceOf(Reference::class, $indexerArguments[1]); + $this->assertSame('ai.store.memory.my_store', (string) $indexerArguments[1]); + + $this->assertIsArray($arguments[2]); + $this->assertCount(2, $arguments[2]); $this->assertSame([ '/path/to/file1.txt', '/path/to/file2.txt', - ], $arguments[3]); + ], $arguments[2]); - $this->assertSame([], $arguments[4]); // Empty filters - $this->assertIsArray($arguments[5]); - $this->assertCount(1, $arguments[5]); - $this->assertInstanceOf(Reference::class, $arguments[5][0]); - $this->assertSame(TextTrimTransformer::class, (string) $arguments[5][0]); + $this->assertSame([], $indexerArguments[2]); // Empty filters + $this->assertIsArray($indexerArguments[3]); + $this->assertCount(1, $indexerArguments[3]); + $this->assertInstanceOf(Reference::class, $indexerArguments[3][0]); + $this->assertSame(TextTrimTransformer::class, (string) $indexerArguments[3][0]); } - public function testIndexerWithConfiguredFilters() + public function testIngesterWithConfiguredFilters() { $container = $this->buildContainer([ 'ai' => [ @@ -5363,8 +5372,8 @@ public function testIndexerWithConfiguredFilters() 'my_store' => [], ], ], - 'indexer' => [ - 'my_indexer' => [ + 'ingester' => [ + 'my_ingester' => [ 'loader' => InMemoryLoader::class, 'filters' => [ TextContainsFilter::class, @@ -5377,25 +5386,26 @@ public function testIndexerWithConfiguredFilters() ], ]); - $this->assertTrue($container->hasDefinition('ai.indexer.my_indexer')); - $indexerDefinition = $container->getDefinition('ai.indexer.my_indexer'); - $arguments = $indexerDefinition->getArguments(); + $this->assertTrue($container->hasDefinition('ai.ingester.my_ingester')); + $ingesterDefinition = $container->getDefinition('ai.ingester.my_ingester'); + $arguments = $ingesterDefinition->getArguments(); + $indexerArguments = $arguments[1]->getArguments(); - // Verify filters are in the correct position (index 4, before transformers) - $this->assertIsArray($arguments[4]); - $this->assertCount(2, $arguments[4]); + // Verify filters are in the correct position + $this->assertIsArray($indexerArguments[2]); + $this->assertCount(2, $indexerArguments[2]); - $this->assertInstanceOf(Reference::class, $arguments[4][0]); - $this->assertSame(TextContainsFilter::class, (string) $arguments[4][0]); + $this->assertInstanceOf(Reference::class, $indexerArguments[2][0]); + $this->assertSame(TextContainsFilter::class, (string) $indexerArguments[2][0]); - $this->assertInstanceOf(Reference::class, $arguments[4][1]); - $this->assertSame('App\CustomFilter', (string) $arguments[4][1]); + $this->assertInstanceOf(Reference::class, $indexerArguments[2][1]); + $this->assertSame('App\CustomFilter', (string) $indexerArguments[2][1]); - // Verify transformers are in the correct position (index 5, after filters) - $this->assertSame([], $arguments[5]); // Empty transformers + // Verify transformers are in the correct position (index 3, after filters) + $this->assertSame([], $indexerArguments[3]); // Empty transformers } - public function testIndexerWithEmptyFilters() + public function testIngesterWithEmptyFilters() { $container = $this->buildContainer([ 'ai' => [ @@ -5404,8 +5414,8 @@ public function testIndexerWithEmptyFilters() 'my_store' => [], ], ], - 'indexer' => [ - 'my_indexer' => [ + 'ingester' => [ + 'my_ingester' => [ 'loader' => InMemoryLoader::class, 'filters' => [], 'vectorizer' => 'my_vectorizer_service', @@ -5415,14 +5425,15 @@ public function testIndexerWithEmptyFilters() ], ]); - $this->assertTrue($container->hasDefinition('ai.indexer.my_indexer')); - $indexerDefinition = $container->getDefinition('ai.indexer.my_indexer'); - $arguments = $indexerDefinition->getArguments(); + $this->assertTrue($container->hasDefinition('ai.ingester.my_ingester')); + $ingesterDefinition = $container->getDefinition('ai.ingester.my_ingester'); + $arguments = $ingesterDefinition->getArguments(); + $indexerArguments = $arguments[1]->getArguments(); - $this->assertSame([], $arguments[4]); // Empty filters + $this->assertSame([], $indexerArguments[2]); // Empty filters } - public function testIndexerWithoutFilters() + public function testIngesterWithoutFilters() { $container = $this->buildContainer([ 'ai' => [ @@ -5431,8 +5442,8 @@ public function testIndexerWithoutFilters() 'my_store' => [], ], ], - 'indexer' => [ - 'my_indexer' => [ + 'ingester' => [ + 'my_ingester' => [ 'loader' => InMemoryLoader::class, 'vectorizer' => 'my_vectorizer_service', 'store' => 'ai.store.memory.my_store', @@ -5442,14 +5453,16 @@ public function testIndexerWithoutFilters() ], ]); - $this->assertTrue($container->hasDefinition('ai.indexer.my_indexer')); - $indexerDefinition = $container->getDefinition('ai.indexer.my_indexer'); - $arguments = $indexerDefinition->getArguments(); + $this->assertTrue($container->hasDefinition('ai.ingester.my_ingester')); + $ingesterDefinition = $container->getDefinition('ai.ingester.my_ingester'); + $arguments = $ingesterDefinition->getArguments(); + + $indexerArguments = $arguments[1]->getArguments(); - $this->assertSame([], $arguments[4]); // Empty filters + $this->assertSame([], $indexerArguments[2]); // Empty filters } - public function testIndexerWithFiltersAndTransformers() + public function testIngesterWithFiltersAndTransformers() { $container = $this->buildContainer([ 'ai' => [ @@ -5458,8 +5471,8 @@ public function testIndexerWithFiltersAndTransformers() 'my_store' => [], ], ], - 'indexer' => [ - 'my_indexer' => [ + 'ingester' => [ + 'my_ingester' => [ 'loader' => InMemoryLoader::class, 'filters' => [ TextContainsFilter::class, @@ -5474,24 +5487,26 @@ public function testIndexerWithFiltersAndTransformers() ], ]); - $this->assertTrue($container->hasDefinition('ai.indexer.my_indexer')); - $indexerDefinition = $container->getDefinition('ai.indexer.my_indexer'); - $arguments = $indexerDefinition->getArguments(); + $this->assertTrue($container->hasDefinition('ai.ingester.my_ingester')); + $ingesterDefinition = $container->getDefinition('ai.ingester.my_ingester'); + $arguments = $ingesterDefinition->getArguments(); - // Verify filters are at index 4 - $this->assertIsArray($arguments[4]); - $this->assertCount(1, $arguments[4]); - $this->assertInstanceOf(Reference::class, $arguments[4][0]); - $this->assertSame(TextContainsFilter::class, (string) $arguments[4][0]); + $indexerArguments = $arguments[1]->getArguments(); - // Verify transformers are at index 5 - $this->assertIsArray($arguments[5]); - $this->assertCount(1, $arguments[5]); - $this->assertInstanceOf(Reference::class, $arguments[5][0]); - $this->assertSame(TextTrimTransformer::class, (string) $arguments[5][0]); + // Verify filters are at index 2 + $this->assertIsArray($indexerArguments[2]); + $this->assertCount(1, $indexerArguments[2]); + $this->assertInstanceOf(Reference::class, $indexerArguments[2][0]); + $this->assertSame(TextContainsFilter::class, (string) $indexerArguments[2][0]); + + // Verify transformers are at index 3 + $this->assertIsArray($indexerArguments[3]); + $this->assertCount(1, $indexerArguments[3]); + $this->assertInstanceOf(Reference::class, $indexerArguments[3][0]); + $this->assertSame(TextTrimTransformer::class, (string) $indexerArguments[3][0]); } - public function testIndexerWithSourceFiltersAndTransformers() + public function testIngesterWithSourceFiltersAndTransformers() { $container = $this->buildContainer([ 'ai' => [ @@ -5500,8 +5515,8 @@ public function testIndexerWithSourceFiltersAndTransformers() 'my_store' => [], ], ], - 'indexer' => [ - 'my_indexer' => [ + 'ingester' => [ + 'my_ingester' => [ 'loader' => InMemoryLoader::class, 'source' => [ '/path/to/file1.txt', @@ -5520,36 +5535,40 @@ public function testIndexerWithSourceFiltersAndTransformers() ], ]); - $this->assertTrue($container->hasDefinition('ai.indexer.my_indexer')); - $indexerDefinition = $container->getDefinition('ai.indexer.my_indexer'); - $arguments = $indexerDefinition->getArguments(); + $this->assertTrue($container->hasDefinition('ai.ingester.my_ingester')); + $ingesterDefinition = $container->getDefinition('ai.ingester.my_ingester'); + $arguments = $ingesterDefinition->getArguments(); + $indexerArguments = $arguments[1]->getArguments(); // Verify correct order: loader, vectorizer, store, source, filters, transformers, logger $this->assertInstanceOf(Reference::class, $arguments[0]); // loader $this->assertSame(InMemoryLoader::class, (string) $arguments[0]); - $this->assertInstanceOf(Reference::class, $arguments[1]); // vectorizer - $this->assertSame('my_vectorizer_service', (string) $arguments[1]); + $this->assertInstanceOf(Reference::class, $indexerArguments[0]); // vectorizer + $this->assertSame('my_vectorizer_service', (string) $indexerArguments[0]); + + $this->assertInstanceOf(Reference::class, $indexerArguments[1]); // store + $this->assertSame('ai.store.memory.my_store', (string) $indexerArguments[1]); - $this->assertInstanceOf(Reference::class, $arguments[2]); // store - $this->assertSame('ai.store.memory.my_store', (string) $arguments[2]); + $this->assertIsArray($arguments[2]); // source + $this->assertCount(2, $arguments[2]); + $this->assertSame(['/path/to/file1.txt', '/path/to/file2.txt'], $arguments[2]); - $this->assertIsArray($arguments[3]); // source - $this->assertCount(2, $arguments[3]); - $this->assertSame(['/path/to/file1.txt', '/path/to/file2.txt'], $arguments[3]); + $this->assertIsArray($indexerArguments[2]); // filters + $this->assertCount(1, $indexerArguments[2]); + $this->assertInstanceOf(Reference::class, $indexerArguments[2][0]); + $this->assertSame(TextContainsFilter::class, (string) $indexerArguments[2][0]); - $this->assertIsArray($arguments[4]); // filters - $this->assertCount(1, $arguments[4]); - $this->assertInstanceOf(Reference::class, $arguments[4][0]); - $this->assertSame(TextContainsFilter::class, (string) $arguments[4][0]); + $this->assertIsArray($indexerArguments[3]); // transformers + $this->assertCount(1, $indexerArguments[3]); + $this->assertInstanceOf(Reference::class, $indexerArguments[3][0]); + $this->assertSame(TextTrimTransformer::class, (string) $indexerArguments[3][0]); - $this->assertIsArray($arguments[5]); // transformers - $this->assertCount(1, $arguments[5]); - $this->assertInstanceOf(Reference::class, $arguments[5][0]); - $this->assertSame(TextTrimTransformer::class, (string) $arguments[5][0]); + $this->assertInstanceOf(Reference::class, $indexerArguments[4]); // logger + $this->assertSame('logger', (string) $indexerArguments[4]); - $this->assertInstanceOf(Reference::class, $arguments[6]); // logger - $this->assertSame('logger', (string) $arguments[6]); + $this->assertInstanceOf(Reference::class, $arguments[3]); // logger + $this->assertSame('logger', (string) $arguments[3]); } public function testInjectionIndexerAliasIsRegistered() @@ -5561,8 +5580,8 @@ public function testInjectionIndexerAliasIsRegistered() 'my_store' => [], ], ], - 'indexer' => [ - 'my_indexer' => [ + 'ingester' => [ + 'my_ingester' => [ 'loader' => InMemoryLoader::class, 'transformers' => [], 'vectorizer' => 'my_vectorizer_service', @@ -5578,8 +5597,8 @@ public function testInjectionIndexerAliasIsRegistered() ], ]); - $this->assertTrue($container->hasAlias(IndexerInterface::class.' $myIndexer')); - $this->assertTrue($container->hasAlias(IndexerInterface::class.' $another')); + $this->assertTrue($container->hasAlias(IngesterInterface::class.' $myIngester')); + $this->assertTrue($container->hasAlias(IngesterInterface::class.' $another')); } public function testRetrieverWithConfiguredVectorizer() @@ -7536,8 +7555,8 @@ private function getFullConfig(): array ], ], ], - 'indexer' => [ - 'my_text_indexer' => [ + 'ingester' => [ + 'my_text_ingester' => [ 'loader' => InMemoryLoader::class, 'vectorizer' => 'ai.vectorizer.test_vectorizer', 'store' => 'my_azuresearch_store_service_id', diff --git a/src/store/AGENTS.md b/src/store/AGENTS.md index c793fceaf1..f53c2c08f7 100644 --- a/src/store/AGENTS.md +++ b/src/store/AGENTS.md @@ -11,7 +11,7 @@ Low-level abstraction for vector stores enabling RAG applications. Unified inter ### Core Interfaces - **StoreInterface**: Main interface with `add()` and `query()` methods - **ManagedStoreInterface**: Extends with `setup()` and `drop()` lifecycle methods -- **Indexer**: High-level service converting TextDocuments to VectorDocuments +- **Ingester**: High-level service converting TextDocuments to VectorDocuments ### Bridge Pattern Multiple vector store implementations: @@ -59,4 +59,4 @@ composer install - PHPUnit 11+ with strict configuration - Document preprocessing with transformers - Batch indexing for performance -- Unified interface across all vector store types \ No newline at end of file +- Unified interface across all vector store types diff --git a/src/store/CHANGELOG.md b/src/store/CHANGELOG.md index fa54f16f1d..845f5ba121 100644 --- a/src/store/CHANGELOG.md +++ b/src/store/CHANGELOG.md @@ -23,7 +23,7 @@ CHANGELOG - `Vectorizer` for converting TextDocuments to VectorDocuments - Batch vectorization support for compatible platforms - Single document vectorization with fallback - * Add high-level `Indexer` service: + * Add high-level `Ingester` service: - Orchestrates document processing pipeline - Accepts TextDocuments, vectorizes and stores in chunks - Configurable batch processing diff --git a/src/store/CLAUDE.md b/src/store/CLAUDE.md index 3006c32635..740a63dfd1 100644 --- a/src/store/CLAUDE.md +++ b/src/store/CLAUDE.md @@ -41,7 +41,7 @@ composer install ### Core Interfaces - **StoreInterface**: Main interface defining `add()` and `query()` methods for vector document storage and retrieval - **ManagedStoreInterface**: Extension interface providing `setup()` and `drop()` methods for store lifecycle management -- **Indexer**: High-level service that converts TextDocuments to VectorDocuments and stores them in batches +- **Ingester**: High-level service that converts TextDocuments to VectorDocuments and stores them in batches ### Bridge Pattern Architecture The component follows a bridge pattern with implementations for multiple vector stores: @@ -64,4 +64,4 @@ The component follows a bridge pattern with implementations for multiple vector - **symfony/http-client**: For HTTP-based vector store communication ### Test Architecture -Tests follow the same bridge structure as source code, with each store implementation having corresponding test classes. Tests use PHPUnit 11+ with strict configuration for coverage and error handling. \ No newline at end of file +Tests follow the same bridge structure as source code, with each store implementation having corresponding test classes. Tests use PHPUnit 11+ with strict configuration for coverage and error handling. diff --git a/src/store/src/Command/IndexCommand.php b/src/store/src/Command/IngestCommand.php similarity index 63% rename from src/store/src/Command/IndexCommand.php rename to src/store/src/Command/IngestCommand.php index 65fc752245..74c73ba8c3 100644 --- a/src/store/src/Command/IndexCommand.php +++ b/src/store/src/Command/IngestCommand.php @@ -12,7 +12,7 @@ namespace Symfony\AI\Store\Command; use Symfony\AI\Store\Exception\RuntimeException; -use Symfony\AI\Store\IndexerInterface; +use Symfony\AI\Store\IngesterInterface; use Symfony\Component\Console\Attribute\AsCommand; use Symfony\Component\Console\Command\Command; use Symfony\Component\Console\Completion\CompletionInput; @@ -28,34 +28,34 @@ * @author Oskar Stark */ #[AsCommand( - name: 'ai:store:index', + name: 'ai:store:ingest', description: 'Index documents into a store', )] -final class IndexCommand extends Command +final class IngestCommand extends Command { /** - * @param ServiceLocator $indexers + * @param ServiceLocator $ingesters */ public function __construct( - private readonly ServiceLocator $indexers, + private readonly ServiceLocator $ingesters, ) { parent::__construct(); } public function complete(CompletionInput $input, CompletionSuggestions $suggestions): void { - if ($input->mustSuggestArgumentValuesFor('indexer')) { - $suggestions->suggestValues(array_keys($this->indexers->getProvidedServices())); + if ($input->mustSuggestArgumentValuesFor('ingester')) { + $suggestions->suggestValues(array_keys($this->ingesters->getProvidedServices())); } } protected function configure(): void { $this - ->addArgument('indexer', InputArgument::REQUIRED, 'Name of the indexer to run') - ->addOption('source', 's', InputOption::VALUE_OPTIONAL | InputOption::VALUE_IS_ARRAY, 'Source(s) to index (overrides configured source)') + ->addArgument('ingester', InputArgument::REQUIRED, 'Name of the ingester to run') + ->addOption('source', 's', InputOption::VALUE_OPTIONAL | InputOption::VALUE_IS_ARRAY, 'Source(s) to ingest (overrides configured source)') ->setHelp(<<<'EOF' -The %command.name% command indexes documents into a store using the specified indexer. +The %command.name% command ingestes documents into a store using the specified ingester. Basic usage: php %command.full_name% blog @@ -74,7 +74,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int { $io = new SymfonyStyle($input, $output); - $indexer = $input->getArgument('indexer'); + $ingester = $input->getArgument('ingester'); $sources = $input->getOption('source'); $source = match (true) { [] === $sources => null, @@ -82,24 +82,24 @@ protected function execute(InputInterface $input, OutputInterface $output): int default => $sources, }; - if (!$this->indexers->has($indexer)) { - throw new RuntimeException(\sprintf('The "%s" indexer does not exist.', $indexer)); + if (!$this->ingesters->has($ingester)) { + throw new RuntimeException(\sprintf('The "%s" ingester does not exist.', $ingester)); } - $indexerService = $this->indexers->get($indexer); + $ingesterService = $this->ingesters->get($ingester); if (null !== $source) { - $indexerService = $indexerService->withSource($source); + $ingesterService = $ingesterService->withSource($source); } - $io->title(\sprintf('Indexing documents using "%s" indexer', $indexer)); + $io->title(\sprintf('Indexing documents using "%s" ingester', $ingester)); try { - $indexerService->index([]); + $ingesterService->ingest($source); - $io->success(\sprintf('Documents indexed successfully using "%s" indexer.', $indexer)); + $io->success(\sprintf('Documents ingested successfully using "%s" ingester.', $ingester)); } catch (\Exception $e) { - throw new RuntimeException(\sprintf('An error occurred while indexing with "%s": ', $indexer).$e->getMessage(), previous: $e); + throw new RuntimeException(\sprintf('An error occurred while ingesting with "%s": ', $ingester).$e->getMessage(), previous: $e); } return Command::SUCCESS; diff --git a/src/store/src/Indexer.php b/src/store/src/Indexer.php index d797c05f89..507dec1a27 100644 --- a/src/store/src/Indexer.php +++ b/src/store/src/Indexer.php @@ -13,9 +13,7 @@ use Psr\Log\LoggerInterface; use Psr\Log\NullLogger; -use Symfony\AI\Store\Document\EmbeddableDocumentInterface; use Symfony\AI\Store\Document\FilterInterface; -use Symfony\AI\Store\Document\LoaderInterface; use Symfony\AI\Store\Document\TransformerInterface; use Symfony\AI\Store\Document\VectorizerInterface; @@ -26,47 +24,22 @@ class Indexer implements IndexerInterface { /** - * @var array - */ - private array $sources = []; - - /** - * @param string|array|null $source Source identifier(s) for data loading (file paths, URLs, etc.) - * @param FilterInterface[] $filters Filters to apply after loading documents to remove unwanted content - * @param TransformerInterface[] $transformers Transformers to mutate documents after filtering (chunking, cleaning, etc.) + * @param FilterInterface[] $filters Filters to apply after loading documents to remove unwanted content + * @param TransformerInterface[] $transformers Transformers to mutate documents after filtering (chunking, cleaning, etc.) */ public function __construct( - private LoaderInterface $loader, private VectorizerInterface $vectorizer, private StoreInterface $store, - string|array|null $source = null, private array $filters = [], private array $transformers = [], private LoggerInterface $logger = new NullLogger(), ) { - $this->sources = null === $source ? [] : (array) $source; } - public function withSource(string|array $source): self + public function index(iterable $documents, array $options = []): void { - return new self($this->loader, $this->vectorizer, $this->store, $source, $this->filters, $this->transformers, $this->logger); - } - - public function index(array $options = []): void - { - $this->logger->debug('Starting document processing', ['sources' => $this->sources, 'options' => $options]); - - $documents = []; - if ([] === $this->sources) { - $documents = $this->loadSource(null); - } else { - foreach ($this->sources as $singleSource) { - $documents = array_merge($documents, $this->loadSource($singleSource)); - } - } - - if ([] === $documents) { - $this->logger->debug('No documents to process', ['sources' => $this->sources]); + if (!$documents) { + $this->logger->debug('No documents to process.'); return; } @@ -98,17 +71,4 @@ public function index(array $options = []): void $this->logger->debug('Document processing completed', ['total_documents' => $counter]); } - - /** - * @return EmbeddableDocumentInterface[] - */ - private function loadSource(?string $source): array - { - $documents = []; - foreach ($this->loader->load($source) as $document) { - $documents[] = $document; - } - - return $documents; - } } diff --git a/src/store/src/IndexerInterface.php b/src/store/src/IndexerInterface.php index fcb27494f3..161567bbeb 100644 --- a/src/store/src/IndexerInterface.php +++ b/src/store/src/IndexerInterface.php @@ -11,6 +11,8 @@ namespace Symfony\AI\Store; +use Symfony\AI\Store\Document\EmbeddableDocumentInterface; + /** * Handles the complete document processing pipeline: load → transform → vectorize → store. * @@ -19,16 +21,10 @@ interface IndexerInterface { /** - * Process sources through the complete document pipeline: load → transform → vectorize → store. - * - * @param array{chunk_size?: int} $options Processing options - */ - public function index(array $options = []): void; - - /** - * Create a new instance with a different source. + * Process documents through the document pipeline: transform → vectorize → store. * - * @param string|array $source Source identifier (file path, URL, etc.) or array of sources + * @param iterable $documents + * @param array{chunk_size?: int} $options Processing options */ - public function withSource(string|array $source): self; + public function index(iterable $documents, array $options = []): void; } diff --git a/src/store/src/Ingester.php b/src/store/src/Ingester.php new file mode 100644 index 0000000000..a7640b3c5d --- /dev/null +++ b/src/store/src/Ingester.php @@ -0,0 +1,63 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Store; + +use Psr\Log\LoggerInterface; +use Psr\Log\NullLogger; +use Symfony\AI\Store\Document\LoaderInterface; + +/** + * @author Christopher Hertel + * @author Oskar Stark + */ +class Ingester implements IngesterInterface +{ + /** + * @param string|array $sources + */ + public function __construct( + private LoaderInterface $loader, + private IndexerInterface $indexer, + private string|array $sources = [], + private LoggerInterface $logger = new NullLogger(), + ) { + $this->sources = (array) $sources; + } + + public function withSource(string|array $source): self + { + return new self($this->loader, $this->indexer, $source, $this->logger); + } + + public function ingest(array $options = []): void + { + $this->logger->debug('Starting document processing', ['sources' => $this->sources, 'options' => $options]); + + if ($this->sources) { + $documents = (function () { + foreach ($this->sources as $singleSource) { + yield from $this->loader->load($singleSource); + } + })(); + } else { + $documents = $this->loader->load(null); + } + + if ([] === $documents) { + $this->logger->debug('No documents to process', ['sources' => $this->sources]); + + return; + } + + $this->indexer->index($documents, $options); + } +} diff --git a/src/store/src/IngesterInterface.php b/src/store/src/IngesterInterface.php new file mode 100644 index 0000000000..43f305450a --- /dev/null +++ b/src/store/src/IngesterInterface.php @@ -0,0 +1,34 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Store; + +/** + * Handles the complete document processing pipeline: load → transform → vectorize → store. + * + * @author Oskar Stark + */ +interface IngesterInterface +{ + /** + * Process sources through the complete document pipeline: load → transform → vectorize → store. + * + * @param array{chunk_size?: int} $options Processing options + */ + public function ingest(array $options = []): void; + + /** + * Create a new instance with a different source. + * + * @param string|array $source Source identifier (file path, URL, etc.) or array of sources + */ + public function withSource(string|array $source): self; +} diff --git a/src/store/src/RetrieverInterface.php b/src/store/src/RetrieverInterface.php index 26eea5aa63..23a4fd853e 100644 --- a/src/store/src/RetrieverInterface.php +++ b/src/store/src/RetrieverInterface.php @@ -16,7 +16,7 @@ /** * Retrieves documents from a vector store based on a query string. * - * The opposite of IndexerInterface - while the Indexer loads, transforms, vectorizes and stores documents, + * The opposite of IngesterInterface - while the Ingester loads, transforms, vectorizes and stores documents, * the Retriever vectorizes a query and retrieves similar documents from the store. * * @author Oskar Stark diff --git a/src/store/tests/IndexerTest.php b/src/store/tests/IngesterTest.php similarity index 81% rename from src/store/tests/IndexerTest.php rename to src/store/tests/IngesterTest.php index 6ee8083833..1bb66b975c 100644 --- a/src/store/tests/IndexerTest.php +++ b/src/store/tests/IngesterTest.php @@ -23,11 +23,12 @@ use Symfony\AI\Store\Document\VectorDocument; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\Ingester; use Symfony\AI\Store\Tests\Double\PlatformTestHandler; use Symfony\AI\Store\Tests\Double\TestStore; use Symfony\Component\Uid\Uuid; -final class IndexerTest extends TestCase +final class IngesterTest extends TestCase { public function testIndexSingleDocument() { @@ -36,8 +37,8 @@ public function testIndexSingleDocument() $loader = new InMemoryLoader([$document]); $vectorizer = new Vectorizer(PlatformTestHandler::createPlatform(new VectorResult($vector)), 'text-embedding-3-small'); - $indexer = new Indexer($loader, $vectorizer, $store = new TestStore()); - $indexer->index(); + $ingester = new Ingester($loader, new Indexer($vectorizer, $store = new TestStore())); + $ingester->ingest(); $this->assertCount(1, $store->documents); $this->assertInstanceOf(VectorDocument::class, $store->documents[0]); @@ -50,8 +51,8 @@ public function testIndexEmptyDocumentList() $loader = new InMemoryLoader([]); $vectorizer = new Vectorizer(PlatformTestHandler::createPlatform(), 'text-embedding-3-small'); - $indexer = new Indexer($loader, $vectorizer, $store = new TestStore()); - $indexer->index(); + $ingester = new Ingester($loader, new Indexer($vectorizer, $store = new TestStore())); + $ingester->ingest(); $this->assertSame([], $store->documents); } @@ -64,8 +65,8 @@ public function testIndexDocumentWithMetadata() $loader = new InMemoryLoader([$document]); $vectorizer = new Vectorizer(PlatformTestHandler::createPlatform(new VectorResult($vector)), 'text-embedding-3-small'); - $indexer = new Indexer($loader, $vectorizer, $store = new TestStore()); - $indexer->index(); + $ingester = new Ingester($loader, new Indexer($vectorizer, $store = new TestStore())); + $ingester->ingest(); $this->assertSame(1, $store->addCalls); $this->assertCount(1, $store->documents); @@ -84,19 +85,19 @@ public function testWithSource() $loader = new InMemoryLoader([$document1]); $vectorizer = new Vectorizer(PlatformTestHandler::createPlatform(new VectorResult($vector)), 'text-embedding-3-small'); - $indexer = new Indexer($loader, $vectorizer, $store = new TestStore(), 'source1'); + $ingester = new Ingester($loader, new Indexer($vectorizer, $store = new TestStore()), 'source1'); - $indexerWithNewSource = $indexer->withSource('source2'); + $ingesterWithNewSource = $ingester->withSource('source2'); - $this->assertNotSame($indexer, $indexerWithNewSource); + $this->assertNotSame($ingester, $ingesterWithNewSource); // Both can index successfully - $indexer->index(); + $ingester->ingest(); $this->assertCount(1, $store->documents); $store2 = new TestStore(); - $indexer2 = new Indexer($loader, $vectorizer, $store2, 'source2'); - $indexer2->index(); + $ingester2 = new Ingester($loader, new Indexer($vectorizer, $store2), 'source2'); + $ingester2->ingest(); $this->assertCount(1, $store2->documents); } @@ -113,23 +114,23 @@ public function testWithSourceArray() // InMemoryLoader returns all documents regardless of source $loader = new InMemoryLoader([$document1, $document2]); - // Need 6 vectors total: 2 for first indexer, then 2 for each source in the second indexer (2 sources * 2 docs = 4) + // Need 6 vectors total: 2 for first ingester, then 2 for each source in the second ingester (2 sources * 2 docs = 4) $vectorizer = new Vectorizer(PlatformTestHandler::createPlatform(new VectorResult($vector1, $vector2, $vector3, $vector4, $vector5, $vector6)), 'test-embedding-model'); - // Create indexer with single source - $indexer = new Indexer($loader, $vectorizer, $store1 = new TestStore(), 'source1'); + // Create ingester with single source + $ingester = new Ingester($loader, new Indexer($vectorizer, $store1 = new TestStore()), 'source1'); - $indexerWithMultipleSources = $indexer->withSource(['source2', 'source3']); + $ingesterWithMultipleSources = $ingester->withSource(['source2', 'source3']); - $this->assertNotSame($indexer, $indexerWithMultipleSources); + $this->assertNotSame($ingester, $ingesterWithMultipleSources); // Since InMemoryLoader ignores source, both will index all documents - $indexer->index(); + $ingester->ingest(); $this->assertCount(2, $store1->documents); $store2 = new TestStore(); - $indexer2 = new Indexer($loader, $vectorizer, $store2, ['source2', 'source3']); - $indexer2->index(); + $ingester2 = new Ingester($loader, new Indexer($vectorizer, $store2), ['source2', 'source3']); + $ingester2->ingest(); // With array sources, loadSource is called for each source // Since InMemoryLoader ignores source, it returns all docs each time // So with 2 sources and 2 docs each time = 4 documents total @@ -150,8 +151,8 @@ public function testIndexWithTextContainsFilter() $vectorizer = new Vectorizer(PlatformTestHandler::createPlatform(new VectorResult($vector1, $vector2)), 'test-embedding-model'); $filter = new TextContainsFilter('Week of Symfony'); - $indexer = new Indexer($loader, $vectorizer, $store = new TestStore(), null, [$filter]); - $indexer->index(); + $ingester = new Ingester($loader, new Indexer($vectorizer, $store = new TestStore(), [$filter])); + $ingester->ingest(); // Should only have 2 documents (the "Week of Symfony" one should be filtered out) $this->assertCount(2, $store->documents); @@ -175,8 +176,8 @@ public function testIndexWithMultipleFilters() new TextContainsFilter('SPAM'), ]; - $indexer = new Indexer($loader, $vectorizer, $store = new TestStore(), null, $filters); - $indexer->index(); + $ingester = new Ingester($loader, new Indexer($vectorizer, $store = new TestStore(), $filters)); + $ingester->ingest(); // Should only have 2 documents (filtered out "Week of Symfony" and "SPAM") $this->assertCount(2, $store->documents); @@ -207,8 +208,8 @@ public function transform(iterable $documents, array $options = []): iterable } }; - $indexer = new Indexer($loader, $vectorizer, $store = new TestStore(), null, [$filter], [$transformer]); - $indexer->index(); + $ingester = new Ingester($loader, new Indexer($vectorizer, $store = new TestStore(), [$filter], [$transformer])); + $ingester->ingest(); // Should have 2 documents (filtered out "Week of Symfony"), and transformation should have occurred $this->assertCount(2, $store->documents); @@ -253,8 +254,8 @@ public function transform(iterable $documents, array $options = []): iterable } }; - $indexer = new Indexer($loader, $vectorizer, $store = new TestStore(), null, [$filter], [$transformer]); - $indexer->index(); + $ingester = new Ingester($loader, new Indexer($vectorizer, $store = new TestStore(), [$filter], [$transformer])); + $ingester->ingest(); // Should have 2 documents (one filtered out) $this->assertCount(2, $store->documents); @@ -272,8 +273,8 @@ public function testIndexWithNoFilters() $loader = new InMemoryLoader([$document]); $vectorizer = new Vectorizer(PlatformTestHandler::createPlatform(new VectorResult($vector)), 'text-embedding-3-small'); - $indexer = new Indexer($loader, $vectorizer, $store = new TestStore(), null, []); - $indexer->index(); + $ingester = new Ingester($loader, new Indexer($vectorizer, $store = new TestStore(), [])); + $ingester->ingest(); $this->assertCount(1, $store->documents); } @@ -286,12 +287,12 @@ public function testWithSourcePreservesFilters() $vectorizer = new Vectorizer(PlatformTestHandler::createPlatform(new VectorResult($vector)), 'text-embedding-3-small'); $filter = new TextContainsFilter('nonexistent'); - $indexer = new Indexer($loader, $vectorizer, $store = new TestStore(), 'source1', [$filter]); - $indexerWithNewSource = $indexer->withSource('source2'); + $ingester = new Ingester($loader, new Indexer($vectorizer, $store = new TestStore(), [$filter]), 'source1'); + $ingesterWithNewSource = $ingester->withSource('source2'); - $this->assertNotSame($indexer, $indexerWithNewSource); + $this->assertNotSame($ingester, $ingesterWithNewSource); - $indexerWithNewSource->index(); + $ingesterWithNewSource->ingest(); $this->assertCount(1, $store->documents); // Filter should still work } }