diff --git a/src/store/src/Document/Loader/TextFileLoader.php b/src/store/src/Document/Loader/TextFileLoader.php index 7c7c01c7e..975437681 100644 --- a/src/store/src/Document/Loader/TextFileLoader.php +++ b/src/store/src/Document/Loader/TextFileLoader.php @@ -22,7 +22,7 @@ */ final readonly class TextFileLoader implements LoaderInterface { - public function __invoke(string $source, array $options = []): iterable + public function load(string $source, array $options = []): iterable { if (!is_file($source)) { throw new RuntimeException(\sprintf('File "%s" does not exist.', $source)); diff --git a/src/store/src/Document/LoaderInterface.php b/src/store/src/Document/LoaderInterface.php index 6b4aa689e..549283d79 100644 --- a/src/store/src/Document/LoaderInterface.php +++ b/src/store/src/Document/LoaderInterface.php @@ -22,5 +22,5 @@ interface LoaderInterface * * @return iterable iterable of TextDocuments loaded from the source */ - public function __invoke(string $source, array $options = []): iterable; + public function load(string $source, array $options = []): iterable; } diff --git a/src/store/src/Document/Transformer/ChainTransformer.php b/src/store/src/Document/Transformer/ChainTransformer.php index 686e1c49c..832b7174a 100644 --- a/src/store/src/Document/Transformer/ChainTransformer.php +++ b/src/store/src/Document/Transformer/ChainTransformer.php @@ -28,10 +28,10 @@ public function __construct(iterable $transformers) $this->transformers = $transformers instanceof \Traversable ? iterator_to_array($transformers) : $transformers; } - public function __invoke(iterable $documents, array $options = []): iterable + public function transform(iterable $documents, array $options = []): iterable { foreach ($this->transformers as $transformer) { - $documents = $transformer($documents, $options); + $documents = $transformer->transform($documents, $options); } return $documents; diff --git a/src/store/src/Document/Transformer/ChunkDelayTransformer.php b/src/store/src/Document/Transformer/ChunkDelayTransformer.php index 045149eb4..7124abba0 100644 --- a/src/store/src/Document/Transformer/ChunkDelayTransformer.php +++ b/src/store/src/Document/Transformer/ChunkDelayTransformer.php @@ -33,7 +33,7 @@ public function __construct( /** * @param array{chunk_size?: int, delay?: int} $options */ - public function __invoke(iterable $documents, array $options = []): iterable + public function transform(iterable $documents, array $options = []): iterable { $chunkSize = $options[self::OPTION_CHUNK_SIZE] ?? 50; $delay = $options[self::OPTION_DELAY] ?? 10; diff --git a/src/store/src/Document/Transformer/TextSplitTransformer.php b/src/store/src/Document/Transformer/TextSplitTransformer.php index 7094914ad..772de9bf8 100644 --- a/src/store/src/Document/Transformer/TextSplitTransformer.php +++ b/src/store/src/Document/Transformer/TextSplitTransformer.php @@ -32,7 +32,7 @@ /** * @param array{chunk_size?: int, overlap?: int} $options */ - public function __invoke(iterable $documents, array $options = []): iterable + public function transform(iterable $documents, array $options = []): iterable { $chunkSize = $options[self::OPTION_CHUNK_SIZE] ?? 1000; $overlap = $options[self::OPTION_OVERLAP] ?? 200; diff --git a/src/store/src/Document/TransformerInterface.php b/src/store/src/Document/TransformerInterface.php index cdb022bdd..f86885d1a 100644 --- a/src/store/src/Document/TransformerInterface.php +++ b/src/store/src/Document/TransformerInterface.php @@ -26,5 +26,5 @@ interface TransformerInterface * * @return iterable */ - public function __invoke(iterable $documents, array $options = []): iterable; + public function transform(iterable $documents, array $options = []): iterable; } diff --git a/src/store/src/Document/Vectorizer.php b/src/store/src/Document/Vectorizer.php index a3f27b120..7a2d0bbc8 100644 --- a/src/store/src/Document/Vectorizer.php +++ b/src/store/src/Document/Vectorizer.php @@ -26,7 +26,7 @@ public function __construct( ) { } - public function __invoke(array $documents): array + public function vectorize(array $documents): array { $documentCount = \count($documents); $this->logger->info('Starting vectorization process', ['document_count' => $documentCount]); diff --git a/src/store/src/Document/VectorizerInterface.php b/src/store/src/Document/VectorizerInterface.php index 9f8ab665a..d87edce34 100644 --- a/src/store/src/Document/VectorizerInterface.php +++ b/src/store/src/Document/VectorizerInterface.php @@ -23,5 +23,5 @@ interface VectorizerInterface * * @return VectorDocument[] */ - public function __invoke(array $documents): array; + public function vectorize(array $documents): array; } diff --git a/src/store/src/Indexer.php b/src/store/src/Indexer.php index dff99b66e..b3257ea6f 100644 --- a/src/store/src/Indexer.php +++ b/src/store/src/Indexer.php @@ -45,13 +45,13 @@ public function index(TextDocument|iterable $documents, int $chunkSize = 50): vo ++$counter; if ($chunkSize === \count($chunk)) { - $this->store->add(...($this->vectorizer)($chunk)); + $this->store->add(...$this->vectorizer->vectorize($chunk)); $chunk = []; } } if (\count($chunk) > 0) { - $this->store->add(...($this->vectorizer)($chunk)); + $this->store->add(...$this->vectorizer->vectorize($chunk)); } $this->logger->debug(0 === $counter ? 'No documents to index' : \sprintf('Indexed %d documents', $counter)); diff --git a/src/store/tests/Document/Loader/TextFileLoaderTest.php b/src/store/tests/Document/Loader/TextFileLoaderTest.php index fb15fc6a8..b39a710de 100644 --- a/src/store/tests/Document/Loader/TextFileLoaderTest.php +++ b/src/store/tests/Document/Loader/TextFileLoaderTest.php @@ -27,14 +27,14 @@ public function testLoadWithInvalidSource() $this->expectException(RuntimeException::class); $this->expectExceptionMessage('File "/invalid/source.txt" does not exist.'); - iterator_to_array($loader('/invalid/source.txt')); + iterator_to_array($loader->load('/invalid/source.txt')); } public function testLoadWithValidSource() { $loader = new TextFileLoader(); - $documents = iterator_to_array($loader(\dirname(__DIR__, 5).'/fixtures/lorem.txt')); + $documents = iterator_to_array($loader->load(\dirname(__DIR__, 5).'/fixtures/lorem.txt')); $this->assertCount(1, $documents); $this->assertInstanceOf(TextDocument::class, $document = $documents[0]); @@ -48,7 +48,7 @@ public function testSourceIsPresentInMetadata() $loader = new TextFileLoader(); $source = \dirname(__DIR__, 5).'/fixtures/lorem.txt'; - $documents = iterator_to_array($loader($source)); + $documents = iterator_to_array($loader->load($source)); $this->assertCount(1, $documents); $this->assertInstanceOf(TextDocument::class, $document = $documents[0]); diff --git a/src/store/tests/Document/Transformer/ChainTransformerTest.php b/src/store/tests/Document/Transformer/ChainTransformerTest.php index cc93dc62e..b7fd4a7a6 100644 --- a/src/store/tests/Document/Transformer/ChainTransformerTest.php +++ b/src/store/tests/Document/Transformer/ChainTransformerTest.php @@ -24,7 +24,7 @@ final class ChainTransformerTest extends TestCase public function testChainTransformerAppliesAllTransformersInOrder() { $transformerA = new class implements TransformerInterface { - public function __invoke(iterable $documents, array $options = []): iterable + public function transform(iterable $documents, array $options = []): iterable { foreach ($documents as $document) { yield new TextDocument($document->id, $document->content.'-A'); @@ -33,7 +33,7 @@ public function __invoke(iterable $documents, array $options = []): iterable }; $transformerB = new class implements TransformerInterface { - public function __invoke(iterable $documents, array $options = []): iterable + public function transform(iterable $documents, array $options = []): iterable { foreach ($documents as $document) { yield new TextDocument($document->id, $document->content.'-B'); @@ -47,7 +47,7 @@ public function __invoke(iterable $documents, array $options = []): iterable new TextDocument(Uuid::v4(), 'bar'), ]; - $result = iterator_to_array($chain->__invoke($documents)); + $result = iterator_to_array($chain->transform($documents)); $this->assertSame('foo-A-B', $result[0]->content); $this->assertSame('bar-A-B', $result[1]->content); @@ -58,7 +58,7 @@ public function testChainTransformerWithNoTransformersReturnsInput() $chain = new ChainTransformer([]); $documents = [new TextDocument(Uuid::v4(), 'baz')]; - $result = iterator_to_array($chain->__invoke($documents)); + $result = iterator_to_array($chain->transform($documents)); $this->assertSame('baz', $result[0]->content); } diff --git a/src/store/tests/Document/Transformer/ChunkDelayTransformerTest.php b/src/store/tests/Document/Transformer/ChunkDelayTransformerTest.php index ecf1a7ac1..719ccab5f 100644 --- a/src/store/tests/Document/Transformer/ChunkDelayTransformerTest.php +++ b/src/store/tests/Document/Transformer/ChunkDelayTransformerTest.php @@ -35,7 +35,7 @@ public function testDefaultChunkSizeAndDelay() $documents[] = new TextDocument(Uuid::v4(), 'content-'.$i); } - $result = iterator_to_array($transformer($documents)); + $result = iterator_to_array($transformer->transform($documents)); $this->assertCount(30, $result); for ($i = 0; $i < 30; ++$i) { @@ -57,7 +57,7 @@ public function testSleepsAfterChunkSize() $documents[] = new TextDocument(Uuid::v4(), 'content-'.$i); } - $result = iterator_to_array($transformer($documents, [ + $result = iterator_to_array($transformer->transform($documents, [ ChunkDelayTransformer::OPTION_CHUNK_SIZE => 50, ChunkDelayTransformer::OPTION_DELAY => 5, ])); @@ -79,7 +79,7 @@ public function testCustomChunkSizeAndDelay() $documents[] = new TextDocument(Uuid::v4(), 'content-'.$i); } - $result = iterator_to_array($transformer($documents, [ + $result = iterator_to_array($transformer->transform($documents, [ ChunkDelayTransformer::OPTION_CHUNK_SIZE => 10, ChunkDelayTransformer::OPTION_DELAY => 2, ])); @@ -100,7 +100,7 @@ public function testNoSleepWhenDelayIsZero() $documents[] = new TextDocument(Uuid::v4(), 'content-'.$i); } - $result = iterator_to_array($transformer($documents, [ + $result = iterator_to_array($transformer->transform($documents, [ ChunkDelayTransformer::OPTION_CHUNK_SIZE => 5, ChunkDelayTransformer::OPTION_DELAY => 0, ])); @@ -119,7 +119,7 @@ public function testYieldsDocumentsInCorrectOrder() new TextDocument(Uuid::v4(), 'third'), ]; - $result = iterator_to_array($transformer($documents, [ + $result = iterator_to_array($transformer->transform($documents, [ ChunkDelayTransformer::OPTION_CHUNK_SIZE => 2, ChunkDelayTransformer::OPTION_DELAY => 1, ])); @@ -137,7 +137,7 @@ public function testHandlesEmptyIterable() $transformer = new ChunkDelayTransformer($clock); - $result = iterator_to_array($transformer([])); + $result = iterator_to_array($transformer->transform([])); $this->assertCount(0, $result); } @@ -153,7 +153,7 @@ public function testSingleDocument() $documents = [new TextDocument(Uuid::v4(), 'single')]; - $result = iterator_to_array($transformer($documents, [ + $result = iterator_to_array($transformer->transform($documents, [ ChunkDelayTransformer::OPTION_CHUNK_SIZE => 1, ChunkDelayTransformer::OPTION_DELAY => 5, ])); @@ -176,7 +176,7 @@ public function testExactlyChunkSizeDocuments() $documents[] = new TextDocument(Uuid::v4(), 'content-'.$i); } - $result = iterator_to_array($transformer($documents, [ + $result = iterator_to_array($transformer->transform($documents, [ ChunkDelayTransformer::OPTION_CHUNK_SIZE => 10, ChunkDelayTransformer::OPTION_DELAY => 3, ])); @@ -198,7 +198,7 @@ public function testMultipleExactChunks() $documents[] = new TextDocument(Uuid::v4(), 'content-'.$i); } - $result = iterator_to_array($transformer($documents, [ + $result = iterator_to_array($transformer->transform($documents, [ ChunkDelayTransformer::OPTION_CHUNK_SIZE => 5, ChunkDelayTransformer::OPTION_DELAY => 1, ])); @@ -220,7 +220,7 @@ public function testLazyEvaluation() $documents[] = new TextDocument(Uuid::v4(), 'content-'.$i); } - $generator = $transformer($documents, [ + $generator = $transformer->transform($documents, [ ChunkDelayTransformer::OPTION_CHUNK_SIZE => 3, ChunkDelayTransformer::OPTION_DELAY => 1, ]); @@ -248,7 +248,7 @@ public function testWithMockClock() $startTime = $clock->now(); - iterator_to_array($transformer($documents, [ + iterator_to_array($transformer->transform($documents, [ ChunkDelayTransformer::OPTION_CHUNK_SIZE => 5, ChunkDelayTransformer::OPTION_DELAY => 30, ])); diff --git a/src/store/tests/Document/Transformer/TextSplitTransformerTest.php b/src/store/tests/Document/Transformer/TextSplitTransformerTest.php index bfea94eaa..98d092caa 100644 --- a/src/store/tests/Document/Transformer/TextSplitTransformerTest.php +++ b/src/store/tests/Document/Transformer/TextSplitTransformerTest.php @@ -33,7 +33,7 @@ public function testSplitReturnsSingleChunkForShortText() { $document = new TextDocument(Uuid::v4(), 'short text'); - $chunks = iterator_to_array(($this->transformer)([$document])); + $chunks = iterator_to_array($this->transformer->transform([$document])); $this->assertCount(1, $chunks); $this->assertSame('short text', $chunks[0]->content); @@ -48,7 +48,7 @@ public function testSplitSplitsLongTextWithOverlap() { $document = new TextDocument(Uuid::v4(), $this->getLongText()); - $chunks = iterator_to_array(($this->transformer)([$document])); + $chunks = iterator_to_array($this->transformer->transform([$document])); $this->assertCount(2, $chunks); @@ -63,7 +63,7 @@ public function testSplitWithCustomChunkSizeAndOverlap() { $document = new TextDocument(Uuid::v4(), $this->getLongText()); - $chunks = iterator_to_array(($this->transformer)([$document], [ + $chunks = iterator_to_array($this->transformer->transform([$document], [ TextSplitTransformer::OPTION_CHUNK_SIZE => 150, TextSplitTransformer::OPTION_OVERLAP => 25, ])); @@ -111,7 +111,7 @@ public function testSplitWithZeroOverlap() { $document = new TextDocument(Uuid::v4(), $this->getLongText()); - $chunks = iterator_to_array(($this->transformer)([$document], [ + $chunks = iterator_to_array($this->transformer->transform([$document], [ TextSplitTransformer::OPTION_OVERLAP => 0, ])); @@ -124,7 +124,7 @@ public function testParentIdIsSetInMetadata() { $document = new TextDocument(Uuid::v4(), $this->getLongText()); - $chunks = iterator_to_array(($this->transformer)([$document], [ + $chunks = iterator_to_array($this->transformer->transform([$document], [ TextSplitTransformer::OPTION_CHUNK_SIZE => 1000, TextSplitTransformer::OPTION_OVERLAP => 200, ])); @@ -141,7 +141,7 @@ public function testMetadataIsInherited() 'foo' => 'bar', ])); - $chunks = iterator_to_array(($this->transformer)([$document])); + $chunks = iterator_to_array($this->transformer->transform([$document])); $this->assertCount(2, $chunks); $this->assertSame('value', $chunks[0]->metadata['key']); @@ -154,7 +154,7 @@ public function testSplitWithChunkSizeLargerThanText() { $document = new TextDocument(Uuid::v4(), 'tiny'); - $chunks = iterator_to_array(($this->transformer)([$document])); + $chunks = iterator_to_array($this->transformer->transform([$document])); $this->assertCount(1, $chunks); $this->assertSame('tiny', $chunks[0]->content); @@ -166,7 +166,7 @@ public function testSplitWithOverlapGreaterThanChunkSize() $this->expectException(InvalidArgumentException::class); $this->expectExceptionMessage('Overlap must be non-negative and less than chunk size.'); - iterator_to_array(($this->transformer)([$document], [ + iterator_to_array($this->transformer->transform([$document], [ TextSplitTransformer::OPTION_CHUNK_SIZE => 10, TextSplitTransformer::OPTION_OVERLAP => 20, ])); @@ -178,7 +178,7 @@ public function testSplitWithNegativeOverlap() $this->expectException(InvalidArgumentException::class); $this->expectExceptionMessage('Overlap must be non-negative and less than chunk size.'); - iterator_to_array(($this->transformer)([$document], [ + iterator_to_array($this->transformer->transform([$document], [ TextSplitTransformer::OPTION_CHUNK_SIZE => 10, TextSplitTransformer::OPTION_OVERLAP => -1, ])); diff --git a/src/store/tests/Document/VectorizerTest.php b/src/store/tests/Document/VectorizerTest.php index b8fb24cf2..3e4059944 100644 --- a/src/store/tests/Document/VectorizerTest.php +++ b/src/store/tests/Document/VectorizerTest.php @@ -67,7 +67,7 @@ public function testVectorizeDocumentsWithBatchSupport() $model = new Embeddings(); $vectorizer = new Vectorizer($platform, $model); - $vectorDocuments = $vectorizer($documents); + $vectorDocuments = $vectorizer->vectorize($documents); $this->assertCount(3, $vectorDocuments); @@ -88,7 +88,7 @@ public function testVectorizeDocumentsWithSingleDocument() $model = new Embeddings(); $vectorizer = new Vectorizer($platform, $model); - $vectorDocuments = $vectorizer([$document]); + $vectorDocuments = $vectorizer->vectorize([$document]); $this->assertCount(1, $vectorDocuments); $this->assertInstanceOf(VectorDocument::class, $vectorDocuments[0]); @@ -103,7 +103,7 @@ public function testVectorizeEmptyDocumentsArray() $model = new Embeddings(); $vectorizer = new Vectorizer($platform, $model); - $vectorDocuments = $vectorizer([]); + $vectorDocuments = $vectorizer->vectorize([]); $this->assertSame([], $vectorDocuments); } @@ -127,7 +127,7 @@ public function testVectorizeDocumentsPreservesMetadata() $model = new Embeddings(); $vectorizer = new Vectorizer($platform, $model); - $vectorDocuments = $vectorizer($documents); + $vectorDocuments = $vectorizer->vectorize($documents); $this->assertCount(2, $vectorDocuments); $this->assertSame($metadata1, $vectorDocuments[0]->metadata); @@ -158,7 +158,7 @@ public function testVectorizeDocumentsPreservesDocumentIds() $model = new Embeddings(); $vectorizer = new Vectorizer($platform, $model); - $vectorDocuments = $vectorizer($documents); + $vectorDocuments = $vectorizer->vectorize($documents); $this->assertCount(3, $vectorDocuments); $this->assertSame($id1, $vectorDocuments[0]->id); @@ -187,7 +187,7 @@ public function testVectorizeVariousDocumentCounts(int $count) $model = new Embeddings(); $vectorizer = new Vectorizer($platform, $model); - $vectorDocuments = $vectorizer($documents); + $vectorDocuments = $vectorizer->vectorize($documents); $this->assertCount($count, $vectorDocuments); @@ -226,7 +226,7 @@ public function testVectorizeDocumentsWithLargeVectors() $model = new Embeddings(); $vectorizer = new Vectorizer($platform, $model); - $vectorDocuments = $vectorizer([$document]); + $vectorDocuments = $vectorizer->vectorize([$document]); $this->assertCount(1, $vectorDocuments); $this->assertEquals($vector, $vectorDocuments[0]->vector); @@ -250,7 +250,7 @@ public function testVectorizeDocumentsWithSpecialCharacters() $model = new Embeddings(); $vectorizer = new Vectorizer($platform, $model); - $vectorDocuments = $vectorizer($documents); + $vectorDocuments = $vectorizer->vectorize($documents); $this->assertCount(3, $vectorDocuments); @@ -313,7 +313,7 @@ public function convert(RawResultInterface $result, array $options = []): Result $platform = new Platform([$handler], [$handler]); $vectorizer = new Vectorizer($platform, $model); - $vectorDocuments = $vectorizer($documents); + $vectorDocuments = $vectorizer->vectorize($documents); $this->assertCount(2, $vectorDocuments); $this->assertEquals($vectors[0], $vectorDocuments[0]->vector);