diff --git a/examples/document/vectorizing-text-documents.php b/examples/document/vectorizing-text-documents.php index 1106222b6..24e74d094 100644 --- a/examples/document/vectorizing-text-documents.php +++ b/examples/document/vectorizing-text-documents.php @@ -26,6 +26,6 @@ ]; $vectorizer = new Vectorizer($platform, 'text-embedding-3-large'); -$vectorDocuments = $vectorizer->vectorizeTextDocuments($textDocuments); +$vectorDocuments = $vectorizer->vectorizeEmbeddableDocuments($textDocuments); dump(array_map(fn (VectorDocument $document) => $document->vector->getDimensions(), $vectorDocuments)); diff --git a/src/store/src/Document/EmbeddableDocumentInterface.php b/src/store/src/Document/EmbeddableDocumentInterface.php new file mode 100644 index 000000000..672ae4a30 --- /dev/null +++ b/src/store/src/Document/EmbeddableDocumentInterface.php @@ -0,0 +1,21 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Store\Document; + +interface EmbeddableDocumentInterface +{ + public function getId(): mixed; + + public function getContent(): string; + + public function getMetadata(): Metadata; +} diff --git a/src/store/src/Document/Filter/TextContainsFilter.php b/src/store/src/Document/Filter/TextContainsFilter.php index 37a4172dd..4f370245c 100644 --- a/src/store/src/Document/Filter/TextContainsFilter.php +++ b/src/store/src/Document/Filter/TextContainsFilter.php @@ -49,7 +49,7 @@ public function filter(iterable $documents, array $options = []): iterable $caseSensitive = $options[self::OPTION_CASE_SENSITIVE] ?? $this->caseSensitive; foreach ($documents as $document) { - $content = $document->content; + $content = $document->getContent(); if ($caseSensitive) { $contains = str_contains($content, $needle); diff --git a/src/store/src/Document/FilterInterface.php b/src/store/src/Document/FilterInterface.php index 6fc8050a2..ffaee8195 100644 --- a/src/store/src/Document/FilterInterface.php +++ b/src/store/src/Document/FilterInterface.php @@ -20,10 +20,10 @@ interface FilterInterface { /** - * @param iterable $documents - * @param array $options + * @param iterable $documents + * @param array $options * - * @return iterable + * @return iterable */ public function filter(iterable $documents, array $options = []): iterable; } diff --git a/src/store/src/Document/Loader/InMemoryLoader.php b/src/store/src/Document/Loader/InMemoryLoader.php index 5c09a35ac..a41055d3e 100644 --- a/src/store/src/Document/Loader/InMemoryLoader.php +++ b/src/store/src/Document/Loader/InMemoryLoader.php @@ -11,11 +11,11 @@ namespace Symfony\AI\Store\Document\Loader; +use Symfony\AI\Store\Document\EmbeddableDocumentInterface; use Symfony\AI\Store\Document\LoaderInterface; -use Symfony\AI\Store\Document\TextDocument; /** - * Loader that returns pre-loaded TextDocuments from memory. + * Loader that returns preloaded documents from memory. * Useful for testing or when documents are already available as objects. * * @author Oskar Stark @@ -23,7 +23,7 @@ final readonly class InMemoryLoader implements LoaderInterface { /** - * @param TextDocument[] $documents + * @param EmbeddableDocumentInterface[] $documents */ public function __construct( private array $documents = [], diff --git a/src/store/src/Document/LoaderInterface.php b/src/store/src/Document/LoaderInterface.php index 339ebb6b2..123f4eab2 100644 --- a/src/store/src/Document/LoaderInterface.php +++ b/src/store/src/Document/LoaderInterface.php @@ -20,7 +20,7 @@ interface LoaderInterface * @param string|null $source Identifier for the loader to load the documents from, e.g. file path, folder, or URL. Can be null for InMemoryLoader. * @param array $options loader specific set of options to control the loading process * - * @return iterable iterable of TextDocuments loaded from the source + * @return iterable iterable of embeddable documents loaded from the source */ public function load(?string $source, array $options = []): iterable; } diff --git a/src/store/src/Document/TextDocument.php b/src/store/src/Document/TextDocument.php index 0cab882bf..04ad92847 100644 --- a/src/store/src/Document/TextDocument.php +++ b/src/store/src/Document/TextDocument.php @@ -17,12 +17,12 @@ /** * @author Christopher Hertel */ -final readonly class TextDocument +final readonly class TextDocument implements EmbeddableDocumentInterface { public function __construct( - public Uuid $id, - public string $content, - public Metadata $metadata = new Metadata(), + private Uuid $id, + private string $content, + private Metadata $metadata = new Metadata(), ) { if ('' === trim($this->content)) { throw new InvalidArgumentException('The content shall not be an empty string.'); @@ -33,4 +33,19 @@ public function withContent(string $content): self { return new self($this->id, $content, $this->metadata); } + + public function getId(): Uuid + { + return $this->id; + } + + public function getContent(): string + { + return $this->content; + } + + public function getMetadata(): Metadata + { + return $this->metadata; + } } diff --git a/src/store/src/Document/Transformer/TextReplaceTransformer.php b/src/store/src/Document/Transformer/TextReplaceTransformer.php index 22d3b891a..b6a63d9bc 100644 --- a/src/store/src/Document/Transformer/TextReplaceTransformer.php +++ b/src/store/src/Document/Transformer/TextReplaceTransformer.php @@ -11,6 +11,7 @@ namespace Symfony\AI\Store\Document\Transformer; +use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\TransformerInterface; use Symfony\AI\Store\Exception\InvalidArgumentException; @@ -32,6 +33,7 @@ public function __construct( } /** + * @param iterable $documents * @param array{search?: string, replace?: string} $options */ public function transform(iterable $documents, array $options = []): iterable @@ -42,7 +44,7 @@ public function transform(iterable $documents, array $options = []): iterable self::validate($search, $replace); foreach ($documents as $document) { - yield $document->withContent(str_replace($search, $replace, $document->content)); + yield $document->withContent(str_replace($search, $replace, $document->getContent())); } } diff --git a/src/store/src/Document/Transformer/TextSplitTransformer.php b/src/store/src/Document/Transformer/TextSplitTransformer.php index b062b87fe..7b2b7ff8e 100644 --- a/src/store/src/Document/Transformer/TextSplitTransformer.php +++ b/src/store/src/Document/Transformer/TextSplitTransformer.php @@ -51,13 +51,13 @@ public function transform(iterable $documents, array $options = []): iterable } foreach ($documents as $document) { - if (mb_strlen($document->content) <= $chunkSize) { + if (mb_strlen($document->getContent()) <= $chunkSize) { yield $document; continue; } - $text = $document->content; + $text = $document->getContent(); $length = mb_strlen($text); $start = 0; @@ -66,9 +66,9 @@ public function transform(iterable $documents, array $options = []): iterable $chunkText = mb_substr($text, $start, $end - $start); yield new TextDocument(Uuid::v4(), $chunkText, new Metadata([ - Metadata::KEY_PARENT_ID => $document->id, + Metadata::KEY_PARENT_ID => $document->getId(), Metadata::KEY_TEXT => $chunkText, - ...$document->metadata, + ...$document->getMetadata(), ])); $start += ($chunkSize - $overlap); diff --git a/src/store/src/Document/Transformer/TextTrimTransformer.php b/src/store/src/Document/Transformer/TextTrimTransformer.php index 1a47fec63..cb9c1231d 100644 --- a/src/store/src/Document/Transformer/TextTrimTransformer.php +++ b/src/store/src/Document/Transformer/TextTrimTransformer.php @@ -11,6 +11,7 @@ namespace Symfony\AI\Store\Document\Transformer; +use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\TransformerInterface; /** @@ -20,10 +21,13 @@ */ final readonly class TextTrimTransformer implements TransformerInterface { + /** + * @param iterable $documents + */ public function transform(iterable $documents, array $options = []): iterable { foreach ($documents as $document) { - yield $document->withContent(trim($document->content)); + yield $document->withContent(trim($document->getContent())); } } } diff --git a/src/store/src/Document/TransformerInterface.php b/src/store/src/Document/TransformerInterface.php index f86885d1a..7c8477b66 100644 --- a/src/store/src/Document/TransformerInterface.php +++ b/src/store/src/Document/TransformerInterface.php @@ -12,7 +12,7 @@ namespace Symfony\AI\Store\Document; /** - * A Transformer is designed to mutate a stream of TextDocuments with the purpose of preparing them for indexing. + * A Transformer is designed to mutate a stream of embeddable with the purpose of preparing them for indexing. * It can reduce or expand the number of documents, modify their content or metadata. * It should not act blocking, but is expected to iterate over incoming documents and yield prepared ones. * @@ -21,10 +21,10 @@ interface TransformerInterface { /** - * @param iterable $documents - * @param array $options + * @param iterable $documents + * @param array $options * - * @return iterable + * @return iterable */ public function transform(iterable $documents, array $options = []): iterable; } diff --git a/src/store/src/Document/Vectorizer.php b/src/store/src/Document/Vectorizer.php index 8e10ef3e5..f8e08f65e 100644 --- a/src/store/src/Document/Vectorizer.php +++ b/src/store/src/Document/Vectorizer.php @@ -27,14 +27,14 @@ public function __construct( ) { } - public function vectorizeTextDocuments(array $documents, array $options = []): array + public function vectorizeEmbeddableDocuments(array $documents, array $options = []): array { $documentCount = \count($documents); $this->logger->info('Starting vectorization process', ['document_count' => $documentCount]); if ($this->platform->getModelCatalog()->getModel($this->model)->supports(Capability::INPUT_MULTIPLE)) { $this->logger->debug('Using batch vectorization with model that supports multiple inputs'); - $result = $this->platform->invoke($this->model, array_map(fn (TextDocument $document) => $document->content, $documents), $options); + $result = $this->platform->invoke($this->model, array_map(fn (EmbeddableDocumentInterface $document) => $document->getContent(), $documents), $options); $vectors = $result->asVectors(); $this->logger->debug('Batch vectorization completed', ['vector_count' => \count($vectors)]); @@ -42,8 +42,8 @@ public function vectorizeTextDocuments(array $documents, array $options = []): a $this->logger->debug('Using sequential vectorization for model without multiple input support'); $results = []; foreach ($documents as $i => $document) { - $this->logger->debug('Vectorizing document', ['document_index' => $i, 'document_id' => $document->id]); - $results[] = $this->platform->invoke($this->model, $document->content, $options); + $this->logger->debug('Vectorizing document', ['document_index' => $i, 'document_id' => $document->getId()]); + $results[] = $this->platform->invoke($this->model, $document->getContent(), $options); } $vectors = []; @@ -55,7 +55,7 @@ public function vectorizeTextDocuments(array $documents, array $options = []): a $vectorDocuments = []; foreach ($documents as $i => $document) { - $vectorDocuments[] = new VectorDocument($document->id, $vectors[$i], $document->metadata); + $vectorDocuments[] = new VectorDocument($document->getId(), $vectors[$i], $document->getMetadata()); } $this->logger->info('Vectorization process completed', [ diff --git a/src/store/src/Document/VectorizerInterface.php b/src/store/src/Document/VectorizerInterface.php index 3b95279eb..3f08999a2 100644 --- a/src/store/src/Document/VectorizerInterface.php +++ b/src/store/src/Document/VectorizerInterface.php @@ -14,7 +14,7 @@ use Symfony\AI\Platform\Vector\Vector; /** - * Interface for converting a collection of TextDocuments into VectorDocuments + * Interface for converting a collection of Embeddable documents into VectorDocuments * and for vectorizing individual strings. * * @author Oskar Stark @@ -22,12 +22,12 @@ interface VectorizerInterface { /** - * @param TextDocument[] $documents - * @param array $options Options to pass to the underlying platform + * @param EmbeddableDocumentInterface[] $documents + * @param array $options Options to pass to the underlying platform * * @return VectorDocument[] */ - public function vectorizeTextDocuments(array $documents, array $options = []): array; + public function vectorizeEmbeddableDocuments(array $documents, array $options = []): array; /** * Vectorizes a single string or Stringable object into a Vector. diff --git a/src/store/src/Indexer.php b/src/store/src/Indexer.php index 684cd9394..34bec91dd 100644 --- a/src/store/src/Indexer.php +++ b/src/store/src/Indexer.php @@ -13,9 +13,9 @@ use Psr\Log\LoggerInterface; use Psr\Log\NullLogger; +use Symfony\AI\Store\Document\EmbeddableDocumentInterface; use Symfony\AI\Store\Document\FilterInterface; use Symfony\AI\Store\Document\LoaderInterface; -use Symfony\AI\Store\Document\TextDocument; use Symfony\AI\Store\Document\TransformerInterface; use Symfony\AI\Store\Document\VectorizerInterface; @@ -87,20 +87,20 @@ public function index(array $options = []): void ++$counter; if ($chunkSize === \count($chunk)) { - $this->store->add(...$this->vectorizer->vectorizeTextDocuments($chunk)); + $this->store->add(...$this->vectorizer->vectorizeEmbeddableDocuments($chunk)); $chunk = []; } } if ([] !== $chunk) { - $this->store->add(...$this->vectorizer->vectorizeTextDocuments($chunk)); + $this->store->add(...$this->vectorizer->vectorizeEmbeddableDocuments($chunk)); } $this->logger->debug('Document processing completed', ['total_documents' => $counter]); } /** - * @return TextDocument[] + * @return EmbeddableDocumentInterface[] */ private function loadSource(?string $source): array { diff --git a/src/store/tests/Document/Filter/TextContainsFilterTest.php b/src/store/tests/Document/Filter/TextContainsFilterTest.php index 3efbc4012..e59552260 100644 --- a/src/store/tests/Document/Filter/TextContainsFilterTest.php +++ b/src/store/tests/Document/Filter/TextContainsFilterTest.php @@ -36,8 +36,8 @@ public function testFilterWithConstructorParameters() $result = iterator_to_array($filter->filter($documents)); $this->assertCount(2, $result); - $this->assertSame('This is a regular blog post', $result[0]->content); - $this->assertSame('Another regular post', $result[1]->content); + $this->assertSame('This is a regular blog post', $result[0]->getContent()); + $this->assertSame('Another regular post', $result[1]->getContent()); } public function testFilterWithOptions() @@ -54,8 +54,8 @@ public function testFilterWithOptions() ])); $this->assertCount(2, $result); - $this->assertSame('Keep this document', $result[0]->content); - $this->assertSame('Another good document', $result[1]->content); + $this->assertSame('Keep this document', $result[0]->getContent()); + $this->assertSame('Another good document', $result[1]->getContent()); } public function testOptionsOverrideConstructorParameters() @@ -72,8 +72,8 @@ public function testOptionsOverrideConstructorParameters() ])); $this->assertCount(2, $result); - $this->assertSame('Regular post', $result[0]->content); - $this->assertSame('Week of Symfony news', $result[1]->content); + $this->assertSame('Regular post', $result[0]->getContent()); + $this->assertSame('Week of Symfony news', $result[1]->getContent()); } public function testFilterCaseInsensitive() @@ -89,7 +89,7 @@ public function testFilterCaseInsensitive() $result = iterator_to_array($filter->filter($documents)); $this->assertCount(1, $result); - $this->assertSame('Clean content', $result[0]->content); + $this->assertSame('Clean content', $result[0]->getContent()); } public function testFilterCaseSensitive() @@ -105,9 +105,9 @@ public function testFilterCaseSensitive() $result = iterator_to_array($filter->filter($documents)); $this->assertCount(3, $result); - $this->assertSame('This contains spam', $result[0]->content); - $this->assertSame('This contains Spam', $result[1]->content); - $this->assertSame('Clean content', $result[2]->content); + $this->assertSame('This contains spam', $result[0]->getContent()); + $this->assertSame('This contains Spam', $result[1]->getContent()); + $this->assertSame('Clean content', $result[2]->getContent()); } public function testFilterWithCaseSensitivityOption() @@ -123,8 +123,8 @@ public function testFilterWithCaseSensitivityOption() ])); $this->assertCount(2, $result); // With case sensitivity, 'Test' != 'test' - $this->assertSame('This has Test', $result[0]->content); - $this->assertSame('Clean content', $result[1]->content); + $this->assertSame('This has Test', $result[0]->getContent()); + $this->assertSame('Clean content', $result[1]->getContent()); } public function testFilterPreservesMetadata() @@ -139,8 +139,8 @@ public function testFilterPreservesMetadata() $result = iterator_to_array($filter->filter($documents)); $this->assertCount(1, $result); - $this->assertSame('keep this', $result[0]->content); - $this->assertSame($metadata, $result[0]->metadata); + $this->assertSame('keep this', $result[0]->getContent()); + $this->assertSame($metadata, $result[0]->getMetadata()); } public function testFilterPreservesDocumentId() @@ -155,7 +155,7 @@ public function testFilterPreservesDocumentId() $result = iterator_to_array($filter->filter($documents)); $this->assertCount(1, $result); - $this->assertSame($id, $result[0]->id); + $this->assertSame($id, $result[0]->getId()); } public function testFilterWithEmptyDocuments() @@ -179,8 +179,8 @@ public function testFilterWithNoMatches() $result = iterator_to_array($filter->filter($documents)); $this->assertCount(2, $result); // All documents should pass through - $this->assertSame('First document', $result[0]->content); - $this->assertSame('Second document', $result[1]->content); + $this->assertSame('First document', $result[0]->getContent()); + $this->assertSame('Second document', $result[1]->getContent()); } public function testFilterWithAllMatches() @@ -208,7 +208,7 @@ public function testFilterWithPartialMatch() $result = iterator_to_array($filter->filter($documents)); $this->assertCount(1, $result); - $this->assertSame('Clean content', $result[0]->content); + $this->assertSame('Clean content', $result[0]->getContent()); } public function testPartialOptionsUseConstructorDefaults() @@ -225,7 +225,7 @@ public function testPartialOptionsUseConstructorDefaults() ])); $this->assertCount(1, $result); // Case sensitive, 'test' found in first document, so it's filtered out - $this->assertSame('Clean content', $result[0]->content); + $this->assertSame('Clean content', $result[0]->getContent()); } #[TestWith([''])] diff --git a/src/store/tests/Document/Loader/InMemoryLoaderTest.php b/src/store/tests/Document/Loader/InMemoryLoaderTest.php index 6810254e3..fc5ca0b50 100644 --- a/src/store/tests/Document/Loader/InMemoryLoaderTest.php +++ b/src/store/tests/Document/Loader/InMemoryLoaderTest.php @@ -39,7 +39,7 @@ public function testLoadWithSingleDocument() $this->assertCount(1, $documents); $this->assertSame($document, $documents[0]); - $this->assertSame('This is test content', $documents[0]->content); + $this->assertSame('This is test content', $documents[0]->getContent()); } public function testLoadWithMultipleDocuments() @@ -53,9 +53,9 @@ public function testLoadWithMultipleDocuments() $this->assertCount(2, $documents); $this->assertSame($document1, $documents[0]); $this->assertSame($document2, $documents[1]); - $this->assertSame('First document', $documents[0]->content); - $this->assertSame('Second document', $documents[1]->content); - $this->assertSame('test', $documents[1]->metadata['type']); + $this->assertSame('First document', $documents[0]->getContent()); + $this->assertSame('Second document', $documents[1]->getContent()); + $this->assertSame('test', $documents[1]->getMetadata()['type']); } public function testLoadIgnoresSourceParameter() diff --git a/src/store/tests/Document/Loader/RssFeedLoaderTest.php b/src/store/tests/Document/Loader/RssFeedLoaderTest.php index b704dbff6..5222d89d1 100644 --- a/src/store/tests/Document/Loader/RssFeedLoaderTest.php +++ b/src/store/tests/Document/Loader/RssFeedLoaderTest.php @@ -46,11 +46,11 @@ public function testLoadWithValidRssFeed() $firstDocument = $documents[0]; $this->assertInstanceOf(TextDocument::class, $firstDocument); - $this->assertStringStartsWith('Title: Save the date, SymfonyDay Montreal 2026!', $firstDocument->content); - $this->assertStringContainsString('Date: 2025-09-11 14:30', $firstDocument->content); - $this->assertStringContainsString('SymfonyDay Montreal is happening on', $firstDocument->content); + $this->assertStringStartsWith('Title: Save the date, SymfonyDay Montreal 2026!', $firstDocument->getContent()); + $this->assertStringContainsString('Date: 2025-09-11 14:30', $firstDocument->getContent()); + $this->assertStringContainsString('SymfonyDay Montreal is happening on', $firstDocument->getContent()); - $firstMetadata = $firstDocument->metadata; + $firstMetadata = $firstDocument->getMetadata(); $this->assertSame('Save the date, SymfonyDay Montreal 2026!', $firstMetadata['title']); $this->assertSame('https://symfony.com/blog/save-the-date-symfonyday-montreal-2026?utm_source=Symfony%20Blog%20Feed&utm_medium=feed', $firstMetadata['link']); $this->assertSame('Paola Suárez', $firstMetadata['author']); @@ -125,8 +125,8 @@ public function testLoadReturnsIterableOfTextDocuments() foreach ($result as $document) { $this->assertInstanceOf(TextDocument::class, $document); - $this->assertInstanceOf(Uuid::class, $document->id); - $this->assertNotEmpty($document->content); + $this->assertInstanceOf(Uuid::class, $document->getId()); + $this->assertNotEmpty($document->getContent()); } } @@ -145,7 +145,7 @@ public function testLoadGeneratesConsistentUuids() $this->assertCount(10, $documents2); // UUIDs should be identical for same content - $this->assertEquals($documents1[0]->id, $documents2[0]->id); - $this->assertEquals($documents1[1]->id, $documents2[1]->id); + $this->assertEquals($documents1[0]->getId(), $documents2[0]->getId()); + $this->assertEquals($documents1[1]->getId(), $documents2[1]->getId()); } } diff --git a/src/store/tests/Document/Loader/TextFileLoaderTest.php b/src/store/tests/Document/Loader/TextFileLoaderTest.php index 7972057b2..efbabf2dd 100644 --- a/src/store/tests/Document/Loader/TextFileLoaderTest.php +++ b/src/store/tests/Document/Loader/TextFileLoaderTest.php @@ -47,9 +47,9 @@ public function testLoadWithValidSource() $this->assertCount(1, $documents); $this->assertInstanceOf(TextDocument::class, $document = $documents[0]); - $this->assertStringStartsWith('Lorem ipsum', $document->content); - $this->assertStringEndsWith('nonummy id, met', $document->content); - $this->assertSame(1500, \strlen($document->content)); + $this->assertStringStartsWith('Lorem ipsum', $document->getContent()); + $this->assertStringEndsWith('nonummy id, met', $document->getContent()); + $this->assertSame(1500, \strlen($document->getContent())); } public function testSourceIsPresentInMetadata() @@ -61,7 +61,7 @@ public function testSourceIsPresentInMetadata() $this->assertCount(1, $documents); $this->assertInstanceOf(TextDocument::class, $document = $documents[0]); - $this->assertSame($source, $document->metadata['_source']); - $this->assertSame($source, $document->metadata->getSource()); + $this->assertSame($source, $document->getMetadata()['_source']); + $this->assertSame($source, $document->getMetadata()->getSource()); } } diff --git a/src/store/tests/Document/TextDocumentTest.php b/src/store/tests/Document/TextDocumentTest.php index 1982215b9..5f9c876ad 100644 --- a/src/store/tests/Document/TextDocumentTest.php +++ b/src/store/tests/Document/TextDocumentTest.php @@ -31,9 +31,9 @@ public function testConstructorWithValidContent() $document = new TextDocument($id, $content, $metadata); - $this->assertSame($id, $document->id); - $this->assertSame($content, $document->content); - $this->assertSame($metadata, $document->metadata); + $this->assertSame($id, $document->getId()); + $this->assertSame($content, $document->getContent()); + $this->assertSame($metadata, $document->getMetadata()); } #[TestDox('Creates document with default empty metadata when not provided')] @@ -44,10 +44,10 @@ public function testConstructorWithDefaultMetadata() $document = new TextDocument($id, $content); - $this->assertSame($id, $document->id); - $this->assertSame($content, $document->content); - $this->assertInstanceOf(Metadata::class, $document->metadata); - $this->assertCount(0, $document->metadata); + $this->assertSame($id, $document->getId()); + $this->assertSame($content, $document->getContent()); + $this->assertInstanceOf(Metadata::class, $document->getMetadata()); + $this->assertCount(0, $document->getMetadata()); } #[TestWith([''])] @@ -85,8 +85,8 @@ public function testConstructorAcceptsValidContent(string $content) $document = new TextDocument($id, $content); - $this->assertSame($id, $document->id); - $this->assertSame($content, $document->content); + $this->assertSame($id, $document->getId()); + $this->assertSame($content, $document->getContent()); } #[TestDox('Accepts very long text content')] @@ -97,8 +97,8 @@ public function testConstructorAcceptsVeryLongContent() $document = new TextDocument($id, $content); - $this->assertSame($id, $document->id); - $this->assertSame($content, $document->content); + $this->assertSame($id, $document->getId()); + $this->assertSame($content, $document->getContent()); } #[TestDox('Properties are publicly accessible and readonly')] @@ -110,9 +110,9 @@ public function testReadonlyProperties() $document = new TextDocument($id, $content, $metadata); - $this->assertSame($id, $document->id); - $this->assertSame($content, $document->content); - $this->assertSame($metadata, $document->metadata); + $this->assertSame($id, $document->getId()); + $this->assertSame($content, $document->getContent()); + $this->assertSame($metadata, $document->getMetadata()); } #[TestDox('Metadata contents can be modified even though the property is readonly')] @@ -128,8 +128,8 @@ public function testMetadataCanBeModified() $metadata['key'] = 'value'; $metadata->setSource('test.txt'); - $this->assertSame('value', $document->metadata['key']); - $this->assertSame('test.txt', $document->metadata->getSource()); + $this->assertSame('value', $document->getMetadata()['key']); + $this->assertSame('test.txt', $document->getMetadata()->getSource()); } #[DataProvider('uuidVersionProvider')] @@ -140,8 +140,8 @@ public function testDifferentUuidVersions(string $version, Uuid $uuid) $document = new TextDocument($uuid, $content); - $this->assertSame($uuid, $document->id); - $this->assertSame($content, $document->content); + $this->assertSame($uuid, $document->getId()); + $this->assertSame($content, $document->getContent()); } /** @@ -191,7 +191,7 @@ public function testDocumentWithComplexMetadata() '_source' => 'source.pdf', ]; - $this->assertSame($expected, $document->metadata->getArrayCopy()); + $this->assertSame($expected, $document->getMetadata()->getArrayCopy()); } #[TestDox('Multiple documents can share the same content with different IDs and metadata')] @@ -204,10 +204,10 @@ public function testMultipleDocumentsWithSameContent() $document1 = new TextDocument(Uuid::v4(), $content, $metadata1); $document2 = new TextDocument(Uuid::v4(), $content, $metadata2); - $this->assertSame($content, $document1->content); - $this->assertSame($content, $document2->content); - $this->assertNotSame($document1->id, $document2->id); - $this->assertNotSame($document1->metadata, $document2->metadata); + $this->assertSame($content, $document1->getContent()); + $this->assertSame($content, $document2->getContent()); + $this->assertNotSame($document1->getId(), $document2->getId()); + $this->assertNotSame($document1->getMetadata(), $document2->getMetadata()); } #[TestDox('Documents can have the same ID but different content')] @@ -218,9 +218,9 @@ public function testDocumentWithSameIdButDifferentContent() $document1 = new TextDocument($id, 'Content 1'); $document2 = new TextDocument($id, 'Content 2'); - $this->assertSame($id, $document1->id); - $this->assertSame($id, $document2->id); - $this->assertNotSame($document1->content, $document2->content); + $this->assertSame($id, $document1->getId()); + $this->assertSame($id, $document2->getId()); + $this->assertNotSame($document1->getContent(), $document2->getContent()); } #[TestDox('Content with whitespace is stored as-is without trimming')] @@ -233,7 +233,7 @@ public function testTrimBehaviorValidation() $document = new TextDocument($id, $contentWithWhitespace); // The content is stored as-is, not trimmed - $this->assertSame($contentWithWhitespace, $document->content); + $this->assertSame($contentWithWhitespace, $document->getContent()); } #[TestDox('Exception message is correct for empty content')] @@ -257,10 +257,10 @@ public function testWithContent() $updatedDocument = $originalDocument->withContent($newContent); $this->assertNotSame($originalDocument, $updatedDocument); - $this->assertSame($id, $updatedDocument->id); - $this->assertSame($newContent, $updatedDocument->content); - $this->assertSame($metadata, $updatedDocument->metadata); - $this->assertSame($originalContent, $originalDocument->content); + $this->assertSame($id, $updatedDocument->getId()); + $this->assertSame($newContent, $updatedDocument->getContent()); + $this->assertSame($metadata, $updatedDocument->getMetadata()); + $this->assertSame($originalContent, $originalDocument->getContent()); } #[TestDox('withContent validates new content')] diff --git a/src/store/tests/Document/Transformer/ChainTransformerTest.php b/src/store/tests/Document/Transformer/ChainTransformerTest.php index f713efefa..87720265e 100644 --- a/src/store/tests/Document/Transformer/ChainTransformerTest.php +++ b/src/store/tests/Document/Transformer/ChainTransformerTest.php @@ -25,7 +25,7 @@ public function testChainTransformerAppliesAllTransformersInOrder() public function transform(iterable $documents, array $options = []): iterable { foreach ($documents as $document) { - yield new TextDocument($document->id, $document->content.'-A'); + yield new TextDocument($document->getId(), $document->getContent().'-A'); } } }; @@ -34,7 +34,7 @@ public function transform(iterable $documents, array $options = []): iterable public function transform(iterable $documents, array $options = []): iterable { foreach ($documents as $document) { - yield new TextDocument($document->id, $document->content.'-B'); + yield new TextDocument($document->getId(), $document->getContent().'-B'); } } }; @@ -47,8 +47,8 @@ public function transform(iterable $documents, array $options = []): iterable $result = iterator_to_array($chain->transform($documents)); - $this->assertSame('foo-A-B', $result[0]->content); - $this->assertSame('bar-A-B', $result[1]->content); + $this->assertSame('foo-A-B', $result[0]->getContent()); + $this->assertSame('bar-A-B', $result[1]->getContent()); } public function testChainTransformerWithNoTransformersReturnsInput() @@ -58,6 +58,6 @@ public function testChainTransformerWithNoTransformersReturnsInput() $result = iterator_to_array($chain->transform($documents)); - $this->assertSame('baz', $result[0]->content); + $this->assertSame('baz', $result[0]->getContent()); } } diff --git a/src/store/tests/Document/Transformer/ChunkDelayTransformerTest.php b/src/store/tests/Document/Transformer/ChunkDelayTransformerTest.php index 1f272a091..360c43644 100644 --- a/src/store/tests/Document/Transformer/ChunkDelayTransformerTest.php +++ b/src/store/tests/Document/Transformer/ChunkDelayTransformerTest.php @@ -37,7 +37,7 @@ public function testDefaultChunkSizeAndDelay() $this->assertCount(30, $result); for ($i = 0; $i < 30; ++$i) { - $this->assertSame('content-'.$i, $result[$i]->content); + $this->assertSame('content-'.$i, $result[$i]->getContent()); } } @@ -122,9 +122,9 @@ public function testYieldsDocumentsInCorrectOrder() ChunkDelayTransformer::OPTION_DELAY => 1, ])); - $this->assertSame('first', $result[0]->content); - $this->assertSame('second', $result[1]->content); - $this->assertSame('third', $result[2]->content); + $this->assertSame('first', $result[0]->getContent()); + $this->assertSame('second', $result[1]->getContent()); + $this->assertSame('third', $result[2]->getContent()); } public function testHandlesEmptyIterable() @@ -157,7 +157,7 @@ public function testSingleDocument() ])); $this->assertCount(1, $result); - $this->assertSame('single', $result[0]->content); + $this->assertSame('single', $result[0]->getContent()); } public function testExactlyChunkSizeDocuments() diff --git a/src/store/tests/Document/Transformer/TextReplaceTransformerTest.php b/src/store/tests/Document/Transformer/TextReplaceTransformerTest.php index 7b191d974..f07d6bff0 100644 --- a/src/store/tests/Document/Transformer/TextReplaceTransformerTest.php +++ b/src/store/tests/Document/Transformer/TextReplaceTransformerTest.php @@ -31,7 +31,7 @@ public function testReplaceWithConstructorParameters() $result = iterator_to_array($transformer->transform([$document])); $this->assertCount(1, $result); - $this->assertSame('bar is bar', $result[0]->content); + $this->assertSame('bar is bar', $result[0]->getContent()); } public function testReplaceWithOptions() @@ -45,7 +45,7 @@ public function testReplaceWithOptions() ])); $this->assertCount(1, $result); - $this->assertSame('goodbye world', $result[0]->content); + $this->assertSame('goodbye world', $result[0]->getContent()); } public function testOptionsOverrideConstructorParameters() @@ -59,7 +59,7 @@ public function testOptionsOverrideConstructorParameters() ])); $this->assertCount(1, $result); - $this->assertSame('foo world', $result[0]->content); + $this->assertSame('foo world', $result[0]->getContent()); } public function testReplaceMultipleOccurrences() @@ -70,7 +70,7 @@ public function testReplaceMultipleOccurrences() $result = iterator_to_array($transformer->transform([$document])); $this->assertCount(1, $result); - $this->assertSame('b b b', $result[0]->content); + $this->assertSame('b b b', $result[0]->getContent()); } public function testReplaceWithEmptyString() @@ -81,7 +81,7 @@ public function testReplaceWithEmptyString() $result = iterator_to_array($transformer->transform([$document])); $this->assertCount(1, $result); - $this->assertSame(' this word', $result[0]->content); + $this->assertSame(' this word', $result[0]->getContent()); } public function testReplacePreservesMetadata() @@ -93,8 +93,8 @@ public function testReplacePreservesMetadata() $result = iterator_to_array($transformer->transform([$document])); $this->assertCount(1, $result); - $this->assertSame('new text', $result[0]->content); - $this->assertSame($metadata, $result[0]->metadata); + $this->assertSame('new text', $result[0]->getContent()); + $this->assertSame($metadata, $result[0]->getMetadata()); } public function testReplacePreservesDocumentId() @@ -106,7 +106,7 @@ public function testReplacePreservesDocumentId() $result = iterator_to_array($transformer->transform([$document])); $this->assertCount(1, $result); - $this->assertSame($id, $result[0]->id); + $this->assertSame($id, $result[0]->getId()); } public function testReplaceProcessesMultipleDocuments() @@ -121,9 +121,9 @@ public function testReplaceProcessesMultipleDocuments() $result = iterator_to_array($transformer->transform($documents)); $this->assertCount(3, $result); - $this->assertSame('y marks the spot', $result[0]->content); - $this->assertSame('find y here', $result[1]->content); - $this->assertSame('no match', $result[2]->content); + $this->assertSame('y marks the spot', $result[0]->getContent()); + $this->assertSame('find y here', $result[1]->getContent()); + $this->assertSame('no match', $result[2]->getContent()); } public function testReplaceCaseSensitive() @@ -134,7 +134,7 @@ public function testReplaceCaseSensitive() $result = iterator_to_array($transformer->transform([$document])); $this->assertCount(1, $result); - $this->assertSame('Goodbye hello HELLO', $result[0]->content); + $this->assertSame('Goodbye hello HELLO', $result[0]->getContent()); } public function testReplaceHandlesNoMatch() @@ -145,7 +145,7 @@ public function testReplaceHandlesNoMatch() $result = iterator_to_array($transformer->transform([$document])); $this->assertCount(1, $result); - $this->assertSame('original text', $result[0]->content); + $this->assertSame('original text', $result[0]->getContent()); } public function testConstructorThrowsExceptionWhenSearchEqualsReplace() @@ -189,6 +189,6 @@ public function testPartialOptionsUseConstructorDefaults() ])); $this->assertCount(1, $result); - $this->assertSame('default replacement', $result[0]->content); + $this->assertSame('default replacement', $result[0]->getContent()); } } diff --git a/src/store/tests/Document/Transformer/TextSplitTransformerTest.php b/src/store/tests/Document/Transformer/TextSplitTransformerTest.php index 417829ee8..641a9b34a 100644 --- a/src/store/tests/Document/Transformer/TextSplitTransformerTest.php +++ b/src/store/tests/Document/Transformer/TextSplitTransformerTest.php @@ -34,7 +34,7 @@ public function testSplitReturnsSingleChunkForShortText() $chunks = iterator_to_array($this->transformer->transform([$document])); $this->assertCount(1, $chunks); - $this->assertSame('short text', $chunks[0]->content); + $this->assertSame('short text', $chunks[0]->getContent()); } public function testTextLength() @@ -50,11 +50,11 @@ public function testSplitSplitsLongTextWithOverlap() $this->assertCount(2, $chunks); - $this->assertSame(1000, mb_strlen($chunks[0]->content)); - $this->assertSame(substr($this->getLongText(), 0, 1000), $chunks[0]->content); + $this->assertSame(1000, mb_strlen($chunks[0]->getContent())); + $this->assertSame(substr($this->getLongText(), 0, 1000), $chunks[0]->getContent()); - $this->assertSame(700, mb_strlen($chunks[1]->content)); - $this->assertSame(substr($this->getLongText(), 800, 700), $chunks[1]->content); + $this->assertSame(700, mb_strlen($chunks[1]->getContent())); + $this->assertSame(substr($this->getLongText(), 800, 700), $chunks[1]->getContent()); } public function testSplitWithCustomChunkSizeAndOverlap() @@ -68,41 +68,41 @@ public function testSplitWithCustomChunkSizeAndOverlap() $this->assertCount(12, $chunks); - $this->assertSame(150, mb_strlen($chunks[0]->content)); - $this->assertSame(substr($this->getLongText(), 0, 150), $chunks[0]->content); + $this->assertSame(150, mb_strlen($chunks[0]->getContent())); + $this->assertSame(substr($this->getLongText(), 0, 150), $chunks[0]->getContent()); - $this->assertSame(150, mb_strlen($chunks[1]->content)); - $this->assertSame(substr($this->getLongText(), 125, 150), $chunks[1]->content); + $this->assertSame(150, mb_strlen($chunks[1]->getContent())); + $this->assertSame(substr($this->getLongText(), 125, 150), $chunks[1]->getContent()); - $this->assertSame(150, mb_strlen($chunks[2]->content)); - $this->assertSame(substr($this->getLongText(), 250, 150), $chunks[2]->content); + $this->assertSame(150, mb_strlen($chunks[2]->getContent())); + $this->assertSame(substr($this->getLongText(), 250, 150), $chunks[2]->getContent()); - $this->assertSame(150, mb_strlen($chunks[3]->content)); - $this->assertSame(substr($this->getLongText(), 375, 150), $chunks[3]->content); + $this->assertSame(150, mb_strlen($chunks[3]->getContent())); + $this->assertSame(substr($this->getLongText(), 375, 150), $chunks[3]->getContent()); - $this->assertSame(150, mb_strlen($chunks[4]->content)); - $this->assertSame(substr($this->getLongText(), 500, 150), $chunks[4]->content); + $this->assertSame(150, mb_strlen($chunks[4]->getContent())); + $this->assertSame(substr($this->getLongText(), 500, 150), $chunks[4]->getContent()); - $this->assertSame(150, mb_strlen($chunks[5]->content)); - $this->assertSame(substr($this->getLongText(), 625, 150), $chunks[5]->content); + $this->assertSame(150, mb_strlen($chunks[5]->getContent())); + $this->assertSame(substr($this->getLongText(), 625, 150), $chunks[5]->getContent()); - $this->assertSame(150, mb_strlen($chunks[6]->content)); - $this->assertSame(substr($this->getLongText(), 750, 150), $chunks[6]->content); + $this->assertSame(150, mb_strlen($chunks[6]->getContent())); + $this->assertSame(substr($this->getLongText(), 750, 150), $chunks[6]->getContent()); - $this->assertSame(150, mb_strlen($chunks[7]->content)); - $this->assertSame(substr($this->getLongText(), 875, 150), $chunks[7]->content); + $this->assertSame(150, mb_strlen($chunks[7]->getContent())); + $this->assertSame(substr($this->getLongText(), 875, 150), $chunks[7]->getContent()); - $this->assertSame(150, mb_strlen($chunks[8]->content)); - $this->assertSame(substr($this->getLongText(), 1000, 150), $chunks[8]->content); + $this->assertSame(150, mb_strlen($chunks[8]->getContent())); + $this->assertSame(substr($this->getLongText(), 1000, 150), $chunks[8]->getContent()); - $this->assertSame(150, mb_strlen($chunks[9]->content)); - $this->assertSame(substr($this->getLongText(), 1125, 150), $chunks[9]->content); + $this->assertSame(150, mb_strlen($chunks[9]->getContent())); + $this->assertSame(substr($this->getLongText(), 1125, 150), $chunks[9]->getContent()); - $this->assertSame(150, mb_strlen($chunks[10]->content)); - $this->assertSame(substr($this->getLongText(), 1250, 150), $chunks[10]->content); + $this->assertSame(150, mb_strlen($chunks[10]->getContent())); + $this->assertSame(substr($this->getLongText(), 1250, 150), $chunks[10]->getContent()); - $this->assertSame(125, mb_strlen($chunks[11]->content)); - $this->assertSame(substr($this->getLongText(), 1375, 150), $chunks[11]->content); + $this->assertSame(125, mb_strlen($chunks[11]->getContent())); + $this->assertSame(substr($this->getLongText(), 1375, 150), $chunks[11]->getContent()); } public function testSplitWithZeroOverlap() @@ -114,8 +114,8 @@ public function testSplitWithZeroOverlap() ])); $this->assertCount(2, $chunks); - $this->assertSame(substr($this->getLongText(), 0, 1000), $chunks[0]->content); - $this->assertSame(substr($this->getLongText(), 1000, 500), $chunks[1]->content); + $this->assertSame(substr($this->getLongText(), 0, 1000), $chunks[0]->getContent()); + $this->assertSame(substr($this->getLongText(), 1000, 500), $chunks[1]->getContent()); } public function testParentIdIsSetInMetadata() @@ -128,8 +128,8 @@ public function testParentIdIsSetInMetadata() ])); $this->assertCount(2, $chunks); - $this->assertSame($document->id, $chunks[0]->metadata['_parent_id']); - $this->assertSame($document->id, $chunks[1]->metadata['_parent_id']); + $this->assertSame($document->getId(), $chunks[0]->getMetadata()['_parent_id']); + $this->assertSame($document->getId(), $chunks[1]->getMetadata()['_parent_id']); } public function testMetadataIsInherited() @@ -142,10 +142,10 @@ public function testMetadataIsInherited() $chunks = iterator_to_array($this->transformer->transform([$document])); $this->assertCount(2, $chunks); - $this->assertSame('value', $chunks[0]->metadata['key']); - $this->assertSame('bar', $chunks[0]->metadata['foo']); - $this->assertSame('value', $chunks[1]->metadata['key']); - $this->assertSame('bar', $chunks[1]->metadata['foo']); + $this->assertSame('value', $chunks[0]->getMetadata()['key']); + $this->assertSame('bar', $chunks[0]->getMetadata()['foo']); + $this->assertSame('value', $chunks[1]->getMetadata()['key']); + $this->assertSame('bar', $chunks[1]->getMetadata()['foo']); } public function testSplitWithChunkSizeLargerThanText() @@ -155,7 +155,7 @@ public function testSplitWithChunkSizeLargerThanText() $chunks = iterator_to_array($this->transformer->transform([$document])); $this->assertCount(1, $chunks); - $this->assertSame('tiny', $chunks[0]->content); + $this->assertSame('tiny', $chunks[0]->getContent()); } public function testSplitWithOverlapGreaterThanChunkSize() @@ -190,7 +190,7 @@ public function testConstructorWithValidParameters() $chunks = iterator_to_array($transformer->transform([$document])); $this->assertCount(1, $chunks); - $this->assertSame('short text', $chunks[0]->content); + $this->assertSame('short text', $chunks[0]->getContent()); } public function testConstructorWithDefaultParameters() @@ -201,7 +201,7 @@ public function testConstructorWithDefaultParameters() $chunks = iterator_to_array($transformer->transform([$document])); $this->assertCount(1, $chunks); - $this->assertSame('short text', $chunks[0]->content); + $this->assertSame('short text', $chunks[0]->getContent()); } public function testConstructorWithNegativeOverlap() @@ -236,7 +236,7 @@ public function testConstructorParametersAreUsedAsDefaults() $chunks = iterator_to_array($transformer->transform([$document])); $this->assertCount(12, $chunks); - $this->assertSame(150, mb_strlen($chunks[0]->content)); + $this->assertSame(150, mb_strlen($chunks[0]->getContent())); } private function getLongText(): string diff --git a/src/store/tests/Document/Transformer/TextTrimTransformerTest.php b/src/store/tests/Document/Transformer/TextTrimTransformerTest.php index d3b09c46f..164d74a02 100644 --- a/src/store/tests/Document/Transformer/TextTrimTransformerTest.php +++ b/src/store/tests/Document/Transformer/TextTrimTransformerTest.php @@ -39,7 +39,7 @@ public function testTrim(string $input, string $expected) $result = iterator_to_array($transformer->transform([$document])); $this->assertCount(1, $result); - $this->assertSame($expected, $result[0]->content); + $this->assertSame($expected, $result[0]->getContent()); } public function testTrimHandlesOnlyWhitespace() @@ -64,9 +64,9 @@ public function testTrimProcessesMultipleDocuments() $result = iterator_to_array($transformer->transform($documents)); $this->assertCount(3, $result); - $this->assertSame('first', $result[0]->content); - $this->assertSame('second', $result[1]->content); - $this->assertSame('third', $result[2]->content); + $this->assertSame('first', $result[0]->getContent()); + $this->assertSame('second', $result[1]->getContent()); + $this->assertSame('third', $result[2]->getContent()); } public function testTrimPreservesMetadata() @@ -78,8 +78,8 @@ public function testTrimPreservesMetadata() $result = iterator_to_array($transformer->transform([$document])); $this->assertCount(1, $result); - $this->assertSame('text', $result[0]->content); - $this->assertSame($metadata, $result[0]->metadata); + $this->assertSame('text', $result[0]->getContent()); + $this->assertSame($metadata, $result[0]->getMetadata()); } public function testTrimPreservesDocumentId() @@ -91,6 +91,6 @@ public function testTrimPreservesDocumentId() $result = iterator_to_array($transformer->transform([$document])); $this->assertCount(1, $result); - $this->assertSame($id, $result[0]->id); + $this->assertSame($id, $result[0]->getId()); } } diff --git a/src/store/tests/Document/VectorizerTest.php b/src/store/tests/Document/VectorizerTest.php index 67596ab22..374f29835 100644 --- a/src/store/tests/Document/VectorizerTest.php +++ b/src/store/tests/Document/VectorizerTest.php @@ -65,15 +65,15 @@ public function testVectorizeDocumentsWithBatchSupport() $platform = PlatformTestHandler::createPlatform(new VectorResult(...$vectors), $modelCatalog); $vectorizer = new Vectorizer($platform, 'test-embedding-with-batch'); - $vectorDocuments = $vectorizer->vectorizeTextDocuments($documents); + $vectorDocuments = $vectorizer->vectorizeEmbeddableDocuments($documents); $this->assertCount(3, $vectorDocuments); foreach ($vectorDocuments as $i => $vectorDoc) { $this->assertInstanceOf(VectorDocument::class, $vectorDoc); - $this->assertSame($documents[$i]->id, $vectorDoc->id); + $this->assertSame($documents[$i]->getId(), $vectorDoc->id); $this->assertEquals($vectors[$i], $vectorDoc->vector); - $this->assertSame($documents[$i]->metadata, $vectorDoc->metadata); + $this->assertSame($documents[$i]->getMetadata(), $vectorDoc->metadata); } } @@ -84,20 +84,20 @@ public function testVectorizeDocumentsWithSingleDocument() $platform = PlatformTestHandler::createPlatform(new VectorResult($vector)); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small'); - $vectorDocuments = $vectorizer->vectorizeTextDocuments([$document]); + $vectorDocuments = $vectorizer->vectorizeEmbeddableDocuments([$document]); $this->assertCount(1, $vectorDocuments); $this->assertInstanceOf(VectorDocument::class, $vectorDocuments[0]); - $this->assertSame($document->id, $vectorDocuments[0]->id); + $this->assertSame($document->getId(), $vectorDocuments[0]->id); $this->assertEquals($vector, $vectorDocuments[0]->vector); - $this->assertSame($document->metadata, $vectorDocuments[0]->metadata); + $this->assertSame($document->getMetadata(), $vectorDocuments[0]->metadata); } public function testVectorizeEmptyDocumentsArray() { $platform = PlatformTestHandler::createPlatform(new VectorResult()); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small'); - $vectorDocuments = $vectorizer->vectorizeTextDocuments([]); + $vectorDocuments = $vectorizer->vectorizeEmbeddableDocuments([]); $this->assertSame([], $vectorDocuments); } @@ -119,7 +119,7 @@ public function testVectorizeDocumentsPreservesMetadata() $platform = PlatformTestHandler::createPlatform(new VectorResult(...$vectors)); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small'); - $vectorDocuments = $vectorizer->vectorizeTextDocuments($documents); + $vectorDocuments = $vectorizer->vectorizeEmbeddableDocuments($documents); $this->assertCount(2, $vectorDocuments); $this->assertSame($metadata1, $vectorDocuments[0]->metadata); @@ -148,7 +148,7 @@ public function testVectorizeDocumentsPreservesDocumentIds() $platform = PlatformTestHandler::createPlatform(new VectorResult(...$vectors)); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small'); - $vectorDocuments = $vectorizer->vectorizeTextDocuments($documents); + $vectorDocuments = $vectorizer->vectorizeEmbeddableDocuments($documents); $this->assertCount(3, $vectorDocuments); $this->assertSame($id1, $vectorDocuments[0]->id); @@ -175,15 +175,15 @@ public function testVectorizeVariousDocumentCounts(int $count) $count > 0 ? new VectorResult(...$vectors) : new VectorResult() ); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small'); - $vectorDocuments = $vectorizer->vectorizeTextDocuments($documents); + $vectorDocuments = $vectorizer->vectorizeEmbeddableDocuments($documents); $this->assertCount($count, $vectorDocuments); foreach ($vectorDocuments as $i => $vectorDoc) { $this->assertInstanceOf(VectorDocument::class, $vectorDoc); - $this->assertSame($documents[$i]->id, $vectorDoc->id); + $this->assertSame($documents[$i]->getId(), $vectorDoc->id); $this->assertEquals($vectors[$i], $vectorDoc->vector); - $this->assertSame($documents[$i]->metadata, $vectorDoc->metadata); + $this->assertSame($documents[$i]->getMetadata(), $vectorDoc->metadata); $this->assertSame(['index' => $i], $vectorDoc->metadata->getArrayCopy()); } } @@ -212,7 +212,7 @@ public function testVectorizeDocumentsWithLargeVectors() $platform = PlatformTestHandler::createPlatform(new VectorResult($vector)); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small'); - $vectorDocuments = $vectorizer->vectorizeTextDocuments([$document]); + $vectorDocuments = $vectorizer->vectorizeEmbeddableDocuments([$document]); $this->assertCount(1, $vectorDocuments); $this->assertEquals($vector, $vectorDocuments[0]->vector); @@ -234,12 +234,12 @@ public function testVectorizeDocumentsWithSpecialCharacters() $platform = PlatformTestHandler::createPlatform(new VectorResult(...$vectors)); $vectorizer = new Vectorizer($platform, 'text-embedding-3-small'); - $vectorDocuments = $vectorizer->vectorizeTextDocuments($documents); + $vectorDocuments = $vectorizer->vectorizeEmbeddableDocuments($documents); $this->assertCount(3, $vectorDocuments); foreach ($vectorDocuments as $i => $vectorDoc) { - $this->assertSame($documents[$i]->id, $vectorDoc->id); + $this->assertSame($documents[$i]->getId(), $vectorDoc->id); $this->assertEquals($vectors[$i], $vectorDoc->vector); } } @@ -272,7 +272,7 @@ public function testVectorizeDocumentsWithoutBatchSupportUsesNonBatchMode() $platform = PlatformTestHandler::createPlatform(new VectorResult(...$vectors), $modelCatalog); $vectorizer = new Vectorizer($platform, 'test-embedding-no-batch'); - $vectorDocuments = $vectorizer->vectorizeTextDocuments($documents); + $vectorDocuments = $vectorizer->vectorizeEmbeddableDocuments($documents); $this->assertCount(2, $vectorDocuments); $this->assertEquals($vectors[0], $vectorDocuments[0]->vector); @@ -344,7 +344,7 @@ public function testVectorizeTextDocumentsPassesOptionsToInvoke() // This ensures batch mode is used and the test expectation matches the behavior $platform = PlatformTestHandler::createPlatform(new VectorResult($vector)); $vectorizer = new Vectorizer($platform, 'test-embedding-with-batch'); - $result = $vectorizer->vectorizeTextDocuments($documents, $options); + $result = $vectorizer->vectorizeEmbeddableDocuments($documents, $options); $this->assertCount(1, $result); $this->assertEquals($vector, $result[0]->vector); @@ -362,7 +362,7 @@ public function testVectorizeTextDocumentsWithEmptyOptions() // This ensures batch mode is used and the test expectation matches the behavior $platform = PlatformTestHandler::createPlatform(new VectorResult($vector)); $vectorizer = new Vectorizer($platform, 'test-embedding-with-batch'); - $result = $vectorizer->vectorizeTextDocuments($documents); + $result = $vectorizer->vectorizeEmbeddableDocuments($documents); $this->assertCount(1, $result); $this->assertEquals($vector, $result[0]->vector); @@ -441,7 +441,7 @@ public function testVectorizeTextDocumentsWithoutBatchSupportPassesOptions() $platform = PlatformTestHandler::createPlatform(new VectorResult(...$vectors), $modelCatalog); $vectorizer = new Vectorizer($platform, 'test-embedding-no-batch-with-options'); - $result = $vectorizer->vectorizeTextDocuments($documents, $options); + $result = $vectorizer->vectorizeEmbeddableDocuments($documents, $options); $this->assertCount(2, $result); $this->assertEquals($vectors[0], $result[0]->vector); diff --git a/src/store/tests/IndexerTest.php b/src/store/tests/IndexerTest.php index 0f71668fc..6ee808383 100644 --- a/src/store/tests/IndexerTest.php +++ b/src/store/tests/IndexerTest.php @@ -199,10 +199,10 @@ public function testIndexWithFiltersAndTransformers() public function transform(iterable $documents, array $options = []): iterable { foreach ($documents as $document) { - $metadata = new Metadata($document->metadata->getArrayCopy()); + $metadata = new Metadata($document->getMetadata()->getArrayCopy()); $metadata['transformed'] = true; - $metadata['original_content'] = $document->content; - yield new TextDocument($document->id, strtoupper($document->content), $metadata); + $metadata['original_content'] = $document->getContent(); + yield new TextDocument($document->getId(), strtoupper($document->getContent()), $metadata); } } }; @@ -235,7 +235,7 @@ public function testIndexWithFiltersAndTransformersAppliesBoth() public function filter(iterable $documents, array $options = []): iterable { foreach ($documents as $document) { - if (!str_contains($document->content, 'Remove')) { + if (!str_contains($document->getContent(), 'Remove')) { yield $document; } } @@ -246,9 +246,9 @@ public function filter(iterable $documents, array $options = []): iterable public function transform(iterable $documents, array $options = []): iterable { foreach ($documents as $document) { - $metadata = new Metadata($document->metadata->getArrayCopy()); + $metadata = new Metadata($document->getMetadata()->getArrayCopy()); $metadata['transformed'] = true; - yield new TextDocument($document->id, $document->content, $metadata); + yield new TextDocument($document->getId(), $document->getContent(), $metadata); } } };