From 7c64af328dd2d951616eb48b622329573504f775 Mon Sep 17 00:00:00 2001 From: Guillaume Loulier Date: Thu, 14 Aug 2025 15:14:53 +0200 Subject: [PATCH] feat(store): Weaviate support --- examples/.env | 4 + examples/compose.yaml | 21 +- examples/rag/weaviate.php | 71 +++++ src/ai-bundle/config/options.php | 10 + src/ai-bundle/src/AiBundle.php | 19 ++ .../DependencyInjection/AiBundleTest.php | 7 + src/store/CHANGELOG.md | 1 + src/store/doc/index.rst | 4 + src/store/src/Bridge/Weaviate/Store.php | 135 ++++++++++ src/store/tests/Bridge/Weaviate/StoreTest.php | 255 ++++++++++++++++++ 10 files changed, 526 insertions(+), 1 deletion(-) create mode 100644 examples/rag/weaviate.php create mode 100644 src/store/src/Bridge/Weaviate/Store.php create mode 100644 src/store/tests/Bridge/Weaviate/StoreTest.php diff --git a/examples/.env b/examples/.env index 5592e1e4a..0bd80c166 100644 --- a/examples/.env +++ b/examples/.env @@ -128,3 +128,7 @@ CHROMADB_PORT=8001 CLICKHOUSE_HOST=http://symfony:symfony@127.0.0.1:8123 CLICKHOUSE_DATABASE=symfony CLICKHOUSE_TABLE=symfony + +# Weaviate (store) +WEAVIATE_HOST=http://127.0.0.1:8080 +WEAVIATE_API_KEY=symfony diff --git a/examples/compose.yaml b/examples/compose.yaml index 4f611dc5c..3b46e6cc9 100644 --- a/examples/compose.yaml +++ b/examples/compose.yaml @@ -68,7 +68,6 @@ services: MINIO_SECRET_KEY: minioadmin ports: - '9001:9001' - - '9000:9000' volumes: - minio_vlm:/minio_data command: minio server /minio_data --console-address ":9001" @@ -145,9 +144,29 @@ services: ports: - '8108:8108' + weaviate: + image: cr.weaviate.io/semitechnologies/weaviate:1.32.4 + command: ['--host', '0.0.0.0', '--port', '8080', '--scheme', 'http'] + environment: + QUERY_DEFAULTS_LIMIT: 25 + AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'false' + AUTHENTICATION_APIKEY_ENABLED: 'true' + AUTHENTICATION_APIKEY_ALLOWED_KEYS: 'symfony' + AUTHENTICATION_APIKEY_USERS: 'symfony' + PERSISTENCE_DATA_PATH: '/var/lib/weaviate' + ENABLE_API_BASED_MODULES: 'true' + CLUSTER_HOSTNAME: 'node1' + RAFT_ENABLE_ONE_NODE_RECOVERY: 'true' # See https://github.com/weaviate/weaviate/issues/5491#issuecomment-2416929309 + volumes: + - weaviate_data:/var/lib/weaviate + ports: + - '8080:8080' + - '50051:50051' + volumes: typesense_data: etcd_vlm: minio_vlm: milvus_vlm: chroma_vlm: + weaviate_data: diff --git a/examples/rag/weaviate.php b/examples/rag/weaviate.php new file mode 100644 index 000000000..131332b20 --- /dev/null +++ b/examples/rag/weaviate.php @@ -0,0 +1,71 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +use Symfony\AI\Agent\Agent; +use Symfony\AI\Agent\Toolbox\AgentProcessor; +use Symfony\AI\Agent\Toolbox\Tool\SimilaritySearch; +use Symfony\AI\Agent\Toolbox\Toolbox; +use Symfony\AI\Fixtures\Movies; +use Symfony\AI\Platform\Bridge\OpenAi\Embeddings; +use Symfony\AI\Platform\Bridge\OpenAi\Gpt; +use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory; +use Symfony\AI\Platform\Message\Message; +use Symfony\AI\Platform\Message\MessageBag; +use Symfony\AI\Store\Bridge\Weaviate\Store; +use Symfony\AI\Store\Document\Metadata; +use Symfony\AI\Store\Document\TextDocument; +use Symfony\AI\Store\Document\Vectorizer; +use Symfony\AI\Store\Indexer; +use Symfony\Component\Uid\Uuid; + +require_once dirname(__DIR__).'/bootstrap.php'; + +// initialize the store +$store = new Store( + httpClient: http_client(), + endpointUrl: env('WEAVIATE_HOST'), + apiKey: env('WEAVIATE_API_KEY'), + collection: 'Movies', +); + +// initialize the index +$store->setup(); + +// create embeddings and documents +$documents = []; +foreach (Movies::all() as $i => $movie) { + $documents[] = new TextDocument( + id: Uuid::v4(), + content: 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description'], + metadata: new Metadata($movie), + ); +} + +// create embeddings for documents +$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); +$vectorizer = new Vectorizer($platform, $embeddings = new Embeddings()); +$indexer = new Indexer($vectorizer, $store, logger()); +$indexer->index($documents); + +$model = new Gpt(Gpt::GPT_4O_MINI); + +$similaritySearch = new SimilaritySearch($platform, $embeddings, $store); +$toolbox = new Toolbox([$similaritySearch], logger: logger()); +$processor = new AgentProcessor($toolbox); +$agent = new Agent($platform, $model, [$processor], [$processor], logger()); + +$messages = new MessageBag( + Message::forSystem('Please answer all user questions only using SimilaritySearch function.'), + Message::ofUser('Which movie fits the theme of technology?') +); +$result = $agent->call($messages); + +echo $result->getContent().\PHP_EOL; diff --git a/src/ai-bundle/config/options.php b/src/ai-bundle/config/options.php index 830a671ea..cafafc09e 100644 --- a/src/ai-bundle/config/options.php +++ b/src/ai-bundle/config/options.php @@ -337,6 +337,16 @@ ->end() ->end() ->end() + ->arrayNode('weaviate') + ->useAttributeAsKey('name') + ->arrayPrototype() + ->children() + ->scalarNode('endpoint')->cannotBeEmpty()->end() + ->scalarNode('api_key')->isRequired()->end() + ->scalarNode('collection')->isRequired()->end() + ->end() + ->end() + ->end() ->end() ->end() ->arrayNode('indexer') diff --git a/src/ai-bundle/src/AiBundle.php b/src/ai-bundle/src/AiBundle.php index b5d446d06..873470d20 100644 --- a/src/ai-bundle/src/AiBundle.php +++ b/src/ai-bundle/src/AiBundle.php @@ -62,6 +62,7 @@ use Symfony\AI\Store\Bridge\Qdrant\Store as QdrantStore; use Symfony\AI\Store\Bridge\SurrealDb\Store as SurrealDbStore; use Symfony\AI\Store\Bridge\Typesense\Store as TypesenseStore; +use Symfony\AI\Store\Bridge\Weaviate\Store as WeaviateStore; use Symfony\AI\Store\Document\Vectorizer; use Symfony\AI\Store\Indexer; use Symfony\AI\Store\StoreInterface; @@ -945,6 +946,24 @@ private function processStoreConfig(string $type, array $stores, ContainerBuilde $container->setDefinition('ai.store.'.$type.'.'.$name, $definition); } } + + if ('weaviate' === $type) { + foreach ($stores as $name => $store) { + $arguments = [ + new Reference('http_client'), + $store['endpoint'], + $store['api_key'], + $store['collection'], + ]; + + $definition = new Definition(WeaviateStore::class); + $definition + ->addTag('ai.store') + ->setArguments($arguments); + + $container->setDefinition('ai.store.'.$type.'.'.$name, $definition); + } + } } /** diff --git a/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php b/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php index 7db3c4b25..204037ebd 100644 --- a/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php +++ b/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php @@ -528,6 +528,13 @@ private function getFullConfig(): array 'dimensions' => 768, ], ], + 'weaviate' => [ + 'my_weaviate_store' => [ + 'endpoint' => 'http://localhost:8080', + 'api_key' => 'bar', + 'collection' => 'my_weaviate_collection', + ], + ], ], 'indexer' => [ 'my_text_indexer' => [ diff --git a/src/store/CHANGELOG.md b/src/store/CHANGELOG.md index c9fee33d1..747409fe1 100644 --- a/src/store/CHANGELOG.md +++ b/src/store/CHANGELOG.md @@ -46,6 +46,7 @@ CHANGELOG - Qdrant - SurrealDB - Typesense + - Weaviate * Add Retrieval Augmented Generation (RAG) support: - Document embedding storage - Similarity search for relevant documents diff --git a/src/store/doc/index.rst b/src/store/doc/index.rst index 45c9eac1f..b99e1ced3 100644 --- a/src/store/doc/index.rst +++ b/src/store/doc/index.rst @@ -48,6 +48,7 @@ You can find more advanced usage in combination with an Agent using the store fo * `Similarity Search with SurrealDB (RAG)`_ * `Similarity Search with Symfony Cache (RAG)`_ * `Similarity Search with Typesense (RAG)`_ +* `Similarity Search with Weaviate (RAG)`_ .. note:: @@ -72,6 +73,7 @@ Supported Stores * `SurrealDB`_ * `Symfony Cache`_ * `Typesense`_ +* `Weaviate`_ .. note:: @@ -113,6 +115,7 @@ This leads to a store implementing two methods:: .. _`Similarity Search with Qdrant (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/qdrant.php .. _`Similarity Search with SurrealDB (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/surrealdb.php .. _`Similarity Search with Typesense (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/typesense.php +.. _`Similarity Search with Weaviate (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/weaviate.php .. _`Azure AI Search`: https://azure.microsoft.com/products/ai-services/ai-search .. _`Chroma`: https://www.trychroma.com/ .. _`MariaDB`: https://mariadb.org/projects/mariadb-vector/ @@ -128,3 +131,4 @@ This leads to a store implementing two methods:: .. _`Typesense`: https://typesense.org/ .. _`GitHub`: https://github.com/symfony/ai/issues/16 .. _`Symfony Cache`: https://symfony.com/doc/current/components/cache.html +.. _`Weaviate`: https://weaviate.io/ diff --git a/src/store/src/Bridge/Weaviate/Store.php b/src/store/src/Bridge/Weaviate/Store.php new file mode 100644 index 000000000..e3197c6a4 --- /dev/null +++ b/src/store/src/Bridge/Weaviate/Store.php @@ -0,0 +1,135 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Store\Bridge\Weaviate; + +use Symfony\AI\Platform\Vector\NullVector; +use Symfony\AI\Platform\Vector\Vector; +use Symfony\AI\Store\Document\Metadata; +use Symfony\AI\Store\Document\VectorDocument; +use Symfony\AI\Store\Exception\InvalidArgumentException; +use Symfony\AI\Store\ManagedStoreInterface; +use Symfony\AI\Store\StoreInterface; +use Symfony\Component\Uid\Uuid; +use Symfony\Contracts\HttpClient\HttpClientInterface; + +/** + * @author Guillaume Loulier + */ +final readonly class Store implements ManagedStoreInterface, StoreInterface +{ + public function __construct( + private HttpClientInterface $httpClient, + private string $endpointUrl, + #[\SensitiveParameter] private string $apiKey, + private string $collection, + ) { + } + + public function setup(array $options = []): void + { + if ([] !== $options) { + throw new InvalidArgumentException('No supported options.'); + } + + $this->request('POST', 'v1/schema', [ + 'class' => $this->collection, + ]); + } + + public function add(VectorDocument ...$documents): void + { + $this->request('POST', 'v1/batch/objects', [ + 'fields' => [ + 'ALL', + ], + 'objects' => array_map($this->convertToIndexableArray(...), $documents), + ]); + } + + public function query(Vector $vector, array $options = []): array + { + $results = $this->request('POST', 'v1/graphql', [ + 'query' => \sprintf('{ + Get { + %s ( + nearVector: { + vector: [%s] + } + ) { + uuid, + vector, + _metadata + } + } + }', $this->collection, implode(', ', $vector->getData())), + ]); + + return array_map($this->convertToVectorDocument(...), $results['data']['Get'][$this->collection]); + } + + public function drop(): void + { + $this->request('DELETE', \sprintf('v1/schema/%s', $this->collection), []); + } + + /** + * @param array $payload + * + * @return array + */ + private function request(string $method, string $endpoint, array $payload): array + { + $url = \sprintf('%s/%s', $this->endpointUrl, $endpoint); + + $finalPayload = [ + 'auth_bearer' => $this->apiKey, + ]; + + if ([] !== $payload) { + $finalPayload['json'] = $payload; + } + + $result = $this->httpClient->request($method, $url, $finalPayload); + + return $result->toArray(); + } + + /** + * @return array + */ + private function convertToIndexableArray(VectorDocument $document): array + { + return [ + 'class' => $this->collection, + 'id' => $document->id->toRfc4122(), + 'vector' => $document->vector->getData(), + 'properties' => [ + 'uuid' => $document->id->toRfc4122(), + 'vector' => $document->vector->getData(), + '_metadata' => json_encode($document->metadata->getArrayCopy()), + ], + ]; + } + + /** + * @param array $data + */ + private function convertToVectorDocument(array $data): VectorDocument + { + $id = $data['uuid'] ?? throw new InvalidArgumentException('Missing "id" field in the document data.'); + + $vector = !\array_key_exists('vector', $data) || null === $data['vector'] + ? new NullVector() : new Vector($data['vector']); + + return new VectorDocument(Uuid::fromString($id), $vector, new Metadata(json_decode($data['_metadata'], true))); + } +} diff --git a/src/store/tests/Bridge/Weaviate/StoreTest.php b/src/store/tests/Bridge/Weaviate/StoreTest.php new file mode 100644 index 000000000..f7e36a37b --- /dev/null +++ b/src/store/tests/Bridge/Weaviate/StoreTest.php @@ -0,0 +1,255 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Store\Tests\Bridge\Weaviate; + +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\Attributes\UsesClass; +use PHPUnit\Framework\TestCase; +use Symfony\AI\Platform\Vector\Vector; +use Symfony\AI\Store\Bridge\Weaviate\Store; +use Symfony\AI\Store\Document\VectorDocument; +use Symfony\AI\Store\Exception\InvalidArgumentException; +use Symfony\Component\HttpClient\Exception\ClientException; +use Symfony\Component\HttpClient\MockHttpClient; +use Symfony\Component\HttpClient\Response\JsonMockResponse; +use Symfony\Component\Uid\Uuid; + +#[CoversClass(Store::class)] +#[UsesClass(VectorDocument::class)] +#[UsesClass(Vector::class)] +final class StoreTest extends TestCase +{ + public function testStoreCannotSetupWithExtraOptions() + { + $store = new Store( + new MockHttpClient(), + 'http://127.0.0.1:8080', + 'test', + 'test', + ); + + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('No supported options.'); + $this->expectExceptionCode(0); + $store->setup([ + 'foo' => 'bar', + ]); + } + + public function testStoreCannotSetupOnInvalidResponse() + { + $httpClient = new MockHttpClient([ + new JsonMockResponse([ + 'error' => [ + 'message' => 'foo', + ], + ], [ + 'http_code' => 422, + ]), + ], 'http://127.0.0.1:8080'); + + $store = new Store( + $httpClient, + 'http://127.0.0.1:8080', + 'test', + 'test', + ); + + $this->expectException(ClientException::class); + $this->expectExceptionMessage('HTTP 422 returned for "http://127.0.0.1:8080/v1/schema".'); + $this->expectExceptionCode(422); + $store->setup(); + } + + public function testStoreCanSetup() + { + $httpClient = new MockHttpClient([ + new JsonMockResponse([ + 'class' => 'test', + ], [ + 'http_code' => 200, + ]), + ], 'http://127.0.0.1:8080'); + + $store = new Store( + $httpClient, + 'http://127.0.0.1:8080', + 'test', + 'test', + ); + + $store->setup(); + + $this->assertSame(1, $httpClient->getRequestsCount()); + } + + public function testStoreCannotDropOnInvalidResponse() + { + $httpClient = new MockHttpClient([ + new JsonMockResponse([ + 'error' => [ + 'message' => 'foo', + ], + ], [ + 'http_code' => 422, + ]), + ], 'http://127.0.0.1:8080'); + + $store = new Store( + $httpClient, + 'http://127.0.0.1:8080', + 'test', + 'test', + ); + + $this->expectException(ClientException::class); + $this->expectExceptionMessage('HTTP 422 returned for "http://127.0.0.1:8080/v1/schema/test".'); + $this->expectExceptionCode(422); + $store->drop(); + } + + public function testStoreCanDrop() + { + $httpClient = new MockHttpClient([ + new JsonMockResponse([], [ + 'http_code' => 200, + ]), + ], 'http://127.0.0.1:8080'); + + $store = new Store( + $httpClient, + 'http://127.0.0.1:8080', + 'test', + 'test', + ); + + $store->drop(); + + $this->assertSame(1, $httpClient->getRequestsCount()); + } + + public function testStoreCannotAddOnInvalidResponse() + { + $httpClient = new MockHttpClient([ + new JsonMockResponse([ + 'error' => [ + 'message' => 'foo', + ], + ], [ + 'http_code' => 422, + ]), + ], 'http://127.0.0.1:8080'); + + $store = new Store( + $httpClient, + 'http://127.0.0.1:8080', + 'test', + 'test', + ); + + $this->expectException(ClientException::class); + $this->expectExceptionMessage('HTTP 422 returned for "http://127.0.0.1:8080/v1/batch/objects".'); + $this->expectExceptionCode(422); + $store->add(new VectorDocument(Uuid::v4(), new Vector([0.1, 0.2, 0.3]))); + } + + public function testStoreCanAdd() + { + $httpClient = new MockHttpClient([ + new JsonMockResponse([ + 'objects' => [ + [ + 'class' => 'test', + 'id' => Uuid::v4()->toRfc4122(), + 'vector' => [0.1, 0.2, 0.3], + ], + ], + ], [ + 'http_code' => 200, + ]), + ], 'http://127.0.0.1:8080'); + + $store = new Store( + $httpClient, + 'http://127.0.0.1:8080', + 'test', + 'test', + ); + + $store->add(new VectorDocument(Uuid::v4(), new Vector([0.1, 0.2, 0.3]))); + + $this->assertSame(1, $httpClient->getRequestsCount()); + } + + public function testStoreCannotQueryOnInvalidResponse() + { + $httpClient = new MockHttpClient([ + new JsonMockResponse([ + 'error' => [ + 'message' => 'foo', + ], + ], [ + 'http_code' => 422, + ]), + ], 'http://127.0.0.1:8080'); + + $store = new Store( + $httpClient, + 'http://127.0.0.1:8080', + 'test', + 'test', + ); + + $this->expectException(ClientException::class); + $this->expectExceptionMessage('HTTP 422 returned for "http://127.0.0.1:8080/v1/graphql".'); + $this->expectExceptionCode(422); + $store->query(new Vector([0.1, 0.2, 0.3])); + } + + public function testStoreCanQuery() + { + $httpClient = new MockHttpClient([ + new JsonMockResponse([ + 'data' => [ + 'Get' => [ + 'test' => [ + [ + 'uuid' => Uuid::v4()->toRfc4122(), + 'vector' => [0.1, 0.2, 0.3], + '_metadata' => json_encode(['foo' => 'bar']), + ], + [ + 'uuid' => Uuid::v4()->toRfc4122(), + 'vector' => [0.1, 0.2, 0.3], + '_metadata' => json_encode(['foo' => 'bar']), + ], + ], + ], + ], + ], [ + 'http_code' => 200, + ]), + ], 'http://127.0.0.1:8080'); + + $store = new Store( + $httpClient, + 'http://127.0.0.1:8080', + 'test', + 'test', + ); + + $results = $store->query(new Vector([0.1, 0.2, 0.3])); + + $this->assertCount(2, $results); + $this->assertSame(1, $httpClient->getRequestsCount()); + } +}