From e55333d25e4ac141cf4ade17d5e9a477923f27ef Mon Sep 17 00:00:00 2001 From: Guillaume Loulier Date: Fri, 29 Aug 2025 13:56:13 +0200 Subject: [PATCH] feat(store): cloudflare --- examples/.env | 4 + examples/rag/cloudflare.php | 71 +++++ src/ai-bundle/config/options.php | 13 + src/ai-bundle/src/AiBundle.php | 31 +++ .../DependencyInjection/AiBundleTest.php | 10 + src/store/CHANGELOG.md | 1 + src/store/composer.json | 1 + src/store/doc/index.rst | 4 + src/store/src/Bridge/Cloudflare/Store.php | 145 +++++++++++ .../tests/Bridge/Cloudflare/StoreTest.php | 245 ++++++++++++++++++ 10 files changed, 525 insertions(+) create mode 100644 examples/rag/cloudflare.php create mode 100644 src/store/src/Bridge/Cloudflare/Store.php create mode 100644 src/store/tests/Bridge/Cloudflare/StoreTest.php diff --git a/examples/.env b/examples/.env index 0bd80c166..7a8741e1a 100644 --- a/examples/.env +++ b/examples/.env @@ -118,6 +118,10 @@ MILVUS_HOST=http://127.0.0.1:19530 MILVUS_API_KEY=root:Milvus MILVUS_DATABASE=symfony +# Cloudflare (store) +CLOUDFLARE_ACCOUNT_ID= +CLOUDFLARE_API_KEY= + # Cerebras CEREBRAS_API_KEY= diff --git a/examples/rag/cloudflare.php b/examples/rag/cloudflare.php new file mode 100644 index 000000000..0423be412 --- /dev/null +++ b/examples/rag/cloudflare.php @@ -0,0 +1,71 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +use Symfony\AI\Agent\Agent; +use Symfony\AI\Agent\Toolbox\AgentProcessor; +use Symfony\AI\Agent\Toolbox\Tool\SimilaritySearch; +use Symfony\AI\Agent\Toolbox\Toolbox; +use Symfony\AI\Fixtures\Movies; +use Symfony\AI\Platform\Bridge\OpenAi\Embeddings; +use Symfony\AI\Platform\Bridge\OpenAi\Gpt; +use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory; +use Symfony\AI\Platform\Message\Message; +use Symfony\AI\Platform\Message\MessageBag; +use Symfony\AI\Store\Bridge\Cloudflare\Store; +use Symfony\AI\Store\Document\Metadata; +use Symfony\AI\Store\Document\TextDocument; +use Symfony\AI\Store\Document\Vectorizer; +use Symfony\AI\Store\Indexer; +use Symfony\Component\Uid\Uuid; + +require_once dirname(__DIR__).'/bootstrap.php'; + +// initialize the store +$store = new Store( + httpClient: http_client(), + accountId: env('CLOUDFLARE_ACCOUNT_ID'), + apiKey: env('CLOUDFLARE_API_KEY'), + index: 'movies', +); + +// initialize the index +$store->setup(); + +// create embeddings and documents +$documents = []; +foreach (Movies::all() as $i => $movie) { + $documents[] = new TextDocument( + id: Uuid::v4(), + content: 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description'], + metadata: new Metadata($movie), + ); +} + +// create embeddings for documents (keep in mind that upserting vectors is asynchronous) +$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); +$vectorizer = new Vectorizer($platform, $embeddings = new Embeddings()); +$indexer = new Indexer($vectorizer, $store, logger()); +$indexer->index($documents); + +$model = new Gpt(Gpt::GPT_4O_MINI); + +$similaritySearch = new SimilaritySearch($platform, $embeddings, $store); +$toolbox = new Toolbox([$similaritySearch], logger: logger()); +$processor = new AgentProcessor($toolbox); +$agent = new Agent($platform, $model, [$processor], [$processor], logger()); + +$messages = new MessageBag( + Message::forSystem('Please answer all user questions only using SimilaritySearch function.'), + Message::ofUser('Which movie fits the theme of technology?') +); +$result = $agent->call($messages); + +echo $result->getContent().\PHP_EOL; diff --git a/src/ai-bundle/config/options.php b/src/ai-bundle/config/options.php index 18fa1649b..ffd5556d0 100644 --- a/src/ai-bundle/config/options.php +++ b/src/ai-bundle/config/options.php @@ -227,6 +227,19 @@ ->end() ->end() ->end() + ->arrayNode('cloudflare') + ->useAttributeAsKey('name') + ->arrayPrototype() + ->children() + ->scalarNode('account_id')->cannotBeEmpty()->end() + ->scalarNode('api_key')->cannotBeEmpty()->end() + ->scalarNode('index_name')->cannotBeEmpty()->end() + ->integerNode('dimensions')->end() + ->scalarNode('metric')->end() + ->scalarNode('endpoint_url')->end() + ->end() + ->end() + ->end() ->arrayNode('meilisearch') ->useAttributeAsKey('name') ->arrayPrototype() diff --git a/src/ai-bundle/src/AiBundle.php b/src/ai-bundle/src/AiBundle.php index bc21de8bd..8ef001951 100644 --- a/src/ai-bundle/src/AiBundle.php +++ b/src/ai-bundle/src/AiBundle.php @@ -50,6 +50,7 @@ use Symfony\AI\Store\Bridge\Azure\SearchStore as AzureSearchStore; use Symfony\AI\Store\Bridge\ChromaDb\Store as ChromaDbStore; use Symfony\AI\Store\Bridge\ClickHouse\Store as ClickHouseStore; +use Symfony\AI\Store\Bridge\Cloudflare\Store as CloudflareStore; use Symfony\AI\Store\Bridge\Local\CacheStore; use Symfony\AI\Store\Bridge\Local\DistanceCalculator; use Symfony\AI\Store\Bridge\Local\DistanceStrategy; @@ -685,6 +686,36 @@ private function processStoreConfig(string $type, array $stores, ContainerBuilde } } + if ('cloudflare' === $type) { + foreach ($stores as $name => $store) { + $arguments = [ + new Reference('http_client'), + $store['account_id'], + $store['api_key'], + $store['index_name'], + ]; + + if (\array_key_exists('dimensions', $store)) { + $arguments[4] = $store['dimensions']; + } + + if (\array_key_exists('metric', $store)) { + $arguments[5] = $store['metric']; + } + + if (\array_key_exists('endpoint', $store)) { + $arguments[6] = $store['endpoint']; + } + + $definition = new Definition(CloudflareStore::class); + $definition + ->addTag('ai.store') + ->setArguments($arguments); + + $container->setDefinition('ai.store.'.$type.'.'.$name, $definition); + } + } + if ('meilisearch' === $type) { foreach ($stores as $name => $store) { $arguments = [ diff --git a/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php b/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php index 8dc71b696..ffba0118a 100644 --- a/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php +++ b/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php @@ -691,6 +691,16 @@ private function getFullConfig(): array 'table' => 'my_table', ], ], + 'cloudflare' => [ + 'my_cloudflare_store' => [ + 'account_id' => 'foo', + 'api_key' => 'bar', + 'index_name' => 'random', + 'dimensions' => 1536, + 'metric' => 'cosine', + 'endpoint_url' => 'https://api.cloudflare.com/client/v5/accounts', + ], + ], 'meilisearch' => [ 'my_meilisearch_store' => [ 'endpoint' => 'http://127.0.0.1:7700', diff --git a/src/store/CHANGELOG.md b/src/store/CHANGELOG.md index 747409fe1..1c3543789 100644 --- a/src/store/CHANGELOG.md +++ b/src/store/CHANGELOG.md @@ -37,6 +37,7 @@ CHANGELOG - Azure AI Search - ChromaDB - ClickHouse + - Cloudflare - MariaDB - Meilisearch - MongoDB diff --git a/src/store/composer.json b/src/store/composer.json index 01d629246..fac481639 100644 --- a/src/store/composer.json +++ b/src/store/composer.json @@ -7,6 +7,7 @@ "azure", "chromadb", "clickhouse", + "cloudflare", "mariadb", "meilisearch", "milvus", diff --git a/src/store/doc/index.rst b/src/store/doc/index.rst index b99e1ced3..4db06f34e 100644 --- a/src/store/doc/index.rst +++ b/src/store/doc/index.rst @@ -37,6 +37,7 @@ used vector store:: You can find more advanced usage in combination with an Agent using the store for RAG in the examples folder: +* `Similarity Search with Cloudflare (RAG)`_ * `Similarity Search with MariaDB (RAG)`_ * `Similarity Search with Meilisearch (RAG)`_ * `Similarity Search with memory storage (RAG)`_ @@ -61,6 +62,7 @@ Supported Stores * `Azure AI Search`_ * `Chroma`_ (requires `codewithkyrian/chromadb-php` as additional dependency) +* `Cloudflare`_ * `InMemory`_ * `MariaDB`_ (requires `ext-pdo`) * `Meilisearch`_ @@ -104,6 +106,7 @@ This leads to a store implementing two methods:: } .. _`Retrieval Augmented Generation`: https://de.wikipedia.org/wiki/Retrieval-Augmented_Generation +.. _`Similarity Search with Cloudflare (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/cloudflare.php .. _`Similarity Search with MariaDB (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/mariadb-gemini.php .. _`Similarity Search with Meilisearch (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/meilisearch.php .. _`Similarity Search with memory storage (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/in-memory.php @@ -118,6 +121,7 @@ This leads to a store implementing two methods:: .. _`Similarity Search with Weaviate (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/weaviate.php .. _`Azure AI Search`: https://azure.microsoft.com/products/ai-services/ai-search .. _`Chroma`: https://www.trychroma.com/ +.. _`Cloudflare`: https://developers.cloudflare.com/vectorize/ .. _`MariaDB`: https://mariadb.org/projects/mariadb-vector/ .. _`Pinecone`: https://www.pinecone.io/ .. _`Postgres`: https://www.postgresql.org/about/news/pgvector-070-released-2852/ diff --git a/src/store/src/Bridge/Cloudflare/Store.php b/src/store/src/Bridge/Cloudflare/Store.php new file mode 100644 index 000000000..0356cf1a9 --- /dev/null +++ b/src/store/src/Bridge/Cloudflare/Store.php @@ -0,0 +1,145 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Store\Bridge\Cloudflare; + +use Symfony\AI\Platform\Vector\NullVector; +use Symfony\AI\Platform\Vector\Vector; +use Symfony\AI\Store\Document\Metadata; +use Symfony\AI\Store\Document\VectorDocument; +use Symfony\AI\Store\Exception\InvalidArgumentException; +use Symfony\AI\Store\ManagedStoreInterface; +use Symfony\AI\Store\StoreInterface; +use Symfony\Component\Uid\Uuid; +use Symfony\Contracts\HttpClient\HttpClientInterface; + +/** + * @author Guillaume Loulier + */ +final readonly class Store implements ManagedStoreInterface, StoreInterface +{ + public function __construct( + private HttpClientInterface $httpClient, + private string $accountId, + #[\SensitiveParameter] private string $apiKey, + private string $index, + private int $dimensions = 1536, + private string $metric = 'cosine', + private string $endpointUrl = 'https://api.cloudflare.com/client/v4/accounts', + ) { + } + + public function setup(array $options = []): void + { + if ([] !== $options) { + throw new InvalidArgumentException('No supported options.'); + } + + $this->request('POST', 'vectorize/v2/indexes', [ + 'config' => [ + 'dimensions' => $this->dimensions, + 'metric' => $this->metric, + ], + 'name' => $this->index, + ]); + } + + public function drop(): void + { + $this->request('DELETE', \sprintf('vectorize/v2/indexes/%s', $this->index)); + } + + public function add(VectorDocument ...$documents): void + { + $payload = array_map( + $this->convertToIndexableArray(...), + $documents, + ); + + $this->request('POST', \sprintf('vectorize/v2/indexes/%s/upsert', $this->index), function () use ($payload) { + foreach ($payload as $entry) { + yield json_encode($entry).\PHP_EOL; + } + }); + } + + public function query(Vector $vector, array $options = []): array + { + $results = $this->request('POST', \sprintf('vectorize/v2/indexes/%s/query', $this->index), [ + 'vector' => $vector->getData(), + 'returnValues' => true, + 'returnMetadata' => 'all', + ]); + + return array_map($this->convertToVectorDocument(...), $results['result']['matches']); + } + + /** + * @param array $payload + * + * @return array + */ + private function request(string $method, string $endpoint, \Closure|array $payload = []): array + { + $url = \sprintf('%s/%s/%s', $this->endpointUrl, $this->accountId, $endpoint); + + $options = [ + 'auth_bearer' => $this->apiKey, + ]; + + if ($payload instanceof \Closure) { + $options['headers'] = [ + 'Content-Type' => 'application/x-ndjson', + ]; + + $options['body'] = $payload(); + } + + if (\is_array($payload)) { + $options['json'] = $payload; + } + + $response = $this->httpClient->request($method, $url, $options); + + return $response->toArray(); + } + + /** + * @return array + */ + private function convertToIndexableArray(VectorDocument $document): array + { + return [ + 'id' => $document->id->toRfc4122(), + 'values' => $document->vector->getData(), + 'metadata' => $document->metadata->getArrayCopy(), + ]; + } + + /** + * @param array $data + */ + private function convertToVectorDocument(array $data): VectorDocument + { + $id = $data['id'] ?? throw new InvalidArgumentException('Missing "id" field in the document data.'); + + $vector = !\array_key_exists('values', $data) || null === $data['values'] + ? new NullVector() + : new Vector($data['values']); + + return new VectorDocument( + id: Uuid::fromString($id), + vector: $vector, + metadata: new Metadata($data['metadata']), + score: $data['score'] ?? null + ); + } +} diff --git a/src/store/tests/Bridge/Cloudflare/StoreTest.php b/src/store/tests/Bridge/Cloudflare/StoreTest.php new file mode 100644 index 000000000..13a0c5e64 --- /dev/null +++ b/src/store/tests/Bridge/Cloudflare/StoreTest.php @@ -0,0 +1,245 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Store\Tests\Bridge\Cloudflare; + +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\Attributes\UsesClass; +use PHPUnit\Framework\TestCase; +use Symfony\AI\Platform\Vector\Vector; +use Symfony\AI\Store\Bridge\Cloudflare\Store; +use Symfony\AI\Store\Document\VectorDocument; +use Symfony\AI\Store\Exception\InvalidArgumentException; +use Symfony\Component\HttpClient\Exception\ClientException; +use Symfony\Component\HttpClient\MockHttpClient; +use Symfony\Component\HttpClient\Response\JsonMockResponse; +use Symfony\Component\Uid\Uuid; + +#[CoversClass(Store::class)] +#[UsesClass(VectorDocument::class)] +#[UsesClass(Vector::class)] +final class StoreTest extends TestCase +{ + public function testStoreCannotSetupWithExtraOptions() + { + $store = new Store( + new MockHttpClient(), + 'foo', + 'bar', + 'random' + ); + + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('No supported options.'); + $this->expectExceptionCode(0); + $store->setup([ + 'foo' => 'bar', + ]); + } + + public function testStoreCannotSetupOnInvalidResponse() + { + $mockHttpClient = new MockHttpClient([ + new JsonMockResponse([], [ + 'http_code' => 400, + ]), + ]); + + $store = new Store( + $mockHttpClient, + 'foo', + 'bar', + 'random', + ); + + $this->expectException(ClientException::class); + $this->expectExceptionMessage('HTTP 400 returned for "https://api.cloudflare.com/client/v4/accounts/foo/vectorize/v2/indexes".'); + $this->expectExceptionCode(400); + $store->setup(); + } + + public function testStoreCanSetup() + { + $mockHttpClient = new MockHttpClient([ + new JsonMockResponse([ + 'result' => [ + 'config' => [ + 'dimensions' => 1536, + 'metric' => 'cosine', + ], + 'name' => 'random', + ], + ]), + ]); + + $store = new Store( + $mockHttpClient, + 'foo', + 'bar', + 'random', + ); + + $store->setup(); + + $this->assertSame(1, $mockHttpClient->getRequestsCount()); + } + + public function testStoreCannotDropOnInvalidResponse() + { + $mockHttpClient = new MockHttpClient([ + new JsonMockResponse([], [ + 'http_code' => 400, + ]), + ]); + + $store = new Store( + $mockHttpClient, + 'foo', + 'bar', + 'random', + ); + + $this->expectException(ClientException::class); + $this->expectExceptionMessage('HTTP 400 returned for "https://api.cloudflare.com/client/v4/accounts/foo/vectorize/v2/indexes/random".'); + $this->expectExceptionCode(400); + $store->drop(); + } + + public function testStoreCanDrop() + { + $mockHttpClient = new MockHttpClient([ + new JsonMockResponse([ + 'messages' => [ + 'code' => 1000, + 'message' => 'foo', + ], + 'result' => [], + 'success' => true, + ]), + ]); + + $store = new Store( + $mockHttpClient, + 'foo', + 'bar', + 'random', + ); + + $store->drop(); + + $this->assertSame(1, $mockHttpClient->getRequestsCount()); + } + + public function testStoreCannotAddOnInvalidResponse() + { + $mockHttpClient = new MockHttpClient([ + new JsonMockResponse([], [ + 'http_code' => 400, + ]), + ]); + + $store = new Store( + $mockHttpClient, + 'foo', + 'bar', + 'random', + ); + + $this->expectException(ClientException::class); + $this->expectExceptionMessage('HTTP 400 returned for "https://api.cloudflare.com/client/v4/accounts/foo/vectorize/v2/indexes/random/upsert".'); + $this->expectExceptionCode(400); + $store->add(new VectorDocument(Uuid::v4(), new Vector([0.1, 0.2, 0.3]))); + } + + public function testStoreCanAdd() + { + $mockHttpClient = new MockHttpClient([ + new JsonMockResponse([ + 'result' => [ + 'mutationId' => '1', + ], + 'success' => true, + ], [ + 'http_code' => 200, + ]), + ]); + + $store = new Store( + $mockHttpClient, + 'foo', + 'bar', + 'random', + ); + + $store->add(new VectorDocument(Uuid::v4(), new Vector([0.1, 0.2, 0.3]))); + + $this->assertSame(1, $mockHttpClient->getRequestsCount()); + } + + public function testStoreCannotQueryOnInvalidResponse() + { + $mockHttpClient = new MockHttpClient([ + new JsonMockResponse([], [ + 'http_code' => 400, + ]), + ]); + + $store = new Store( + $mockHttpClient, + 'foo', + 'bar', + 'random', + ); + + $this->expectException(ClientException::class); + $this->expectExceptionMessage('HTTP 400 returned for "https://api.cloudflare.com/client/v4/accounts/foo/vectorize/v2/indexes/random/query".'); + $this->expectExceptionCode(400); + $store->query(new Vector([0.1, 0.2, 0.3])); + } + + public function testStoreCanQuery() + { + $mockHttpClient = new MockHttpClient([ + new JsonMockResponse([ + 'result' => [ + 'matches' => [ + [ + 'score' => 1.0, + 'id' => Uuid::v4()->toRfc4122(), + 'values' => [0.1, 0.2, 0.3], + 'metadata' => [], + ], + [ + 'score' => 1.0, + 'id' => Uuid::v4()->toRfc4122(), + 'values' => [0.1, 0.2, 0.3], + 'metadata' => [], + ], + ], + ], + ], [ + 'http_code' => 200, + ]), + ]); + + $store = new Store( + $mockHttpClient, + 'foo', + 'bar', + 'random', + ); + + $results = $store->query(new Vector([0.1, 0.2, 0.3])); + + $this->assertCount(2, $results); + $this->assertSame(1, $mockHttpClient->getRequestsCount()); + } +}