From c59f31a03d5abe7a721a3b4970b683f55d09dc37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Po=C5=BAniak?= Date: Thu, 5 Jun 2025 00:20:12 -0700 Subject: [PATCH 1/4] Add script for creating vectorsets configuration, do not flush database on init, make noquant default --- engine/clients/vectorsets/configure.py | 2 - engine/clients/vectorsets/upload.py | 2 +- .../configurations/create-vectorsets.py | 38 +++++++++++++++++++ 3 files changed, 39 insertions(+), 3 deletions(-) create mode 100644 experiments/configurations/create-vectorsets.py diff --git a/engine/clients/vectorsets/configure.py b/engine/clients/vectorsets/configure.py index 5c5a06ae..95d111ba 100644 --- a/engine/clients/vectorsets/configure.py +++ b/engine/clients/vectorsets/configure.py @@ -20,7 +20,6 @@ def __init__(self, host, collection_params: dict, connection_params: dict): self.client = redis_constructor( host=host, port=REDIS_PORT, password=REDIS_AUTH, username=REDIS_USER ) - self.client.flushall() def clean(self): conns = [self.client] @@ -30,7 +29,6 @@ def clean(self): for node in self.client.get_primaries() ] for conn in conns: - index = conn.ft() try: conn.flushall() except redis.ResponseError as e: diff --git a/engine/clients/vectorsets/upload.py b/engine/clients/vectorsets/upload.py index ccd16dd9..a46d87dc 100644 --- a/engine/clients/vectorsets/upload.py +++ b/engine/clients/vectorsets/upload.py @@ -33,7 +33,7 @@ def upload_batch( hnsw_params = upload_params.get("hnsw_config") M = hnsw_params.get("M", 16) efc = hnsw_params.get("EF_CONSTRUCTION", 200) - quant = hnsw_params.get("quant") + quant = hnsw_params.get("quant", "NOQUANT") p = cls.client.pipeline(transaction=False) for i in range(len(ids)): diff --git a/experiments/configurations/create-vectorsets.py b/experiments/configurations/create-vectorsets.py new file mode 100644 index 00000000..23db1855 --- /dev/null +++ b/experiments/configurations/create-vectorsets.py @@ -0,0 +1,38 @@ +import json + +ms = [16] +ef_constructs = [100] +ef_runtimes = [20, 40, 80] +# qants = ["NOQUANT", "Q8", "BIN"] +qants = ["NOQUANT"] +configs = [] +for m in ms: + for ef_construct in ef_constructs: + for quant in qants: + config = { + "name": f"redis-intel-vectorsets-m-{m}-ef-{ef_construct}-quant-{quant}", + "engine": "vectorsets", + "connection_params": {}, + "collection_params": {}, + "search_params": [], + "upload_params": { + "parallel": 128, + "hnsw_config": { + "M": m, + "EF_CONSTRUCTION": ef_construct, + "quant": quant, + }, + }, + } + for client in [1, 8]: + for ef_runtime in ef_runtimes: + test_config = { + "parallel": client, + "search_params": {"ef": ef_runtime}, + } + config["search_params"].append(test_config) + configs.append(config) + fname = f"redis-intel-vectorsets.json" + with open(fname, "w") as json_fd: + json.dump(configs, json_fd, indent=2) + print(f"created {len(configs)} configs for {fname}.") From e4a02ad8bbdc30a3480216c3f8fa3a120bf072c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Po=C5=BAniak?= Date: Thu, 5 Jun 2025 01:45:55 -0700 Subject: [PATCH 2/4] Add topKs to create-vectorsets script --- experiments/configurations/create-vectorsets.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/experiments/configurations/create-vectorsets.py b/experiments/configurations/create-vectorsets.py index 23db1855..70393e7e 100644 --- a/experiments/configurations/create-vectorsets.py +++ b/experiments/configurations/create-vectorsets.py @@ -2,10 +2,11 @@ ms = [16] ef_constructs = [100] -ef_runtimes = [20, 40, 80] +ef_runtimes = [40, 80] # qants = ["NOQUANT", "Q8", "BIN"] qants = ["NOQUANT"] configs = [] +topKs = [10] for m in ms: for ef_construct in ef_constructs: for quant in qants: @@ -25,12 +26,14 @@ }, } for client in [1, 8]: - for ef_runtime in ef_runtimes: - test_config = { - "parallel": client, - "search_params": {"ef": ef_runtime}, - } - config["search_params"].append(test_config) + for top in topKs: + for ef_runtime in ef_runtimes: + test_config = { + "top": top, + "parallel": client, + "search_params": {"ef": ef_runtime}, + } + config["search_params"].append(test_config) configs.append(config) fname = f"redis-intel-vectorsets.json" with open(fname, "w") as json_fd: From 45454b1a31814eedebaec5035d0de684b3fd1937 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Po=C5=BAniak?= Date: Mon, 9 Jun 2025 01:34:39 -0700 Subject: [PATCH 3/4] Add get_memory_usage function to vectorsets --- engine/clients/vectorsets/upload.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/engine/clients/vectorsets/upload.py b/engine/clients/vectorsets/upload.py index a46d87dc..057f5c03 100644 --- a/engine/clients/vectorsets/upload.py +++ b/engine/clients/vectorsets/upload.py @@ -23,7 +23,15 @@ def init_client(cls, host, distance, connection_params, upload_params): cls.client = redis_constructor( host=host, port=REDIS_PORT, password=REDIS_AUTH, username=REDIS_USER ) + cls.client_decode = redis_constructor( + host=host, + port=REDIS_PORT, + password=REDIS_AUTH, + username=REDIS_USER, + decode_responses=True, + ) cls.upload_params = upload_params + cls._is_cluster = True if REDIS_CLUSTER else False @classmethod def upload_batch( @@ -46,3 +54,18 @@ def upload_batch( @classmethod def post_upload(cls, _distance): return {} + + def get_memory_usage(cls): + used_memory = [] + conns = [cls.client_decode] + if cls._is_cluster: + conns = [ + cls.client_decode.get_redis_connection(node) + for node in cls.client_decode.get_primaries() + ] + for conn in conns: + used_memory_shard = conn.info("memory")["used_memory"] + used_memory.append(used_memory_shard) + + return {"used_memory": sum(used_memory), + "shards": len(used_memory)} \ No newline at end of file From 97519297eab69811ca19ad1d68c503cdca6e6e59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Po=C5=BAniak?= Date: Mon, 9 Jun 2025 01:36:40 -0700 Subject: [PATCH 4/4] Add empty line --- engine/clients/vectorsets/upload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine/clients/vectorsets/upload.py b/engine/clients/vectorsets/upload.py index 057f5c03..aec62dfb 100644 --- a/engine/clients/vectorsets/upload.py +++ b/engine/clients/vectorsets/upload.py @@ -68,4 +68,4 @@ def get_memory_usage(cls): used_memory.append(used_memory_shard) return {"used_memory": sum(used_memory), - "shards": len(used_memory)} \ No newline at end of file + "shards": len(used_memory)}