Skip to content

Commit ca8f161

Browse files
committed
feat: Add ability to skip searches if results exist
1 parent ce5b6d8 commit ca8f161

File tree

2 files changed

+22
-2
lines changed

2 files changed

+22
-2
lines changed

engine/base_client/client.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
from datetime import datetime
3+
from pathlib import Path
34
from typing import List
45

56
from benchmark import ROOT_DIR
@@ -54,7 +55,11 @@ def save_upload_results(
5455
out.write(json.dumps(upload_stats, indent=2))
5556

5657
def run_experiment(
57-
self, dataset: Dataset, skip_upload: bool = False, skip_search: bool = False
58+
self,
59+
dataset: Dataset,
60+
skip_upload: bool = False,
61+
skip_search: bool = False,
62+
skip_if_exists: bool = False,
5863
):
5964
execution_params = self.configurator.execution_params(
6065
distance=dataset.config.distance, vector_size=dataset.config.vector_size
@@ -82,6 +87,18 @@ def run_experiment(
8287
if not skip_search:
8388
print("Experiment stage: Search")
8489
for search_id, searcher in enumerate(self.searchers):
90+
91+
if skip_if_exists:
92+
existing_results = RESULTS_DIR.glob(
93+
f"{self.name}-{dataset.config.name}-search-{search_id}-*.json"
94+
)
95+
if len(existing_results) == 1:
96+
print(
97+
f"Skipping search {search_id} as it already exists in",
98+
existing_results[0],
99+
)
100+
continue
101+
85102
search_params = {**searcher.search_params}
86103
search_stats = searcher.search_all(
87104
dataset.config.distance, reader.read_queries()

run.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ def run(
2020
host: str = "localhost",
2121
skip_upload: bool = False,
2222
skip_search: bool = False,
23+
skip_if_exists: bool = True,
2324
exit_on_error: bool = True,
2425
timeout: float = 86400.0,
2526
):
@@ -49,7 +50,9 @@ def run(
4950
dataset.download()
5051
try:
5152
with stopit.ThreadingTimeout(timeout) as tt:
52-
client.run_experiment(dataset, skip_upload, skip_search)
53+
client.run_experiment(
54+
dataset, skip_upload, skip_search, skip_if_exists
55+
)
5356

5457
# If the timeout is reached, the server might be still in the
5558
# middle of some background processing, like creating the index.

0 commit comments

Comments
 (0)