diff --git a/README.md b/README.md index 77de62e..3c0f301 100644 --- a/README.md +++ b/README.md @@ -101,8 +101,11 @@ INSERT BULK INTO COLLECTION articles VALUES [{'text': '...'}, {'text': '...'}] -- Search SEARCH articles SIMILAR TO 'query' LIMIT 10 SEARCH articles SIMILAR TO 'query' LIMIT 10 WHERE year >= 2020 +SEARCH articles SIMILAR TO 'query' LIMIT 10 WHERE active = true SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID FUSION 'dbsf' +SEARCH articles SIMILAR TO 'query' LIMIT 10 WITH { indexed_only: true } +SEARCH articles SIMILAR TO 'query' LIMIT 10 WITH { quantization: { ignore: true, oversampling: 2 } } SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID RERANK -- Scroll diff --git a/docs/filters.md b/docs/filters.md index 2434546..a4f5961 100644 --- a/docs/filters.md +++ b/docs/filters.md @@ -12,6 +12,9 @@ The `WHERE` clause lets you filter on any payload field using SQL-style predicat -- Exact match SEARCH articles SIMILAR TO 'ml' LIMIT 10 WHERE category = 'paper' +-- Boolean match +SEARCH articles SIMILAR TO 'ml' LIMIT 10 WHERE active = true + -- Not equal SEARCH articles SIMILAR TO 'ml' LIMIT 10 WHERE status != 'draft' ``` @@ -43,6 +46,7 @@ SEARCH articles SIMILAR TO 'history of ai' LIMIT 10 WHERE year BETWEEN 2018 AND ```sql SEARCH articles SIMILAR TO 'retrieval' LIMIT 10 WHERE status IN ('published', 'reviewed') SEARCH articles SIMILAR TO 'retrieval' LIMIT 10 WHERE status NOT IN ('deleted', 'archived') +SEARCH articles SIMILAR TO 'retrieval' LIMIT 10 WHERE active IN (true, false) ``` --- diff --git a/docs/programmatic.md b/docs/programmatic.md index 4eadae1..e44b0f7 100644 --- a/docs/programmatic.md +++ b/docs/programmatic.md @@ -138,7 +138,7 @@ class ExecutionResult: | INSERT BULK | `None` (count in `result.message`) | | SELECT | `{"id": str, "payload": dict}` or `None` when not found | | SEARCH | `[{"id": str, "score": float, "payload": dict}, ...]` | -| SCROLL | `{"points": [{"id": str, "payload": dict}, ...], "next_offset": str \| None}` | +| SCROLL | `{"points": [{"id": str, "payload": dict}, ...], "next_offset": str \| int \| None}` | | RECOMMEND | `[{"id": str, "score": float, "payload": dict}, ...]` | | SHOW COLLECTIONS | `["name1", "name2", ...]` | | SHOW COLLECTION | `{"name": str, "status": str, "points_count": int \| None, "indexed_vectors_count": int \| None, "segments_count": int, "topology": str, "vectors": dict, "sparse_vectors": dict \| None, "quantization": str \| None, "hnsw_config": dict, "payload_schema": dict \| None, "sharding": dict}` | diff --git a/docs/search.md b/docs/search.md index f973cd3..d60c842 100644 --- a/docs/search.md +++ b/docs/search.md @@ -17,7 +17,7 @@ SEARCH SIMILAR TO '' LIMIT USING HYBRID SEARCH SIMILAR TO '' LIMIT USING HYBRID [FUSION 'rrf|dbsf'] [DENSE MODEL ''] [SPARSE MODEL ''] [WHERE ] SEARCH SIMILAR TO '' LIMIT USING SPARSE [MODEL ''] SEARCH SIMILAR TO '' LIMIT EXACT -SEARCH SIMILAR TO '' LIMIT [USING ...] [WHERE ] [RERANK] WITH { hnsw_ef: , exact: true|false, acorn: true|false } +SEARCH SIMILAR TO '' LIMIT [USING ...] [WHERE ] [RERANK] WITH { hnsw_ef: , exact: true|false, acorn: true|false, indexed_only: true|false, quantization: { ignore: true|false, rescore: true|false, oversampling: } } SEARCH SIMILAR TO '' LIMIT [USING ...] [WHERE ] RERANK [MODEL ''] ``` @@ -102,10 +102,12 @@ Use these when you want to debug retrieval quality or tune recall without changi | `WITH { hnsw_ef: 128 }` | Increase HNSW exploration at query time | | `WITH { exact: true }` | Force exact KNN explicitly | | `WITH { acorn: true }` | Enable ACORN for filtered queries | +| `WITH { indexed_only: true }` | Restrict the query to indexed segments only | +| `WITH { quantization: { ... } }` | Tune quantized-search behavior at query time | - `EXACT` can appear after `LIMIT` or after `RERANK` - `WITH { ... }` can appear after `WHERE` and/or `RERANK` -- Supported `WITH` keys are only `hnsw_ef`, `exact`, and `acorn` +- Supported top-level `WITH` keys are `hnsw_ef`, `exact`, `acorn`, `indexed_only`, and `quantization` ```sql -- Exact KNN baseline @@ -116,6 +118,12 @@ SEARCH articles SIMILAR TO 'transformers' LIMIT 10 WITH { hnsw_ef: 256 } -- Filtered search with ACORN SEARCH articles SIMILAR TO 'RAG' LIMIT 10 WHERE tag = 'li' WITH { acorn: true } + +-- Restrict to indexed segments only +SEARCH articles SIMILAR TO 'retrieval' LIMIT 10 WITH { indexed_only: true } + +-- Quantized-search tuning +SEARCH articles SIMILAR TO 'vector db' LIMIT 10 WITH { quantization: { ignore: true, oversampling: 2 } } ``` --- @@ -142,6 +150,7 @@ SCROLL FROM articles AFTER 'cursor-id' LIMIT 50 **Behavior:** - Returns points in ID order with payloads. - Returns a `next_offset` cursor when more points are available. +- `next_offset` preserves the native point-id type (`string` or integer). - Use `AFTER ` to fetch the next page. --- @@ -230,7 +239,7 @@ RECOMMEND FROM POSITIVE IDS (, ...) STRATEGY '' RECOMMEND FROM POSITIVE IDS (, ...) LIMIT WHERE RECOMMEND FROM POSITIVE IDS (, ...) LIMIT OFFSET RECOMMEND FROM POSITIVE IDS (, ...) LIMIT SCORE THRESHOLD -RECOMMEND FROM POSITIVE IDS (, ...) LIMIT WITH { exact: true, hnsw_ef: } +RECOMMEND FROM POSITIVE IDS (, ...) LIMIT WITH { exact: true, hnsw_ef: , indexed_only: true|false, quantization: { ignore: true|false, rescore: true|false, oversampling: } } RECOMMEND FROM POSITIVE IDS (, ...) LIMIT LOOKUP FROM RECOMMEND FROM POSITIVE IDS (, ...) LIMIT LOOKUP FROM VECTOR '' RECOMMEND FROM POSITIVE IDS (, ...) LIMIT USING '' diff --git a/src/qql/ast_nodes.py b/src/qql/ast_nodes.py index cfb817c..935c56b 100644 --- a/src/qql/ast_nodes.py +++ b/src/qql/ast_nodes.py @@ -27,6 +27,15 @@ class SearchWith: hnsw_ef: int | None = None exact: bool = False acorn: bool = False + indexed_only: bool = False + quantization: "QuantizationSearchWith | None" = None + + +@dataclass(frozen=True) +class QuantizationSearchWith: + ignore: bool | None = None + rescore: bool | None = None + oversampling: float | None = None # ── Filter expression leaf nodes ────────────────────────────────────────────── @@ -36,7 +45,7 @@ class CompareExpr: """field op literal — covers =, !=, >, >=, <, <=""" field: str op: str # one of: "=", "!=", ">", ">=", "<", "<=" - value: str | int | float + value: str | int | float | bool @dataclass(frozen=True) @@ -51,14 +60,14 @@ class BetweenExpr: class InExpr: """field IN (v1, v2, ...)""" field: str - values: tuple[str | int | float, ...] + values: tuple[str | int | float | bool, ...] @dataclass(frozen=True) class NotInExpr: """field NOT IN (v1, v2, ...)""" field: str - values: tuple[str | int | float, ...] + values: tuple[str | int | float | bool, ...] @dataclass(frozen=True) diff --git a/src/qql/cli.py b/src/qql/cli.py index e88e81a..f3357c1 100644 --- a/src/qql/cli.py +++ b/src/qql/cli.py @@ -70,7 +70,7 @@ Optional: [yellow]WHERE[/yellow] (e.g. WHERE year > 2020 AND status = 'ok') Optional: [yellow]RERANK[/yellow] [MODEL ''] rerank results with a cross-encoder Optional: [yellow]EXACT[/yellow] bypass HNSW and perform exact search - Optional: [yellow]WITH[/yellow] { hnsw_ef: , exact: , acorn: } search parameters + Optional: [yellow]WITH[/yellow] { hnsw_ef: , exact: , acorn: , indexed_only: , quantization: { ignore: , rescore: , oversampling: } } search parameters Optional: [yellow]GROUP BY[/yellow] [[yellow]GROUP_SIZE[/yellow] ] Group results by a payload field value (default GROUP_SIZE: 3). Field must be keyword or integer type. RERANK and GROUP BY cannot be combined. diff --git a/src/qql/executor.py b/src/qql/executor.py index b4e8651..944c59c 100644 --- a/src/qql/executor.py +++ b/src/qql/executor.py @@ -35,6 +35,7 @@ Prefetch, ProductQuantization, ProductQuantizationConfig, + QuantizationSearchParams, Range, RecommendInput, RecommendQuery, @@ -559,7 +560,7 @@ def _execute_scroll(self, node: ScrollStmt) -> ExecutionResult: return ExecutionResult( success=True, message=f"Scrolled {len(points)} point(s) from '{node.collection}'", - data={"points": points, "next_offset": None if next_offset is None else str(next_offset)}, + data={"points": points, "next_offset": next_offset}, ) def _execute_select(self, node: SelectStmt) -> ExecutionResult: @@ -678,6 +679,7 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult: using="sparse", limit=fetch_limit, query_filter=qdrant_filter, + search_params=search_params, ) except UnexpectedResponse as e: raise QQLRuntimeError(f"Qdrant error during SEARCH: {e}") from e @@ -825,9 +827,18 @@ def _execute_recommend(self, node: RecommendStmt) -> ExecutionResult: def _build_search_params(self, with_clause: SearchWith | None) -> SearchParams | None: if with_clause is None: return None + quantization = None + if with_clause.quantization is not None: + quantization = QuantizationSearchParams( + ignore=with_clause.quantization.ignore, + rescore=with_clause.quantization.rescore, + oversampling=with_clause.quantization.oversampling, + ) return SearchParams( hnsw_ef=with_clause.hnsw_ef, exact=with_clause.exact, + quantization=quantization, + indexed_only=True if with_clause.indexed_only else None, acorn=AcornSearchParams(enable=True) if with_clause.acorn else None, ) diff --git a/src/qql/parser.py b/src/qql/parser.py index 4add845..7f9f187 100644 --- a/src/qql/parser.py +++ b/src/qql/parser.py @@ -23,6 +23,7 @@ NotExpr, NotInExpr, OrExpr, + QuantizationSearchWith, QuantizationConfig, QuantizationType, RecommendStmt, @@ -414,6 +415,8 @@ def _parse_search(self) -> SearchStmt: hnsw_ef=with_clause.hnsw_ef, exact=True, acorn=with_clause.acorn, + indexed_only=with_clause.indexed_only, + quantization=with_clause.quantization, ) if self._peek().kind == TokenKind.WITH: self._advance() # consume WITH @@ -425,6 +428,8 @@ def _parse_search(self) -> SearchStmt: hnsw_ef=parsed_with.hnsw_ef or with_clause.hnsw_ef, exact=parsed_with.exact or with_clause.exact, acorn=parsed_with.acorn or with_clause.acorn, + indexed_only=parsed_with.indexed_only or with_clause.indexed_only, + quantization=parsed_with.quantization or with_clause.quantization, ) group_by: str | None = None group_size: int = 3 @@ -760,8 +765,8 @@ def _parse_field_path(self) -> str: f"Expected a field name, got '{tok.value}'", tok.pos ) - def _parse_literal(self) -> str | int | float: - """STRING | INTEGER | FLOAT""" + def _parse_literal(self) -> str | int | float | bool: + """STRING | INTEGER | FLOAT | boolean""" tok = self._peek() if tok.kind == TokenKind.STRING: self._advance() @@ -772,8 +777,16 @@ def _parse_literal(self) -> str | int | float: if tok.kind == TokenKind.FLOAT: self._advance() return float(tok.value) + if tok.kind == TokenKind.IDENTIFIER: + upper = tok.value.upper() + if upper == "TRUE": + self._advance() + return True + if upper == "FALSE": + self._advance() + return False raise QQLSyntaxError( - f"Expected a literal value (string, integer, or float), got '{tok.value}'", + f"Expected a literal value (string, integer, float, or boolean), got '{tok.value}'", tok.pos, ) @@ -790,10 +803,10 @@ def _parse_number(self) -> int | float: f"Expected a number, got '{tok.value}'", tok.pos ) - def _parse_literal_list(self) -> list[str | int | float]: + def _parse_literal_list(self) -> list[str | int | float | bool]: """'(' literal { ',' literal } [','] ')' — used by IN / NOT IN.""" self._expect(TokenKind.LPAREN) - items: list[str | int | float] = [] + items: list[str | int | float | bool] = [] if self._peek().kind == TokenKind.RPAREN: self._advance() return items @@ -942,13 +955,15 @@ def _parse_value(self) -> Any: return self._parse_list() raise QQLSyntaxError(f"Unexpected value token '{tok.value}'", tok.pos) - # ── WITH clause: { hnsw_ef: N, exact: true, acorn: true } ── + # ── WITH clause: { hnsw_ef: N, exact: true, acorn: true, ... } ── def _parse_with_clause(self) -> SearchWith: self._expect(TokenKind.LBRACE) hnsw_ef: int | None = None exact: bool = False acorn: bool = False + indexed_only: bool = False + quantization: QuantizationSearchWith | None = None while self._peek().kind != TokenKind.RBRACE: key_tok = self._peek() if key_tok.kind not in ( @@ -969,9 +984,14 @@ def _parse_with_clause(self) -> SearchWith: exact = self._parse_bool() elif key == "acorn": acorn = self._parse_bool() + elif key == "indexed_only": + indexed_only = self._parse_bool() + elif key == "quantization": + quantization = self._parse_quantization_search_with() else: raise QQLSyntaxError( - f"Unknown WITH parameter '{key}'. Expected: hnsw_ef, exact, acorn", + "Unknown WITH parameter " + f"'{key}'. Expected: hnsw_ef, exact, acorn, indexed_only, quantization", key_tok.pos, ) if self._peek().kind == TokenKind.COMMA: @@ -985,6 +1005,44 @@ def _parse_with_clause(self) -> SearchWith: hnsw_ef=hnsw_ef, exact=exact, acorn=acorn, + indexed_only=indexed_only, + quantization=quantization, + ) + + def _parse_quantization_search_with(self) -> QuantizationSearchWith: + self._expect(TokenKind.LBRACE) + ignore: bool | None = None + rescore: bool | None = None + oversampling: float | None = None + + while self._peek().kind != TokenKind.RBRACE: + key_tok = self._expect(TokenKind.IDENTIFIER) + key = key_tok.value.lower() + self._expect(TokenKind.COLON) + if key == "ignore": + ignore = self._parse_bool() + elif key == "rescore": + rescore = self._parse_bool() + elif key == "oversampling": + oversampling = float(self._parse_number()) + else: + raise QQLSyntaxError( + "Unknown quantization parameter " + f"'{key}'. Expected: ignore, rescore, oversampling", + key_tok.pos, + ) + if self._peek().kind == TokenKind.COMMA: + self._advance() + if self._peek().kind == TokenKind.RBRACE: + break + else: + break + + self._expect(TokenKind.RBRACE) + return QuantizationSearchWith( + ignore=ignore, + rescore=rescore, + oversampling=oversampling, ) def _parse_bool(self) -> bool: diff --git a/tests/test_executor.py b/tests/test_executor.py index 294f93a..cfae5af 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -8,6 +8,7 @@ InsertBulkStmt, InsertStmt, QuantizationConfig, + QuantizationSearchWith, QuantizationType, RecommendStmt, SelectStmt, @@ -606,6 +607,19 @@ def test_scroll_returns_points_and_next_offset(self, executor, mock_client, mock "next_offset": "next-1", } + def test_scroll_preserves_numeric_next_offset_type(self, executor, mock_client, mocker): + mock_client.collection_exists.return_value = True + rec = mocker.MagicMock() + rec.id = 1 + rec.payload = {"text": "first"} + mock_client.scroll.return_value = ([rec], 42) + + node = ScrollStmt(collection="notes", limit=1) + result = executor.execute(node) + + assert result.success is True + assert result.data["next_offset"] == 42 + def test_scroll_with_after_and_filter(self, executor, mock_client, mocker): from qql.ast_nodes import CompareExpr from qdrant_client.models import Filter @@ -728,6 +742,57 @@ def test_search_with_acorn_forwards_search_params( assert search_params.hnsw_ef == 128 assert search_params.acorn.enable is True + def test_search_with_indexed_only_and_quantization_forwards_search_params( + self, executor, mock_client, mocker + ): + mock_client.collection_exists.return_value = True + mock_response = mocker.MagicMock() + mock_response.points = [] + mock_client.query_points.return_value = mock_response + + node = SearchStmt( + collection="notes", + query_text="hello", + limit=5, + model=None, + with_clause=SearchWith( + indexed_only=True, + quantization=QuantizationSearchWith( + ignore=True, + rescore=False, + oversampling=2.5, + ), + ), + ) + executor.execute(node) + + search_params = mock_client.query_points.call_args.kwargs["search_params"] + assert search_params.indexed_only is True + assert search_params.quantization is not None + assert search_params.quantization.ignore is True + assert search_params.quantization.rescore is False + assert search_params.quantization.oversampling == pytest.approx(2.5) + + def test_sparse_search_forwards_search_params(self, executor, mock_client, mocker): + mock_client.collection_exists.return_value = True + mock_response = mocker.MagicMock() + mock_response.points = [] + mock_client.query_points.return_value = mock_response + + node = SearchStmt( + collection="notes", + query_text="hello", + limit=5, + model=None, + sparse_only=True, + with_clause=SearchWith(exact=True, indexed_only=True), + ) + executor.execute(node) + + search_params = mock_client.query_points.call_args.kwargs["search_params"] + assert search_params.exact is True + assert search_params.indexed_only is True + def test_dense_search_against_hybrid_collection_uses_dense_vector_name( self, executor, mock_client, mocker ): @@ -940,6 +1005,27 @@ def test_recommend_forwards_search_params(self, executor, mock_client, mocker): assert search_params.exact is True assert search_params.hnsw_ef == 128 + def test_recommend_forwards_indexed_only_and_quantization(self, executor, mock_client, mocker): + mock_client.collection_exists.return_value = True + mock_response = mocker.MagicMock() + mock_response.points = [] + mock_client.query_points.return_value = mock_response + + node = RecommendStmt( + collection="notes", + positive_ids=("a",), + limit=5, + with_clause=SearchWith( + indexed_only=True, + quantization=QuantizationSearchWith(rescore=True), + ), + ) + executor.execute(node) + search_params = mock_client.query_points.call_args.kwargs["search_params"] + assert search_params.indexed_only is True + assert search_params.quantization is not None + assert search_params.quantization.rescore is True + def test_recommend_offset_zero_passes_none(self, executor, mock_client, mocker): mock_client.collection_exists.return_value = True mock_response = mocker.MagicMock() diff --git a/tests/test_parser.py b/tests/test_parser.py index 1320fe1..0983ea7 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -336,6 +336,16 @@ def test_recommend_with_clause_hnsw_ef(self): assert node.with_clause is not None assert node.with_clause.hnsw_ef == 128 + def test_recommend_with_indexed_only_and_quantization(self): + node = parse( + "RECOMMEND FROM notes POSITIVE IDS ('a') LIMIT 10 " + "WITH { indexed_only: true, quantization: { rescore: true } }" + ) + assert node.with_clause is not None + assert node.with_clause.indexed_only is True + assert node.with_clause.quantization is not None + assert node.with_clause.quantization.rescore is True + def test_recommend_lookup_from(self): node = parse( "RECOMMEND FROM target_collection POSITIVE IDS ('a') " @@ -459,6 +469,20 @@ def test_in_expr(self): assert f.field == "status" assert f.values == ("a", "b") + def test_boolean_equality_filter(self): + node = parse("SEARCH docs SIMILAR TO 'ml' LIMIT 5 WHERE active = true") + f = node.query_filter + assert isinstance(f, CompareExpr) + assert f.field == "active" + assert f.op == "=" + assert f.value is True + + def test_boolean_in_expr(self): + node = parse("SEARCH docs SIMILAR TO 'x' LIMIT 5 WHERE active IN (true, false)") + f = node.query_filter + assert isinstance(f, InExpr) + assert f.values == (True, False) + def test_in_with_trailing_comma(self): node = parse("SEARCH docs SIMILAR TO 'x' LIMIT 5 WHERE status IN ('a', 'b',)") assert isinstance(node.query_filter, InExpr) @@ -899,6 +923,22 @@ def test_with_multiple_params(self): assert node.with_clause.hnsw_ef == 256 assert node.with_clause.acorn is True + def test_with_indexed_only(self): + node = parse("SEARCH col SIMILAR TO 'q' LIMIT 5 WITH { indexed_only: true }") + assert node.with_clause is not None + assert node.with_clause.indexed_only is True + + def test_with_quantization(self): + node = parse( + "SEARCH col SIMILAR TO 'q' LIMIT 5 " + "WITH { quantization: { ignore: true, rescore: false, oversampling: 2 } }" + ) + assert node.with_clause is not None + assert node.with_clause.quantization is not None + assert node.with_clause.quantization.ignore is True + assert node.with_clause.quantization.rescore is False + assert node.with_clause.quantization.oversampling == pytest.approx(2.0) + def test_with_after_where(self): node = parse( "SEARCH col SIMILAR TO 'q' LIMIT 5 WHERE year > 2020 WITH { hnsw_ef: 128 }" @@ -933,6 +973,13 @@ def test_with_trailing_comma(self): node = parse("SEARCH col SIMILAR TO 'q' LIMIT 5 WITH { hnsw_ef: 256, }") assert node.with_clause.hnsw_ef == 256 + def test_with_quantization_unknown_key_raises(self): + with pytest.raises(QQLSyntaxError): + parse( + "SEARCH col SIMILAR TO 'q' LIMIT 5 " + "WITH { quantization: { unknown: true } }" + ) + class TestSparseOnlySearch: def test_using_sparse_sets_flag(self):