diff --git a/README.md b/README.md index 3c0f301..9493022 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,7 @@ INSERT BULK INTO COLLECTION articles VALUES [{'text': '...'}, {'text': '...'}] SEARCH articles SIMILAR TO 'query' LIMIT 10 SEARCH articles SIMILAR TO 'query' LIMIT 10 WHERE year >= 2020 SEARCH articles SIMILAR TO 'query' LIMIT 10 WHERE active = true +SEARCH articles SIMILAR TO 'query' LIMIT 10 WITH { mmr_diversity: 0.5, mmr_candidates: 50 } SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID FUSION 'dbsf' SEARCH articles SIMILAR TO 'query' LIMIT 10 WITH { indexed_only: true } diff --git a/docs/search.md b/docs/search.md index d60c842..6a05b63 100644 --- a/docs/search.md +++ b/docs/search.md @@ -17,7 +17,7 @@ SEARCH SIMILAR TO '' LIMIT USING HYBRID SEARCH SIMILAR TO '' LIMIT USING HYBRID [FUSION 'rrf|dbsf'] [DENSE MODEL ''] [SPARSE MODEL ''] [WHERE ] SEARCH SIMILAR TO '' LIMIT USING SPARSE [MODEL ''] SEARCH SIMILAR TO '' LIMIT EXACT -SEARCH SIMILAR TO '' LIMIT [USING ...] [WHERE ] [RERANK] WITH { hnsw_ef: , exact: true|false, acorn: true|false, indexed_only: true|false, quantization: { ignore: true|false, rescore: true|false, oversampling: } } +SEARCH SIMILAR TO '' LIMIT [USING ...] [WHERE ] [RERANK] WITH { hnsw_ef: , exact: true|false, acorn: true|false, indexed_only: true|false, quantization: { ignore: true|false, rescore: true|false, oversampling: }, mmr_diversity: <0..1>, mmr_candidates: } SEARCH SIMILAR TO '' LIMIT [USING ...] [WHERE ] RERANK [MODEL ''] ``` @@ -55,6 +55,11 @@ Search with query-time HNSW tuning: SEARCH articles SIMILAR TO 'attention mechanism' LIMIT 10 WITH { hnsw_ef: 128 } ``` +Search with native MMR diversification: +```sql +SEARCH articles SIMILAR TO 'attention mechanism' LIMIT 10 WITH { mmr_diversity: 0.5, mmr_candidates: 50 } +``` + **Output:** Results are displayed as a table with three columns: @@ -102,12 +107,14 @@ Use these when you want to debug retrieval quality or tune recall without changi | `WITH { hnsw_ef: 128 }` | Increase HNSW exploration at query time | | `WITH { exact: true }` | Force exact KNN explicitly | | `WITH { acorn: true }` | Enable ACORN for filtered queries | -| `WITH { indexed_only: true }` | Restrict the query to indexed segments only | -| `WITH { quantization: { ... } }` | Tune quantized-search behavior at query time | +| `WITH { indexed_only: true, quantization: { rescore: true } }` | Prefer indexed vectors and apply quantization controls | +| `WITH { mmr_diversity: 0.5, mmr_candidates: 50 }` | Apply native MMR diversification after nearest-neighbor retrieval | - `EXACT` can appear after `LIMIT` or after `RERANK` - `WITH { ... }` can appear after `WHERE` and/or `RERANK` -- Supported top-level `WITH` keys are `hnsw_ef`, `exact`, `acorn`, `indexed_only`, and `quantization` +- Supported top-level `WITH` keys are `hnsw_ef`, `exact`, `acorn`, `indexed_only`, `quantization`, `mmr_diversity`, and `mmr_candidates` +- MMR is currently supported for dense `SEARCH` and dense `SEARCH ... GROUP BY` +- MMR is not yet supported with `USING HYBRID`, `USING SPARSE`, or `RECOMMEND` ```sql -- Exact KNN baseline @@ -124,6 +131,9 @@ SEARCH articles SIMILAR TO 'retrieval' LIMIT 10 WITH { indexed_only: true } -- Quantized-search tuning SEARCH articles SIMILAR TO 'vector db' LIMIT 10 WITH { quantization: { ignore: true, oversampling: 2 } } + +-- Diversify top-k results with native MMR +SEARCH articles SIMILAR TO 'retrieval systems' LIMIT 10 WITH { mmr_diversity: 0.5, mmr_candidates: 50 } ``` --- diff --git a/src/qql/ast_nodes.py b/src/qql/ast_nodes.py index 935c56b..9c92adf 100644 --- a/src/qql/ast_nodes.py +++ b/src/qql/ast_nodes.py @@ -29,6 +29,8 @@ class SearchWith: acorn: bool = False indexed_only: bool = False quantization: "QuantizationSearchWith | None" = None + mmr_diversity: float | None = None + mmr_candidates: int | None = None @dataclass(frozen=True) diff --git a/src/qql/cli.py b/src/qql/cli.py index f3357c1..457a497 100644 --- a/src/qql/cli.py +++ b/src/qql/cli.py @@ -70,7 +70,7 @@ Optional: [yellow]WHERE[/yellow] (e.g. WHERE year > 2020 AND status = 'ok') Optional: [yellow]RERANK[/yellow] [MODEL ''] rerank results with a cross-encoder Optional: [yellow]EXACT[/yellow] bypass HNSW and perform exact search - Optional: [yellow]WITH[/yellow] { hnsw_ef: , exact: , acorn: , indexed_only: , quantization: { ignore: , rescore: , oversampling: } } search parameters + Optional: [yellow]WITH[/yellow] { hnsw_ef: , exact: , acorn: , indexed_only: , quantization: { ignore: , rescore: , oversampling: }, mmr_diversity: <0..1>, mmr_candidates: } search parameters Optional: [yellow]GROUP BY[/yellow] [[yellow]GROUP_SIZE[/yellow] ] Group results by a payload field value (default GROUP_SIZE: 3). Field must be keyword or integer type. RERANK and GROUP BY cannot be combined. diff --git a/src/qql/executor.py b/src/qql/executor.py index 944c59c..65fdd71 100644 --- a/src/qql/executor.py +++ b/src/qql/executor.py @@ -27,7 +27,9 @@ MatchText, MatchTextAny, MatchValue, + Mmr, Modifier, + NearestQuery, PayloadField, PayloadSchemaType, PointStruct, @@ -602,6 +604,7 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult: ) search_params = self._build_search_params(node.with_clause) + self._validate_search_mmr_usage(node) # When reranking is requested, fetch more candidates so the reranker has # enough material to reorder; only `node.limit` results are returned. @@ -712,7 +715,7 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult: query_using = self._get_dense_vector_name(node.collection) response = self._client.query_points( collection_name=node.collection, - query=vector, + query=self._build_dense_query(vector, node.with_clause), using=query_using, limit=fetch_limit, query_filter=qdrant_filter, @@ -790,6 +793,8 @@ def _execute_recommend(self, node: RecommendStmt) -> ExecutionResult: ) search_params = self._build_search_params(node.with_clause) + if self._has_mmr(node.with_clause): + raise QQLRuntimeError("MMR is supported only for SEARCH statements") lookup_from: LookupLocation | None = None if node.lookup_from is not None: @@ -842,6 +847,34 @@ def _build_search_params(self, with_clause: SearchWith | None) -> SearchParams | acorn=AcornSearchParams(enable=True) if with_clause.acorn else None, ) + def _has_mmr(self, with_clause: SearchWith | None) -> bool: + return with_clause is not None and ( + with_clause.mmr_diversity is not None or with_clause.mmr_candidates is not None + ) + + def _validate_search_mmr_usage(self, node: SearchStmt) -> None: + if not self._has_mmr(node.with_clause): + return + if node.hybrid: + raise QQLRuntimeError("MMR is not supported with USING HYBRID yet") + if node.sparse_only: + raise QQLRuntimeError("MMR is not supported with USING SPARSE yet") + + def _build_dense_query( + self, + vector: list[float], + with_clause: SearchWith | None, + ) -> list[float] | NearestQuery: + if not self._has_mmr(with_clause): + return vector + return NearestQuery( + nearest=vector, + mmr=Mmr( + diversity=with_clause.mmr_diversity, + candidates_limit=with_clause.mmr_candidates, + ), + ) + def _parse_recommend_strategy( self, strategy: str | None ) -> RecommendStrategy | None: @@ -1029,7 +1062,7 @@ def _execute_search_groups( response = self._client.query_points_groups( collection_name=node.collection, group_by=node.group_by, - query=vector, + query=self._build_dense_query(vector, node.with_clause), using=query_using, limit=node.limit, group_size=node.group_size, diff --git a/src/qql/parser.py b/src/qql/parser.py index 7f9f187..2beed4b 100644 --- a/src/qql/parser.py +++ b/src/qql/parser.py @@ -26,6 +26,7 @@ QuantizationSearchWith, QuantizationConfig, QuantizationType, + QuantizationSearchWith, RecommendStmt, SelectStmt, ScrollStmt, @@ -417,6 +418,8 @@ def _parse_search(self) -> SearchStmt: acorn=with_clause.acorn, indexed_only=with_clause.indexed_only, quantization=with_clause.quantization, + mmr_diversity=with_clause.mmr_diversity, + mmr_candidates=with_clause.mmr_candidates, ) if self._peek().kind == TokenKind.WITH: self._advance() # consume WITH @@ -430,6 +433,12 @@ def _parse_search(self) -> SearchStmt: acorn=parsed_with.acorn or with_clause.acorn, indexed_only=parsed_with.indexed_only or with_clause.indexed_only, quantization=parsed_with.quantization or with_clause.quantization, + mmr_diversity=( + parsed_with.mmr_diversity + if parsed_with.mmr_diversity is not None + else with_clause.mmr_diversity + ), + mmr_candidates=parsed_with.mmr_candidates or with_clause.mmr_candidates, ) group_by: str | None = None group_size: int = 3 @@ -964,6 +973,8 @@ def _parse_with_clause(self) -> SearchWith: acorn: bool = False indexed_only: bool = False quantization: QuantizationSearchWith | None = None + mmr_diversity: float | None = None + mmr_candidates: int | None = None while self._peek().kind != TokenKind.RBRACE: key_tok = self._peek() if key_tok.kind not in ( @@ -988,10 +999,24 @@ def _parse_with_clause(self) -> SearchWith: indexed_only = self._parse_bool() elif key == "quantization": quantization = self._parse_quantization_search_with() + elif key == "mmr_diversity": + mmr_diversity = float(self._parse_number()) + if not 0.0 <= mmr_diversity <= 1.0: + raise QQLSyntaxError( + f"mmr_diversity must be between 0 and 1, got {mmr_diversity}", + key_tok.pos, + ) + elif key == "mmr_candidates": + mmr_candidates = int(self._expect(TokenKind.INTEGER).value) + if mmr_candidates <= 0: + raise QQLSyntaxError( + f"mmr_candidates must be a positive integer, got {mmr_candidates}", + key_tok.pos, + ) else: raise QQLSyntaxError( "Unknown WITH parameter " - f"'{key}'. Expected: hnsw_ef, exact, acorn, indexed_only, quantization", + f"'{key}'. Expected: hnsw_ef, exact, acorn, indexed_only, quantization, mmr_diversity, mmr_candidates", key_tok.pos, ) if self._peek().kind == TokenKind.COMMA: @@ -1007,6 +1032,8 @@ def _parse_with_clause(self) -> SearchWith: acorn=acorn, indexed_only=indexed_only, quantization=quantization, + mmr_diversity=mmr_diversity, + mmr_candidates=mmr_candidates, ) def _parse_quantization_search_with(self) -> QuantizationSearchWith: diff --git a/tests/test_executor.py b/tests/test_executor.py index cfae5af..c35016d 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -792,7 +792,6 @@ def test_sparse_search_forwards_search_params(self, executor, mock_client, mocke search_params = mock_client.query_points.call_args.kwargs["search_params"] assert search_params.exact is True assert search_params.indexed_only is True - def test_dense_search_against_hybrid_collection_uses_dense_vector_name( self, executor, mock_client, mocker ): @@ -811,6 +810,55 @@ def test_dense_search_against_hybrid_collection_uses_dense_vector_name( assert mock_client.query_points.call_args.kwargs["using"] == "dense" + def test_dense_search_with_mmr_uses_nearest_query(self, executor, mock_client, mocker): + from qdrant_client.models import NearestQuery + + mock_client.collection_exists.return_value = True + mock_response = mocker.MagicMock() + mock_response.points = [] + mock_client.query_points.return_value = mock_response + + node = SearchStmt( + collection="notes", + query_text="hello", + limit=5, + model=None, + with_clause=SearchWith(mmr_diversity=0.4, mmr_candidates=25), + ) + executor.execute(node) + + query = mock_client.query_points.call_args.kwargs["query"] + assert isinstance(query, NearestQuery) + assert query.mmr is not None + assert query.mmr.diversity == pytest.approx(0.4) + assert query.mmr.candidates_limit == 25 + + def test_hybrid_search_with_mmr_raises(self, executor, mock_client): + mock_client.collection_exists.return_value = True + node = SearchStmt( + collection="notes", + query_text="hello", + limit=5, + model=None, + hybrid=True, + with_clause=SearchWith(mmr_diversity=0.5), + ) + with pytest.raises(QQLRuntimeError, match="MMR is not supported with USING HYBRID yet"): + executor.execute(node) + + def test_sparse_search_with_mmr_raises(self, executor, mock_client): + mock_client.collection_exists.return_value = True + node = SearchStmt( + collection="notes", + query_text="hello", + limit=5, + model=None, + sparse_only=True, + with_clause=SearchWith(mmr_diversity=0.5), + ) + with pytest.raises(QQLRuntimeError, match="MMR is not supported with USING SPARSE yet"): + executor.execute(node) + class TestRecommend: def test_recommend_calls_qdrant_query_points(self, executor, mock_client, mocker): @@ -1026,6 +1074,17 @@ def test_recommend_forwards_indexed_only_and_quantization(self, executor, mock_c assert search_params.quantization is not None assert search_params.quantization.rescore is True + def test_recommend_with_mmr_raises(self, executor, mock_client): + mock_client.collection_exists.return_value = True + node = RecommendStmt( + collection="notes", + positive_ids=("a",), + limit=5, + with_clause=SearchWith(mmr_diversity=0.5), + ) + with pytest.raises(QQLRuntimeError, match="MMR is supported only for SEARCH statements"): + executor.execute(node) + def test_recommend_offset_zero_passes_none(self, executor, mock_client, mocker): mock_client.collection_exists.return_value = True mock_response = mocker.MagicMock() @@ -2268,12 +2327,35 @@ def test_group_by_hybrid_uses_query_points_groups(self, executor, mock_client, m collection="articles", query_text="q", limit=3, model=None, hybrid=True, group_by="category", group_size=2, ) - result = executor.execute(node) + executor.execute(node) mock_client.query_points_groups.assert_called_once() kwargs = mock_client.query_points_groups.call_args.kwargs assert kwargs["group_by"] == "category" assert "prefetch" in kwargs + def test_group_by_dense_with_mmr_uses_nearest_query(self, executor, mock_client, mocker): + from qdrant_client.models import NearestQuery + + mock_client.collection_exists.return_value = True + mock_response = mocker.MagicMock() + mock_response.groups = [] + mock_client.query_points_groups.return_value = mock_response + + node = SearchStmt( + collection="articles", + query_text="ai", + limit=5, + model=None, + group_by="category", + with_clause=SearchWith(mmr_diversity=0.35, mmr_candidates=40), + ) + executor.execute(node) + query = mock_client.query_points_groups.call_args.kwargs["query"] + assert isinstance(query, NearestQuery) + assert query.mmr is not None + assert query.mmr.diversity == pytest.approx(0.35) + assert query.mmr.candidates_limit == 40 + class TestUpdateVector: def test_update_vector_calls_update_vectors(self, executor, mock_client): @@ -2288,7 +2370,6 @@ def test_update_vector_calls_update_vectors(self, executor, mock_client): def test_update_vector_passes_correct_point_id(self, executor, mock_client): from qql.ast_nodes import UpdateVectorStmt - from qdrant_client.models import PointVectors mock_client.collection_exists.return_value = True mock_client.get_collection.return_value.config.params.vectors = {} # non-dict → unnamed node = UpdateVectorStmt( @@ -2480,7 +2561,6 @@ def test_update_vector_unnamed_collection_sends_plain_list(self, executor, mock_ from qql.ast_nodes import UpdateVectorStmt mock_client.collection_exists.return_value = True # Unnamed collection: get_collection returns non-dict vectors - mock_vectors = mocker.MagicMock() if False else type("V", (), {})() info = mock_client.get_collection.return_value info.config.params.vectors = [None] # list → not a dict → unnamed diff --git a/tests/test_parser.py b/tests/test_parser.py index 0983ea7..2c61b66 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -22,6 +22,7 @@ NotInExpr, OrExpr, QuantizationType, + QuantizationSearchWith, RecommendStmt, SelectStmt, ScrollStmt, @@ -916,13 +917,6 @@ def test_with_acorn(self): assert node.with_clause is not None assert node.with_clause.acorn is True - def test_with_multiple_params(self): - node = parse( - "SEARCH col SIMILAR TO 'q' LIMIT 5 WITH { hnsw_ef: 256, acorn: true }" - ) - assert node.with_clause.hnsw_ef == 256 - assert node.with_clause.acorn is True - def test_with_indexed_only(self): node = parse("SEARCH col SIMILAR TO 'q' LIMIT 5 WITH { indexed_only: true }") assert node.with_clause is not None @@ -939,6 +933,22 @@ def test_with_quantization(self): assert node.with_clause.quantization.rescore is False assert node.with_clause.quantization.oversampling == pytest.approx(2.0) + def test_with_multiple_params(self): + node = parse( + "SEARCH col SIMILAR TO 'q' LIMIT 5 WITH { hnsw_ef: 256, acorn: true }" + ) + assert node.with_clause.hnsw_ef == 256 + assert node.with_clause.acorn is True + + def test_with_mmr_params(self): + node = parse( + "SEARCH col SIMILAR TO 'q' LIMIT 5 " + "WITH { mmr_diversity: 0.5, mmr_candidates: 50 }" + ) + assert node.with_clause is not None + assert node.with_clause.mmr_diversity == pytest.approx(0.5) + assert node.with_clause.mmr_candidates == 50 + def test_with_after_where(self): node = parse( "SEARCH col SIMILAR TO 'q' LIMIT 5 WHERE year > 2020 WITH { hnsw_ef: 128 }" @@ -969,6 +979,18 @@ def test_with_unknown_keyword_raises(self): with pytest.raises(QQLSyntaxError): parse("SEARCH col SIMILAR TO 'q' LIMIT 5 WITH { diversity: 0.5 }") + def test_with_mmr_diversity_out_of_range_raises(self): + with pytest.raises(QQLSyntaxError, match="mmr_diversity must be between 0 and 1"): + parse("SEARCH col SIMILAR TO 'q' LIMIT 5 WITH { mmr_diversity: 1.5 }") + + def test_with_mmr_candidates_non_positive_raises(self): + with pytest.raises(QQLSyntaxError, match="mmr_candidates must be a positive integer"): + parse("SEARCH col SIMILAR TO 'q' LIMIT 5 WITH { mmr_candidates: 0 }") + + def test_with_quantization_unknown_key_raises(self): + with pytest.raises(QQLSyntaxError): + parse("SEARCH col SIMILAR TO 'q' LIMIT 5 WITH { quantization: { unknown: true } }") + def test_with_trailing_comma(self): node = parse("SEARCH col SIMILAR TO 'q' LIMIT 5 WITH { hnsw_ef: 256, }") assert node.with_clause.hnsw_ef == 256 @@ -1326,7 +1348,6 @@ def test_update_vector_parses_float_list(self): assert all(isinstance(v, float) for v in node.vector) def test_update_vector_collection_stored(self): - from qql.ast_nodes import UpdateVectorStmt node = parse("UPDATE my_col SET VECTOR WHERE id = 99 [0.5]") assert node.collection == "my_col" @@ -1399,7 +1420,6 @@ def test_update_payload_dict_values_preserved(self): assert node.payload["score"] == pytest.approx(0.99) def test_update_payload_collection_stored(self): - from qql.ast_nodes import UpdatePayloadStmt node = parse("UPDATE my_notes SET PAYLOAD WHERE id = 7 {'tag': 'ai'}") assert node.collection == "my_notes"