From 9ed565d5d58f0c21be393a266e4eff6a3e5c8c22 Mon Sep 17 00:00:00 2001 From: srimon12 Date: Thu, 21 May 2026 13:19:23 +0530 Subject: [PATCH 1/2] feat: enable MMR diversity for hybrid search MMR (Maximal Marginal Relevance) was artificially blocked for hybrid search in QQL, even though the Qdrant SDK supports it: Prefetch.query accepts NearestQuery, which carries an MMR field. - Remove the hybrid guard from _validate_search_mmr_usage - Wire _build_dense_query() into the dense prefetch of both flat and GROUP BY hybrid paths, so MMR params produce a NearestQuery(mmr=...) instead of a raw vector - Keep the sparse-only and recommend guards (MMR is a dense-space concept and RecommendInput has no MMR field in the Qdrant API) - Replace test_hybrid_search_with_mmr_raises with a new test that asserts NearestQuery + MMR is passed in the dense prefetch --- src/qql/executor.py | 6 ++---- tests/test_executor.py | 29 +++++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/qql/executor.py b/src/qql/executor.py index 29c2e63..d0a43cf 100644 --- a/src/qql/executor.py +++ b/src/qql/executor.py @@ -872,7 +872,7 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult: collection_name=node.collection, prefetch=[ Prefetch( - query=dense_vector, + query=self._build_dense_query(dense_vector, node.with_clause), using=topology.dense_using(node.dense_vector), limit=node.limit * _HYBRID_PREFETCH_MULTIPLIER, params=search_params, @@ -1460,8 +1460,6 @@ def _has_mmr(self, with_clause: SearchWith | None) -> bool: def _validate_search_mmr_usage(self, node: SearchStmt) -> None: if not self._has_mmr(node.with_clause): return - if node.hybrid: - raise QQLRuntimeError("MMR is not supported with USING HYBRID yet") if node.sparse_only: raise QQLRuntimeError("MMR is not supported with USING SPARSE yet") @@ -1635,7 +1633,7 @@ def _execute_search_groups( group_by=node.group_by, prefetch=[ Prefetch( - query=dense_vector, + query=self._build_dense_query(dense_vector, node.with_clause), using=topology.dense_using(node.dense_vector), limit=node.limit * _HYBRID_PREFETCH_MULTIPLIER, params=search_params, diff --git a/tests/test_executor.py b/tests/test_executor.py index 4ed4864..cae643e 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -1242,18 +1242,39 @@ def test_dense_search_with_mmr_uses_nearest_query(self, executor, mock_client, m assert query.mmr.diversity == pytest.approx(0.4) assert query.mmr.candidates_limit == 25 - def test_hybrid_search_with_mmr_raises(self, executor, mock_client): + def test_hybrid_search_with_mmr_uses_nearest_query_in_prefetch(self, executor, mock_client, mocker): + from qdrant_client.models import NearestQuery + + mocker.patch("qql.executor.Embedder", return_value=mocker.MagicMock()) + mocker.patch("qql.executor.SparseEmbedder", return_value=mocker.MagicMock()) mock_client.collection_exists.return_value = True + + collection_info = mocker.MagicMock() + collection_info.config.params.vectors = {"dense": {}} + collection_info.config.params.sparse_vectors = {"sparse": {}} + mock_client.get_collection.return_value = collection_info + + mock_response = mocker.MagicMock() + mock_response.points = [] + mock_client.query_points.return_value = mock_response + node = SearchStmt( collection="notes", query_text="hello", limit=5, model=None, hybrid=True, - with_clause=SearchWith(mmr_diversity=0.5), + with_clause=SearchWith(mmr_diversity=0.5, mmr_candidates=30), ) - with pytest.raises(QQLRuntimeError, match="MMR is not supported with USING HYBRID yet"): - executor.execute(node) + executor.execute(node) + + prefetch = mock_client.query_points.call_args.kwargs["prefetch"] + assert prefetch is not None + dense_query = prefetch[0].query + assert isinstance(dense_query, NearestQuery) + assert dense_query.mmr is not None + assert dense_query.mmr.diversity == pytest.approx(0.5) + assert dense_query.mmr.candidates_limit == 30 def test_sparse_search_with_mmr_raises(self, executor, mock_client): mock_client.collection_exists.return_value = True From 0140d97d420ab368687719c48f37963af19f6546 Mon Sep 17 00:00:00 2001 From: srimon12 Date: Thu, 21 May 2026 13:59:53 +0530 Subject: [PATCH 2/2] feat: add hybrid search test with MMR grouped prefetch functionality --- tests/test_executor.py | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/tests/test_executor.py b/tests/test_executor.py index cae643e..a2f4c3f 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -1270,7 +1270,40 @@ def test_hybrid_search_with_mmr_uses_nearest_query_in_prefetch(self, executor, m prefetch = mock_client.query_points.call_args.kwargs["prefetch"] assert prefetch is not None - dense_query = prefetch[0].query + dense_prefetch = next(p for p in prefetch if p.using == "dense") + dense_query = dense_prefetch.query + assert isinstance(dense_query, NearestQuery) + assert dense_query.mmr is not None + assert dense_query.mmr.diversity == pytest.approx(0.5) + assert dense_query.mmr.candidates_limit == 30 + + def test_hybrid_search_with_mmr_grouped_uses_nearest_query_in_prefetch(self, executor, mock_client, mocker): + from qdrant_client.models import NearestQuery + + _mock_hybrid_collection(mock_client) + mock_response = mocker.MagicMock() + mock_response.groups = [] + mock_client.query_points_groups.return_value = mock_response + + mock_sparse_embedder = mocker.MagicMock() + mock_sparse_embedder.query_embed.return_value = {"indices": [0, 1], "values": [0.5, 0.5]} + mocker.patch("qql.executor.SparseEmbedder", return_value=mock_sparse_embedder) + + node = SearchStmt( + collection="articles", + query_text="hello", + limit=5, + model=None, + hybrid=True, + group_by="category", + with_clause=SearchWith(mmr_diversity=0.5, mmr_candidates=30), + ) + executor.execute(node) + + prefetch = mock_client.query_points_groups.call_args.kwargs["prefetch"] + assert prefetch is not None + dense_prefetch = next(p for p in prefetch if p.using == "dense") + dense_query = dense_prefetch.query assert isinstance(dense_query, NearestQuery) assert dense_query.mmr is not None assert dense_query.mmr.diversity == pytest.approx(0.5)