pavanjava · pavanjava · May 15, 2026 · May 15, 2026 · May 15, 2026
diff --git a/README.md b/README.md
@@ -102,6 +102,7 @@ INSERT BULK INTO COLLECTION articles VALUES [{'text': '...'}, {'text': '...'}]
 SEARCH articles SIMILAR TO 'query' LIMIT 10
 SEARCH articles SIMILAR TO 'query' LIMIT 10 WHERE year >= 2020
 SEARCH articles SIMILAR TO 'query' LIMIT 10 WHERE active = true
+SEARCH articles SIMILAR TO 'query' LIMIT 10 WITH { mmr_diversity: 0.5, mmr_candidates: 50 }
 SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID
 SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID FUSION 'dbsf'
 SEARCH articles SIMILAR TO 'query' LIMIT 10 WITH { indexed_only: true }

diff --git a/docs/search.md b/docs/search.md
@@ -17,7 +17,7 @@ SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING HYBRID
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING HYBRID [FUSION 'rrf|dbsf'] [DENSE MODEL '<model>'] [SPARSE MODEL '<model>'] [WHERE <filter>]
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING SPARSE [MODEL '<sparse_model>']
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> EXACT
-SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING ...] [WHERE <filter>] [RERANK] WITH { hnsw_ef: <n>, exact: true|false, acorn: true|false, indexed_only: true|false, quantization: { ignore: true|false, rescore: true|false, oversampling: <n> } }
+SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING ...] [WHERE <filter>] [RERANK] WITH { hnsw_ef: <n>, exact: true|false, acorn: true|false, indexed_only: true|false, quantization: { ignore: true|false, rescore: true|false, oversampling: <n> }, mmr_diversity: <0..1>, mmr_candidates: <n> }
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING ...] [WHERE <filter>] RERANK [MODEL '<reranker_model>']
 ```
 
@@ -55,6 +55,11 @@ Search with query-time HNSW tuning:
 SEARCH articles SIMILAR TO 'attention mechanism' LIMIT 10 WITH { hnsw_ef: 128 }
 ```
 
+Search with native MMR diversification:
+```sql
+SEARCH articles SIMILAR TO 'attention mechanism' LIMIT 10 WITH { mmr_diversity: 0.5, mmr_candidates: 50 }
+```
+
 **Output:**
 
 Results are displayed as a table with three columns:
@@ -102,12 +107,14 @@ Use these when you want to debug retrieval quality or tune recall without changi
 | `WITH { hnsw_ef: 128 }` | Increase HNSW exploration at query time |
 | `WITH { exact: true }` | Force exact KNN explicitly |
 | `WITH { acorn: true }` | Enable ACORN for filtered queries |
-| `WITH { indexed_only: true }` | Restrict the query to indexed segments only |
-| `WITH { quantization: { ... } }` | Tune quantized-search behavior at query time |
+| `WITH { indexed_only: true, quantization: { rescore: true } }` | Prefer indexed vectors and apply quantization controls |
+| `WITH { mmr_diversity: 0.5, mmr_candidates: 50 }` | Apply native MMR diversification after nearest-neighbor retrieval |
 
 - `EXACT` can appear after `LIMIT` or after `RERANK`
 - `WITH { ... }` can appear after `WHERE` and/or `RERANK`
-- Supported top-level `WITH` keys are `hnsw_ef`, `exact`, `acorn`, `indexed_only`, and `quantization`
+- Supported top-level `WITH` keys are `hnsw_ef`, `exact`, `acorn`, `indexed_only`, `quantization`, `mmr_diversity`, and `mmr_candidates`
+- MMR is currently supported for dense `SEARCH` and dense `SEARCH ... GROUP BY`
+- MMR is not yet supported with `USING HYBRID`, `USING SPARSE`, or `RECOMMEND`
 
 ```sql
 -- Exact KNN baseline
@@ -124,6 +131,9 @@ SEARCH articles SIMILAR TO 'retrieval' LIMIT 10 WITH { indexed_only: true }
 
 -- Quantized-search tuning
 SEARCH articles SIMILAR TO 'vector db' LIMIT 10 WITH { quantization: { ignore: true, oversampling: 2 } }
+
+-- Diversify top-k results with native MMR
+SEARCH articles SIMILAR TO 'retrieval systems' LIMIT 10 WITH { mmr_diversity: 0.5, mmr_candidates: 50 }
 ```
 
 ---

diff --git a/src/qql/ast_nodes.py b/src/qql/ast_nodes.py
@@ -29,6 +29,8 @@ class SearchWith:
     acorn: bool = False
     indexed_only: bool = False
     quantization: "QuantizationSearchWith | None" = None
+    mmr_diversity: float | None = None
+    mmr_candidates: int | None = None
 
 
 @dataclass(frozen=True)

diff --git a/src/qql/cli.py b/src/qql/cli.py
@@ -70,7 +70,7 @@
       Optional: [yellow]WHERE[/yellow] <filter>   (e.g. WHERE year > 2020 AND status = 'ok')
       Optional: [yellow]RERANK[/yellow] [MODEL '<model>']   rerank results with a cross-encoder
       Optional: [yellow]EXACT[/yellow]   bypass HNSW and perform exact search
-      Optional: [yellow]WITH[/yellow] { hnsw_ef: <int>, exact: <bool>, acorn: <bool>, indexed_only: <bool>, quantization: { ignore: <bool>, rescore: <bool>, oversampling: <n> } }   search parameters
+      Optional: [yellow]WITH[/yellow] { hnsw_ef: <int>, exact: <bool>, acorn: <bool>, indexed_only: <bool>, quantization: { ignore: <bool>, rescore: <bool>, oversampling: <n> }, mmr_diversity: <0..1>, mmr_candidates: <int> }   search parameters
       Optional: [yellow]GROUP BY[/yellow] <field> [[yellow]GROUP_SIZE[/yellow] <n>]
                   Group results by a payload field value (default GROUP_SIZE: 3).
                   Field must be keyword or integer type. RERANK and GROUP BY cannot be combined.

diff --git a/src/qql/executor.py b/src/qql/executor.py
@@ -27,7 +27,9 @@
     MatchText,
     MatchTextAny,
     MatchValue,
+    Mmr,
     Modifier,
+    NearestQuery,
     PayloadField,
     PayloadSchemaType,
     PointStruct,
@@ -602,6 +604,7 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult:
             )
 
         search_params = self._build_search_params(node.with_clause)
+        self._validate_search_mmr_usage(node)
 
         # When reranking is requested, fetch more candidates so the reranker has
         # enough material to reorder; only `node.limit` results are returned.
@@ -712,7 +715,7 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult:
             query_using = self._get_dense_vector_name(node.collection)
             response = self._client.query_points(
                 collection_name=node.collection,
-                query=vector,
+                query=self._build_dense_query(vector, node.with_clause),
                 using=query_using,
                 limit=fetch_limit,
                 query_filter=qdrant_filter,
@@ -790,6 +793,8 @@ def _execute_recommend(self, node: RecommendStmt) -> ExecutionResult:
         )
 
         search_params = self._build_search_params(node.with_clause)
+        if self._has_mmr(node.with_clause):
+            raise QQLRuntimeError("MMR is supported only for SEARCH statements")
 
         lookup_from: LookupLocation | None = None
         if node.lookup_from is not None:
@@ -842,6 +847,34 @@ def _build_search_params(self, with_clause: SearchWith | None) -> SearchParams |
             acorn=AcornSearchParams(enable=True) if with_clause.acorn else None,
         )
 
+    def _has_mmr(self, with_clause: SearchWith | None) -> bool:
+        return with_clause is not None and (
+            with_clause.mmr_diversity is not None or with_clause.mmr_candidates is not None
+        )
+
+    def _validate_search_mmr_usage(self, node: SearchStmt) -> None:
+        if not self._has_mmr(node.with_clause):
+            return
+        if node.hybrid:
+            raise QQLRuntimeError("MMR is not supported with USING HYBRID yet")
+        if node.sparse_only:
+            raise QQLRuntimeError("MMR is not supported with USING SPARSE yet")
+
+    def _build_dense_query(
+        self,
+        vector: list[float],
+        with_clause: SearchWith | None,
+    ) -> list[float] | NearestQuery:
+        if not self._has_mmr(with_clause):
+            return vector
+        return NearestQuery(
+            nearest=vector,
+            mmr=Mmr(
+                diversity=with_clause.mmr_diversity,
+                candidates_limit=with_clause.mmr_candidates,
+            ),
+        )
+
     def _parse_recommend_strategy(
         self, strategy: str | None
     ) -> RecommendStrategy | None:
@@ -1029,7 +1062,7 @@ def _execute_search_groups(
                 response = self._client.query_points_groups(
                     collection_name=node.collection,
                     group_by=node.group_by,
-                    query=vector,
+                    query=self._build_dense_query(vector, node.with_clause),
                     using=query_using,
                     limit=node.limit,
                     group_size=node.group_size,

diff --git a/src/qql/parser.py b/src/qql/parser.py
@@ -26,6 +26,7 @@
     QuantizationSearchWith,
     QuantizationConfig,
     QuantizationType,
+    QuantizationSearchWith,
     RecommendStmt,
     SelectStmt,
     ScrollStmt,
@@ -417,6 +418,8 @@ def _parse_search(self) -> SearchStmt:
                     acorn=with_clause.acorn,
                     indexed_only=with_clause.indexed_only,
                     quantization=with_clause.quantization,
+                    mmr_diversity=with_clause.mmr_diversity,
+                    mmr_candidates=with_clause.mmr_candidates,
                 )
         if self._peek().kind == TokenKind.WITH:
             self._advance()  # consume WITH
@@ -430,6 +433,12 @@ def _parse_search(self) -> SearchStmt:
                     acorn=parsed_with.acorn or with_clause.acorn,
                     indexed_only=parsed_with.indexed_only or with_clause.indexed_only,
                     quantization=parsed_with.quantization or with_clause.quantization,
+                    mmr_diversity=(
+                        parsed_with.mmr_diversity
+                        if parsed_with.mmr_diversity is not None
+                        else with_clause.mmr_diversity
+                    ),
+                    mmr_candidates=parsed_with.mmr_candidates or with_clause.mmr_candidates,
                 )
         group_by: str | None = None
         group_size: int = 3
@@ -964,6 +973,8 @@ def _parse_with_clause(self) -> SearchWith:
         acorn: bool = False
         indexed_only: bool = False
         quantization: QuantizationSearchWith | None = None
+        mmr_diversity: float | None = None
+        mmr_candidates: int | None = None
         while self._peek().kind != TokenKind.RBRACE:
             key_tok = self._peek()
             if key_tok.kind not in (
@@ -988,10 +999,24 @@ def _parse_with_clause(self) -> SearchWith:
                 indexed_only = self._parse_bool()
             elif key == "quantization":
                 quantization = self._parse_quantization_search_with()
+            elif key == "mmr_diversity":
+                mmr_diversity = float(self._parse_number())
+                if not 0.0 <= mmr_diversity <= 1.0:
+                    raise QQLSyntaxError(
+                        f"mmr_diversity must be between 0 and 1, got {mmr_diversity}",
+                        key_tok.pos,
+                    )
+            elif key == "mmr_candidates":
+                mmr_candidates = int(self._expect(TokenKind.INTEGER).value)
+                if mmr_candidates <= 0:
+                    raise QQLSyntaxError(
+                        f"mmr_candidates must be a positive integer, got {mmr_candidates}",
+                        key_tok.pos,
+                    )
             else:
                 raise QQLSyntaxError(
                     "Unknown WITH parameter "
-                    f"'{key}'. Expected: hnsw_ef, exact, acorn, indexed_only, quantization",
+                    f"'{key}'. Expected: hnsw_ef, exact, acorn, indexed_only, quantization, mmr_diversity, mmr_candidates",
                     key_tok.pos,
                 )
             if self._peek().kind == TokenKind.COMMA:
@@ -1007,6 +1032,8 @@ def _parse_with_clause(self) -> SearchWith:
             acorn=acorn,
             indexed_only=indexed_only,
             quantization=quantization,
+            mmr_diversity=mmr_diversity,
+            mmr_candidates=mmr_candidates,
         )
 
     def _parse_quantization_search_with(self) -> QuantizationSearchWith:

diff --git a/tests/test_executor.py b/tests/test_executor.py
@@ -792,7 +792,6 @@ def test_sparse_search_forwards_search_params(self, executor, mock_client, mocke
         search_params = mock_client.query_points.call_args.kwargs["search_params"]
         assert search_params.exact is True
         assert search_params.indexed_only is True
-
     def test_dense_search_against_hybrid_collection_uses_dense_vector_name(
         self, executor, mock_client, mocker
     ):
@@ -811,6 +810,55 @@ def test_dense_search_against_hybrid_collection_uses_dense_vector_name(
 
         assert mock_client.query_points.call_args.kwargs["using"] == "dense"
 
+    def test_dense_search_with_mmr_uses_nearest_query(self, executor, mock_client, mocker):
+        from qdrant_client.models import NearestQuery
+
+        mock_client.collection_exists.return_value = True
+        mock_response = mocker.MagicMock()
+        mock_response.points = []
+        mock_client.query_points.return_value = mock_response
+
+        node = SearchStmt(
+            collection="notes",
+            query_text="hello",
+            limit=5,
+            model=None,
+            with_clause=SearchWith(mmr_diversity=0.4, mmr_candidates=25),
+        )
+        executor.execute(node)
+
+        query = mock_client.query_points.call_args.kwargs["query"]
+        assert isinstance(query, NearestQuery)
+        assert query.mmr is not None
+        assert query.mmr.diversity == pytest.approx(0.4)
+        assert query.mmr.candidates_limit == 25
+
+    def test_hybrid_search_with_mmr_raises(self, executor, mock_client):
+        mock_client.collection_exists.return_value = True
+        node = SearchStmt(
+            collection="notes",
+            query_text="hello",
+            limit=5,
+            model=None,
+            hybrid=True,
+            with_clause=SearchWith(mmr_diversity=0.5),
+        )
+        with pytest.raises(QQLRuntimeError, match="MMR is not supported with USING HYBRID yet"):
+            executor.execute(node)
+
+    def test_sparse_search_with_mmr_raises(self, executor, mock_client):
+        mock_client.collection_exists.return_value = True
+        node = SearchStmt(
+            collection="notes",
+            query_text="hello",
+            limit=5,
+            model=None,
+            sparse_only=True,
+            with_clause=SearchWith(mmr_diversity=0.5),
+        )
+        with pytest.raises(QQLRuntimeError, match="MMR is not supported with USING SPARSE yet"):
+            executor.execute(node)
+
 
 class TestRecommend:
     def test_recommend_calls_qdrant_query_points(self, executor, mock_client, mocker):
@@ -1026,6 +1074,17 @@ def test_recommend_forwards_indexed_only_and_quantization(self, executor, mock_c
         assert search_params.quantization is not None
         assert search_params.quantization.rescore is True
 
+    def test_recommend_with_mmr_raises(self, executor, mock_client):
+        mock_client.collection_exists.return_value = True
+        node = RecommendStmt(
+            collection="notes",
+            positive_ids=("a",),
+            limit=5,
+            with_clause=SearchWith(mmr_diversity=0.5),
+        )
+        with pytest.raises(QQLRuntimeError, match="MMR is supported only for SEARCH statements"):
+            executor.execute(node)
+
     def test_recommend_offset_zero_passes_none(self, executor, mock_client, mocker):
         mock_client.collection_exists.return_value = True
         mock_response = mocker.MagicMock()
@@ -2268,12 +2327,35 @@ def test_group_by_hybrid_uses_query_points_groups(self, executor, mock_client, m
             collection="articles", query_text="q", limit=3, model=None,
             hybrid=True, group_by="category", group_size=2,
         )
-        result = executor.execute(node)
+        executor.execute(node)
         mock_client.query_points_groups.assert_called_once()
         kwargs = mock_client.query_points_groups.call_args.kwargs
         assert kwargs["group_by"] == "category"
         assert "prefetch" in kwargs
 
+    def test_group_by_dense_with_mmr_uses_nearest_query(self, executor, mock_client, mocker):
+        from qdrant_client.models import NearestQuery
+
+        mock_client.collection_exists.return_value = True
+        mock_response = mocker.MagicMock()
+        mock_response.groups = []
+        mock_client.query_points_groups.return_value = mock_response
+
+        node = SearchStmt(
+            collection="articles",
+            query_text="ai",
+            limit=5,
+            model=None,
+            group_by="category",
+            with_clause=SearchWith(mmr_diversity=0.35, mmr_candidates=40),
+        )
+        executor.execute(node)
+        query = mock_client.query_points_groups.call_args.kwargs["query"]
+        assert isinstance(query, NearestQuery)
+        assert query.mmr is not None
+        assert query.mmr.diversity == pytest.approx(0.35)
+        assert query.mmr.candidates_limit == 40
+
 
 class TestUpdateVector:
     def test_update_vector_calls_update_vectors(self, executor, mock_client):
@@ -2288,7 +2370,6 @@ def test_update_vector_calls_update_vectors(self, executor, mock_client):
 
     def test_update_vector_passes_correct_point_id(self, executor, mock_client):
         from qql.ast_nodes import UpdateVectorStmt
-        from qdrant_client.models import PointVectors
         mock_client.collection_exists.return_value = True
         mock_client.get_collection.return_value.config.params.vectors = {}  # non-dict → unnamed
         node = UpdateVectorStmt(
@@ -2480,7 +2561,6 @@ def test_update_vector_unnamed_collection_sends_plain_list(self, executor, mock_
         from qql.ast_nodes import UpdateVectorStmt
         mock_client.collection_exists.return_value = True
         # Unnamed collection: get_collection returns non-dict vectors
-        mock_vectors = mocker.MagicMock() if False else type("V", (), {})()
         info = mock_client.get_collection.return_value
         info.config.params.vectors = [None]  # list → not a dict → unnamed