Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ INSERT BULK INTO COLLECTION articles VALUES [{'text': '...'}, {'text': '...'}]
SEARCH articles SIMILAR TO 'query' LIMIT 10
SEARCH articles SIMILAR TO 'query' LIMIT 10 WHERE year >= 2020
SEARCH articles SIMILAR TO 'query' LIMIT 10 WHERE active = true
SEARCH articles SIMILAR TO 'query' LIMIT 10 WITH { mmr_diversity: 0.5, mmr_candidates: 50 }
SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID
SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID FUSION 'dbsf'
SEARCH articles SIMILAR TO 'query' LIMIT 10 WITH { indexed_only: true }
Expand Down
18 changes: 14 additions & 4 deletions docs/search.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING HYBRID
SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING HYBRID [FUSION 'rrf|dbsf'] [DENSE MODEL '<model>'] [SPARSE MODEL '<model>'] [WHERE <filter>]
SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING SPARSE [MODEL '<sparse_model>']
SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> EXACT
SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING ...] [WHERE <filter>] [RERANK] WITH { hnsw_ef: <n>, exact: true|false, acorn: true|false, indexed_only: true|false, quantization: { ignore: true|false, rescore: true|false, oversampling: <n> } }
SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING ...] [WHERE <filter>] [RERANK] WITH { hnsw_ef: <n>, exact: true|false, acorn: true|false, indexed_only: true|false, quantization: { ignore: true|false, rescore: true|false, oversampling: <n> }, mmr_diversity: <0..1>, mmr_candidates: <n> }
SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING ...] [WHERE <filter>] RERANK [MODEL '<reranker_model>']
```

Expand Down Expand Up @@ -55,6 +55,11 @@ Search with query-time HNSW tuning:
SEARCH articles SIMILAR TO 'attention mechanism' LIMIT 10 WITH { hnsw_ef: 128 }
```

Search with native MMR diversification:
```sql
SEARCH articles SIMILAR TO 'attention mechanism' LIMIT 10 WITH { mmr_diversity: 0.5, mmr_candidates: 50 }
```

**Output:**

Results are displayed as a table with three columns:
Expand Down Expand Up @@ -102,12 +107,14 @@ Use these when you want to debug retrieval quality or tune recall without changi
| `WITH { hnsw_ef: 128 }` | Increase HNSW exploration at query time |
| `WITH { exact: true }` | Force exact KNN explicitly |
| `WITH { acorn: true }` | Enable ACORN for filtered queries |
| `WITH { indexed_only: true }` | Restrict the query to indexed segments only |
| `WITH { quantization: { ... } }` | Tune quantized-search behavior at query time |
| `WITH { indexed_only: true, quantization: { rescore: true } }` | Prefer indexed vectors and apply quantization controls |
| `WITH { mmr_diversity: 0.5, mmr_candidates: 50 }` | Apply native MMR diversification after nearest-neighbor retrieval |

- `EXACT` can appear after `LIMIT` or after `RERANK`
- `WITH { ... }` can appear after `WHERE` and/or `RERANK`
- Supported top-level `WITH` keys are `hnsw_ef`, `exact`, `acorn`, `indexed_only`, and `quantization`
- Supported top-level `WITH` keys are `hnsw_ef`, `exact`, `acorn`, `indexed_only`, `quantization`, `mmr_diversity`, and `mmr_candidates`
- MMR is currently supported for dense `SEARCH` and dense `SEARCH ... GROUP BY`
- MMR is not yet supported with `USING HYBRID`, `USING SPARSE`, or `RECOMMEND`

```sql
-- Exact KNN baseline
Expand All @@ -124,6 +131,9 @@ SEARCH articles SIMILAR TO 'retrieval' LIMIT 10 WITH { indexed_only: true }

-- Quantized-search tuning
SEARCH articles SIMILAR TO 'vector db' LIMIT 10 WITH { quantization: { ignore: true, oversampling: 2 } }

-- Diversify top-k results with native MMR
SEARCH articles SIMILAR TO 'retrieval systems' LIMIT 10 WITH { mmr_diversity: 0.5, mmr_candidates: 50 }
```

---
Expand Down
2 changes: 2 additions & 0 deletions src/qql/ast_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ class SearchWith:
acorn: bool = False
indexed_only: bool = False
quantization: "QuantizationSearchWith | None" = None
mmr_diversity: float | None = None
mmr_candidates: int | None = None


@dataclass(frozen=True)
Expand Down
2 changes: 1 addition & 1 deletion src/qql/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
Optional: [yellow]WHERE[/yellow] <filter> (e.g. WHERE year > 2020 AND status = 'ok')
Optional: [yellow]RERANK[/yellow] [MODEL '<model>'] rerank results with a cross-encoder
Optional: [yellow]EXACT[/yellow] bypass HNSW and perform exact search
Optional: [yellow]WITH[/yellow] { hnsw_ef: <int>, exact: <bool>, acorn: <bool>, indexed_only: <bool>, quantization: { ignore: <bool>, rescore: <bool>, oversampling: <n> } } search parameters
Optional: [yellow]WITH[/yellow] { hnsw_ef: <int>, exact: <bool>, acorn: <bool>, indexed_only: <bool>, quantization: { ignore: <bool>, rescore: <bool>, oversampling: <n> }, mmr_diversity: <0..1>, mmr_candidates: <int> } search parameters
Optional: [yellow]GROUP BY[/yellow] <field> [[yellow]GROUP_SIZE[/yellow] <n>]
Group results by a payload field value (default GROUP_SIZE: 3).
Field must be keyword or integer type. RERANK and GROUP BY cannot be combined.
Expand Down
37 changes: 35 additions & 2 deletions src/qql/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@
MatchText,
MatchTextAny,
MatchValue,
Mmr,
Modifier,
NearestQuery,
PayloadField,
PayloadSchemaType,
PointStruct,
Expand Down Expand Up @@ -602,6 +604,7 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult:
)

search_params = self._build_search_params(node.with_clause)
self._validate_search_mmr_usage(node)

# When reranking is requested, fetch more candidates so the reranker has
# enough material to reorder; only `node.limit` results are returned.
Expand Down Expand Up @@ -712,7 +715,7 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult:
query_using = self._get_dense_vector_name(node.collection)
response = self._client.query_points(
collection_name=node.collection,
query=vector,
query=self._build_dense_query(vector, node.with_clause),
using=query_using,
limit=fetch_limit,
query_filter=qdrant_filter,
Expand Down Expand Up @@ -790,6 +793,8 @@ def _execute_recommend(self, node: RecommendStmt) -> ExecutionResult:
)

search_params = self._build_search_params(node.with_clause)
if self._has_mmr(node.with_clause):
raise QQLRuntimeError("MMR is supported only for SEARCH statements")

lookup_from: LookupLocation | None = None
if node.lookup_from is not None:
Expand Down Expand Up @@ -842,6 +847,34 @@ def _build_search_params(self, with_clause: SearchWith | None) -> SearchParams |
acorn=AcornSearchParams(enable=True) if with_clause.acorn else None,
)

def _has_mmr(self, with_clause: SearchWith | None) -> bool:
return with_clause is not None and (
with_clause.mmr_diversity is not None or with_clause.mmr_candidates is not None
)

def _validate_search_mmr_usage(self, node: SearchStmt) -> None:
if not self._has_mmr(node.with_clause):
return
if node.hybrid:
raise QQLRuntimeError("MMR is not supported with USING HYBRID yet")
if node.sparse_only:
raise QQLRuntimeError("MMR is not supported with USING SPARSE yet")

def _build_dense_query(
self,
vector: list[float],
with_clause: SearchWith | None,
) -> list[float] | NearestQuery:
if not self._has_mmr(with_clause):
return vector
return NearestQuery(
nearest=vector,
mmr=Mmr(
diversity=with_clause.mmr_diversity,
candidates_limit=with_clause.mmr_candidates,
),
)

def _parse_recommend_strategy(
self, strategy: str | None
) -> RecommendStrategy | None:
Expand Down Expand Up @@ -1029,7 +1062,7 @@ def _execute_search_groups(
response = self._client.query_points_groups(
collection_name=node.collection,
group_by=node.group_by,
query=vector,
query=self._build_dense_query(vector, node.with_clause),
using=query_using,
limit=node.limit,
group_size=node.group_size,
Expand Down
29 changes: 28 additions & 1 deletion src/qql/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
QuantizationSearchWith,
QuantizationConfig,
QuantizationType,
QuantizationSearchWith,
RecommendStmt,
SelectStmt,
ScrollStmt,
Expand Down Expand Up @@ -417,6 +418,8 @@ def _parse_search(self) -> SearchStmt:
acorn=with_clause.acorn,
indexed_only=with_clause.indexed_only,
quantization=with_clause.quantization,
mmr_diversity=with_clause.mmr_diversity,
mmr_candidates=with_clause.mmr_candidates,
)
if self._peek().kind == TokenKind.WITH:
self._advance() # consume WITH
Expand All @@ -430,6 +433,12 @@ def _parse_search(self) -> SearchStmt:
acorn=parsed_with.acorn or with_clause.acorn,
indexed_only=parsed_with.indexed_only or with_clause.indexed_only,
quantization=parsed_with.quantization or with_clause.quantization,
mmr_diversity=(
parsed_with.mmr_diversity
if parsed_with.mmr_diversity is not None
else with_clause.mmr_diversity
),
mmr_candidates=parsed_with.mmr_candidates or with_clause.mmr_candidates,
)
group_by: str | None = None
group_size: int = 3
Expand Down Expand Up @@ -964,6 +973,8 @@ def _parse_with_clause(self) -> SearchWith:
acorn: bool = False
indexed_only: bool = False
quantization: QuantizationSearchWith | None = None
mmr_diversity: float | None = None
mmr_candidates: int | None = None
while self._peek().kind != TokenKind.RBRACE:
key_tok = self._peek()
if key_tok.kind not in (
Expand All @@ -988,10 +999,24 @@ def _parse_with_clause(self) -> SearchWith:
indexed_only = self._parse_bool()
elif key == "quantization":
quantization = self._parse_quantization_search_with()
elif key == "mmr_diversity":
mmr_diversity = float(self._parse_number())
if not 0.0 <= mmr_diversity <= 1.0:
raise QQLSyntaxError(
f"mmr_diversity must be between 0 and 1, got {mmr_diversity}",
key_tok.pos,
)
elif key == "mmr_candidates":
mmr_candidates = int(self._expect(TokenKind.INTEGER).value)
if mmr_candidates <= 0:
raise QQLSyntaxError(
f"mmr_candidates must be a positive integer, got {mmr_candidates}",
key_tok.pos,
)
else:
raise QQLSyntaxError(
"Unknown WITH parameter "
f"'{key}'. Expected: hnsw_ef, exact, acorn, indexed_only, quantization",
f"'{key}'. Expected: hnsw_ef, exact, acorn, indexed_only, quantization, mmr_diversity, mmr_candidates",
key_tok.pos,
)
if self._peek().kind == TokenKind.COMMA:
Expand All @@ -1007,6 +1032,8 @@ def _parse_with_clause(self) -> SearchWith:
acorn=acorn,
indexed_only=indexed_only,
quantization=quantization,
mmr_diversity=mmr_diversity,
mmr_candidates=mmr_candidates,
)

def _parse_quantization_search_with(self) -> QuantizationSearchWith:
Expand Down
88 changes: 84 additions & 4 deletions tests/test_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -792,7 +792,6 @@ def test_sparse_search_forwards_search_params(self, executor, mock_client, mocke
search_params = mock_client.query_points.call_args.kwargs["search_params"]
assert search_params.exact is True
assert search_params.indexed_only is True

def test_dense_search_against_hybrid_collection_uses_dense_vector_name(
self, executor, mock_client, mocker
):
Expand All @@ -811,6 +810,55 @@ def test_dense_search_against_hybrid_collection_uses_dense_vector_name(

assert mock_client.query_points.call_args.kwargs["using"] == "dense"

def test_dense_search_with_mmr_uses_nearest_query(self, executor, mock_client, mocker):
from qdrant_client.models import NearestQuery

mock_client.collection_exists.return_value = True
mock_response = mocker.MagicMock()
mock_response.points = []
mock_client.query_points.return_value = mock_response

node = SearchStmt(
collection="notes",
query_text="hello",
limit=5,
model=None,
with_clause=SearchWith(mmr_diversity=0.4, mmr_candidates=25),
)
executor.execute(node)

query = mock_client.query_points.call_args.kwargs["query"]
assert isinstance(query, NearestQuery)
assert query.mmr is not None
assert query.mmr.diversity == pytest.approx(0.4)
assert query.mmr.candidates_limit == 25

def test_hybrid_search_with_mmr_raises(self, executor, mock_client):
mock_client.collection_exists.return_value = True
node = SearchStmt(
collection="notes",
query_text="hello",
limit=5,
model=None,
hybrid=True,
with_clause=SearchWith(mmr_diversity=0.5),
)
with pytest.raises(QQLRuntimeError, match="MMR is not supported with USING HYBRID yet"):
executor.execute(node)

def test_sparse_search_with_mmr_raises(self, executor, mock_client):
mock_client.collection_exists.return_value = True
node = SearchStmt(
collection="notes",
query_text="hello",
limit=5,
model=None,
sparse_only=True,
with_clause=SearchWith(mmr_diversity=0.5),
)
with pytest.raises(QQLRuntimeError, match="MMR is not supported with USING SPARSE yet"):
executor.execute(node)


class TestRecommend:
def test_recommend_calls_qdrant_query_points(self, executor, mock_client, mocker):
Expand Down Expand Up @@ -1026,6 +1074,17 @@ def test_recommend_forwards_indexed_only_and_quantization(self, executor, mock_c
assert search_params.quantization is not None
assert search_params.quantization.rescore is True

def test_recommend_with_mmr_raises(self, executor, mock_client):
mock_client.collection_exists.return_value = True
node = RecommendStmt(
collection="notes",
positive_ids=("a",),
limit=5,
with_clause=SearchWith(mmr_diversity=0.5),
)
with pytest.raises(QQLRuntimeError, match="MMR is supported only for SEARCH statements"):
executor.execute(node)

def test_recommend_offset_zero_passes_none(self, executor, mock_client, mocker):
mock_client.collection_exists.return_value = True
mock_response = mocker.MagicMock()
Expand Down Expand Up @@ -2268,12 +2327,35 @@ def test_group_by_hybrid_uses_query_points_groups(self, executor, mock_client, m
collection="articles", query_text="q", limit=3, model=None,
hybrid=True, group_by="category", group_size=2,
)
result = executor.execute(node)
executor.execute(node)
mock_client.query_points_groups.assert_called_once()
kwargs = mock_client.query_points_groups.call_args.kwargs
assert kwargs["group_by"] == "category"
assert "prefetch" in kwargs

def test_group_by_dense_with_mmr_uses_nearest_query(self, executor, mock_client, mocker):
from qdrant_client.models import NearestQuery

mock_client.collection_exists.return_value = True
mock_response = mocker.MagicMock()
mock_response.groups = []
mock_client.query_points_groups.return_value = mock_response

node = SearchStmt(
collection="articles",
query_text="ai",
limit=5,
model=None,
group_by="category",
with_clause=SearchWith(mmr_diversity=0.35, mmr_candidates=40),
)
executor.execute(node)
query = mock_client.query_points_groups.call_args.kwargs["query"]
assert isinstance(query, NearestQuery)
assert query.mmr is not None
assert query.mmr.diversity == pytest.approx(0.35)
assert query.mmr.candidates_limit == 40


class TestUpdateVector:
def test_update_vector_calls_update_vectors(self, executor, mock_client):
Expand All @@ -2288,7 +2370,6 @@ def test_update_vector_calls_update_vectors(self, executor, mock_client):

def test_update_vector_passes_correct_point_id(self, executor, mock_client):
from qql.ast_nodes import UpdateVectorStmt
from qdrant_client.models import PointVectors
mock_client.collection_exists.return_value = True
mock_client.get_collection.return_value.config.params.vectors = {} # non-dict → unnamed
node = UpdateVectorStmt(
Expand Down Expand Up @@ -2480,7 +2561,6 @@ def test_update_vector_unnamed_collection_sends_plain_list(self, executor, mock_
from qql.ast_nodes import UpdateVectorStmt
mock_client.collection_exists.return_value = True
# Unnamed collection: get_collection returns non-dict vectors
mock_vectors = mocker.MagicMock() if False else type("V", (), {})()
info = mock_client.get_collection.return_value
info.config.params.vectors = [None] # list → not a dict → unnamed

Expand Down
Loading
Loading