pavanjava · pavanjava · May 15, 2026 · May 15, 2026
diff --git a/README.md b/README.md
@@ -101,8 +101,11 @@ INSERT BULK INTO COLLECTION articles VALUES [{'text': '...'}, {'text': '...'}]
 -- Search
 SEARCH articles SIMILAR TO 'query' LIMIT 10
 SEARCH articles SIMILAR TO 'query' LIMIT 10 WHERE year >= 2020
+SEARCH articles SIMILAR TO 'query' LIMIT 10 WHERE active = true
 SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID
 SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID FUSION 'dbsf'
+SEARCH articles SIMILAR TO 'query' LIMIT 10 WITH { indexed_only: true }
+SEARCH articles SIMILAR TO 'query' LIMIT 10 WITH { quantization: { ignore: true, oversampling: 2 } }
 SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID RERANK
 
 -- Scroll

diff --git a/docs/filters.md b/docs/filters.md
@@ -12,6 +12,9 @@ The `WHERE` clause lets you filter on any payload field using SQL-style predicat
 -- Exact match
 SEARCH articles SIMILAR TO 'ml' LIMIT 10 WHERE category = 'paper'
 
+-- Boolean match
+SEARCH articles SIMILAR TO 'ml' LIMIT 10 WHERE active = true
+
 -- Not equal
 SEARCH articles SIMILAR TO 'ml' LIMIT 10 WHERE status != 'draft'
 ```
@@ -43,6 +46,7 @@ SEARCH articles SIMILAR TO 'history of ai' LIMIT 10 WHERE year BETWEEN 2018 AND
 ```sql
 SEARCH articles SIMILAR TO 'retrieval' LIMIT 10 WHERE status IN ('published', 'reviewed')
 SEARCH articles SIMILAR TO 'retrieval' LIMIT 10 WHERE status NOT IN ('deleted', 'archived')
+SEARCH articles SIMILAR TO 'retrieval' LIMIT 10 WHERE active IN (true, false)
 ```
 
 ---

diff --git a/docs/programmatic.md b/docs/programmatic.md
@@ -138,7 +138,7 @@ class ExecutionResult:
 | INSERT BULK | `None` (count in `result.message`) |
 | SELECT | `{"id": str, "payload": dict}` or `None` when not found |
 | SEARCH | `[{"id": str, "score": float, "payload": dict}, ...]` |
-| SCROLL | `{"points": [{"id": str, "payload": dict}, ...], "next_offset": str \| None}` |
+| SCROLL | `{"points": [{"id": str, "payload": dict}, ...], "next_offset": str \| int \| None}` |
 | RECOMMEND | `[{"id": str, "score": float, "payload": dict}, ...]` |
 | SHOW COLLECTIONS | `["name1", "name2", ...]` |
 | SHOW COLLECTION | `{"name": str, "status": str, "points_count": int \| None, "indexed_vectors_count": int \| None, "segments_count": int, "topology": str, "vectors": dict, "sparse_vectors": dict \| None, "quantization": str \| None, "hnsw_config": dict, "payload_schema": dict \| None, "sharding": dict}` |

diff --git a/docs/search.md b/docs/search.md
@@ -17,7 +17,7 @@ SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING HYBRID
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING HYBRID [FUSION 'rrf|dbsf'] [DENSE MODEL '<model>'] [SPARSE MODEL '<model>'] [WHERE <filter>]
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING SPARSE [MODEL '<sparse_model>']
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> EXACT
-SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING ...] [WHERE <filter>] [RERANK] WITH { hnsw_ef: <n>, exact: true|false, acorn: true|false }
+SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING ...] [WHERE <filter>] [RERANK] WITH { hnsw_ef: <n>, exact: true|false, acorn: true|false, indexed_only: true|false, quantization: { ignore: true|false, rescore: true|false, oversampling: <n> } }
 SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING ...] [WHERE <filter>] RERANK [MODEL '<reranker_model>']
 ```
 
@@ -102,10 +102,12 @@ Use these when you want to debug retrieval quality or tune recall without changi
 | `WITH { hnsw_ef: 128 }` | Increase HNSW exploration at query time |
 | `WITH { exact: true }` | Force exact KNN explicitly |
 | `WITH { acorn: true }` | Enable ACORN for filtered queries |
+| `WITH { indexed_only: true }` | Restrict the query to indexed segments only |
+| `WITH { quantization: { ... } }` | Tune quantized-search behavior at query time |
 
 - `EXACT` can appear after `LIMIT` or after `RERANK`
 - `WITH { ... }` can appear after `WHERE` and/or `RERANK`
-- Supported `WITH` keys are only `hnsw_ef`, `exact`, and `acorn`
+- Supported top-level `WITH` keys are `hnsw_ef`, `exact`, `acorn`, `indexed_only`, and `quantization`
 
 ```sql
 -- Exact KNN baseline
@@ -116,6 +118,12 @@ SEARCH articles SIMILAR TO 'transformers' LIMIT 10 WITH { hnsw_ef: 256 }
 
 -- Filtered search with ACORN
 SEARCH articles SIMILAR TO 'RAG' LIMIT 10 WHERE tag = 'li' WITH { acorn: true }
+
+-- Restrict to indexed segments only
+SEARCH articles SIMILAR TO 'retrieval' LIMIT 10 WITH { indexed_only: true }
+
+-- Quantized-search tuning
+SEARCH articles SIMILAR TO 'vector db' LIMIT 10 WITH { quantization: { ignore: true, oversampling: 2 } }
 ```
 
 ---
@@ -142,6 +150,7 @@ SCROLL FROM articles AFTER 'cursor-id' LIMIT 50
 **Behavior:**
 - Returns points in ID order with payloads.
 - Returns a `next_offset` cursor when more points are available.
+- `next_offset` preserves the native point-id type (`string` or integer).
 - Use `AFTER <next_offset>` to fetch the next page.
 
 ---
@@ -230,7 +239,7 @@ RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) STRATEGY '<strategy>'
 RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> WHERE <filter>
 RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> OFFSET <n>
 RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> SCORE THRESHOLD <f>
-RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> WITH { exact: true, hnsw_ef: <n> }
+RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> WITH { exact: true, hnsw_ef: <n>, indexed_only: true|false, quantization: { ignore: true|false, rescore: true|false, oversampling: <n> } }
 RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> LOOKUP FROM <collection>
 RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> LOOKUP FROM <collection> VECTOR '<name>'
 RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> USING '<vector_name>'

diff --git a/src/qql/ast_nodes.py b/src/qql/ast_nodes.py
@@ -27,6 +27,15 @@ class SearchWith:
     hnsw_ef: int | None = None
     exact: bool = False
     acorn: bool = False
+    indexed_only: bool = False
+    quantization: "QuantizationSearchWith | None" = None
+
+
+@dataclass(frozen=True)
+class QuantizationSearchWith:
+    ignore: bool | None = None
+    rescore: bool | None = None
+    oversampling: float | None = None
 
 
 # ── Filter expression leaf nodes ──────────────────────────────────────────────
@@ -36,7 +45,7 @@ class CompareExpr:
     """field op literal  — covers =, !=, >, >=, <, <="""
     field: str
     op: str   # one of: "=", "!=", ">", ">=", "<", "<="
-    value: str | int | float
+    value: str | int | float | bool
 
 
 @dataclass(frozen=True)
@@ -51,14 +60,14 @@ class BetweenExpr:
 class InExpr:
     """field IN (v1, v2, ...)"""
     field: str
-    values: tuple[str | int | float, ...]
+    values: tuple[str | int | float | bool, ...]
 
 
 @dataclass(frozen=True)
 class NotInExpr:
     """field NOT IN (v1, v2, ...)"""
     field: str
-    values: tuple[str | int | float, ...]
+    values: tuple[str | int | float | bool, ...]
 
 
 @dataclass(frozen=True)

diff --git a/src/qql/cli.py b/src/qql/cli.py
@@ -70,7 +70,7 @@
       Optional: [yellow]WHERE[/yellow] <filter>   (e.g. WHERE year > 2020 AND status = 'ok')
       Optional: [yellow]RERANK[/yellow] [MODEL '<model>']   rerank results with a cross-encoder
       Optional: [yellow]EXACT[/yellow]   bypass HNSW and perform exact search
-      Optional: [yellow]WITH[/yellow] { hnsw_ef: <int>, exact: <bool>, acorn: <bool> }   search parameters
+      Optional: [yellow]WITH[/yellow] { hnsw_ef: <int>, exact: <bool>, acorn: <bool>, indexed_only: <bool>, quantization: { ignore: <bool>, rescore: <bool>, oversampling: <n> } }   search parameters
       Optional: [yellow]GROUP BY[/yellow] <field> [[yellow]GROUP_SIZE[/yellow] <n>]
                   Group results by a payload field value (default GROUP_SIZE: 3).
                   Field must be keyword or integer type. RERANK and GROUP BY cannot be combined.

diff --git a/src/qql/executor.py b/src/qql/executor.py
@@ -35,6 +35,7 @@
     Prefetch,
     ProductQuantization,
     ProductQuantizationConfig,
+    QuantizationSearchParams,
     Range,
     RecommendInput,
     RecommendQuery,
@@ -559,7 +560,7 @@ def _execute_scroll(self, node: ScrollStmt) -> ExecutionResult:
         return ExecutionResult(
             success=True,
             message=f"Scrolled {len(points)} point(s) from '{node.collection}'",
-            data={"points": points, "next_offset": None if next_offset is None else str(next_offset)},
+            data={"points": points, "next_offset": next_offset},
         )
 
     def _execute_select(self, node: SelectStmt) -> ExecutionResult:
@@ -678,6 +679,7 @@ def _execute_search(self, node: SearchStmt) -> ExecutionResult:
                     using="sparse",
                     limit=fetch_limit,
                     query_filter=qdrant_filter,
+                    search_params=search_params,
                 )
             except UnexpectedResponse as e:
                 raise QQLRuntimeError(f"Qdrant error during SEARCH: {e}") from e
@@ -825,9 +827,18 @@ def _execute_recommend(self, node: RecommendStmt) -> ExecutionResult:
     def _build_search_params(self, with_clause: SearchWith | None) -> SearchParams | None:
         if with_clause is None:
             return None
+        quantization = None
+        if with_clause.quantization is not None:
+            quantization = QuantizationSearchParams(
+                ignore=with_clause.quantization.ignore,
+                rescore=with_clause.quantization.rescore,
+                oversampling=with_clause.quantization.oversampling,
+            )
         return SearchParams(
             hnsw_ef=with_clause.hnsw_ef,
             exact=with_clause.exact,
+            quantization=quantization,
+            indexed_only=True if with_clause.indexed_only else None,
             acorn=AcornSearchParams(enable=True) if with_clause.acorn else None,
         )
 

diff --git a/src/qql/parser.py b/src/qql/parser.py
@@ -23,6 +23,7 @@
     NotExpr,
     NotInExpr,
     OrExpr,
+    QuantizationSearchWith,
     QuantizationConfig,
     QuantizationType,
     RecommendStmt,
@@ -414,6 +415,8 @@ def _parse_search(self) -> SearchStmt:
                     hnsw_ef=with_clause.hnsw_ef,
                     exact=True,
                     acorn=with_clause.acorn,
+                    indexed_only=with_clause.indexed_only,
+                    quantization=with_clause.quantization,
                 )
         if self._peek().kind == TokenKind.WITH:
             self._advance()  # consume WITH
@@ -425,6 +428,8 @@ def _parse_search(self) -> SearchStmt:
                     hnsw_ef=parsed_with.hnsw_ef or with_clause.hnsw_ef,
                     exact=parsed_with.exact or with_clause.exact,
                     acorn=parsed_with.acorn or with_clause.acorn,
+                    indexed_only=parsed_with.indexed_only or with_clause.indexed_only,
+                    quantization=parsed_with.quantization or with_clause.quantization,
                 )
         group_by: str | None = None
         group_size: int = 3
@@ -760,8 +765,8 @@ def _parse_field_path(self) -> str:
             f"Expected a field name, got '{tok.value}'", tok.pos
         )
 
-    def _parse_literal(self) -> str | int | float:
-        """STRING | INTEGER | FLOAT"""
+    def _parse_literal(self) -> str | int | float | bool:
+        """STRING | INTEGER | FLOAT | boolean"""
         tok = self._peek()
         if tok.kind == TokenKind.STRING:
             self._advance()
@@ -772,8 +777,16 @@ def _parse_literal(self) -> str | int | float:
         if tok.kind == TokenKind.FLOAT:
             self._advance()
             return float(tok.value)
+        if tok.kind == TokenKind.IDENTIFIER:
+            upper = tok.value.upper()
+            if upper == "TRUE":
+                self._advance()
+                return True
+            if upper == "FALSE":
+                self._advance()
+                return False
         raise QQLSyntaxError(
-            f"Expected a literal value (string, integer, or float), got '{tok.value}'",
+            f"Expected a literal value (string, integer, float, or boolean), got '{tok.value}'",
             tok.pos,
         )
 
@@ -790,10 +803,10 @@ def _parse_number(self) -> int | float:
             f"Expected a number, got '{tok.value}'", tok.pos
         )
 
-    def _parse_literal_list(self) -> list[str | int | float]:
+    def _parse_literal_list(self) -> list[str | int | float | bool]:
         """'(' literal { ',' literal } [','] ')'  — used by IN / NOT IN."""
         self._expect(TokenKind.LPAREN)
-        items: list[str | int | float] = []
+        items: list[str | int | float | bool] = []
         if self._peek().kind == TokenKind.RPAREN:
             self._advance()
             return items
@@ -942,13 +955,15 @@ def _parse_value(self) -> Any:
             return self._parse_list()
         raise QQLSyntaxError(f"Unexpected value token '{tok.value}'", tok.pos)
 
-    # ── WITH clause: { hnsw_ef: N, exact: true, acorn: true } ──
+    # ── WITH clause: { hnsw_ef: N, exact: true, acorn: true, ... } ──
 
     def _parse_with_clause(self) -> SearchWith:
         self._expect(TokenKind.LBRACE)
         hnsw_ef: int | None = None
         exact: bool = False
         acorn: bool = False
+        indexed_only: bool = False
+        quantization: QuantizationSearchWith | None = None
         while self._peek().kind != TokenKind.RBRACE:
             key_tok = self._peek()
             if key_tok.kind not in (
@@ -969,9 +984,14 @@ def _parse_with_clause(self) -> SearchWith:
                 exact = self._parse_bool()
             elif key == "acorn":
                 acorn = self._parse_bool()
+            elif key == "indexed_only":
+                indexed_only = self._parse_bool()
+            elif key == "quantization":
+                quantization = self._parse_quantization_search_with()
             else:
                 raise QQLSyntaxError(
-                    f"Unknown WITH parameter '{key}'. Expected: hnsw_ef, exact, acorn",
+                    "Unknown WITH parameter "
+                    f"'{key}'. Expected: hnsw_ef, exact, acorn, indexed_only, quantization",
                     key_tok.pos,
                 )
             if self._peek().kind == TokenKind.COMMA:
@@ -985,6 +1005,44 @@ def _parse_with_clause(self) -> SearchWith:
             hnsw_ef=hnsw_ef,
             exact=exact,
             acorn=acorn,
+            indexed_only=indexed_only,
+            quantization=quantization,
+        )
+
+    def _parse_quantization_search_with(self) -> QuantizationSearchWith:
+        self._expect(TokenKind.LBRACE)
+        ignore: bool | None = None
+        rescore: bool | None = None
+        oversampling: float | None = None
+
+        while self._peek().kind != TokenKind.RBRACE:
+            key_tok = self._expect(TokenKind.IDENTIFIER)
+            key = key_tok.value.lower()
+            self._expect(TokenKind.COLON)
+            if key == "ignore":
+                ignore = self._parse_bool()
+            elif key == "rescore":
+                rescore = self._parse_bool()
+            elif key == "oversampling":
+                oversampling = float(self._parse_number())
+            else:
+                raise QQLSyntaxError(
+                    "Unknown quantization parameter "
+                    f"'{key}'. Expected: ignore, rescore, oversampling",
+                    key_tok.pos,
+                )
+            if self._peek().kind == TokenKind.COMMA:
+                self._advance()
+                if self._peek().kind == TokenKind.RBRACE:
+                    break
+            else:
+                break
+
+        self._expect(TokenKind.RBRACE)
+        return QuantizationSearchWith(
+            ignore=ignore,
+            rescore=rescore,
+            oversampling=oversampling,
         )
 
     def _parse_bool(self) -> bool: