pavanjava · pavanjava · May 15, 2026 · May 15, 2026
diff --git a/README.md b/README.md
@@ -123,11 +123,15 @@ SELECT * FROM articles WHERE id = '3f2e1a4b-...'
 -- Collections
 CREATE COLLECTION articles
 CREATE COLLECTION articles HYBRID
+CREATE COLLECTION articles HNSW { payload_m: 16 }
 CREATE COLLECTION articles QUANTIZE SCALAR
 CREATE COLLECTION articles QUANTIZE TURBO
 CREATE COLLECTION articles QUANTIZE TURBO BITS 2
 CREATE COLLECTION articles QUANTIZE TURBO BITS 1.5 ALWAYS RAM
 CREATE INDEX ON COLLECTION articles FOR year TYPE integer
+CREATE INDEX ON COLLECTION articles FOR tenant_id TYPE keyword WITH { is_tenant: true, on_disk: true }
+CREATE INDEX ON COLLECTION articles FOR doc_id TYPE uuid
+CREATE INDEX ON COLLECTION articles FOR title TYPE text WITH { tokenizer: 'word', min_token_len: 2, lowercase: true }
 SHOW COLLECTIONS
 SHOW COLLECTION articles
 DROP COLLECTION articles

diff --git a/docs/collections.md b/docs/collections.md
@@ -93,9 +93,10 @@ CREATE COLLECTION <collection_name> HYBRID
 CREATE COLLECTION <collection_name> USING MODEL '<model_name>'
 CREATE COLLECTION <collection_name> USING HYBRID
 CREATE COLLECTION <collection_name> USING HYBRID DENSE MODEL '<model>'
+CREATE COLLECTION <collection_name> HNSW { payload_m: <int> }
 ```
 
-Any of the above forms can be followed by an optional `QUANTIZE` clause — see [Quantization](#quantization--quantize-clause) below.
+Any of the above forms can be followed by an optional `QUANTIZE` clause and/or `HNSW { payload_m: <int> }`.
 
 **Examples:**
 
@@ -119,8 +120,25 @@ Hybrid collection with a custom dense model:
 CREATE COLLECTION research_papers USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5'
 ```
 
+Dense collection with payload-aware HNSW links:
+```sql
+CREATE COLLECTION research_papers HNSW {payload_m: 16}
+```
+
 When `USING MODEL` is omitted, the collection uses the **default embedding model's dimensions** (384 for `all-MiniLM-L6-v2`). If the collection already exists, the command succeeds with a message and does nothing.
 
+### HNSW clause
+
+QQL currently supports one explicit HNSW knob during collection creation:
+
+- `payload_m` — enables payload-aware HNSW connectivity used by Qdrant for filtered / tenant-aware workloads
+
+Example:
+
+```sql
+CREATE COLLECTION tenant_docs USING HYBRID HNSW {payload_m: 16}
+```
+
 ---
 
 ## Quantization — QUANTIZE clause
@@ -239,6 +257,7 @@ Creates a payload index on a collection field. Payload indexes speed up `WHERE`
 **Syntax:**
 ```
 CREATE INDEX ON COLLECTION <collection_name> FOR <field_name> TYPE <schema_type>
+CREATE INDEX ON COLLECTION <collection_name> FOR <field_name> TYPE <schema_type> WITH { ... }
 ```
 
 **Supported schema types:**
@@ -252,19 +271,41 @@ CREATE INDEX ON COLLECTION <collection_name> FOR <field_name> TYPE <schema_type>
 | `text` | Full-text search (enables `MATCH` operators) |
 | `geo` | Geospatial coordinates |
 | `datetime` | Date/time values |
+| `uuid` | UUID payload values |
 
 **Examples:**
 
 ```sql
 CREATE INDEX ON COLLECTION articles FOR category TYPE keyword
+CREATE INDEX ON COLLECTION articles FOR tenant_id TYPE keyword WITH {is_tenant: true, on_disk: true, enable_hnsw: true}
 CREATE INDEX ON COLLECTION articles FOR year TYPE integer
+CREATE INDEX ON COLLECTION articles FOR doc_id TYPE uuid
 CREATE INDEX ON COLLECTION articles FOR title TYPE text
+CREATE INDEX ON COLLECTION articles FOR title TYPE text WITH {tokenizer: 'word', min_token_len: 2, max_token_len: 20, lowercase: true, phrase_matching: true}
 CREATE INDEX ON COLLECTION articles FOR meta.author TYPE keyword
 ```
 
+**Advanced options currently supported:**
+
+- `keyword` / `uuid`
+  - `is_tenant: true|false`
+  - `on_disk: true|false`
+  - `enable_hnsw: true|false`
+- `text`
+  - `tokenizer: 'prefix'|'whitespace'|'word'|'multilingual'`
+  - `min_token_len: <int>`
+  - `max_token_len: <int>`
+  - `lowercase: true|false`
+  - `ascii_folding: true|false`
+  - `phrase_matching: true|false`
+  - `stopwords: 'english'` or `stopwords: ['a', 'the']`
+  - `on_disk: true|false`
+  - `enable_hnsw: true|false`
+
 **Rules:**
 - The collection must already exist. Raises an error otherwise.
 - Indexes are idempotent — creating the same index twice succeeds silently.
+- Advanced `WITH { ... }` options are currently supported only for `keyword`, `uuid`, and `text`.
 
 ---
 

diff --git a/docs/programmatic.md b/docs/programmatic.md
@@ -88,7 +88,7 @@ result = run_query(
 )
 print(result.data["topology"])         # "dense" or "hybrid"
 print(result.data["vectors"])          # {"": {...}} or {"dense": {...}, ...}
-print(result.data["payload_schema"])   # {"field": "keyword", ...} or None
+print(result.data["payload_schema"])   # {"field": {"type": "keyword", ...}, ...} or None
 ```
 
 ---

diff --git a/docs/reference.md b/docs/reference.md
@@ -192,5 +192,6 @@ Expected output: **500 tests passing**.
 | `Vector elements must be numeric; got invalid value: ...` | A non-numeric value (string or null) was present in the vector array for `UPDATE SET VECTOR` | Ensure all vector elements are floats: `UPDATE … [0.1, 0.2, …, 0.N]` |
 | `GROUP_SIZE must be a positive integer, got N` | `GROUP_SIZE 0` or a negative value was specified | Use a positive integer: `GROUP_SIZE 3` |
 | `Qdrant error during SCROLL: ...` | Qdrant rejected scroll request | Verify collection state, filter, and cursor (`AFTER`) value |
-| `Unknown index type '...'` | Invalid schema type in CREATE INDEX | Use one of: `keyword`, `integer`, `float`, `bool`, `text`, `geo`, `datetime` |
+| `Unknown index type '...'` | Invalid schema type in CREATE INDEX | Use one of: `keyword`, `integer`, `float`, `bool`, `text`, `geo`, `datetime`, `uuid` |
+| `Unknown CREATE INDEX option '...'` | Unsupported advanced option for the chosen payload index type | Check which `WITH { ... }` keys are supported for `keyword`, `uuid`, or `text` |
 | `Qdrant error during CREATE INDEX: ...` | Qdrant rejected the index creation | Check field name and collection state |
diff --git a/src/qql/ast_nodes.py b/src/qql/ast_nodes.py
@@ -172,13 +172,15 @@ class CreateCollectionStmt:
     hybrid: bool = False                      # if True, create with dense + sparse named vectors
     model: str | None = None                  # dense model; None → use config default
     quantization: QuantizationConfig | None = None  # optional QUANTIZE clause
+    payload_m: int | None = None              # optional HNSW { payload_m: N } clause
 
 
 @dataclass(frozen=True)
 class CreateIndexStmt:
     collection: str
     field_name: str
     schema: str
+    options: dict[str, Any] | None = None
 
 
 @dataclass(frozen=True)

diff --git a/src/qql/cli.py b/src/qql/cli.py
@@ -38,6 +38,7 @@
       Create a new collection. Add HYBRID for dense+sparse BM25 vectors.
       Optional: [yellow]USING MODEL[/yellow] '<model>'
       Optional: [yellow]USING HYBRID[/yellow] [DENSE MODEL '<model>']
+      Optional: [yellow]HNSW[/yellow] { payload_m: <int> }
       Optional: [yellow]QUANTIZE SCALAR[/yellow] [QUANTILE <0.0–1.0>] [ALWAYS RAM]
       Optional: [yellow]QUANTIZE BINARY[/yellow] [ALWAYS RAM]
       Optional: [yellow]QUANTIZE PRODUCT[/yellow] [ALWAYS RAM]   (4× compression)
@@ -46,6 +47,11 @@
   [yellow]DROP COLLECTION[/yellow] <name>
       Delete a collection and all its points.
 
+  [yellow]CREATE INDEX ON COLLECTION[/yellow] <name> [yellow]FOR[/yellow] <field> [yellow]TYPE[/yellow] <schema>
+      Create a payload index for filtering or text search.
+      Optional: [yellow]WITH[/yellow] { is_tenant, on_disk, enable_hnsw } for keyword/uuid
+      Optional: [yellow]WITH[/yellow] { tokenizer, min_token_len, max_token_len, lowercase, ascii_folding, phrase_matching, stopwords, on_disk, enable_hnsw } for text
+
   [yellow]SHOW COLLECTIONS[/yellow]
       List all collections in the connected Qdrant instance.
 
@@ -420,8 +426,16 @@ def _format_collection_diagnostics(data: dict) -> str:
     schema = data["payload_schema"]
     if schema:
         lines.append("  Payload indexes:")
-        for field, dtype in schema.items():
-            lines.append(f"    {field}: {dtype}")
+        for field, index_info in schema.items():
+            if isinstance(index_info, dict):
+                line = f"    {field}: {index_info.get('type')}"
+                params = index_info.get("params")
+                if params:
+                    rendered = ", ".join(f"{k}={v}" for k, v in params.items())
+                    line += f" ({rendered})"
+                lines.append(line)
+            else:
+                lines.append(f"    {field}: {index_info}")
     else:
         lines.append("  Payload indexes      : none")