From 93ce2e93d8e087d78e86019862e82d83ed8256cc Mon Sep 17 00:00:00 2001 From: Srimon Date: Sat, 25 Apr 2026 12:22:18 +0530 Subject: [PATCH 1/2] feat: implement CREATE INDEX statement and associated functionality --- src/qql/ast_nodes.py | 11 +++++- src/qql/executor.py | 61 +++++++++++++++++++++++++++++- src/qql/lexer.py | 8 ++++ src/qql/parser.py | 86 +++++++++++++++++++++++++----------------- tests/test_executor.py | 30 +++++++++++++++ tests/test_parser.py | 17 +++++++++ 6 files changed, 175 insertions(+), 38 deletions(-) diff --git a/src/qql/ast_nodes.py b/src/qql/ast_nodes.py index 7bd64bf..1f282f9 100644 --- a/src/qql/ast_nodes.py +++ b/src/qql/ast_nodes.py @@ -145,6 +145,13 @@ class CreateCollectionStmt: model: str | None = None # dense model; None → use config default +@dataclass(frozen=True) +class CreateIndexStmt: + collection: str + field_name: str + schema: str + + @dataclass(frozen=True) class DropCollectionStmt: collection: str @@ -188,7 +195,8 @@ class RecommendStmt: @dataclass(frozen=True) class DeleteStmt: collection: str - point_id: str | int + point_id: str | int | None = None + query_filter: FilterExpr | None = None # Union type for all top-level statement nodes @@ -196,6 +204,7 @@ class DeleteStmt: InsertStmt | InsertBulkStmt | CreateCollectionStmt + | CreateIndexStmt | DropCollectionStmt | ShowCollectionsStmt | SearchStmt diff --git a/src/qql/executor.py b/src/qql/executor.py index ee78a69..1a8d8fa 100644 --- a/src/qql/executor.py +++ b/src/qql/executor.py @@ -26,6 +26,7 @@ MatchValue, Modifier, PayloadField, + PayloadSchemaType, PointStruct, Prefetch, Range, @@ -44,6 +45,7 @@ BetweenExpr, CompareExpr, CreateCollectionStmt, + CreateIndexStmt, DeleteStmt, DropCollectionStmt, FilterExpr, @@ -93,6 +95,8 @@ def execute(self, node: ASTNode) -> ExecutionResult: return self._execute_insert(node) if isinstance(node, CreateCollectionStmt): return self._execute_create(node) + if isinstance(node, CreateIndexStmt): + return self._execute_create_index(node) if isinstance(node, DropCollectionStmt): return self._execute_drop(node) if isinstance(node, ShowCollectionsStmt): @@ -321,6 +325,43 @@ def _execute_create(self, node: CreateCollectionStmt) -> ExecutionResult: message=f"Collection '{node.collection}' created ({dims}-dimensional vectors, cosine distance)", ) + def _execute_create_index(self, node: CreateIndexStmt) -> ExecutionResult: + if not self._client.collection_exists(node.collection): + raise QQLRuntimeError(f"Collection '{node.collection}' does not exist") + + schema_map = { + "keyword": PayloadSchemaType.KEYWORD, + "integer": PayloadSchemaType.INTEGER, + "float": PayloadSchemaType.FLOAT, + "bool": PayloadSchemaType.BOOL, + "text": PayloadSchemaType.TEXT, + "geo": PayloadSchemaType.GEO, + "datetime": PayloadSchemaType.DATETIME, + } + try: + field_schema = schema_map[node.schema] + except KeyError as e: + raise QQLRuntimeError( + "Unknown index type '" + f"{node.schema}'. Expected one of: keyword, integer, float, bool, text, geo, datetime" + ) from e + + try: + self._client.create_payload_index( + collection_name=node.collection, + field_name=node.field_name, + field_schema=field_schema, + ) + except UnexpectedResponse as e: + raise QQLRuntimeError(f"Qdrant error during CREATE INDEX: {e}") from e + + return ExecutionResult( + success=True, + message=( + f"Created index on '{node.collection}.{node.field_name}' as '{node.schema}'" + ), + ) + def _execute_drop(self, node: DropCollectionStmt) -> ExecutionResult: if not self._client.collection_exists(node.collection): raise QQLRuntimeError(f"Collection '{node.collection}' does not exist") @@ -648,9 +689,25 @@ def _execute_delete(self, node: DeleteStmt) -> ExecutionResult: if not self._client.collection_exists(node.collection): raise QQLRuntimeError(f"Collection '{node.collection}' does not exist") - from qdrant_client.models import PointIdsList - try: + if node.query_filter is not None: + self._client.delete( + collection_name=node.collection, + wait=True, + points_selector=self._wrap_as_filter( + self._build_qdrant_filter(node.query_filter) + ), + ) + return ExecutionResult( + success=True, + message=f"Deleted points from '{node.collection}' by filter", + ) + + from qdrant_client.models import PointIdsList + + if node.point_id is None: + raise QQLRuntimeError("DELETE requires either a point id or a filter") + self._client.delete( collection_name=node.collection, wait=True, diff --git a/src/qql/lexer.py b/src/qql/lexer.py index ae49247..49f4683 100644 --- a/src/qql/lexer.py +++ b/src/qql/lexer.py @@ -21,6 +21,8 @@ class TokenKind(Enum): WITH = auto() ACORN = auto() CREATE = auto() + INDEX = auto() + ON = auto() DROP = auto() SHOW = auto() COLLECTIONS = auto() @@ -42,6 +44,8 @@ class TokenKind(Enum): FROM = auto() WHERE = auto() ID = auto() + FOR = auto() + TYPE = auto() # ── Filter keywords ─────────────────────────────────────────────────── AND = auto() OR = auto() @@ -96,6 +100,8 @@ class TokenKind(Enum): "WITH": TokenKind.WITH, "ACORN": TokenKind.ACORN, "CREATE": TokenKind.CREATE, + "INDEX": TokenKind.INDEX, + "ON": TokenKind.ON, "DROP": TokenKind.DROP, "SHOW": TokenKind.SHOW, "COLLECTIONS": TokenKind.COLLECTIONS, @@ -117,6 +123,8 @@ class TokenKind(Enum): "FROM": TokenKind.FROM, "WHERE": TokenKind.WHERE, "ID": TokenKind.ID, + "FOR": TokenKind.FOR, + "TYPE": TokenKind.TYPE, # Filter keywords "AND": TokenKind.AND, "OR": TokenKind.OR, diff --git a/src/qql/parser.py b/src/qql/parser.py index 54ac119..97ec325 100644 --- a/src/qql/parser.py +++ b/src/qql/parser.py @@ -6,6 +6,7 @@ BetweenExpr, CompareExpr, CreateCollectionStmt, + CreateIndexStmt, DeleteStmt, DropCollectionStmt, FilterExpr, @@ -150,34 +151,45 @@ def _parse_insert_bulk_body(self) -> InsertBulkStmt: def _parse_create(self) -> CreateCollectionStmt: self._expect(TokenKind.CREATE) - self._expect(TokenKind.COLLECTION) - collection = self._parse_identifier() - hybrid: bool = False - model: str | None = None - - if self._peek().kind == TokenKind.HYBRID: - # Bare HYBRID shorthand — backward compat + if self._peek().kind == TokenKind.COLLECTION: self._advance() - hybrid = True - elif self._peek().kind == TokenKind.USING: - self._advance() # consume USING + collection = self._parse_identifier() + hybrid: bool = False + model: str | None = None + if self._peek().kind == TokenKind.HYBRID: - self._advance() # consume HYBRID + # Bare HYBRID shorthand — backward compat + self._advance() hybrid = True - # Optional DENSE MODEL sub-clause - if self._peek().kind == TokenKind.DENSE: - self._advance() # consume DENSE + elif self._peek().kind == TokenKind.USING: + self._advance() # consume USING + if self._peek().kind == TokenKind.HYBRID: + self._advance() # consume HYBRID + hybrid = True + # Optional DENSE MODEL sub-clause + if self._peek().kind == TokenKind.DENSE: + self._advance() # consume DENSE + self._expect(TokenKind.MODEL) + model = self._expect(TokenKind.STRING).value + else: self._expect(TokenKind.MODEL) model = self._expect(TokenKind.STRING).value - else: - self._expect(TokenKind.MODEL) - model = self._expect(TokenKind.STRING).value - return CreateCollectionStmt( - collection=collection, - hybrid=hybrid, - model=model, - ) + return CreateCollectionStmt( + collection=collection, + hybrid=hybrid, + model=model, + ) + + self._expect(TokenKind.INDEX) + self._expect(TokenKind.ON) + self._expect(TokenKind.COLLECTION) + collection = self._parse_identifier() + self._expect(TokenKind.FOR) + field_name = self._parse_field_path() + self._expect(TokenKind.TYPE) + schema = self._expect(TokenKind.IDENTIFIER).value.lower() + return CreateIndexStmt(collection=collection, field_name=field_name, schema=schema) def _parse_drop(self) -> DropCollectionStmt: self._expect(TokenKind.DROP) @@ -356,20 +368,24 @@ def _parse_delete(self) -> DeleteStmt: self._expect(TokenKind.FROM) collection = self._parse_identifier() self._expect(TokenKind.WHERE) - self._expect(TokenKind.ID) - self._expect(TokenKind.EQUALS) - tok = self._peek() - if tok.kind == TokenKind.STRING: - self._advance() - point_id: str | int = tok.value - elif tok.kind == TokenKind.INTEGER: + if self._peek().kind == TokenKind.ID: self._advance() - point_id = int(tok.value) - else: - raise QQLSyntaxError( - f"Expected string or integer for point id, got '{tok.value}'", tok.pos - ) - return DeleteStmt(collection=collection, point_id=point_id) + self._expect(TokenKind.EQUALS) + tok = self._peek() + if tok.kind == TokenKind.STRING: + self._advance() + point_id: str | int = tok.value + elif tok.kind == TokenKind.INTEGER: + self._advance() + point_id = int(tok.value) + else: + raise QQLSyntaxError( + f"Expected string or integer for point id, got '{tok.value}'", tok.pos + ) + return DeleteStmt(collection=collection, point_id=point_id) + + query_filter = self._parse_filter_expr() + return DeleteStmt(collection=collection, query_filter=query_filter) # ── WHERE clause filter parsing (precedence: NOT > AND > OR) ───────── diff --git a/tests/test_executor.py b/tests/test_executor.py index 5ec3ebd..14aab9b 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -2,6 +2,7 @@ from qql.ast_nodes import ( CreateCollectionStmt, + CreateIndexStmt, DeleteStmt, DropCollectionStmt, InsertBulkStmt, @@ -246,6 +247,21 @@ def test_create_existing_collection_is_noop(self, executor, mock_client): assert "already exists" in result.message +class TestCreateIndex: + def test_create_index_calls_qdrant(self, executor, mock_client): + mock_client.collection_exists.return_value = True + node = CreateIndexStmt(collection="articles", field_name="category", schema="keyword") + result = executor.execute(node) + mock_client.create_payload_index.assert_called_once() + assert result.success is True + + def test_create_index_nonexistent_collection_raises(self, executor, mock_client): + mock_client.collection_exists.return_value = False + node = CreateIndexStmt(collection="ghost", field_name="category", schema="keyword") + with pytest.raises(QQLRuntimeError, match="does not exist"): + executor.execute(node) + + class TestCreateWithModel: def test_create_with_model_passes_model_to_embedder(self, mock_client, cfg, mocker): mock_emb = mocker.MagicMock() @@ -630,6 +646,20 @@ def test_delete_calls_qdrant_delete(self, executor, mock_client): mock_client.delete.assert_called_once() assert result.success is True + def test_delete_by_filter_calls_qdrant_delete_with_filter(self, executor, mock_client): + from qdrant_client.models import Filter + from qql.ast_nodes import CompareExpr + + mock_client.collection_exists.return_value = True + node = DeleteStmt( + collection="articles", + query_filter=CompareExpr(field="category", op="=", value="archived"), + ) + result = executor.execute(node) + selector = mock_client.delete.call_args.kwargs["points_selector"] + assert isinstance(selector, Filter) + assert result.success is True + def test_delete_nonexistent_collection_raises(self, executor, mock_client): mock_client.collection_exists.return_value = False node = DeleteStmt(collection="ghost", point_id="x") diff --git a/tests/test_parser.py b/tests/test_parser.py index 1d5c22e..6804561 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -5,6 +5,7 @@ BetweenExpr, CompareExpr, CreateCollectionStmt, + CreateIndexStmt, DeleteStmt, DropCollectionStmt, InExpr, @@ -165,6 +166,13 @@ def test_create_collection(self): assert isinstance(node, CreateCollectionStmt) assert node.collection == "my_col" + def test_create_index(self): + node = parse("CREATE INDEX ON COLLECTION articles FOR category TYPE keyword") + assert isinstance(node, CreateIndexStmt) + assert node.collection == "articles" + assert node.field_name == "category" + assert node.schema == "keyword" + class TestDrop: def test_drop_collection(self): @@ -199,12 +207,21 @@ def test_delete_by_string_id(self): assert isinstance(node, DeleteStmt) assert node.collection == "notes" assert node.point_id == "abc-123" + assert node.query_filter is None def test_delete_by_integer_id(self): node = parse("DELETE FROM notes WHERE id = 99") assert isinstance(node, DeleteStmt) assert node.point_id == 99 + def test_delete_by_filter(self): + node = parse("DELETE FROM articles WHERE category = 'archived'") + assert isinstance(node, DeleteStmt) + assert node.point_id is None + assert isinstance(node.query_filter, CompareExpr) + assert node.query_filter.field == "category" + assert node.query_filter.value == "archived" + class TestRecommend: def test_recommend_with_positive_ids(self): From 8b7eed494fa6e0273ecb3e4e85e9e12f87d58230 Mon Sep 17 00:00:00 2001 From: Srimon Date: Tue, 28 Apr 2026 23:50:25 +0530 Subject: [PATCH 2/2] feat: add CREATE INDEX documentation and update DELETE syntax in README --- README.md | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 67 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 18e157a..d6a3c09 100644 --- a/README.md +++ b/README.md @@ -44,8 +44,9 @@ qql> SEARCH notes SIMILAR TO 'vector databases' LIMIT 5 USING HYBRID RERANK - [Cross-Encoder Reranking (RERANK)](#cross-encoder-reranking-rerank) - [SHOW COLLECTIONS — list collections](#show-collections--list-collections) - [CREATE COLLECTION — create a collection](#create-collection--create-a-collection) + - [CREATE INDEX — create a payload index](#create-index--create-a-payload-index) - [DROP COLLECTION — delete a collection](#drop-collection--delete-a-collection) - - [DELETE — remove a point](#delete--remove-a-point) + - [DELETE — remove points](#delete--remove-points) - [Script Files](#script-files) - [EXECUTE — run a script file](#execute--run-a-qql-script-file) - [DUMP COLLECTION — export to script](#dump-collection--export-collection-to-a-qql-script-file) @@ -902,6 +903,56 @@ If the collection already exists, the command succeeds with a message and does n --- +### CREATE INDEX — create a payload index + +Creates a payload index on a collection field. Payload indexes speed up `WHERE` clause filtering by allowing Qdrant to efficiently match on indexed fields. + +**Syntax:** +``` +CREATE INDEX ON COLLECTION FOR TYPE +``` + +**Supported schema types:** + +| Type | Description | +|---|---| +| `keyword` | Exact string match (e.g. status, category) | +| `integer` | Whole numbers | +| `float` | Decimal numbers | +| `bool` | Boolean values | +| `text` | Full-text search (enables `MATCH` operators) | +| `geo` | Geospatial coordinates | +| `datetime` | Date/time values | + +**Examples:** + +Create a keyword index on a string field: +```sql +CREATE INDEX ON COLLECTION articles FOR category TYPE keyword +``` + +Create an integer index on a numeric field: +```sql +CREATE INDEX ON COLLECTION articles FOR year TYPE integer +``` + +Create a text index for full-text search: +```sql +CREATE INDEX ON COLLECTION articles FOR title TYPE text +``` + +Nested field (dot notation): +```sql +CREATE INDEX ON COLLECTION articles FOR meta.author TYPE keyword +``` + +**Rules:** +- The collection must already exist. Raises an error otherwise. +- The schema type must be one of: `keyword`, `integer`, `float`, `bool`, `text`, `geo`, `datetime`. +- Indexes are idempotent — creating the same index twice succeeds silently. + +--- + ### DROP COLLECTION — delete a collection Permanently deletes a collection and **all points inside it**. This operation is irreversible. @@ -920,14 +971,15 @@ Raises an error if the collection does not exist. --- -### DELETE — remove a point +### DELETE — remove points -Deletes a single point from a collection by its ID. The ID may be an integer or a UUID string, either generated by QQL or supplied explicitly on INSERT. +Deletes one or more points from a collection. You can delete by specific ID or by a `WHERE` filter that matches multiple points. **Syntax:** ``` DELETE FROM WHERE id = '' DELETE FROM WHERE id = +DELETE FROM WHERE ``` **Examples:** @@ -942,6 +994,16 @@ Delete by integer ID: DELETE FROM articles WHERE id = 42 ``` +Delete all points matching a filter: +```sql +DELETE FROM articles WHERE category = 'archived' +``` + +Delete with a compound filter: +```sql +DELETE FROM articles WHERE year < 2020 AND status = 'draft' +``` + To find a point's ID, run a SEARCH first and copy the ID from the results table. --- @@ -1399,3 +1461,5 @@ Expected output: **212 tests passing**. | `Expected a filter operator after field '...'` | Unknown operator in WHERE clause | Use one of: `=`, `!=`, `>`, `>=`, `<`, `<=`, `IN`, `NOT IN`, `BETWEEN`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `MATCH` | | `Expected ')' ...` | Unclosed parenthesis in WHERE clause | Add the missing `)` to close the group | | `Qdrant error during SEARCH: ...` | Hybrid search on a non-hybrid collection, or wrong vector names | Ensure the collection was created with `HYBRID` before using `USING HYBRID` in INSERT/SEARCH | +| `Unknown index type '...'` | Invalid schema type in CREATE INDEX | Use one of: `keyword`, `integer`, `float`, `bool`, `text`, `geo`, `datetime` | +| `Qdrant error during CREATE INDEX: ...` | Qdrant rejected the index creation | Check field name and collection state |