Skip to content

Parser rejects boolean comparison after COUNT(DISTINCT ...) during projection #123

@prrao87

Description

@prrao87

The Cypher parser fails when a boolean comparison is applied to an aggregate in RETURN.

Version

Tested on lance-graph 0.5.0 (Python).

Repro

import pyarrow as pa
from lance_graph import CypherQuery, GraphConfig


def build_graph() -> tuple[GraphConfig, dict[str, pa.Table]]:
    cfg = (
        GraphConfig.builder()
        .with_node_label("Person", "id")
        .with_node_label("Post", "id")
        .with_relationship("likePost", "src", "dst")
        .build()
    )

    datasets: dict[str, pa.Table] = {
        "Person": pa.table(
            {"id": [1], "firstname": ["Bill"], "lastname": ["Moore"]}
        ),
        "Post": pa.table({"id": [1]}),
        "likePost": pa.table({"src": [1], "dst": [1]}),
    }
    return cfg, datasets


def execute(query: str, cfg: GraphConfig, datasets: dict[str, pa.Table]):
    cypher = CypherQuery(query)
    return cypher.with_config(cfg).execute(datasets)


def main() -> None:
    cfg, datasets = build_graph()
    q1 = """
        MATCH (p:Post)<-[:likePost]-(p2:Person)
        WHERE p2.firstname = "Bill" AND p2.lastname = "Moore"
          AND p.id = 1
        RETURN COUNT(DISTINCT p.id) AS liked
    """
    q2 = """
        MATCH (p:Post)<-[:likePost]-(p2:Person)
        WHERE p2.firstname = "Bill" AND p2.lastname = "Moore"
          AND p.id = 2
        RETURN COUNT(DISTINCT p.id) > 0 AS liked
    """

    print("Running query without comparison:")
    print(execute(q1, cfg, datasets))

    print("\nRunning query with comparison (expected to fail):")
    try:
        print(execute(q2, cfg, datasets))
    except Exception as exc:  # noqa: BLE001 - minimal repro script
        print(f"ERROR: {exc}")


if __name__ == "__main__":
    main()

This gives:

❯ uv run repro_count_distinct_bool.py
Running query without comparison:
pyarrow.Table
liked: int64 not null
----
liked: [[1]]

Running query with comparison (expected to fail):
ERROR: Cypher parse error at position 170: Unexpected input after query: > 0 AS liked

Expected

The second query should return a boolean TRUE or FALSE based on the condition it's checking for.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No fields configured for Bug.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions