From 9c42964d3529ee30363a9cea8e468207e90a912a Mon Sep 17 00:00:00 2001 From: Adam Schill Collberg Date: Tue, 28 Jun 2022 15:48:58 +0200 Subject: [PATCH 1/2] Support pivoting list properties --- graphdatascience/graph/graph_proc_runner.py | 18 ++++++++++++------ .../tests/integration/test_graph_ops.py | 18 +++++++++++------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/graphdatascience/graph/graph_proc_runner.py b/graphdatascience/graph/graph_proc_runner.py index 2fc7f5e26..b083827a8 100644 --- a/graphdatascience/graph/graph_proc_runner.py +++ b/graphdatascience/graph/graph_proc_runner.py @@ -113,10 +113,12 @@ def streamNodeProperties( # new format was requested, but the query was run via Cypher if separate_property_columns and "propertyValue" in result.keys(): - return result.pivot_table("propertyValue", "nodeId", columns="nodeProperty").reset_index() + result = result.pivot(index="nodeId", columns="nodeProperty", values="propertyValue") + result = result.reset_index() + result.columns.name = None # old format was requested but the query was run via Arrow elif not separate_property_columns and "propertyValue" not in result.keys(): - return result.melt(id_vars=["nodeId"]).rename( + result = result.melt(id_vars=["nodeId"]).rename( columns={"variable": "nodeProperty", "value": "propertyValue"} ) @@ -147,12 +149,16 @@ def streamRelationshipProperties( # new format was requested, but the query was run via Cypher if separate_property_columns and "propertyValue" in result.keys(): - return result.pivot_table( - "propertyValue", ["sourceNodeId", "targetNodeId", "relationshipType"], columns="relationshipProperty" - ).reset_index() + result = result.pivot( + index=["sourceNodeId", "targetNodeId", "relationshipType"], + columns="relationshipProperty", + values="propertyValue", + ) + result = result.reset_index() + result.columns.name = None # old format was requested but the query was run via Arrow elif not separate_property_columns and "propertyValue" not in result.keys(): - return result.melt(id_vars=["sourceNodeId", "targetNodeId", "relationshipType"]).rename( + result = result.melt(id_vars=["sourceNodeId", "targetNodeId", "relationshipType"]).rename( columns={"variable": "relationshipProperty", "value": "propertyValue"} ) diff --git a/graphdatascience/tests/integration/test_graph_ops.py b/graphdatascience/tests/integration/test_graph_ops.py index 9e45ed6bc..54abe384d 100644 --- a/graphdatascience/tests/integration/test_graph_ops.py +++ b/graphdatascience/tests/integration/test_graph_ops.py @@ -20,9 +20,9 @@ def run_around_tests(runner: Neo4jQueryRunner) -> Generator[None, None, None]: runner.run_query( """ CREATE - (a: Node {x: 1, y: 2}), - (b: Node {x: 2, y: 3}), - (c: Node {x: 3, y: 4}), + (a: Node {x: 1, y: 2, z: [42]}), + (b: Node {x: 2, y: 3, z: [1337]}), + (c: Node {x: 3, y: 4, z: [9]}), (a)-[:REL {relX: 4, relY: 5}]->(b), (a)-[:REL {relX: 5, relY: 6}]->(c), (b)-[:REL {relX: 6, relY: 7}]->(c), @@ -217,13 +217,17 @@ def test_graph_streamNodeProperties_without_arrow(gds_without_arrow: GraphDataSc def test_graph_streamNodeProperties_without_arrow_separate_property_columns( gds_without_arrow: GraphDataScience, ) -> None: - G, _ = gds_without_arrow.graph.project(GRAPH_NAME, {"Node": {"properties": ["x", "y"]}}, "*") + G, _ = gds_without_arrow.graph.project(GRAPH_NAME, {"Node": {"properties": ["x", "z"]}}, "*") - result = gds_without_arrow.graph.streamNodeProperties(G, ["x", "y"], separate_property_columns=True, concurrency=2) + result = gds_without_arrow.graph.streamNodeProperties(G, ["x", "z"], separate_property_columns=True, concurrency=2) + + assert list(result.keys()) == ["nodeId", "x", "z"] - assert list(result.keys()) == ["nodeId", "x", "y"] assert {e for e in result["x"]} == {1, 2, 3} - assert {e for e in result["y"]} == {2, 3, 4} + + assert len(result["z"]) == 3 + for e in result["z"]: + assert e in [[9], [42], [1337]] def test_graph_streamRelationshipProperty_with_arrow(gds: GraphDataScience) -> None: From e56e9becab2ca7ae9186ea79189d2c3010bd376e Mon Sep 17 00:00:00 2001 From: Adam Schill Collberg Date: Tue, 28 Jun 2022 15:51:05 +0200 Subject: [PATCH 2/2] Add changelog entry about fixing separate_property_columns bug --- changelog/1.2.0.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/changelog/1.2.0.md b/changelog/1.2.0.md index e30c13912..274d88b41 100644 --- a/changelog/1.2.0.md +++ b/changelog/1.2.0.md @@ -9,6 +9,8 @@ ## Bug fixes +* Fixed a bug where the `separate_property_columns=True` option of `gds.graph.streamNodeProperties` did not handle list node properties correctly. + ## Improvements