Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions changelog/1.2.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

## Bug fixes

* Fixed a bug where the `separate_property_columns=True` option of `gds.graph.streamNodeProperties` did not handle list node properties correctly.


## Improvements

Expand Down
18 changes: 12 additions & 6 deletions graphdatascience/graph/graph_proc_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,12 @@ def streamNodeProperties(

# new format was requested, but the query was run via Cypher
if separate_property_columns and "propertyValue" in result.keys():
return result.pivot_table("propertyValue", "nodeId", columns="nodeProperty").reset_index()
result = result.pivot(index="nodeId", columns="nodeProperty", values="propertyValue")
result = result.reset_index()
result.columns.name = None
# old format was requested but the query was run via Arrow
elif not separate_property_columns and "propertyValue" not in result.keys():
return result.melt(id_vars=["nodeId"]).rename(
result = result.melt(id_vars=["nodeId"]).rename(
columns={"variable": "nodeProperty", "value": "propertyValue"}
)

Expand Down Expand Up @@ -147,12 +149,16 @@ def streamRelationshipProperties(

# new format was requested, but the query was run via Cypher
if separate_property_columns and "propertyValue" in result.keys():
return result.pivot_table(
"propertyValue", ["sourceNodeId", "targetNodeId", "relationshipType"], columns="relationshipProperty"
).reset_index()
result = result.pivot(
index=["sourceNodeId", "targetNodeId", "relationshipType"],
columns="relationshipProperty",
values="propertyValue",
)
result = result.reset_index()
result.columns.name = None
# old format was requested but the query was run via Arrow
elif not separate_property_columns and "propertyValue" not in result.keys():
return result.melt(id_vars=["sourceNodeId", "targetNodeId", "relationshipType"]).rename(
result = result.melt(id_vars=["sourceNodeId", "targetNodeId", "relationshipType"]).rename(
columns={"variable": "relationshipProperty", "value": "propertyValue"}
)

Expand Down
18 changes: 11 additions & 7 deletions graphdatascience/tests/integration/test_graph_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ def run_around_tests(runner: Neo4jQueryRunner) -> Generator[None, None, None]:
runner.run_query(
"""
CREATE
(a: Node {x: 1, y: 2}),
(b: Node {x: 2, y: 3}),
(c: Node {x: 3, y: 4}),
(a: Node {x: 1, y: 2, z: [42]}),
(b: Node {x: 2, y: 3, z: [1337]}),
(c: Node {x: 3, y: 4, z: [9]}),
(a)-[:REL {relX: 4, relY: 5}]->(b),
(a)-[:REL {relX: 5, relY: 6}]->(c),
(b)-[:REL {relX: 6, relY: 7}]->(c),
Expand Down Expand Up @@ -217,13 +217,17 @@ def test_graph_streamNodeProperties_without_arrow(gds_without_arrow: GraphDataSc
def test_graph_streamNodeProperties_without_arrow_separate_property_columns(
gds_without_arrow: GraphDataScience,
) -> None:
G, _ = gds_without_arrow.graph.project(GRAPH_NAME, {"Node": {"properties": ["x", "y"]}}, "*")
G, _ = gds_without_arrow.graph.project(GRAPH_NAME, {"Node": {"properties": ["x", "z"]}}, "*")

result = gds_without_arrow.graph.streamNodeProperties(G, ["x", "y"], separate_property_columns=True, concurrency=2)
result = gds_without_arrow.graph.streamNodeProperties(G, ["x", "z"], separate_property_columns=True, concurrency=2)

assert list(result.keys()) == ["nodeId", "x", "z"]

assert list(result.keys()) == ["nodeId", "x", "y"]
assert {e for e in result["x"]} == {1, 2, 3}
assert {e for e in result["y"]} == {2, 3, 4}

assert len(result["z"]) == 3
for e in result["z"]:
assert e in [[9], [42], [1337]]


def test_graph_streamRelationshipProperty_with_arrow(gds: GraphDataScience) -> None:
Expand Down