From ea32e795c20ce395339e6cfc6394ca7d1d5d2ed0 Mon Sep 17 00:00:00 2001 From: estelle Date: Wed, 20 Aug 2025 11:30:43 +0200 Subject: [PATCH 1/5] Fix error due to wrong property type --- .../experimental/components/graph_pruning.py | 27 +++++++++- .../experimental/components/schema.py | 12 +++++ .../experimental/components/types.py | 2 +- .../components/test_graph_pruning.py | 52 +++++++++++++++++++ 4 files changed, 91 insertions(+), 2 deletions(-) diff --git a/src/neo4j_graphrag/experimental/components/graph_pruning.py b/src/neo4j_graphrag/experimental/components/graph_pruning.py index c8bf647f3..2d61a13c6 100644 --- a/src/neo4j_graphrag/experimental/components/graph_pruning.py +++ b/src/neo4j_graphrag/experimental/components/graph_pruning.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import enum +import json import logging from typing import Optional, Any, TypeVar, Generic, Union @@ -391,11 +392,15 @@ def _enforce_properties( ) -> dict[str, Any]: """ Enforce properties: + - Ensure property type: for now, just prevent having invalid property types (e.g. map) - Filter out those that are not in schema (i.e., valid properties) if allowed properties is False. - Check that all required properties are present and not null. """ + type_safe_properties = self._ensure_property_types( + item.properties, schema_item, pruning_stats + ) filtered_properties = self._filter_properties( - item.properties, + type_safe_properties, schema_item.properties, schema_item.additional_properties, item.token, # label or type @@ -453,3 +458,23 @@ def _check_required_properties( if filtered_properties.get(req_prop) is None: missing_required_properties.append(req_prop) return missing_required_properties + + def _ensure_property_types( + self, + filtered_properties: dict[str, Any], + schema_item: Union[NodeType, RelationshipType], + pruning_stats: PruningStats, + ): + type_safe_properties = {} + for prop_name, prop_value in filtered_properties.items(): + if isinstance(prop_value, dict): + # just ensure the type will not raise error on insert, while preserving data + type_safe_properties[prop_name] = json.dumps( + prop_value, default=str + ) + continue + + # this is where we could check types of other properties + # but keep it simple for now + type_safe_properties[prop_name] = prop_value + return type_safe_properties diff --git a/src/neo4j_graphrag/experimental/components/schema.py b/src/neo4j_graphrag/experimental/components/schema.py index 8e3855dd1..7826cbd15 100644 --- a/src/neo4j_graphrag/experimental/components/schema.py +++ b/src/neo4j_graphrag/experimental/components/schema.py @@ -111,6 +111,12 @@ def validate_additional_properties(self) -> Self: ) return self + def property_type_from_name(self, name: str) -> Optional[PropertyType]: + for prop in self.properties: + if prop.name == name: + return prop + return None + class RelationshipType(BaseModel): """ @@ -141,6 +147,12 @@ def validate_additional_properties(self) -> Self: ) return self + def property_type_from_name(self, name: str) -> Optional[PropertyType]: + for prop in self.properties: + if prop.name == name: + return prop + return None + class GraphSchema(DataModel): """This model represents the expected diff --git a/src/neo4j_graphrag/experimental/components/types.py b/src/neo4j_graphrag/experimental/components/types.py index 363767ef3..35d18c741 100644 --- a/src/neo4j_graphrag/experimental/components/types.py +++ b/src/neo4j_graphrag/experimental/components/types.py @@ -18,7 +18,7 @@ import uuid from typing import Any, Dict, Optional -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, field_validator from neo4j_graphrag.experimental.pipeline.component import DataModel diff --git a/tests/unit/experimental/components/test_graph_pruning.py b/tests/unit/experimental/components/test_graph_pruning.py index 4aee8949f..62f97b59c 100644 --- a/tests/unit/experimental/components/test_graph_pruning.py +++ b/tests/unit/experimental/components/test_graph_pruning.py @@ -14,6 +14,7 @@ # limitations under the License. from __future__ import annotations +import datetime from typing import Any, Optional from unittest.mock import ANY, Mock, patch @@ -101,6 +102,57 @@ def test_graph_pruning_filter_properties( assert filtered_properties == expected_filtered_properties +@pytest.mark.parametrize( + "properties, valid_properties, expected_filtered_properties", + [ + ( + # all good, no bad types + { + "name": "John Does", + "age": 25, + "is_active": True, + }, + [ + # not used for now + ], + { + "name": "John Does", + "age": 25, + "is_active": True, + }, + ), + ( + # map must be serialized + { + "age": {"dob": datetime.date(2000, 1, 1), "age_in_2025": 25}, + }, + [ + # not used for now + ], + { + "age": '{"dob": "2000-01-01", "age_in_2025": 25}', + }, + ), + ], +) +def test_graph_pruning_ensure_property_type( + properties: dict[str, Any], + valid_properties: list[PropertyType], + expected_filtered_properties: dict[str, Any], +) -> None: + pruner = GraphPruning() + node_type = NodeType( + label="Label", + properties=valid_properties, + ) + type_safe_properties = pruner._ensure_property_types( + properties, + node_type, + pruning_stats=PruningStats(), + ) + assert type_safe_properties == expected_filtered_properties + + @pytest.fixture(scope="module") def node_type_no_properties() -> NodeType: return NodeType(label="Person") From 43ab2818ea41176219d7ee651594d605cf1a616f Mon Sep 17 00:00:00 2001 From: estelle Date: Wed, 20 Aug 2025 13:56:37 +0200 Subject: [PATCH 2/5] Ruff --- src/neo4j_graphrag/experimental/components/graph_pruning.py | 4 +--- src/neo4j_graphrag/experimental/components/types.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/neo4j_graphrag/experimental/components/graph_pruning.py b/src/neo4j_graphrag/experimental/components/graph_pruning.py index 2d61a13c6..ac265a199 100644 --- a/src/neo4j_graphrag/experimental/components/graph_pruning.py +++ b/src/neo4j_graphrag/experimental/components/graph_pruning.py @@ -469,9 +469,7 @@ def _ensure_property_types( for prop_name, prop_value in filtered_properties.items(): if isinstance(prop_value, dict): # just ensure the type will not raise error on insert, while preserving data - type_safe_properties[prop_name] = json.dumps( - prop_value, default=str - ) + type_safe_properties[prop_name] = json.dumps(prop_value, default=str) continue # this is where we could check types of other properties diff --git a/src/neo4j_graphrag/experimental/components/types.py b/src/neo4j_graphrag/experimental/components/types.py index 35d18c741..363767ef3 100644 --- a/src/neo4j_graphrag/experimental/components/types.py +++ b/src/neo4j_graphrag/experimental/components/types.py @@ -18,7 +18,7 @@ import uuid from typing import Any, Dict, Optional -from pydantic import BaseModel, Field, field_validator +from pydantic import BaseModel, Field from neo4j_graphrag.experimental.pipeline.component import DataModel From 51efd882c2e151fb2808f2a1eb88f0e1bd991e4a Mon Sep 17 00:00:00 2001 From: estelle Date: Wed, 20 Aug 2025 14:26:17 +0200 Subject: [PATCH 3/5] Mypy --- src/neo4j_graphrag/experimental/components/graph_pruning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neo4j_graphrag/experimental/components/graph_pruning.py b/src/neo4j_graphrag/experimental/components/graph_pruning.py index ac265a199..87059e001 100644 --- a/src/neo4j_graphrag/experimental/components/graph_pruning.py +++ b/src/neo4j_graphrag/experimental/components/graph_pruning.py @@ -464,7 +464,7 @@ def _ensure_property_types( filtered_properties: dict[str, Any], schema_item: Union[NodeType, RelationshipType], pruning_stats: PruningStats, - ): + ) -> dict[str, Any]: type_safe_properties = {} for prop_name, prop_value in filtered_properties.items(): if isinstance(prop_value, dict): From 45fff02fd051fbde8fe0a55d85fd98e1066d62b6 Mon Sep 17 00:00:00 2001 From: estelle Date: Wed, 27 Aug 2025 12:56:08 +0200 Subject: [PATCH 4/5] Remove unused parameters --- .../experimental/components/graph_pruning.py | 4 +--- .../experimental/components/test_graph_pruning.py | 15 +-------------- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/src/neo4j_graphrag/experimental/components/graph_pruning.py b/src/neo4j_graphrag/experimental/components/graph_pruning.py index 87059e001..62ec209f8 100644 --- a/src/neo4j_graphrag/experimental/components/graph_pruning.py +++ b/src/neo4j_graphrag/experimental/components/graph_pruning.py @@ -397,7 +397,7 @@ def _enforce_properties( - Check that all required properties are present and not null. """ type_safe_properties = self._ensure_property_types( - item.properties, schema_item, pruning_stats + item.properties, ) filtered_properties = self._filter_properties( type_safe_properties, @@ -462,8 +462,6 @@ def _check_required_properties( def _ensure_property_types( self, filtered_properties: dict[str, Any], - schema_item: Union[NodeType, RelationshipType], - pruning_stats: PruningStats, ) -> dict[str, Any]: type_safe_properties = {} for prop_name, prop_value in filtered_properties.items(): diff --git a/tests/unit/experimental/components/test_graph_pruning.py b/tests/unit/experimental/components/test_graph_pruning.py index 62f97b59c..f91419453 100644 --- a/tests/unit/experimental/components/test_graph_pruning.py +++ b/tests/unit/experimental/components/test_graph_pruning.py @@ -103,7 +103,7 @@ def test_graph_pruning_filter_properties( @pytest.mark.parametrize( - "properties, valid_properties, expected_filtered_properties", + "properties, expected_filtered_properties", [ ( # all good, no bad types @@ -112,9 +112,6 @@ def test_graph_pruning_filter_properties( "age": 25, "is_active": True, }, - [ - # not used for now - ], { "name": "John Does", "age": 25, @@ -126,9 +123,6 @@ def test_graph_pruning_filter_properties( { "age": {"dob": datetime.date(2000, 1, 1), "age_in_2025": 25}, }, - [ - # not used for now - ], { "age": '{"dob": "2000-01-01", "age_in_2025": 25}', }, @@ -137,18 +131,11 @@ def test_graph_pruning_filter_properties( ) def test_graph_pruning_ensure_property_type( properties: dict[str, Any], - valid_properties: list[PropertyType], expected_filtered_properties: dict[str, Any], ) -> None: pruner = GraphPruning() - node_type = NodeType( - label="Label", - properties=valid_properties, - ) type_safe_properties = pruner._ensure_property_types( properties, - node_type, - pruning_stats=PruningStats(), ) assert type_safe_properties == expected_filtered_properties From 789e0cdc73fe4aa97f10e0df88318d7f2e8717f1 Mon Sep 17 00:00:00 2001 From: estelle Date: Wed, 27 Aug 2025 15:32:42 +0200 Subject: [PATCH 5/5] Update CHANGELOG --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6cdb942d7..f14f845f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ ## Next +### Fixed + +- Fixed an edge case where the LLM can output a property with type 'map', which was causing errors during import as it is not a valid property type in Neo4j. + + ## 1.9.1 ### Fixed