In [1]:
from neo4j import GraphDatabase
import json
import uuid

NEO4J_URI = "neo4j://localhost:7687" 
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "password"  

# JSON file path
JSON_FILE_PATH = (
    "output/json_data/Sb_1991_455_2024-01-01_IZ.json"
)


class Neo4jLoader:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def create_node(self, tx, node_type, title, content, metadata, node_id):
        """Create a node in Neo4j with the given properties."""
        query = (
            f"CREATE (n:{node_type} {{id: $node_id, title: $title, content: $content, metadata: $metadata}})"
            "RETURN n"
        )
        tx.run(query, node_id=node_id, title=title, content=content, metadata=metadata)

    def create_relationship(self, tx, parent_id, child_id, relationship_type):
        """Create a relationship between two nodes."""
        query = (
            "MATCH (parent {id: $parent_id}), (child {id: $child_id}) "
            f"CREATE (parent)-[:{relationship_type}]->(child)"
        )
        tx.run(query, parent_id=parent_id, child_id=child_id)

    def process_node(self, node_data, parent_id=None):
        """Recursively process a node and its children."""
        with self.driver.session() as session:
            node_id = str(uuid.uuid4())
            node_type = node_data.get("type", "Unknown")
            title = node_data.get("title", "")
            content = node_data.get("content", "")
            metadata = (
                "{}" if node_data.get("metadata") == {} else node_data.get("metadata", "")
            )
            try:
                metadata_json = json.loads(metadata)
                session.execute_write(
                    self.create_node, node_type, title, content, metadata, node_id
                )
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON metadata for node {node_id}: {e}, metadata: {metadata}")
                return
            except Exception as e:
                print(f"Error creating node {node_id}: {e}, node_type: {node_type}, title: {title}, content: {content}, metadata: {metadata}")
                return
            references = metadata_json.get("references", [])
            if references:
                for ref in references:
                    id = str(uuid.uuid4())
                    try:
                        session.execute_write(
                            self.create_node,
                            ref["type"],
                            ref["title"],
                            "",
                            "",
                            id,
                        )
                        session.execute_write(
                            self.create_relationship,
                            node_id,
                            id,
                            "REFERENCES",
                        )
                    except Exception as e:
                        print(f"Error creating reference node {id}: {e}, ref: {ref}")
            agencies = metadata_json.get("agencies", [])
            if agencies:
                for agency in agencies:
                    id = str(uuid.uuid4())
                    try:
                        session.execute_write(
                            self.create_node,
                            "Agency",
                            agency,
                            "",
                            "",
                            id,
                        )
                        session.execute_write(
                            self.create_relationship,
                            node_id,
                            id,
                            "HAS_AGENCY",
                        )
                    except Exception as e:
                        print(f"Error creating agency node {id}: {e}, agency: {agency}")
            if parent_id:
                if node_type == "Part":
                    relationship_type = "HAS_PART"
                elif node_type == "Chapter":
                    relationship_type = "HAS_CHAPTER"
                elif node_type == "Section":
                    relationship_type = "HAS_SECTION"
                elif node_type == "Subsection":
                    relationship_type = "HAS_SUBSECTION"
                else:
                    relationship_type = "HAS_PARAGRAPH"
                try:
                    session.execute_write(
                        self.create_relationship, parent_id, node_id, relationship_type
                    )
                except Exception as e:
                    print(f"Error creating relationship from {parent_id} to {node_id}: {e}, relationship_type: {relationship_type}")
            for child in node_data.get("children", []):
                self.process_node(child, node_id)


def load_json_to_neo4j(json_file_path, uri, user, password):
    with open(json_file_path, "r", encoding="utf-8") as file:
        data = json.load(file)

    loader = Neo4jLoader(uri, user, password)

    try:
        loader.process_node(data)
    finally:
        loader.close()


if __name__ == "__main__":
    load_json_to_neo4j(JSON_FILE_PATH, NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)