# Convert JSON to GraphDocument and insert into Neo4j

- No help from LLM
- We manually inspec data and specify schema

In [2]:
# notebook/02_neo4j_build_graph_no_llm.ipynb

import sys
import os

from dotenv import load_dotenv
sys.path.append(os.path.abspath('..'))
load_dotenv('../.env',override=True)

import json
import xml.etree.ElementTree as ET
from neo4j import GraphDatabase

# Load environment variables (Set your keys here)
NEO4J_URI = os.getenv("NEO4J_URI", "bolt://localhost:7687")
NEO4J_USER = os.getenv("NEO4J_USER", "neo4j")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "password")

class Neo4jGraph:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def run_query(self, query, parameters=None):
        with self.driver.session() as session:
            return session.run(query, parameters)
        
    def clear_database(self):
        """Deletes all nodes and relationships in Neo4j"""
        query = "MATCH (n) DETACH DELETE n"
        self.run_query(query)
       


def load_json_to_neo4j(json_data, graph):
    for individual in json_data.get("individuals", []):
        entity_id = individual["EntityID"]
        general_info = individual["GeneralInfo"]
        names = individual["Names"]
        features = individual.get("Features", {})
        sanctions = individual.get("Sanctions", {}).get("Programs", [])

        # ✅ Find the primary name
        primary_name = next((name for name in names if name["IsPrimary"] == "true"), None)

        if primary_name:
            primary_translation = primary_name["Translations"][0]
            first_name = primary_translation["FormattedFirstName"]
            last_name = primary_translation["FormattedLastName"]
            full_name = primary_translation["FormattedFullName"]
        else:
            first_name, last_name, full_name = None, None, None  # Default values

        # ✅ Extract additional attributes
        birthdate = features.get("Birthdate", {}).get("Value")
        place_of_birth = features.get("Place of Birth", {}).get("Value")
        gender = features.get("Gender", {}).get("Value")
        nationality = features.get("Nationality Country", {}).get("Value")

        # ✅ Create Person node with additional attributes
        query = """
        MERGE (p:Person {EntityID: $entity_id, EntityType: $entity_type})
        SET p.FormattedFirstName = $first_name,
            p.FormattedLastName = $last_name,
            p.FormattedFullName = $full_name,
            p.Birthdate = $birthdate,
            p.PlaceOfBirth = $place_of_birth,
            p.Gender = $gender,
            p.NationalityCountry = $nationality
        """
        graph.run_query(query, {
            "entity_id": entity_id,
            "entity_type": general_info["EntityType"],
            "first_name": first_name,
            "last_name": last_name,
            "full_name": full_name,
            "birthdate": birthdate,
            "place_of_birth": place_of_birth,
            "gender": gender,
            "nationality": nationality
        })

        # ✅ Handle Alias Names (Non-Primary)
        for name in names:
            if name["IsPrimary"] == "false" and name["AliasType"] == "A.K.A.":
                alias_translation = name["Translations"][0]
                alias_first_name = alias_translation["FormattedFirstName"]
                alias_last_name = alias_translation["FormattedLastName"]
                alias_full_name = alias_translation["FormattedFullName"]

                # ✅ Create Alias node and link to Person
                query = """
                MATCH (p:Person {EntityID: $entity_id})
                MERGE (a:Alias {FormattedFirstName: $alias_first_name, 
                                FormattedLastName: $alias_last_name, 
                                FormattedFullName: $alias_full_name})
                MERGE (p)-[:HAS_ALIAS]->(a)
                """
                graph.run_query(query, {
                    "entity_id": entity_id,
                    "alias_first_name": alias_first_name,
                    "alias_last_name": alias_last_name,
                    "alias_full_name": alias_full_name
                })

        # ✅ Handle Sanctions
        for sanction in sanctions:
            program_name = sanction.get("Value")
            if program_name:
                # ✅ Create Sanction node
                query = """
                MERGE (s:Sanction {Program: $program})
                """
                graph.run_query(query, {"program": program_name})

                # ✅ Create relationship SANCTIONED_BY
                query = """
                MATCH (p:Person {EntityID: $entity_id})
                MATCH (s:Sanction {Program: $program})
                MERGE (p)-[:SANCTIONED_BY]->(s)
                """
                graph.run_query(query, {
                    "entity_id": entity_id,
                    "program": program_name
                })



In [3]:

# Load JSON data
with open("ofac_data_small.json", "r") as f:
    json_data = json.load(f)

# Connect to Neo4j and load data
print("Connecting to Neo4j")
graph = Neo4jGraph(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)

# Clear old data
print("Old data cleared from Neo4j")
graph.clear_database()

print("Inserting Graph Data")
load_json_to_neo4j(json_data, graph)

# Close connection
graph.close()
print("Graph successfully created in Neo4j!")

Connecting to Neo4j
Old data cleared from Neo4j
Inserting Graph Data
Graph successfully created in Neo4j!
