In [3]:
import pandas as pd
import re
from typing import List, Dict, Any
from neo4j import GraphDatabase


In [6]:
def load_data_into_neo4j(df: pd.DataFrame, uri: str, user: str, password: str):
    """Loads the processed dataframe into a Neo4j graph database."""
    driver = GraphDatabase.driver(uri, auth=(user, password))
    
    def insert_data(tx, doc_id, document, named_entities):
        query = (
            "MERGE (d:Document {id: $doc_id, text: $document}) "
            "WITH d "
            "UNWIND $named_entities AS ne "
            "MERGE (t:EntityType {type: ne.ne_type}) "
            "MERGE (e:NamedEntity {text: ne.ne_span}) "
            "MERGE (d)-[:HAS_ENTITY_TYPE]->(t) "
            "MERGE (t)-[:HAS_ENTITY]->(e)"
        )
        tx.run(query, doc_id=doc_id, document=document, named_entities=named_entities)
    
    with driver.session() as session:
        for _, row in df.iterrows():
            session.execute_write(insert_data, row['Image_ID'], row['Document'], row['Named_Entities'])
    
    driver.close()

# Load CSV
processed_data_filepath = '../data/processed/processed_data.csv'
df = pd.read_csv(processed_data_filepath)

# Convert Named_Entities from string to list of dictionaries
df['Named_Entities'] = df['Named_Entities'].apply(eval)


# Load into Neo4j
neo4j_uri = "bolt://localhost:7687"
neo4j_user = "neo4j"
neo4j_password = "12345678"
load_data_into_neo4j(df, neo4j_uri, neo4j_user, neo4j_password)

In [5]:
def delete_graph(uri: str, user: str, password: str):
    """Deletes all nodes and relationships in the Neo4j database."""
    driver = GraphDatabase.driver(uri, auth=(user, password))
    with driver.session() as session:
        session.run("MATCH (n) DETACH DELETE n")
    driver.close()

neo4j_uri = "bolt://localhost:7687"
neo4j_user = "neo4j"
neo4j_password = "12345678"
delete_graph(neo4j_uri, neo4j_user, neo4j_password)