In [1]:
import pandas as pd
import numpy as np
from neo4j import GraphDatabase
import json


In [2]:
class DataFrameToNeo4jLoader:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))
    
    def _dataframe_to_neo4j(self, df, table_name):
        # Convert DataFrame to list of dictionaries
        records = df.to_dict(orient='records')
        
        with self.driver.session() as session:
            # Create table node
            session.run(
                """
                MERGE (t:Table {name: $table_name})
                SET t.columns = $columns
                """, 
                {
                    "table_name": table_name, 
                    "columns": list(df.columns)
                }
            )
            
            # Create nodes for each record
            for index, record in enumerate(records):
                # Clean dictionary to remove any non-serializable values
                clean_record = {}
                for k, v in record.items():
                    # Handle numpy types and convert to native Python types
                    if isinstance(v, (np.integer, np.floating)):
                        clean_record[k] = v.item()
                    elif isinstance(v, np.ndarray):
                        clean_record[k] = v.tolist()
                    elif pd.isna(v):
                        continue  # Skip NaN values
                    else:
                        clean_record[k] = v
                
                # Create node with unique identifier
                session.run(
                    """
                    MERGE (r:Record:%(table_name)s {unique_id: $unique_id})
                    SET r += $properties
                    WITH r
                    MATCH (t:Table {name: $table_name})
                    MERGE (r)-[:BELONGS_TO]->(t)
                    """ % {"table_name": table_name.replace(" ", "_")},
                    {
                        "unique_id": f"{table_name}_{index}",
                        "table_name": table_name,
                        "properties": clean_record
                    }
                )
    
    def load_dataframes(self, dataframes_list):
        """
        Load multiple DataFrames from a list into Neo4j
        
        :param dataframes_list: List of DataFrames
        """
        for index, df in enumerate(dataframes_list, 1):
            # Generate a table name if not provided
            table_name = f"Table_{index}"
            print(f"Loading table: {table_name}")
            self._dataframe_to_neo4j(df, table_name)
    
    def close(self):
        self.driver.close()

In [3]:
# Example usage
def main():
    # Create sample DataFrames
    # Create a list of DataFrames
    dataframes = [
        pd.DataFrame({
            'name': ['Alice', 'Bob', 'Charlie'],
            'age': [25, 30, 35]
        }),
        pd.DataFrame({
            'product': ['Laptop', 'Phone', 'Tablet'],
            'price': [1000, 500, 300]
        })
    ]
    
    # Neo4j connection parameters
    URI = "bolt://localhost:7687"
    USER = "neo4j"
    PASSWORD = "12345678"
    
    # Create loader
    loader = DataFrameToNeo4jLoader(URI, USER, PASSWORD)
    
    try:
        # Load DataFrames
        loader.load_dataframes(dataframes)
    finally:
        loader.close()

# Verification Query
def verify_data(driver):
    with driver.session() as session:
        # Count nodes for each table
        result = session.run("""
        MATCH (t:Table)
        CALL {
            WITH t
            MATCH (r)-[:BELONGS_TO]->(t)
            RETURN COUNT(r) AS node_count
        }
        RETURN t.name AS table_name, node_count
        """)
        
        for record in result:
            print(f"Table: {record['table_name']}, Nodes: {record['node_count']}")

if __name__ == "__main__":
    main()

Loading table: Table_1
Loading table: Table_2


In [None]:
# MATCH (n) RETURN (n)