In [None]:
!pip install neo4j

In [None]:
from neo4j import GraphDatabase
from neo4j.exceptions import ServiceUnavailable, AuthError
import pandas as pd

# Use the credentials that were successful in the previous step
uri = "blank"
user = "neo4j"
password = "blank"

In [None]:
import os
import pandas as pd
from neo4j import GraphDatabase

# Ensure connection is established using variables from the previous cells
try:
    if 'driver' not in locals() or driver is None:
        driver = GraphDatabase.driver(uri, auth=(user, password))
    driver.verify_connectivity()
    print("Connection established.")
except NameError:
    print("Error: Please ensure 'uri', 'user', and 'password' are defined and run in the previous cells.")
except Exception as e:
    print(f"Connection failed: {e}")

In [None]:
def save_graph_to_csv(driver, output_folder):
    """Exports all nodes and relationships from Neo4j to CSV files."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        print(f"Created directory: {output_folder}")

    with driver.session() as session:
        # 1. Export Nodes by Label
        print("Fetching node labels...")
        labels_result = session.run("CALL db.labels()")
        labels = [r["label"] for r in labels_result]

        for label in labels:
            print(f"Exporting nodes with label: {label}...")
            # Retrieve properties and the system ID (using elementId for Neo4j 5+)
            query = f"MATCH (n:`{label}`) RETURN elementId(n) as _id, properties(n) as props"
            try:
                result = session.run(query)
                data = []
                for record in result:
                    row = record["props"]
                    row["_id"] = record["_id"]
                    data.append(row)

                if data:
                    df = pd.DataFrame(data)
                    filename = os.path.join(output_folder, f"nodes_{label}.csv")
                    df.to_csv(filename, index=False)
                    print(f"  Saved {len(df)} nodes to {filename}")
                else:
                    print(f"  No nodes found for label {label}")
            except Exception as e:
                print(f"  Error exporting label {label}: {e}")

        # 2. Export Relationships by Type
        print("Fetching relationship types...")
        types_result = session.run("CALL db.relationshipTypes()")
        rel_types = [r["relationshipType"] for r in types_result]

        for rel_type in rel_types:
            print(f"Exporting relationships of type: {rel_type}...")
            query = f"""
            MATCH (a)-[r:`{rel_type}`]->(b)
            RETURN elementId(startNode(r)) as _start_id, elementId(endNode(r)) as _end_id, properties(r) as props
            """
            try:
                result = session.run(query)
                data = []
                for record in result:
                    row = record["props"]
                    row["_start_id"] = record["_start_id"]
                    row["_end_id"] = record["_end_id"]
                    data.append(row)

                if data:
                    df = pd.DataFrame(data)
                    filename = os.path.join(output_folder, f"rels_{rel_type}.csv")
                    df.to_csv(filename, index=False)
                    print(f"  Saved {len(df)} relationships to {filename}")
                else:
                    print(f"  No relationships found for type {rel_type}")
            except Exception as e:
                print(f"  Error exporting relationship {rel_type}: {e}")

# Define the output directory
output_directory = "neo4j_export" # @param {type:"string"}

# Run the export
if 'driver' in locals():
    save_graph_to_csv(driver, output_directory)
    print(f"\nExport completed. Files are saved in '{output_directory}'")
else:
    print("Driver not defined. Cannot export.")

In [None]:
def clear_database(driver, batch_size=200):
    """Clears the database in batches to avoid transaction timeouts."""
    print(f"Clearing database with batch size {batch_size}...")
    query = f"""
    MATCH (n)
    WITH n LIMIT {batch_size}
    DETACH DELETE n
    RETURN count(n) as deleted_count
    """

    total_deleted = 0
    with driver.session() as session:
        while True:
            try:
                result = session.run(query)
                count = result.single()["deleted_count"]
                total_deleted += count
                print(f"  Deleted {count} nodes/relationships...")
                if count == 0:
                    break
            except Exception as e:
                print(f"Error clearing database: {e}")
                raise e
    print(f"Database cleared. Total deleted nodes: {total_deleted}")

In [None]:
import math

def import_from_csv(driver, input_folder, batch_size=1000):
    """Imports nodes and relationships from CSV files in the folder."""
    if not os.path.exists(input_folder):
        print(f"Input folder '{input_folder}' does not exist.")
        return

    # Helper to clean NaN values from dictionary (Neo4j doesn't like NaNs)
    def clean_row(row):
        return {k: v for k, v in row.items() if pd.notna(v)}

    with driver.session() as session:
        # 1. Create Constraint/Index for fast lookup during relationship creation
        print("Creating temporary index for import...")
        # Using a generic label '_MigrationNode' for all imported nodes to allow global lookup
        try:
            # Try creating a constraint (preferred for uniqueness) or index
            session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (n:_MigrationNode) REQUIRE n.migration_id IS UNIQUE")
        except Exception as e:
            print(f"  Note: Could not create constraint (might be not supported on this edition), trying index. Error: {e}")
            session.run("CREATE INDEX IF NOT EXISTS FOR (n:_MigrationNode) ON (n.migration_id)")

        # 2. Import Nodes
        files = [f for f in os.listdir(input_folder) if f.startswith("nodes_") and f.endswith(".csv")]
        for file in files:
            label = file.replace("nodes_", "").replace(".csv", "")
            print(f"Importing nodes for label: {label}...")
            # Set low_memory=False to avoid DtypeWarning
            df = pd.read_csv(os.path.join(input_folder, file), low_memory=False)

            # Batched Import
            total_rows = len(df)
            chunks = math.ceil(total_rows / batch_size)

            for i in range(chunks):
                chunk = df.iloc[i*batch_size : (i+1)*batch_size]
                batch_data = []
                for _, row in chunk.iterrows():
                    row_dict = row.to_dict()
                    migration_id = row_dict.pop("_id") # Extract migration ID
                    props = clean_row(row_dict)
                    batch_data.append({"migration_id": migration_id, "props": props})

                query = f"""
                UNWIND $batch AS row
                CREATE (n:`{label}`:_MigrationNode)
                SET n += row.props, n.migration_id = row.migration_id
                """
                session.run(query, batch=batch_data)
            print(f"  Imported {total_rows} nodes.")

        # 3. Import Relationships
        files = [f for f in os.listdir(input_folder) if f.startswith("rels_") and f.endswith(".csv")]
        for file in files:
            rel_type = file.replace("rels_", "").replace(".csv", "")
            print(f"Importing relationships of type: {rel_type}...")
            df = pd.read_csv(os.path.join(input_folder, file), low_memory=False)

            # Batched Import
            total_rows = len(df)
            chunks = math.ceil(total_rows / batch_size)

            for i in range(chunks):
                chunk = df.iloc[i*batch_size : (i+1)*batch_size]
                batch_data = []
                for _, row in chunk.iterrows():
                    row_dict = row.to_dict()
                    start_id = row_dict.pop("_start_id")
                    end_id = row_dict.pop("_end_id")
                    props = clean_row(row_dict)
                    batch_data.append({"start_id": start_id, "end_id": end_id, "props": props})

                # Match by migration_id and create relationship
                # Note: Curly braces are escaped as {{...}} for Python f-string
                query = f"""
                UNWIND $batch AS row
                MATCH (start:_MigrationNode {{migration_id: row.start_id}})
                MATCH (end:_MigrationNode {{migration_id: row.end_id}})
                CREATE (start)-[r:`{rel_type}`]->(end)
                SET r += row.props
                """
                session.run(query, batch=batch_data)
            print(f"  Imported {total_rows} relationships.")

        # 4. Cleanup
        print("Cleaning up temporary migration properties...")
        session.run("MATCH (n:_MigrationNode) REMOVE n:_MigrationNode, n.migration_id")

        print("Dropping temporary indices/constraints...")
        try:
            # Drop Constraints by name
            result = session.run("SHOW CONSTRAINTS YIELD name, labelsOrTypes, properties")
            for record in result:
                if record["labelsOrTypes"] and "_MigrationNode" in record["labelsOrTypes"] and \
                   record["properties"] and "migration_id" in record["properties"]:
                    print(f"  Dropping constraint: {record['name']}")
                    session.run(f"DROP CONSTRAINT {record['name']}")

            # Drop Indexes by name
            result = session.run("SHOW INDEXES YIELD name, labelsOrTypes, properties")
            for record in result:
                if record["labelsOrTypes"] and "_MigrationNode" in record["labelsOrTypes"] and \
                   record["properties"] and "migration_id" in record["properties"]:
                    print(f"  Dropping index: {record['name']}")
                    session.run(f"DROP INDEX {record['name']}")
        except Exception as e:
            print(f"  Warning: Cleanup of index/constraint failed: {e}")

        print("Import complete.")

In [None]:
# Run the process
if 'driver' in locals():
    try:
        print("Restarting database reload with final cleanup fix...")
        # Use smaller batch sizes to prevent MemoryPoolOutOfMemoryError
        clear_database(driver, batch_size=200)
        import_from_csv(driver, "neo4j_export", batch_size=500)
    except Exception as e:
        print(f"Process stopped due to error: {e}")
else:
    print("Driver not defined.")

In [None]:
# Initialize the Neo4j driver
driver = GraphDatabase.driver(uri, auth=(user, password))

try:
    with driver.session() as session:
        # Count total nodes
        result_nodes = session.run("MATCH (n) RETURN count(n) AS TotalNodes")
        total_nodes = result_nodes.single()["TotalNodes"]

        # Count total relationships
        result_rels = session.run("MATCH ()-[r]->() RETURN count(r) AS TotalRelationships")
        total_rels = result_rels.single()["TotalRelationships"]

        # Create a summary DataFrame
        df_invariants = pd.DataFrame([{
            "Metric": "Total Nodes",
            "Count": total_nodes
        }, {
            "Metric": "Total Relationships",
            "Count": total_rels
        }])

    # Display the summary table
    print(df_invariants)

finally:
    # Close the driver
    driver.close()

In [None]:
import numpy as np
from scipy.sparse import coo_matrix
from scipy.sparse.linalg import eigs

# Initialize the Neo4j driver
driver = GraphDatabase.driver(uri, auth=(user, password))

try:
    with driver.session() as session:
        # 1. Calculate Degree Extremes
        # Using size() or count{} pattern for efficiency if available, or aggregating.
        # Note: count{(n)--()} is efficient for calculating degree.
        query_degree = """
        MATCH (n)
        RETURN max(count{(n)--()}) AS MaxDegree, min(count{(n)--()}) AS MinDegree
        """
        result_degree = session.run(query_degree).single()
        max_degree = result_degree["MaxDegree"]
        min_degree = result_degree["MinDegree"]

        # 2. Fetch Graph Structure for Spectral Radius
        # Fetch all node IDs to create a mapping
        # Using elementId() instead of deprecated id()
        query_nodes = "MATCH (n) RETURN elementId(n) AS id"
        result_nodes = session.run(query_nodes)
        node_ids = [record["id"] for record in result_nodes]

        # Map Neo4j internal IDs to sequential indices 0..N-1
        id_to_index = {node_id: i for i, node_id in enumerate(node_ids)}
        num_nodes = len(node_ids)

        # Fetch all relationships (source, target)
        query_rels = "MATCH (s)-[]->(t) RETURN elementId(s) AS source, elementId(t) AS target"
        result_rels = session.run(query_rels)

        sources = []
        targets = []
        for record in result_rels:
            # Only include relationships where both nodes are in our node list (snapshot consistency)
            if record["source"] in id_to_index and record["target"] in id_to_index:
                sources.append(id_to_index[record["source"]])
                targets.append(id_to_index[record["target"]])

        # 3. Build Adjacency Matrix
        # Create data array of ones
        data = np.ones(len(sources))

        # Construct sparse matrix (N x N)
        adj_matrix = coo_matrix((data, (sources, targets)), shape=(num_nodes, num_nodes), dtype=float)

        # 4. Calculate Spectral Radius
        # Compute the largest magnitude eigenvalue
        # k=1 for one eigenvalue, which='LM' for largest magnitude
        evals, evecs = eigs(adj_matrix, k=1, which='LM')
        spectral_radius = np.abs(evals[0])

        # 5. Display Results
        print(f"Max Degree: {max_degree}")
        print(f"Min Degree: {min_degree}")
        print(f"Spectral Radius: {spectral_radius:.4f}")

finally:
    driver.close()