# AIA25 GraphML 04 Graph DB - modified to run on local machine
follow the instructions to instantiate the Node4J database...

In [34]:
# import packages
import plotly.graph_objects as go
from neo4j import GraphDatabase
import pandas as pd
import networkx as nx
from tqdm import tqdm
import tkinter as tk

In [35]:
# load the csv files
edges_01 = pd.read_csv(r'edges_01a.csv') #check path here
nodes_01 = pd.read_csv(r'nodes_01a.csv') #check path here

In [36]:
# interrogate the dataframes
edges_01.head()

Unnamed: 0.1,Unnamed: 0,source,target,attributes,relation_type
0,0,15vDMMAar37xgrny$3QDMH,2lv$85Dg92KhFiPH_MQvgk,{'relation': 'SURROUNDS'},SURROUNDS
1,1,15vDMMAar37xgrny$3QDMH,3rfBVMzcvEWBXMzUPg1fJ$,{'relation': 'SURROUNDS'},SURROUNDS
2,2,15vDMMAar37xgrny$3QDMH,0ns$ibjbL5iOi6E3kgDXJo,{'relation': 'SURROUNDS'},SURROUNDS
3,3,15vDMMAar37xgrny$3QDMH,0$5P4mS9DE8wVf23cU6OOb,{'relation': 'SURROUNDS'},SURROUNDS
4,4,15vDMMAar37xgrny$3QDMH,0iPTdIpHLD1fWb73ScRQE7,{'relation': 'SURROUNDS'},SURROUNDS


In [37]:
nodes_01.head()

Unnamed: 0.1,Unnamed: 0,GlobalId,Name,Description,ObjectType,IfcType,category
0,15vDMMAar37xgrny$3QDMH,15vDMMAar37xgrny$3QDMH,LVL1_1B_63,,,IfcSpace,IfcSpace
1,0_4V1TmqHFng4fJUGleJ8y,0_4V1TmqHFng4fJUGleJ8y,LVL1_1B_74,,,IfcSpace,IfcSpace
2,2xwSvUz2HC39UNEW6gW5VM,2xwSvUz2HC39UNEW6gW5VM,LVL1_1B_82,,,IfcSpace,IfcSpace
3,2Nam78vBrA_BtikMcO_hhP,2Nam78vBrA_BtikMcO_hhP,LVL1_1B_85,,,IfcSpace,IfcSpace
4,1_4zoklETABBqZ92jqi7TQ,1_4zoklETABBqZ92jqi7TQ,LVL1_1B_93,,,IfcSpace,IfcSpace


In [38]:
# Connect to Neo4j
URI = "bolt://localhost:7687"

USERNAME = "neo4j"
PASSWORD = "macad2025"

# Create a Neo4j driver
driver = GraphDatabase.driver(URI, auth=(USERNAME, PASSWORD))

# Verify connectivity
try:
    driver.verify_connectivity()
    print("Connection successful!")
except Exception as e:
    print("Connection failed:", e)

Connection successful!


# Load nodes and edges to database

In [39]:
from collections import defaultdict

def merge_node(tx, node_data):
    """
    Merges a node into Neo4j using a dynamic label derived from its 'IfcType'
    and sets all properties from the CSV on the node.

    The node is matched on its GlobalId to prevent duplicates.
    """
    # Use the 'IfcType' column as the node label (e.g., IfcSpace, IfcWall, etc.)
    label = node_data["IfcType"]

    # Convert the pandas row (Series) into a dictionary containing all properties.
    props = node_data.to_dict()

    # Build a Cypher query with a dynamic label.
    # MERGE finds or creates the node by GlobalId, then SET updates all properties.
    query = (
        f"MERGE (n:{label} {{GlobalId: $GlobalId}}) "  # Dynamic label here
        "SET n = $props"
    )

    # Execute the query using GlobalId for matching and the full properties dict.
    tx.run(query, GlobalId=props["GlobalId"], props=props)

def batch_merge_nodes(tx, batch):
    """
    Merges a batch of nodes into Neo4j with dynamic labels from 'IfcType'
    """

    label_groups = defaultdict(list)
    for row in batch:
        label = row.get("IfcType")
        if label:
            label_groups[label].append(row)

    for label, records in label_groups.items():
        query = f"""
        UNWIND $rows AS row
        MERGE (n:{label} {{GlobalId: row.GlobalId}})
        SET n += row
        """
        tx.run(query, rows=records)


In [40]:
# Batch size
batch_size = 500
with driver.session() as session:
    for i in tqdm(range(0, len(nodes_01), batch_size), desc="Batch merging nodes"):
        batch = nodes_01.iloc[i:i+batch_size].to_dict('records')
        session.execute_write(batch_merge_nodes, batch)

driver.close()
print("Nodes loaded successfully!")

Batch merging nodes: 100%|██████████| 6/6 [00:03<00:00,  1.56it/s]

Nodes loaded successfully!





In [41]:
def batch_merge_edges_without_apoc(tx, relation_type, batch):
    query = f"""
    UNWIND $rows AS row
    MATCH (a {{GlobalId: row.source}})
    MATCH (b {{GlobalId: row.target}})
    MERGE (a)-[r:{relation_type}]->(b)
    SET r += row.props
    """
    tx.run(query, rows=batch)

edges_data = []
for _, row in edges_01.iterrows():
    props = row.drop(['source', 'target', 'relation_type']).dropna().to_dict()
    edges_data.append({
        'source': row['source'],
        'target': row['target'],
        'relation_type': row['relation_type'],
        'props': props
    })

grouped_edges = defaultdict(list)
for row in edges_data:
    grouped_edges[row['relation_type']].append(row)

with driver.session() as session:
    for relation_type, group in grouped_edges.items():
        for i in tqdm(range(0, len(group), batch_size), desc=f"Merging {relation_type}"):
            batch = group[i:i+batch_size]
            session.execute_write(batch_merge_edges_without_apoc, relation_type, batch)

driver.close()
print("Edges loaded successfully!")


Using a driver after it has been closed is deprecated. Future versions of the driver will raise an error.

Merging SURROUNDS: 100%|██████████| 6/6 [00:14<00:00,  2.41s/it]
Merging VOIDS: 100%|██████████| 1/1 [00:01<00:00,  1.04s/it]

Edges loaded successfully!





In [42]:
def merge_relationship(tx, edge_data):
    """
    Merges a relationship between two nodes based on the 'source' and 'target'
    GlobalIds from the CSV. Uses the actual relation type from the CSV.

    This query:
      - MATCHes the two nodes using their GlobalId properties.
      - MERGEs a relationship of the specified type between them.
      - Optionally sets a 'relation' property on the relationship.
    """
    relation_type = edge_data['relation_type']

    query = f"""
    MATCH (a {{GlobalId: $source}})
    MATCH (b {{GlobalId: $target}})
    MERGE (a)-[r:{relation_type}]->(b)
    SET r.relation = $relation
    """

    tx.run(query,
           source=edge_data["source"],
           target=edge_data["target"],
           relation=edge_data['relation_type'])

In [43]:
#works
def batch_merge_edges(tx, batch):
    query = """
    UNWIND $rows AS row
    MATCH (a {GlobalId: row.source})
    MATCH (b {GlobalId: row.target})
    MERGE (a)-[r:RELATED_TO]->(b)
    SET r += row.props
    """
    tx.run(query, rows=batch)

# Prepare data with source, target, and optional properties
edges_data = []
for _, row in edges_01.iterrows():
    edges_data.append({
        'source': row['source'],
        'target': row['target'],
        'props': row.drop(['source', 'target']).dropna().to_dict()
    })

# Batch size
batch_size = 500
with driver.session() as session:
    for i in tqdm(range(0, len(edges_data), batch_size), desc="Merging edges"):
        batch = edges_data[i:i + batch_size]
        session.execute_write(batch_merge_edges, batch)

driver.close()
print("Edges loaded successfully!")


Using a driver after it has been closed is deprecated. Future versions of the driver will raise an error.

Merging edges: 100%|██████████| 7/7 [00:11<00:00,  1.69s/it]

Edges loaded successfully!





In [None]:
# # Retrieve data from Neo4j database
# def get_nodes(tx):
#     """
#     Retrieve all nodes with their GlobalId and IfcType properties.
#     Returns a list of dictionaries.
#     """
#     query = "MATCH (n) RETURN n.GlobalId AS id, n.IfcType AS ifcType"
#     return [{"id": record["id"], "ifcType": record["ifcType"]} for record in tx.run(query)]

# def get_edges(tx):
#     """
#     Retrieve all relationships (edges) between nodes.
#     This query returns edges using the GlobalId of both endpoints.
#     """
#     query = "MATCH (a)-[r]->(b) RETURN a.GlobalId AS source, b.GlobalId AS target"
#     return [{"source": record["source"], "target": record["target"]} for record in tx.run(query)]

# with driver.session() as session:
#     nodes_data = session.execute_read(get_nodes)
#     edges = session.execute_read(get_edges)

# G = nx.Graph()

# # Add nodes with their IfcType as an attribute.
# for node in nodes_data:
#     G.add_node(node["id"], ifcType=node["ifcType"])

# # Add edges if both nodes exist in the graph.
# for edge in edges:
#     source = edge["source"]
#     target = edge["target"]
#     if source in G.nodes and target in G.nodes:
#         G.add_edge(source, target)

# # Remove isolated nodes (nodes with no connections).
# isolated_nodes = list(nx.isolates(G))
# G.remove_nodes_from(isolated_nodes)

# #  3D Layout for the Graph
# pos = nx.spring_layout(G, dim=3, seed=42)

# def get_color_for_ifc_type(ifc_type):
#     """
#     Return a consistent color string for a given IfcType.
#     This example uses the built-in hash to generate a color.
#     """
#     # Use bitwise AND to ensure the hex value is within range.
#     return f"#{hash(ifc_type) & 0xFFFFFF:06x}"

# # Build edge traces in 3D.
# edge_x, edge_y, edge_z = [], [], []
# for edge in G.edges():
#     x0, y0, z0 = pos[edge[0]]
#     x1, y1, z1 = pos[edge[1]]
#     edge_x.extend([x0, x1, None])
#     edge_y.extend([y0, y1, None])
#     edge_z.extend([z0, z1, None])

# edge_trace = go.Scatter3d(
#     x=edge_x, y=edge_y, z=edge_z,
#     mode='lines',
#     line=dict(color='red', width=2),
#     hoverinfo='none'
# )

# # Build node traces in 3D.
# node_x, node_y, node_z = [], [], []
# node_colors = []  # To hold the color based on IfcType.
# node_text = []   # Hover text for each node.
# for node in G.nodes(data=True):
#     nid = node[0]
#     attr = node[1]
#     x, y, z = pos[nid]
#     node_x.append(x)
#     node_y.append(y)
#     node_z.append(z)
#     # Use the node's IfcType for color coding.
#     ifc_type = attr.get("ifcType", "Unknown")
#     node_colors.append(get_color_for_ifc_type(ifc_type))
#     node_text.append(f"GlobalId: {nid}<br>IfcType: {ifc_type}")

# node_trace = go.Scatter3d(
#     x=node_x, y=node_y, z=node_z,
#     mode='markers',
#     marker=dict(
#         size=6,
#         color=node_colors,   # Each node gets its color based on its IfcType.
#         line=dict(width=0)
#     ),
#     hoverinfo='text',
#     text=node_text
# )


# fig = go.Figure(data=[edge_trace, node_trace],
#                 layout=go.Layout(
#                     title=dict(text="3D Graph Visualization from Neo4j", font=dict(size=16)),
#                     showlegend=False,
#                     width=1200, height=800,
#                     margin=dict(l=0, r=0, b=0, t=40),
#                     scene=dict(
#                         xaxis=dict(showbackground=True, showticklabels=True, title=''),
#                         yaxis=dict(showbackground=True, showticklabels=True, title=''),
#                         zaxis=dict(showbackground=True, showticklabels=True, title='')
#                     )
#                 ))
# # fig.show()

# driver.close()



Using a driver after it has been closed is deprecated. Future versions of the driver will raise an error.



ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [45]:
!pip install --upgrade nbformat

Defaulting to user installation because normal site-packages is not writeable
Collecting nbformat
  Downloading nbformat-5.10.4-py3-none-any.whl.metadata (3.6 kB)
Collecting fastjsonschema>=2.15 (from nbformat)
  Using cached fastjsonschema-2.21.1-py3-none-any.whl.metadata (2.2 kB)
Downloading nbformat-5.10.4-py3-none-any.whl (78 kB)
Using cached fastjsonschema-2.21.1-py3-none-any.whl (23 kB)
Installing collected packages: fastjsonschema, nbformat
Successfully installed fastjsonschema-2.21.1 nbformat-5.10.4



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip
