# AIA25 GraphML 04 Graph DB - modified to run on local machine
follow the instructions to instantiate the Neo4J database...

In [6]:
# import packages
import plotly.graph_objects as go
from neo4j import GraphDatabase
import pandas as pd
import networkx as nx
from tqdm import tqdm
from collections import defaultdict

In [3]:
# load the csv files
edges_01 = pd.read_csv(r'csv_files/edges_02.csv') #check path here
nodes_01 = pd.read_csv(r'csv_files/nodes_02.csv') #check path here

In [4]:
# interrogate the dataframes
edges_01.head()


Unnamed: 0.1,Unnamed: 0,source,target,attributes,relation_type
0,0,3sM$t_Teb2q92lD380TwKD,1_2cirtfH1ARPCSLMw_JEe,{'relation': 'SURROUNDS'},SURROUNDS
1,1,3sM$t_Teb2q92lD380TwKD,2zdWlwgSz8o9G2jpwMqeL$,{'relation': 'SURROUNDS'},SURROUNDS
2,2,3sM$t_Teb2q92lD380TwKD,15a1uvAib5Cx3_GHZZjT$N,{'relation': 'SURROUNDS'},SURROUNDS
3,3,3sM$t_Teb2q92lD380TwKD,0Fo_vRoRX5uQXTgGB2Ak7E,{'relation': 'SURROUNDS'},SURROUNDS
4,4,3sM$t_Teb2q92lD380TwKD,1x0N$dxyL8LOQp$IETASUc,{'relation': 'SURROUNDS'},SURROUNDS


In [4]:
nodes_01.head()

Unnamed: 0.1,Unnamed: 0,GlobalId,Name,Description,ObjectType,IfcType,LocationX,LocationY,LocationZ,category,SolarRad,Tag
0,3sM$t_Teb2q92lD380TwKD,3sM$t_Teb2q92lD380TwKD,LVL1_1B_70,,,IfcSpace,-39.208039,26.376091,4.5,IfcSpace,0.0,0.0
1,1jSziHgjf3ZR_L0Dj1KHuc,1jSziHgjf3ZR_L0Dj1KHuc,LVL1_1B_71,,,IfcSpace,-39.208039,23.376091,4.5,IfcSpace,0.0,0.0
2,0hlmsO3TX3kfSCEYMAtENu,0hlmsO3TX3kfSCEYMAtENu,LVL1_1B_72,,,IfcSpace,-39.208039,20.376091,4.5,IfcSpace,0.0,0.0
3,3X95yfnqPDVQbtUITG$ppG,3X95yfnqPDVQbtUITG$ppG,LVL1_1B_80,,,IfcSpace,-39.208039,62.376091,4.5,IfcSpace,0.0,0.0
4,2W18n9Wmn10BQxqA98Gkq5,2W18n9Wmn10BQxqA98Gkq5,LVL1_1B_98,,,IfcSpace,-30.208038,20.376091,4.5,IfcSpace,0.0,0.0


In [5]:
# Connect to Neo4j
URI = "bolt://localhost:7687"

USERNAME = "neo4j"
PASSWORD = "macad2025"

# Create a Neo4j driver
driver = GraphDatabase.driver(URI, auth=(USERNAME, PASSWORD))

# Verify connectivity
try:
    driver.verify_connectivity()
    print("Connection successful!")
except Exception as e:
    print("Connection failed:", e)

Connection successful!


# Load nodes and edges to database

In [7]:
def batch_merge_nodes(tx, batch):
    """
    Merges a batch of nodes into Neo4j with dynamic labels from 'IfcType'
    """

    label_groups = defaultdict(list)
    for row in batch:
        label = row.get("IfcType")
        if label:
            label_groups[label].append(row)

    for label, records in label_groups.items():
        query = f"""
        UNWIND $rows AS row
        MERGE (n:{label} {{GlobalId: row.GlobalId}})
        SET n += row
        """
        tx.run(query, rows=records)

In [8]:
batch_size = 500
with driver.session() as session:
    for i in tqdm(range(0, len(nodes_01), batch_size), desc="Batch merging nodes"):
        batch = nodes_01.iloc[i:i+batch_size].to_dict('records')
        session.execute_write(batch_merge_nodes, batch)

driver.close()
print("Nodes loaded successfully!")

Batch merging nodes: 100%|██████████| 8/8 [00:02<00:00,  3.25it/s]

Nodes loaded successfully!





In [9]:
def batch_merge_edges_without_apoc(tx, relation_type, batch):
    query = f"""
    UNWIND $rows AS row
    MATCH (a {{GlobalId: row.source}})
    MATCH (b {{GlobalId: row.target}})
    MERGE (a)-[r:{relation_type}]->(b)
    SET r += row.props
    """
    tx.run(query, rows=batch)

edges_data = []
for _, row in edges_01.iterrows():
    props = row.drop(['source', 'target', 'relation_type']).dropna().to_dict()
    edges_data.append({
        'source': row['source'],
        'target': row['target'],
        'relation_type': row['relation_type'],
        'props': props
    })

grouped_edges = defaultdict(list)
for row in edges_data:
    grouped_edges[row['relation_type']].append(row)

with driver.session() as session:
    for relation_type, group in grouped_edges.items():
        for i in tqdm(range(0, len(group), batch_size), desc=f"Merging {relation_type}"):
            batch = group[i:i+batch_size]
            session.execute_write(batch_merge_edges_without_apoc, relation_type, batch)

driver.close()
print("Edges loaded successfully!")

  with driver.session() as session:
Merging SURROUNDS: 100%|██████████| 8/8 [00:09<00:00,  1.16s/it]
Merging VOIDS: 100%|██████████| 1/1 [00:00<00:00,  1.22it/s]

Edges loaded successfully!





In [10]:
#works
def batch_merge_edges(tx, batch):
    query = """
    UNWIND $rows AS row
    MATCH (a {GlobalId: row.source})
    MATCH (b {GlobalId: row.target})
    MERGE (a)-[r:RELATED_TO]->(b)
    SET r += row.props
    """
    tx.run(query, rows=batch)

# Prepare data with source, target, and optional properties
edges_data = []
for _, row in edges_01.iterrows():
    edges_data.append({
        'source': row['source'],
        'target': row['target'],
        'props': row.drop(['source', 'target']).dropna().to_dict()
    })

# Batch size
batch_size = 500
with driver.session() as session:
    for i in tqdm(range(0, len(edges_data), batch_size), desc="Merging edges"):
        batch = edges_data[i:i + batch_size]
        session.execute_write(batch_merge_edges, batch)

driver.close()
print("Edges loaded successfully!")

  with driver.session() as session:
Merging edges: 100%|██████████| 9/9 [00:09<00:00,  1.07s/it]

Edges loaded successfully!





In [11]:

# Retrieve data from Neo4j database
def get_nodes(tx):
    """
    Retrieve all nodes with all their properties.
    """
    query = "MATCH (n) RETURN properties(n) AS props"
    return [record["props"] for record in tx.run(query)]

def get_edges(tx):
    """
    Retrieve all relationships (edges) between nodes using GlobalId.
    """
    query = "MATCH (a)-[r]->(b) RETURN a.GlobalId AS source, b.GlobalId AS target"
    return [{"source": record["source"], "target": record["target"]} for record in tx.run(query)]

# Start session and fetch data
with driver.session() as session:
    nodes_data = session.execute_read(get_nodes)
    edges = session.execute_read(get_edges)

# Build NetworkX graph
G = nx.Graph()

# Add nodes with all their properties
for node in nodes_data:
    node_id = node.get("GlobalId")
    if node_id:
        G.add_node(node_id, **node)

# Add edges if both endpoints exist
for edge in edges:
    source = edge["source"]
    target = edge["target"]
    if source in G.nodes and target in G.nodes:
        G.add_edge(source, target)

# Remove isolated nodes (no edges)
isolated_nodes = list(nx.isolates(G))
G.remove_nodes_from(isolated_nodes)

# 3D spring layout
pos = {
    node: (
        G.nodes[node].get("LocationX", 0),
        G.nodes[node].get("LocationY", 0),
        G.nodes[node].get("LocationZ", 0)
    )
    for node in G.nodes
}

def get_color_for_ifc_type(ifc_type):
    """
    Generate a hex color from IfcType string.
    """
    return f"#{hash(ifc_type) & 0xFFFFFF:06x}"

# Build edge traces
edge_x, edge_y, edge_z = [], [], []
for edge in G.edges():
    x0, y0, z0 = pos[edge[0]]
    x1, y1, z1 = pos[edge[1]]
    edge_x.extend([x0, x1, None])
    edge_y.extend([y0, y1, None])
    edge_z.extend([z0, z1, None])

edge_trace = go.Scatter3d(
    x=edge_x, y=edge_y, z=edge_z,
    mode='lines',
    line=dict(color='gray', width=2),
    hoverinfo='none'
)

# Build node traces
node_x, node_y, node_z = [], [], []
node_colors = []
node_text = []

for nid, attr in G.nodes(data=True):
    x, y, z = pos[nid]
    node_x.append(x)
    node_y.append(y)
    node_z.append(z)
    ifc_type = attr.get("IfcType", "Unknown")
    node_colors.append(get_color_for_ifc_type(ifc_type))
    # Full attribute hover info
    hover_info = "<br>".join(f"{k}: {v}" for k, v in attr.items())
    node_text.append(hover_info)

node_trace = go.Scatter3d(
    x=node_x, y=node_y, z=node_z,
    mode='markers',
    marker=dict(
        size=6,
        color=node_colors,
        line=dict(width=0)
    ),
    hoverinfo='text',
    text=node_text
)

# Create the 3D figure
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    title=dict(text="3D Graph Visualization from Neo4j", font=dict(size=16)),
                    showlegend=False,
                    width=1200, height=800,
                    margin=dict(l=0, r=0, b=0, t=40),
                    scene=dict(
                        xaxis=dict(showbackground=True, showticklabels=True, title=''),
                        yaxis=dict(showbackground=True, showticklabels=True, title=''),
                        zaxis=dict(showbackground=True, showticklabels=True, title='')
                    )
                ))

fig.show()
driver.close()


  with driver.session() as session:
