AIA25-GraphML IFC to Graph v3

# 1. Import packages

In [1]:
import ifcopenshell
import networkx as nx
import plotly.graph_objects as go
import pandas as pd
import os

# 2. Extract data from IFC

## 2.1 Utility Function

In [2]:
def extract_properties(entity):
    """Extracts general and identity-related properties from an IFC entity."""
    data = {
        "GlobalId": entity.GlobalId,
        "Name": entity.Name,
        "Description": getattr(entity, "Description", None),
        "ObjectType": getattr(entity, "ObjectType", None),
        "IfcType": entity.is_a()
    }

    # Fallback naming and identity-related property extraction
    if hasattr(entity, "IsDefinedBy"):
        for rel in entity.IsDefinedBy:
            if not rel.is_a("IfcRelDefinesByProperties"):
                continue
            prop_def = getattr(rel, "RelatingPropertyDefinition", None)
            if not prop_def or not prop_def.is_a("IfcPropertySet"):
                continue

            for prop in prop_def.HasProperties:
                if not prop.is_a("IfcPropertySingleValue"):
                    continue

                name = prop.Name
                value = getattr(prop.NominalValue, "wrappedValue", None)

                # Try to extract identity-related properties
                if name.lower() in ["locationx", "locationy", "locationz", "solarrad"]:
                    data[name] = value

                # Set name if not yet set
                if data["Name"] is None and name.lower() in ["name", "longname"]:
                    data["Name"] = value

                # Handle cases where value is a pointer to another property name
                if isinstance(value, str) and value.lower() in ["name", "longname"]:
                    for inner_prop in prop_def.HasProperties:
                        if inner_prop.is_a("IfcPropertySingleValue") and inner_prop.Name.lower() == value.lower():
                            data["Name"] = getattr(inner_prop.NominalValue, "wrappedValue", None)
                            break

    # Fallback: try LongName attribute directly
    if data["Name"] is None and hasattr(entity, "LongName"):
        data["Name"] = entity.LongName

    # Extract direct Tag attribute if not already set
    if hasattr(entity, "Tag") and "Tag" not in data:
        data["Tag"] = entity.Tag

    return data


## 2.2 Load IFC File and Extract Data

In [3]:
ifc_file = ifcopenshell.open(os.path.join('ifc_files','50_70_0_10_3000_5000_True_0.3_36_25_50_60.ifc'))
rooms = ifc_file.by_type("IfcSpace")
space_boundaries = ifc_file.by_type("IfcRelSpaceBoundary")

In [4]:
# Identify element types in space boundaries
element_types = set()
for rel in space_boundaries:
    if rel.RelatedBuildingElement:
        element_types.add(rel.RelatedBuildingElement.is_a())

print("Unique element types in space boundaries:")
for element_type in sorted(element_types):
    print("-", element_type)

Unique element types in space boundaries:
- IfcColumn
- IfcDoor
- IfcSlab
- IfcStair
- IfcWall
- IfcWindow


In [5]:
# Define categories and colors
categories = {room.is_a(): "red" for room in rooms}
for rel in space_boundaries:
    if rel.RelatedBuildingElement:
        categories.setdefault(
            rel.RelatedBuildingElement.is_a(),
            f"#{hash(rel.RelatedBuildingElement.is_a()) & 0xFFFFFF:06x}"
        )

def get_node_color(ifc_type):
    """Return a color based on the IFC type or default to gray."""
    return categories.get(ifc_type, "gray")

## 2.3 Build graph

RDF (Resource Description Framework) represents data as triples: subject, predicate, and object. It’s commonly used in the Semantic Web to maintain a universal, flexible model of linked data. Relationships are defined via standardized vocabularies (e.g., RDF Schema, OWL), enabling robust data integration and inference across different domains.

LPG (Labeled Property Graph) organizes data into nodes (entities) and edges (relationships), both of which can have labels and properties. This approach is popular in many graph databases for direct, application-focused queries (often using query languages like Cypher). It’s simpler to model certain graph use cases without the semantic overhead of RDF vocabularies.

Our workflow is based on Labeled Property Graph


In [6]:
G = nx.Graph()

# Add Rooms
for room in rooms:
    room_id = room.GlobalId
    G.add_node(room_id, **extract_properties(room), category="IfcSpace")

# Add Room-Element Connections
#    (Any building element bounding a room goes here with 'SURROUNDS'.)
for rel in space_boundaries:
    room = rel.RelatingSpace
    element = rel.RelatedBuildingElement
    if room and element:
        if element.GlobalId not in G.nodes:
            G.add_node(element.GlobalId, **extract_properties(element), category=element.is_a())
        # Connect the room to this element
        G.add_edge(room.GlobalId, element.GlobalId, relation="SURROUNDS")

# Add Wall-Window/Door Connections
#    (Direct 'VOIDS' edge from IfcWall* types to IfcDoor or IfcWindow.)
for rel in ifc_file.by_type("IfcRelVoidsElement"):
    wall = rel.RelatingBuildingElement
    opening = rel.RelatedOpeningElement

    for fill_rel in ifc_file.by_type("IfcRelFillsElement"):
        if fill_rel.RelatingOpeningElement == opening:
            filled_element = fill_rel.RelatedBuildingElement
            # Strict check: IfcWall*, IfcWallStandardCase, IfcCurtainWall --> [IfcDoor | IfcWindow]
            if (
                wall
                and filled_element
                and "Wall" in wall.is_a()
                and filled_element.is_a() in ["IfcDoor", "IfcWindow"]
            ):
                # Ensure both nodes (Wall + Door/Window) exist in the graph
                if wall.GlobalId not in G.nodes:
                    G.add_node(wall.GlobalId, **extract_properties(wall), category=wall.is_a())
                if filled_element.GlobalId not in G.nodes:
                    G.add_node(filled_element.GlobalId, **extract_properties(filled_element), category=filled_element.is_a())

                # Create a direct connection for the wall-door/window
                # print(f"VOIDS edge found: {wall.GlobalId} -> {filled_element.GlobalId}")
                G.add_edge(wall.GlobalId, filled_element.GlobalId, relation="VOIDS")


## 2.4 Generate 3D Layout and Visualization

In [7]:
# Find nodes missing any of the required position attributes
nodes_missing_location = [
    node for node in G.nodes
    if not all(k in G.nodes[node] for k in ("LocationX", "LocationY", "LocationZ"))
]

print("Nodes missing LocationX, LocationY, or LocationZ:")
for node in nodes_missing_location:
    print(f"- {node}: {G.nodes[node]}")


Nodes missing LocationX, LocationY, or LocationZ:


In [8]:
# Remove unconnected nodes
unconnected_nodes = [node for node, degree in G.degree() if degree == 0]
G.remove_nodes_from(unconnected_nodes)

# Assign positions from node attributes
pos = {
    node: (
        G.nodes[node].get("LocationX", 0),
        G.nodes[node].get("LocationY", 0),
        G.nodes[node].get("LocationZ", 0)
    )
    for node in G.nodes
}

# Extract updated node positions and colors
x_nodes, y_nodes, z_nodes = zip(*[pos[node] for node in G.nodes])
colors = [get_node_color(G.nodes[node].get("IfcType", "Undefined")) for node in G.nodes]

# Separate edge coordinates by relation type
voids_x, voids_y, voids_z = [], [], []
surrounds_x, surrounds_y, surrounds_z = [], [], []
other_x, other_y, other_z = [], [], []

for u, v, data in G.edges(data=True):
    x0, y0, z0 = pos[u]
    x1, y1, z1 = pos[v]

    # Add coordinates in Plotly "line segment" style: [x0, x1, None] so lines don't connect across edges
    if data.get("relation") == "VOIDS":
        voids_x.extend([x0, x1, None])
        voids_y.extend([y0, y1, None])
        voids_z.extend([z0, z1, None])
    elif data.get("relation") == "SURROUNDS":
        surrounds_x.extend([x0, x1, None])
        surrounds_y.extend([y0, y1, None])
        surrounds_z.extend([z0, z1, None])
    else:
        other_x.extend([x0, x1, None])
        other_y.extend([y0, y1, None])
        other_z.extend([z0, z1, None])

# Create separate edge traces
voids_trace = go.Scatter3d(
    x=voids_x, y=voids_y, z=voids_z,
    mode='lines',
    line=dict(width=2, color='red'),
    hoverinfo='none'
)

surrounds_trace = go.Scatter3d(
    x=surrounds_x, y=surrounds_y, z=surrounds_z,
    mode='lines',
    line=dict(width=2, color='gray'),
    opacity=0.5,
    hoverinfo='none'
)

other_trace = go.Scatter3d(
    x=other_x, y=other_y, z=other_z,
    mode='lines',
    line=dict(width=2, color='lightgray'),
    hoverinfo='none'
)

# Create the node trace
node_trace = go.Scatter3d(
    x=x_nodes, y=y_nodes, z=z_nodes,
    mode='markers',
    marker=dict(size=5, color=colors, opacity=0.8),
    text=[
        f"{G.nodes[node].get('Name', 'N/A')} (" \
        f"{G.nodes[node].get('IfcType', 'Undefined')})"
        for node in G.nodes
    ],
    hoverinfo='text'
)

# Build figure
layout = go.Layout(
    title="3D IFC Wall-Door/Window Visualization",
    width=1200, height=800,
    scene=dict(xaxis=dict(title='X'),
               yaxis=dict(title='Y'),
               zaxis=dict(title='Z')),
    showlegend=False
)

fig = go.Figure(
    data=[voids_trace, surrounds_trace, other_trace, node_trace],
    layout=layout
)
fig.show()


## 2.5 Plot IfcWalls and TfcDoors/IfcWindows connections

In [9]:
# Remove unconnected nodes
unconnected_nodes = [node for node, degree in G.degree() if degree == 0]
G.remove_nodes_from(unconnected_nodes)

# Assign positions from node attributes
pos = {
    node: (
        G.nodes[node].get("LocationX", 0),
        G.nodes[node].get("LocationY", 0),
        G.nodes[node].get("LocationZ", 0)
    )
    for node in G.nodes
}


# Extract node positions and colors (only for Walls, Doors, Windows)
wall_door_window_nodes = [
    node
    for node in G.nodes
    if G.nodes[node].get("IfcType") in [
        "IfcWall", "IfcDoor", "IfcWindow"
    ]
]

x_nodes, y_nodes, z_nodes = zip(*[pos[node] for node in wall_door_window_nodes])
colors = [
    get_node_color(G.nodes[node].get("IfcType", "Undefined"))
    for node in wall_door_window_nodes
]

# Separate edge coordinates by relation type
voids_x, voids_y, voids_z = [], [], []
surrounds_x, surrounds_y, surrounds_z = [], [], []
other_x, other_y, other_z = [], [], []

for u, v, data in G.edges(data=True):
    x0, y0, z0 = pos[u]
    x1, y1, z1 = pos[v]

    # Add coordinates in Plotly "line segment" style: [x0, x1, None] so lines don't connect across edges
    if data.get("relation") == "VOIDS":
        voids_x.extend([x0, x1, None])
        voids_y.extend([y0, y1, None])
        voids_z.extend([z0, z1, None])
    elif data.get("relation") == "SURROUNDS":
        surrounds_x.extend([x0, x1, None])
        surrounds_y.extend([y0, y1, None])
        surrounds_z.extend([z0, z1, None])
    else:
        other_x.extend([x0, x1, None])
        other_y.extend([y0, y1, None])
        other_z.extend([z0, z1, None])

# Create separate edge traces
voids_trace = go.Scatter3d(
    x=voids_x, y=voids_y, z=voids_z,
    mode='lines',
    line=dict(width=2, color='red'),
    hoverinfo='none'
)

surrounds_trace = go.Scatter3d(
    x=surrounds_x, y=surrounds_y, z=surrounds_z,
    mode='lines',
    line=dict(width=2, color='gray'),
    opacity=0.05,
    hoverinfo='none'
)

other_trace = go.Scatter3d(
    x=other_x, y=other_y, z=other_z,
    mode='lines',
    line=dict(width=2, color='lightgray'),
    hoverinfo='none'
)

# Create the node trace
node_trace = go.Scatter3d(
    x=x_nodes, y=y_nodes, z=z_nodes,
    mode='markers',
    marker=dict(size=5, color=colors, opacity=0.8),
    text=[
        f"{G.nodes[node].get('Name', 'N/A')} (" \
        f"{G.nodes[node].get('IfcType', 'Undefined')})"
        for node in wall_door_window_nodes
    ],
    hoverinfo='text'
)

# Build figure
layout = go.Layout(
    title="3D IFC Wall-Door/Window Visualization",
    width=1200, height=800,
    scene=dict(xaxis=dict(title='X'),
               yaxis=dict(title='Y'),
               zaxis=dict(title='Z')),
    showlegend=False
)

fig = go.Figure(
    data=[voids_trace, surrounds_trace, other_trace, node_trace],
    layout=layout
)
fig.show()


## 2.6 Data check

Check and clean data to avoid duplicates!

In [10]:
nodes = pd.DataFrame.from_dict(dict(G.nodes(data=True)), orient='index')
nodes.head()

Unnamed: 0,GlobalId,Name,Description,ObjectType,IfcType,LocationX,LocationY,LocationZ,category,SolarRad,Tag
3sM$t_Teb2q92lD380TwKD,3sM$t_Teb2q92lD380TwKD,LVL1_1B_70,,,IfcSpace,-39.208039,26.376091,4.5,IfcSpace,,
1jSziHgjf3ZR_L0Dj1KHuc,1jSziHgjf3ZR_L0Dj1KHuc,LVL1_1B_71,,,IfcSpace,-39.208039,23.376091,4.5,IfcSpace,,
0hlmsO3TX3kfSCEYMAtENu,0hlmsO3TX3kfSCEYMAtENu,LVL1_1B_72,,,IfcSpace,-39.208039,20.376091,4.5,IfcSpace,,
3X95yfnqPDVQbtUITG$ppG,3X95yfnqPDVQbtUITG$ppG,LVL1_1B_80,,,IfcSpace,-39.208039,62.376091,4.5,IfcSpace,,
2W18n9Wmn10BQxqA98Gkq5,2W18n9Wmn10BQxqA98Gkq5,LVL1_1B_98,,,IfcSpace,-30.208038,20.376091,4.5,IfcSpace,,


In [11]:
print(f"NaN elements in 'LocationX' column:{len(nodes[nodes['LocationX'].isna()])}")

NaN elements in 'LocationX' column:0


In [12]:
nodes[nodes['LocationX'].notna()]['IfcType'].value_counts()

IfcType
IfcColumn    1629
IfcWall      1056
IfcSlab       596
IfcWindow     326
IfcSpace       99
IfcDoor        93
IfcStair        6
Name: count, dtype: int64

In [13]:
print(f"NaN elements in 'SolarRad' column:{len(nodes[nodes['SolarRad'].isna()])}")

NaN elements in 'SolarRad' column:2927


In [14]:
nodes[nodes['SolarRad'].notna()]['IfcType'].value_counts()

IfcType
IfcWall    878
Name: count, dtype: int64

In [15]:
nodes.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3805 entries, 3sM$t_Teb2q92lD380TwKD to 0pMIq5Xzz1s824SIA5lU4R
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   GlobalId     3805 non-null   object 
 1   Name         3805 non-null   object 
 2   Description  0 non-null      object 
 3   ObjectType   6 non-null      object 
 4   IfcType      3805 non-null   object 
 5   LocationX    3805 non-null   float64
 6   LocationY    3805 non-null   float64
 7   LocationZ    3805 non-null   float64
 8   category     3805 non-null   object 
 9   SolarRad     878 non-null    float64
 10  Tag          0 non-null      float64
dtypes: float64(5), object(6)
memory usage: 485.8+ KB


In [16]:
def replace_nans(df):
    for col in df.columns:
        if pd.api.types.is_numeric_dtype(df[col]):
            df[col].fillna(0, inplace=True)
        else:
            df[col].fillna('N/A', inplace=True)
    return df

nodes = replace_nans(nodes)

nodes.head()


A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.




A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.





Unnamed: 0,GlobalId,Name,Description,ObjectType,IfcType,LocationX,LocationY,LocationZ,category,SolarRad,Tag
3sM$t_Teb2q92lD380TwKD,3sM$t_Teb2q92lD380TwKD,LVL1_1B_70,,,IfcSpace,-39.208039,26.376091,4.5,IfcSpace,0.0,0.0
1jSziHgjf3ZR_L0Dj1KHuc,1jSziHgjf3ZR_L0Dj1KHuc,LVL1_1B_71,,,IfcSpace,-39.208039,23.376091,4.5,IfcSpace,0.0,0.0
0hlmsO3TX3kfSCEYMAtENu,0hlmsO3TX3kfSCEYMAtENu,LVL1_1B_72,,,IfcSpace,-39.208039,20.376091,4.5,IfcSpace,0.0,0.0
3X95yfnqPDVQbtUITG$ppG,3X95yfnqPDVQbtUITG$ppG,LVL1_1B_80,,,IfcSpace,-39.208039,62.376091,4.5,IfcSpace,0.0,0.0
2W18n9Wmn10BQxqA98Gkq5,2W18n9Wmn10BQxqA98Gkq5,LVL1_1B_98,,,IfcSpace,-30.208038,20.376091,4.5,IfcSpace,0.0,0.0


In [17]:
nodes.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3805 entries, 3sM$t_Teb2q92lD380TwKD to 0pMIq5Xzz1s824SIA5lU4R
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   GlobalId     3805 non-null   object 
 1   Name         3805 non-null   object 
 2   Description  3805 non-null   object 
 3   ObjectType   3805 non-null   object 
 4   IfcType      3805 non-null   object 
 5   LocationX    3805 non-null   float64
 6   LocationY    3805 non-null   float64
 7   LocationZ    3805 non-null   float64
 8   category     3805 non-null   object 
 9   SolarRad     3805 non-null   float64
 10  Tag          3805 non-null   float64
dtypes: float64(5), object(6)
memory usage: 485.8+ KB


In [18]:
nodes['IfcType'].value_counts()

IfcType
IfcColumn    1629
IfcWall      1056
IfcSlab       596
IfcWindow     326
IfcSpace       99
IfcDoor        93
IfcStair        6
Name: count, dtype: int64

In [20]:
edges = pd.DataFrame(G.edges(data=True), columns=['source', 'target', 'attributes'])
edges.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4248 entries, 0 to 4247
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   source      4248 non-null   object
 1   target      4248 non-null   object
 2   attributes  4248 non-null   object
dtypes: object(3)
memory usage: 99.7+ KB


In [21]:
edges.head()

Unnamed: 0,source,target,attributes
0,3sM$t_Teb2q92lD380TwKD,1_2cirtfH1ARPCSLMw_JEe,{'relation': 'SURROUNDS'}
1,3sM$t_Teb2q92lD380TwKD,2zdWlwgSz8o9G2jpwMqeL$,{'relation': 'SURROUNDS'}
2,3sM$t_Teb2q92lD380TwKD,15a1uvAib5Cx3_GHZZjT$N,{'relation': 'SURROUNDS'}
3,3sM$t_Teb2q92lD380TwKD,0Fo_vRoRX5uQXTgGB2Ak7E,{'relation': 'SURROUNDS'}
4,3sM$t_Teb2q92lD380TwKD,1x0N$dxyL8LOQp$IETASUc,{'relation': 'SURROUNDS'}


In [22]:
# Check for edges types
edges['relation_type'] = edges['attributes'].apply(lambda x: x.get('relation', None))
unique_relations = edges['relation_type'].unique()
print(unique_relations)

['SURROUNDS' 'VOIDS']


In [24]:
nodes.to_csv(os.path.join('csv_files','nodes_02.csv'))
edges.to_csv(os.path.join('csv_files','edges_02.csv'))