# 1.Connect to google drive and install libraries

In [8]:
import ifcopenshell
import networkx as nx
import plotly.graph_objects as go
import pandas as pd

# 2. Extract data from IFC
## 2.1. Utility Function

### Introduction to IFC Structure (Industry Foundation Classes)

**IFC (Industry Foundation Classes)** is a standardized data model used to describe building and construction industry data. It enables interoperability between different software applications in Building Information Modeling (BIM).

At its core, an IFC file is made up of a large hierarchy of **entities**—objects like walls, doors, beams, or more abstract things like relationships and property sets. Each entity belongs to a specific **type**, e.g., `IfcWall`, `IfcDoor`, `IfcPropertySet`, and so on.

Key concepts relevant to the function:

- **IfcEntity:** A generic object in the IFC model. Each entity has basic properties such as GlobalId, Name, and Description.

- **IfcRelDefinesByProperties:** A relationship that links an entity to a set of properties or quantities.

- **IfcPropertySet:** A collection of properties (e.g., material, fire rating), where each property can be a single value (IfcPropertySingleValue).

- **IfcElementQuantity:** Stores measurable quantities (e.g., length, area, volume) associated with the entity.

- **wrappedValue:** A helper attribute used to access the actual value inside some IFC data types.

```
IFC Structure Overview (as used in extract_properties function)

            +------------------------+
            |      IfcEntity        |         (e.g., IfcWall, IfcDoor)
            +------------------------+
            | - GlobalId             |
            | - Name                 |
            | - Description          |
            | - ObjectType           |
            +------------------------+
                      |
                      | IsDefinedBy
                      v
        +-----------------------------+
        |  IfcRelDefinesByProperties |
        +-----------------------------+
                      |
                      | RelatingPropertyDefinition
        +-----------------------------+
        |       /             \       |
        v                             v
+------------------+      +------------------------+
|  IfcPropertySet  |      |  IfcElementQuantity    |
+------------------+      +------------------------+
| - HasProperties  |      | - Quantities           |
+------------------+      +------------------------+
        |                            |
        v                            v

+-------------------------+    +------------------------+
| IfcPropertySingleValue  |    |   Quantity (Length,    |
+-------------------------+    |    Area, Volume, ...)  |
| - Name                  |    +------------------------+
| - NominalValue          |    | - Name                 |
|   - wrappedValue        |    | - Value (wrappedValue) |
+-------------------------+    +------------------------+

```


In [2]:
def extract_properties(entity):
    """Extracts general properties and quantities from an IFC entity."""
    data = {
        "GlobalId": entity.GlobalId,
        "Name": entity.Name,  # Initialize with entity.Name
        "Description": getattr(entity, "Description", None),
        "ObjectType": getattr(entity, "ObjectType", None),
        "IfcType": entity.is_a()
    }

    if data["Name"] is None and hasattr(entity, "IsDefinedBy"):
        for rel in entity.IsDefinedBy:
            if rel.is_a("IfcRelDefinesByProperties") and hasattr(rel, "RelatingPropertyDefinition"):
                prop_def = rel.RelatingPropertyDefinition
                if prop_def.is_a("IfcPropertySet"):
                    # Check if it's Pset_SpaceCommon
                    if prop_def.Name == "Pset_SpaceCommon":
                        for prop in prop_def.HasProperties:
                            if prop.is_a("IfcPropertySingleValue") and prop.Name == "Name":
                                data["Name"] = getattr(prop.NominalValue, "wrappedValue", None)
                                break  # Stop searching if found
                    else:  # Otherwise, try to find the name in other property sets
                        for prop in prop_def.HasProperties:
                            if prop.is_a("IfcPropertySingleValue"):
                                if prop.Name in ["Name", "name", "LongName", "longname"]:
                                    data["Name"] = getattr(prop.NominalValue, "wrappedValue", None)
                                    break  # Stop searching if found
                                property_value = getattr(prop.NominalValue, "wrappedValue", None)
                                if property_value in ["Name", "LongName"]:
                                    for inner_prop in prop_def.HasProperties:
                                        if inner_prop.is_a("IfcPropertySingleValue") and inner_prop.Name == property_value:
                                            data["Name"] = getattr(inner_prop.NominalValue, "wrappedValue", None)
                                            break
                                    break  # Stop searching if found
    # Check if Name is still None and LongName is available
    if data["Name"] is None and hasattr(entity, 'LongName'):
        data["Name"] = entity.LongName  # Assign LongName to Name if Name is None

    return data

todo -> Location parameter!



## 2.2. Load IFC File and Extract Data

In [30]:
ifc_file = ifcopenshell.open('scott_home_2.ifc')
rooms = ifc_file.by_type("IfcSpace")
space_boundaries = ifc_file.by_type("IfcRelSpaceBoundary")

In [31]:
# Identify element types in space boundaries
element_types = set()
for rel in space_boundaries:
    if rel.RelatedBuildingElement:
        element_types.add(rel.RelatedBuildingElement.is_a())

print("Unique element types in space boundaries:")
for element_type in sorted(element_types):
    print("-", element_type)

Unique element types in space boundaries:
- IfcDoor
- IfcSlab
- IfcWall
- IfcWindow


In [32]:
# Define categories and colors
categories = {room.is_a(): "red" for room in rooms}
for rel in space_boundaries:
    if rel.RelatedBuildingElement:
        categories.setdefault(
            rel.RelatedBuildingElement.is_a(),
            f"#{hash(rel.RelatedBuildingElement.is_a()) & 0xFFFFFF:06x}"
        )

def get_node_color(ifc_type):
    """Return a color based on the IFC type or default to gray."""
    return categories.get(ifc_type, "gray")

## 2.3. Build graph

**RDF (Resource Description Framework)** represents data as triples: subject, predicate, and object. It’s commonly used in the Semantic Web to maintain a universal, flexible model of linked data. Relationships are defined via standardized vocabularies (e.g., RDF Schema, OWL), enabling robust data integration and inference across different domains.

**LPG (Labeled Property Graph)** organizes data into nodes (entities) and edges (relationships), both of which can have labels and properties. This approach is popular in many graph databases for direct, application-focused queries (often using query languages like Cypher). It’s simpler to model certain graph use cases without the semantic overhead of RDF vocabularies.

Our workflow is based on **Labeled Property Graph**

In [33]:
G = nx.Graph()

# Add Rooms
for room in rooms:
    room_id = room.GlobalId
    G.add_node(room_id, **extract_properties(room), category="IfcSpace")

# Add Room-Element Connections
#    (Any building element bounding a room goes here with 'SURROUNDS'.)
for rel in space_boundaries:
    room = rel.RelatingSpace
    element = rel.RelatedBuildingElement
    if room and element:
        if element.GlobalId not in G.nodes:
            G.add_node(element.GlobalId, **extract_properties(element), category=element.is_a())
        # Connect the room to this element
        G.add_edge(room.GlobalId, element.GlobalId, relation="SURROUNDS")

# Add Wall-Window/Door Connections
#    (Direct 'VOIDS' edge from IfcWall* types to IfcDoor or IfcWindow.)
for rel in ifc_file.by_type("IfcRelVoidsElement"):
    wall = rel.RelatingBuildingElement
    opening = rel.RelatedOpeningElement

    for fill_rel in ifc_file.by_type("IfcRelFillsElement"):
        if fill_rel.RelatingOpeningElement == opening:
            filled_element = fill_rel.RelatedBuildingElement
            # Strict check: IfcWall*, IfcWallStandardCase, IfcCurtainWall --> [IfcDoor | IfcWindow]
            if (
                wall
                and filled_element
                and "Wall" in wall.is_a()
                and filled_element.is_a() in ["IfcDoor", "IfcWindow"]
            ):
                # Ensure both nodes (Wall + Door/Window) exist in the graph
                if wall.GlobalId not in G.nodes:
                    G.add_node(wall.GlobalId, **extract_properties(wall), category=wall.is_a())
                if filled_element.GlobalId not in G.nodes:
                    G.add_node(filled_element.GlobalId, **extract_properties(filled_element), category=filled_element.is_a())

                # Create a direct connection for the wall-door/window
                # print(f"VOIDS edge found: {wall.GlobalId} -> {filled_element.GlobalId}")
                G.add_edge(wall.GlobalId, filled_element.GlobalId, relation="VOIDS")


## 2.4. Generate 3D Layout and Visualization

In [34]:
# Remove unconnected nodes
unconnected_nodes = [node for node, degree in G.degree() if degree == 0]
G.remove_nodes_from(unconnected_nodes)

# Recalculate layout (3D)
pos = nx.spring_layout(G, dim=3, seed=42)

# Extract updated node positions and colors
x_nodes, y_nodes, z_nodes = zip(*[pos[node] for node in G.nodes])
colors = [get_node_color(G.nodes[node].get("IfcType", "Undefined")) for node in G.nodes]

# Separate edge coordinates by relation type
voids_x, voids_y, voids_z = [], [], []
surrounds_x, surrounds_y, surrounds_z = [], [], []
other_x, other_y, other_z = [], [], []

for u, v, data in G.edges(data=True):
    x0, y0, z0 = pos[u]
    x1, y1, z1 = pos[v]

    # Add coordinates in Plotly "line segment" style: [x0, x1, None] so lines don't connect across edges
    if data.get("relation") == "VOIDS":
        voids_x.extend([x0, x1, None])
        voids_y.extend([y0, y1, None])
        voids_z.extend([z0, z1, None])
    elif data.get("relation") == "SURROUNDS":
        surrounds_x.extend([x0, x1, None])
        surrounds_y.extend([y0, y1, None])
        surrounds_z.extend([z0, z1, None])
    else:
        other_x.extend([x0, x1, None])
        other_y.extend([y0, y1, None])
        other_z.extend([z0, z1, None])

# Create separate edge traces
voids_trace = go.Scatter3d(
    x=voids_x, y=voids_y, z=voids_z,
    mode='lines',
    line=dict(width=2, color='red'),
    hoverinfo='none'
)

surrounds_trace = go.Scatter3d(
    x=surrounds_x, y=surrounds_y, z=surrounds_z,
    mode='lines',
    line=dict(width=2, color='gray'),
    opacity=0.5,
    hoverinfo='none'
)

other_trace = go.Scatter3d(
    x=other_x, y=other_y, z=other_z,
    mode='lines',
    line=dict(width=2, color='lightgray'),
    hoverinfo='none'
)

# Create the node trace
node_trace = go.Scatter3d(
    x=x_nodes, y=y_nodes, z=z_nodes,
    mode='markers',
    marker=dict(size=5, color=colors, opacity=0.8),
    text=[
        f"{G.nodes[node].get('Name', 'N/A')} (" \
        f"{G.nodes[node].get('IfcType', 'Undefined')})"
        for node in G.nodes
    ],
    hoverinfo='text'
)

# Build figure
layout = go.Layout(
    title="3D IFC Wall-Door/Window Visualization",
    width=1200, height=800,
    scene=dict(xaxis=dict(title='X'),
               yaxis=dict(title='Y'),
               zaxis=dict(title='Z')),
    showlegend=False
)

fig = go.Figure(
    data=[voids_trace, surrounds_trace, other_trace, node_trace],
    layout=layout
)
# fig.show()


todo -> Location parameter!

## 2.5. Plot IfcWalls and IfcDoors/IfcWindows connections

In [35]:
# Remove unconnected nodes
unconnected_nodes = [node for node, degree in G.degree() if degree == 0]
G.remove_nodes_from(unconnected_nodes)

# Recalculate layout (3D)
pos = nx.spring_layout(G, dim=3, seed=42)

# Extract node positions and colors (only for Walls, Doors, Windows)
wall_door_window_nodes = [
    node
    for node in G.nodes
    if G.nodes[node].get("IfcType") in [
        "IfcWall", "IfcDoor", "IfcWindow"
    ]
]

x_nodes, y_nodes, z_nodes = zip(*[pos[node] for node in wall_door_window_nodes])
colors = [
    get_node_color(G.nodes[node].get("IfcType", "Undefined"))
    for node in wall_door_window_nodes
]

# Separate edge coordinates by relation type
voids_x, voids_y, voids_z = [], [], []
surrounds_x, surrounds_y, surrounds_z = [], [], []
other_x, other_y, other_z = [], [], []

for u, v, data in G.edges(data=True):
    x0, y0, z0 = pos[u]
    x1, y1, z1 = pos[v]

    # Add coordinates in Plotly "line segment" style: [x0, x1, None] so lines don't connect across edges
    if data.get("relation") == "VOIDS":
        voids_x.extend([x0, x1, None])
        voids_y.extend([y0, y1, None])
        voids_z.extend([z0, z1, None])
    elif data.get("relation") == "SURROUNDS":
        surrounds_x.extend([x0, x1, None])
        surrounds_y.extend([y0, y1, None])
        surrounds_z.extend([z0, z1, None])
    else:
        other_x.extend([x0, x1, None])
        other_y.extend([y0, y1, None])
        other_z.extend([z0, z1, None])

# Create separate edge traces
voids_trace = go.Scatter3d(
    x=voids_x, y=voids_y, z=voids_z,
    mode='lines',
    line=dict(width=2, color='red'),
    hoverinfo='none'
)

surrounds_trace = go.Scatter3d(
    x=surrounds_x, y=surrounds_y, z=surrounds_z,
    mode='lines',
    line=dict(width=2, color='gray'),
    opacity=0.05,
    hoverinfo='none'
)

other_trace = go.Scatter3d(
    x=other_x, y=other_y, z=other_z,
    mode='lines',
    line=dict(width=2, color='lightgray'),
    hoverinfo='none'
)

# Create the node trace
node_trace = go.Scatter3d(
    x=x_nodes, y=y_nodes, z=z_nodes,
    mode='markers',
    marker=dict(size=5, color=colors, opacity=0.8),
    text=[
        f"{G.nodes[node].get('Name', 'N/A')} (" \
        f"{G.nodes[node].get('IfcType', 'Undefined')})"
        for node in wall_door_window_nodes
    ],
    hoverinfo='text'
)

#  Build figure
layout = go.Layout(
    title="3D IFC Wall-Door/Window Visualization",
    width=1200, height=800,
    scene=dict(xaxis=dict(title='X'),
               yaxis=dict(title='Y'),
               zaxis=dict(title='Z')),
    showlegend=False
)

fig = go.Figure(
    data=[voids_trace, surrounds_trace, other_trace, node_trace],
    layout=layout
)
# fig.show()


## 2.6. Data check

Check and clean data to avoid duplicates!

In [36]:
nodes = pd.DataFrame.from_dict(dict(G.nodes(data=True)), orient='index')

In [37]:
nodes

Unnamed: 0,GlobalId,Name,Description,ObjectType,IfcType,category
3q7Twobe9ENvh43NYJYCkZ,3q7Twobe9ENvh43NYJYCkZ,LEVEL_1,,,IfcSpace,IfcSpace
33GCg72APAegkpE39K6OCT,33GCg72APAegkpE39K6OCT,slab,,,IfcSlab,IfcSlab
15ttRC7BL7Ngs5NHJr5Ynb,15ttRC7BL7Ngs5NHJr5Ynb,wall_1,,,IfcWall,IfcWall
3jHerqfRDEbB68GwT$MLmP,3jHerqfRDEbB68GwT$MLmP,wall_1,,,IfcWall,IfcWall
2G21b5UMHFNB45OoEIXvNO,2G21b5UMHFNB45OoEIXvNO,wall_1,,,IfcWall,IfcWall
1LfjqzwfXEHh5CANx8rM4X,1LfjqzwfXEHh5CANx8rM4X,wall_1,,,IfcWall,IfcWall
04snYQkcbEGfjItRCcaWig,04snYQkcbEGfjItRCcaWig,wall_1,,,IfcWall,IfcWall
24EAjVia58eBxT8G8zKI9s,24EAjVia58eBxT8G8zKI9s,wall_1,,,IfcWall,IfcWall
0_$8hcBNr8OhV23ievP0wA,0_$8hcBNr8OhV23ievP0wA,wall_1,,,IfcWall,IfcWall
00aB8euCbDg81JtJMGNNA_,00aB8euCbDg81JtJMGNNA_,wall_1,,,IfcWall,IfcWall


In [38]:
# # Remove columns that contain a certain percentage of NaNs
# threshold = 0.5 * len(nodes)
# nodes = nodes.dropna(axis=1, thresh=threshold)  # Drop columns below the threshold

In [39]:
nodes.info()

<class 'pandas.core.frame.DataFrame'>
Index: 45 entries, 3q7Twobe9ENvh43NYJYCkZ to 2Zzn$FcnjFt8vTHwYtfFLz
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   GlobalId     45 non-null     object
 1   Name         35 non-null     object
 2   Description  0 non-null      object
 3   ObjectType   0 non-null      object
 4   IfcType      45 non-null     object
 5   category     45 non-null     object
dtypes: object(6)
memory usage: 2.5+ KB


In [40]:
nodes.head(3)

Unnamed: 0,GlobalId,Name,Description,ObjectType,IfcType,category
3q7Twobe9ENvh43NYJYCkZ,3q7Twobe9ENvh43NYJYCkZ,LEVEL_1,,,IfcSpace,IfcSpace
33GCg72APAegkpE39K6OCT,33GCg72APAegkpE39K6OCT,slab,,,IfcSlab,IfcSlab
15ttRC7BL7Ngs5NHJr5Ynb,15ttRC7BL7Ngs5NHJr5Ynb,wall_1,,,IfcWall,IfcWall


In [41]:
# Replace Nans with a string 'N/A'
nodes.fillna('N/A', inplace=True)

In [42]:
nodes.info()

<class 'pandas.core.frame.DataFrame'>
Index: 45 entries, 3q7Twobe9ENvh43NYJYCkZ to 2Zzn$FcnjFt8vTHwYtfFLz
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   GlobalId     45 non-null     object
 1   Name         45 non-null     object
 2   Description  45 non-null     object
 3   ObjectType   45 non-null     object
 4   IfcType      45 non-null     object
 5   category     45 non-null     object
dtypes: object(6)
memory usage: 2.5+ KB


In [43]:
nodes['IfcType'].value_counts()

IfcType
IfcWall      26
IfcDoor      10
IfcWindow     7
IfcSpace      1
IfcSlab       1
Name: count, dtype: int64

In [44]:
nodes['category'].value_counts()

category
IfcWall      26
IfcDoor      10
IfcWindow     7
IfcSpace      1
IfcSlab       1
Name: count, dtype: int64

In [45]:
edges = pd.DataFrame(G.edges(data=True), columns=['source', 'target', 'attributes'])

In [46]:
edges.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 61 entries, 0 to 60
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   source      61 non-null     object
 1   target      61 non-null     object
 2   attributes  61 non-null     object
dtypes: object(3)
memory usage: 1.6+ KB


In [47]:
edges.head()

Unnamed: 0,source,target,attributes
0,3q7Twobe9ENvh43NYJYCkZ,33GCg72APAegkpE39K6OCT,{'relation': 'SURROUNDS'}
1,3q7Twobe9ENvh43NYJYCkZ,15ttRC7BL7Ngs5NHJr5Ynb,{'relation': 'SURROUNDS'}
2,3q7Twobe9ENvh43NYJYCkZ,3jHerqfRDEbB68GwT$MLmP,{'relation': 'SURROUNDS'}
3,3q7Twobe9ENvh43NYJYCkZ,2G21b5UMHFNB45OoEIXvNO,{'relation': 'SURROUNDS'}
4,3q7Twobe9ENvh43NYJYCkZ,1LfjqzwfXEHh5CANx8rM4X,{'relation': 'SURROUNDS'}


In [48]:
# Check for edges types
edges['relation_type'] = edges['attributes'].apply(lambda x: x.get('relation', None))
unique_relations = edges['relation_type'].unique()
print(unique_relations)


['SURROUNDS' 'VOIDS']


## 2.7. Save `csv` files for nodes and edges

In [49]:
# prompt: create directory in drive /content/drive/MyDrive/GraphML/
!mkdir -p /content/drive/MyDrive/GraphML/CSV/

The syntax of the command is incorrect.


In [50]:
nodes.to_csv('nodes_01b.csv')
edges.to_csv('edges_01b.csv')