In [2]:
import rdflib
from rdflib import Graph, URIRef, Literal, Namespace
from rdflib.namespace import RDF, OWL
from difflib import SequenceMatcher
from pprint import pprint
import ifcopenshell

In [3]:
ttl_ark = "C:/Users/yanpe/OneDrive - Metropolia Ammattikorkeakoulu Oy/Research/MD2MV/data/TTL/01ARK/ARK_MET.ttl"
ttl_rak = "C:/Users/yanpe/OneDrive - Metropolia Ammattikorkeakoulu Oy/Research/MD2MV/data/TTL/02RAK/RAK_MET.ttl"
ttl_lvi_iv = "C:/Users/yanpe/OneDrive - Metropolia Ammattikorkeakoulu Oy/Research/MD2MV/data/TTL/03LVI/LVI_IV_MET.ttl"

In [4]:
# --- Namespaces ---
# Adjust IFC namespace to match your specific file version (check file header!)
IFC = Namespace("https://standards.buildingsmart.org/IFC/DEV/IFC2x3/TC1/OWL#") 
BOT = Namespace("https://w3id.org/bot#")

def get_proxy_name(graph, subject):
    """
    Extracts the name from the URI object of owl:sameAs.
    Example: 
      Subject: inst:storey_GUID
      Predicate: owl:sameAs
      Object:  inst:IfcBuildingStorey_181
    Returns: "IfcBuildingStorey_181"
    """
    # 1. Check for owl:sameAs
    for o in graph.objects(subject, OWL.sameAs):
        # Convert URIRef to string (e.g., "http://.../IfcBuildingStorey_181")
        raw_uri = str(o)
        
        # 2. Extract the local name (fragment after last separator)
        if '#' in raw_uri:
            token = raw_uri.split('#')[-1]
        else:
            token = raw_uri.split('/')[-1]
            
        # Return the extracted ID immediately
        if token:
            return token

    # 3. Fallback: If no owl:sameAs, try standard Name property
    for p, o in graph.predicate_objects(subject):
        if "name" in str(p).lower() and isinstance(o, Literal):
            return str(o).strip()
            
    return "Unknown"

def extract_bot_skeleton(ttl_file_path, label):
    print(f"Parsing {label} ({ttl_file_path})...")
    g = Graph()
    g.parse(ttl_file_path, format="ttl")
    
    skeleton = {
        'sites': {}, 'buildings': {}, 'storeys': {}, 'spaces': {}
    }

    # Helper to populate dictionary
    def add_to_skeleton(rdf_type, target_dict):
        for s in g.subjects(RDF.type, rdf_type):
            # USE THE NEW PROXY NAME FUNCTION
            name = get_proxy_name(g, s)
            
            # Avoid overwriting if duplicates exist (though IDs should be unique)
            if name not in target_dict:
                target_dict[name] = s
            else:
                print(f"  [Warning] Duplicate ID found in {label}: {name}")

    # Extract all levels
    add_to_skeleton(BOT.Site, skeleton['sites'])
    add_to_skeleton(BOT.Building, skeleton['buildings'])
    add_to_skeleton(BOT.Storey, skeleton['storeys'])
    add_to_skeleton(BOT.Space, skeleton['spaces'])

    print(f"  Found: {len(skeleton['sites'])} Sites, {len(skeleton['buildings'])} Bldgs, "
          f"{len(skeleton['storeys'])} Storeys, {len(skeleton['spaces'])} Spaces")
    return skeleton


In [5]:
# --- Main Execution ---

# 1. Extract Skeletons
arch_data = extract_bot_skeleton(ttl_ark, "Architecture")
hvac_iv_data = extract_bot_skeleton(ttl_lvi_iv, "HVAC_IV")
rak_data = extract_bot_skeleton(ttl_rak, "Structure")

Parsing Architecture (C:/Users/yanpe/OneDrive - Metropolia Ammattikorkeakoulu Oy/Research/MD2MV/data/TTL/01ARK/ARK_MET.ttl)...
  Found: 1 Sites, 1 Bldgs, 12 Storeys, 1404 Spaces
Parsing HVAC_IV (C:/Users/yanpe/OneDrive - Metropolia Ammattikorkeakoulu Oy/Research/MD2MV/data/TTL/03LVI/LVI_IV_MET.ttl)...
  Found: 1 Sites, 1 Bldgs, 9 Storeys, 0 Spaces
Parsing Structure (C:/Users/yanpe/OneDrive - Metropolia Ammattikorkeakoulu Oy/Research/MD2MV/data/TTL/02RAK/RAK_MET.ttl)...
  Found: 1 Sites, 8 Bldgs, 49 Storeys, 0 Spaces


In [10]:
pprint(arch_data)

{'buildings': {'IfcBuilding_118': rdflib.term.URIRef('https://lbd.example.com/building_28cb49f1-9b69-4870-aca3-d3f3628a7f64')},
 'sites': {'IfcSite_23678881': rdflib.term.URIRef('https://lbd.example.com/site_28cb49f1-9b69-4870-aca3-d3f3628a7f67')},
 'spaces': {'IfcSpace_1029': rdflib.term.URIRef('https://lbd.example.com/space_611eab86-f489-465e-bcd9-3a3dbbfc9996'),
            'IfcSpace_10398': rdflib.term.URIRef('https://lbd.example.com/space_f90bef97-ba5a-4f16-a512-56b7145e3092'),
            'IfcSpace_104011': rdflib.term.URIRef('https://lbd.example.com/space_df7623d4-2393-46f5-ad58-cefa159096e7'),
            'IfcSpace_104781': rdflib.term.URIRef('https://lbd.example.com/space_df7623d4-2393-46f5-ad58-cefa1590a9c1'),
            'IfcSpace_10552': rdflib.term.URIRef('https://lbd.example.com/space_f90bef97-ba5a-4f16-a512-56b7145e309d'),
            'IfcSpace_105874': rdflib.term.URIRef('https://lbd.example.com/space_df7623d4-2393-46f5-ad58-cefa1590a837'),
            'IfcSpace_108820'

In [33]:
g = Graph()
g.parse(ttl_ark, format="turtle")

BOT = Namespace("https://w3id.org/bot#")
PROPS = Namespace("http://lbd.arch.rwth-aachen.de/props#") 

storeys = g.subjects(RDF.type, BOT.Storey)

storey_props = {}

for storey in storeys:
    storey_props[str(storey)] = {
        str(p): str(o)
        for p, o in g.predicate_objects(storey)
        if str(p).startswith(str(PROPS))
    }

In [34]:
from pprint import pprint
print(f'{len(storey_props)} storeys with properties found:')
pprint(storey_props)

12 storeys with properties found:
{'https://lbd.example.com/storey_28cb49f1-9b69-4870-aca3-d3f39d30d6fc': {'http://lbd.arch.rwth-aachen.de/props#elevationIfcBuildingStorey_attribute_simple': '58100.0000000035',
                                                                         'http://lbd.arch.rwth-aachen.de/props#globalIdIfcRoot_attribute_simple': '0eoqdncsb8SAoZq$ETCDRy',
                                                                         'http://lbd.arch.rwth-aachen.de/props#longNameIfcSpatialStructureElement_attribute_simple': 'IV-Kammio'},
 'https://lbd.example.com/storey_28cb49f1-9b69-4870-aca3-d3f39d72d36b': {'http://lbd.arch.rwth-aachen.de/props#elevationIfcBuildingStorey_attribute_simple': '41100.0000000061',
                                                                         'http://lbd.arch.rwth-aachen.de/props#globalIdIfcRoot_attribute_simple': '0eoqdncsb8SAoZq$ETSjDh',
                                                                         'http://lbd.arch

In [12]:
arch_ifc = ifcopenshell.open("C:/Users/yanpe/OneDrive - Metropolia Ammattikorkeakoulu Oy/Research/MD2MV/data/IFC/01ARK/ARK_MET.ifc")
arch_site = arch_ifc.by_type("IfcSite")[0]
arch_site.get_info()

{'id': 23678881,
 'type': 'IfcSite',
 'GlobalId': '0eoqdncsb8SAoZq$DYYdzd',
 'OwnerHistory': #41=IfcOwnerHistory(#38,#5,$,.NOCHANGE.,$,$,$,0),
 'Name': 'Default',
 'Description': None,
 'ObjectType': '',
 'ObjectPlacement': #23678880=IfcLocalPlacement($,#23678879),
 'Representation': None,
 'LongName': None,
 'CompositionType': 'ELEMENT',
 'RefLatitude': (60, 13, 29, 994506),
 'RefLongitude': (25, 4, 32, 885),
 'RefElevation': 0.0,
 'LandTitleNumber': None,
 'SiteAddress': None}

In [22]:
arch_ifc.by_id(127)

#127=IfcBuildingStorey('0eoqdncsb8SAoZq$ETTygP',#41,'Merenpinta',$,$,#125,$,'Merenpinta',.ELEMENT.,0.)

In [None]:
building = arch_ifc.by_type("IfcBuilding")[0]
building.get_info()

{'id': 118,
 'type': 'IfcBuilding',
 'GlobalId': '0eoqdncsb8SAoZq$DYYdza',
 'OwnerHistory': #41=IfcOwnerHistory(#38,#5,$,.NOCHANGE.,$,$,$,0),
 'Name': '',
 'Description': None,
 'ObjectType': None,
 'ObjectPlacement': #32=IfcLocalPlacement(#23678880,#31),
 'Representation': None,
 'LongName': '',
 'CompositionType': 'ELEMENT',
 'ElevationOfRefHeight': None,
 'ElevationOfTerrain': None,
 'BuildingAddress': #114=IfcPostalAddress($,$,$,$,(),$,$,$,$,$)}

In [None]:
storyes_ark = arch_ifc.by_type("IfcBuildingStorey")
for storey in storyes_ark:
    print(storey.get_info())

{'id': 127, 'type': 'IfcBuildingStorey', 'GlobalId': '0eoqdncsb8SAoZq$ETTygP', 'OwnerHistory': #41=IfcOwnerHistory(#38,#5,$,.NOCHANGE.,$,$,$,0), 'Name': 'Merenpinta', 'Description': None, 'ObjectType': None, 'ObjectPlacement': #125=IfcLocalPlacement(#32,#124), 'Representation': None, 'LongName': 'Merenpinta', 'CompositionType': 'ELEMENT', 'Elevation': 0.0}
{'id': 133, 'type': 'IfcBuildingStorey', 'GlobalId': '0eoqdncsb8SAoZq$ETVUGE', 'OwnerHistory': #41=IfcOwnerHistory(#38,#5,$,.NOCHANGE.,$,$,$,0), 'Name': 'R.kerros', 'Description': None, 'ObjectType': None, 'ObjectPlacement': #132=IfcLocalPlacement(#32,#131), 'Representation': None, 'LongName': 'R.kerros', 'CompositionType': 'ELEMENT', 'Elevation': 15899.9999999986}
{'id': 139, 'type': 'IfcBuildingStorey', 'GlobalId': '0eoqdncsb8SAoZq$ETSjH9', 'OwnerHistory': #41=IfcOwnerHistory(#38,#5,$,.NOCHANGE.,$,$,$,0), 'Name': 'K.kerros', 'Description': None, 'ObjectType': None, 'ObjectPlacement': #138=IfcLocalPlacement(#32,#137), 'Representatio

In [20]:
hvac_ifc = ifcopenshell.open("C:/Users/yanpe/OneDrive - Metropolia Ammattikorkeakoulu Oy/Research/MD2MV/data/IFC/03LVI/LVI_LJ_MET.ifc")
site_lvi = hvac_ifc.by_type("IfcSite")[0]
site_lvi.get_info()

{'id': 31,
 'type': 'IfcSite',
 'GlobalId': '2Fm4WKyh10RANaa_0BW2kW',
 'OwnerHistory': #5=IfcOwnerHistory(#8,#9,$,.NOCHANGE.,$,$,$,1562874076),
 'Name': 'Metropolia',
 'Description': 'Testi',
 'ObjectType': None,
 'ObjectPlacement': #32=IfcLocalPlacement($,#28),
 'Representation': None,
 'LongName': None,
 'CompositionType': 'ELEMENT',
 'RefLatitude': (0, 0, 0, 0),
 'RefLongitude': (0, 0, 0, 0),
 'RefElevation': 0.0,
 'LandTitleNumber': None,
 'SiteAddress': None}

In [None]:
building_lvi = hvac_ifc.by_type("IfcBuilding")[0]
building_lvi.get_info() 

{'id': 34,
 'type': 'IfcBuilding',
 'GlobalId': '0DgGiPlffFo9_7V0ZN1flI',
 'OwnerHistory': #5=IfcOwnerHistory(#8,#9,$,.NOCHANGE.,$,$,$,1562874076),
 'Name': 'mc-building',
 'Description': None,
 'ObjectType': None,
 'ObjectPlacement': #35=IfcLocalPlacement($,#28),
 'Representation': None,
 'LongName': None,
 'CompositionType': 'ELEMENT',
 'ElevationOfRefHeight': None,
 'ElevationOfTerrain': None,
 'BuildingAddress': None}

In [None]:
storyes_lvi = hvac_ifc.by_type("IfcBuildingStorey")
for storey in storyes_lvi:
    print(storey.get_info())

{'id': 38, 'type': 'IfcBuildingStorey', 'GlobalId': '0w4y3u2_fFXf$tPPWTaivY', 'OwnerHistory': #5=IfcOwnerHistory(#8,#9,$,.NOCHANGE.,$,$,$,1562874076), 'Name': 'Kellari', 'Description': None, 'ObjectType': None, 'ObjectPlacement': #39=IfcLocalPlacement(#35,#37), 'Representation': None, 'LongName': None, 'CompositionType': 'ELEMENT', 'Elevation': 19900.0}
{'id': 471111, 'type': 'IfcBuildingStorey', 'GlobalId': '3OElruOf1EA9xZz8W4zq6h', 'OwnerHistory': #5=IfcOwnerHistory(#8,#9,$,.NOCHANGE.,$,$,$,1562874076), 'Name': '0 kerros', 'Description': None, 'ObjectType': None, 'ObjectPlacement': #471112=IfcLocalPlacement(#35,#471110), 'Representation': None, 'LongName': None, 'CompositionType': 'ELEMENT', 'Elevation': 23700.0}
{'id': 952737, 'type': 'IfcBuildingStorey', 'GlobalId': '3_Ijz3EZ1B6BYQpbCLflPr', 'OwnerHistory': #5=IfcOwnerHistory(#8,#9,$,.NOCHANGE.,$,$,$,1562874076), 'Name': '1.kerros', 'Description': None, 'ObjectType': None, 'ObjectPlacement': #952738=IfcLocalPlacement(#35,#952736), 

In [None]:
matches = []
matched_storeys_count = 0

# 1. Match Sites and Buildings
if len(arch_data['sites'])==1 and len(hvac_iv_data['sites'])==1:
    for a_name, a_uri in arch_data['sites'].items():
        for h_name, h_uri in hvac_iv_data['sites'].items():
            matches.append((a_uri, h_uri))

if len(arch_data['buildings'])==1 and len(hvac_iv_data['buildings'])==1:
    for a_name, a_uri in arch_data['buildings'].items():
        for h_name, h_uri in hvac_iv_data['buildings'].items():
            matches.append((a_uri, h_uri))

# 2. Match Storeys (The Anchor)
print("\n--- Aligning Storeys ---")
for a_name, a_uri in arch_data['storeys'].items():
    a_id = a_name.split("_")[-1]
    a_storey = arch_ifc.by_id(int(a_id))
    a_elevation = a_storey.Elevation
    best_match_uri = None    
    
    for h_name, h_uri in hvac_iv_data['storeys'].items():
        # match with elevation difference consideration
        h_id = h_name.split("_")[-1]
        h_storey = hvac_ifc.by_id(int(h_id))
        h_elevation = h_storey.Elevation

        # Calculate elevation difference
        elevation_diff = abs(a_elevation - h_elevation)
        if elevation_diff < 0.1:  # Threshold for elevation match
            best_match_uri = h_uri
    
    if best_match_uri:
        matches.append((a_uri, best_match_uri))
        matched_storeys_count += 1
        # Helper to get the name of the matched HVAC storey for printing
        h_matched_name = [k for k, v in hvac_iv_data['storeys'].items() if v == best_match_uri][0]
        print(f"MATCH: Storey '{a_name}' == '{h_matched_name}'")

if matched_storeys_count > 0:
    print(f"\nTotal matched storeys: {matched_storeys_count}")   
            
else:
    print("\nWarning: No storeys matched. Aborting Site/Building linkage to avoid false positives.")




--- Aligning Storeys ---
MATCH: Storey 'IfcBuildingStorey_157' == 'IfcBuildingStorey_1613518'
MATCH: Storey 'IfcBuildingStorey_163' == 'IfcBuildingStorey_2383850'
MATCH: Storey 'IfcBuildingStorey_169' == 'IfcBuildingStorey_3134010'
MATCH: Storey 'IfcBuildingStorey_181' == 'IfcBuildingStorey_4765043'
MATCH: Storey 'IfcBuildingStorey_175' == 'IfcBuildingStorey_3881292'
MATCH: Storey 'IfcBuildingStorey_139' == 'IfcBuildingStorey_38'
MATCH: Storey 'IfcBuildingStorey_145' == 'IfcBuildingStorey_471111'
MATCH: Storey 'IfcBuildingStorey_187' == 'IfcBuildingStorey_5487123'
MATCH: Storey 'IfcBuildingStorey_151' == 'IfcBuildingStorey_952737'

Total matched storeys: 9


In [26]:
# 4. Generate Linkset
linkset = Graph()
linkset.bind("owl", OWL)
linkset.bind("bot", BOT) # Useful if you want to extend with bot types later

print("\n--- Generating 'links.ttl' ---")
for arch_uri, hvac_uri in matches:
    linkset.add((arch_uri, OWL.sameAs, hvac_uri))

linkset.serialize(destination="links.ttl", format="ttl")
print("Done.")


--- Generating 'links.ttl' ---
Done.
