## Generic function that converts from KG to NetworkX

In [1]:
import enum

import networkx as nx
from rdflib import Graph, URIRef, Literal, Namespace
from rdflib.namespace import RDF, RDFS, XSD
import sys

def networkx(rdf_graph):
    """
    Convert an RDFLib graph to a NetworkX graph.
    - Nodes: All individuals and ontology classes.
    - Edges: Object properties create directed edges between nodes.
    - Attributes: Data properties are stored as node attributes.
    """
    if isinstance(rdf_graph, Graph) or isinstance(rdf_graph, str):
        if isinstance(rdf_graph, Graph):
            rdf_graph = rdf_graph
        elif isinstance(rdf_graph, str):
            if rdf_graph.endswith('.ttl'):
                try:
                    rdf_graph = Graph().parse(rdf_graph, format='turtle')
                except Exception as e:
                    print(f"Error parsing Turtle file: {e}")
                    return None
            else:
                print("Unexpected input format. Provide a Turtle (.ttl) file path.")
    else:
        print("Unexpected input format. Provide an RDFLib Graph or a .ttl file path.")
        return None

    # Initialize the directed graph
    nx_graph = nx.DiGraph()

    # Process the RDF graph
    for s, p, o in rdf_graph:
        s, p = s.toPython(), p.toPython()
        # Ensure subject is a node
        if s not in nx_graph:
            nx_graph.add_node(s)

        # Check if the object is a literal (data property) or an entity (object property)
        if isinstance(o, Literal):  # Data property → store as a node attribute
            nx_graph.nodes[s][p] = o.toPython()
        else:  # Object property → add an edge
            o = o.toPython()
            # in case o is not a hashable type, parse it to string
            if isinstance(o, enum.Enum):
                o = str(o)
            if p == RDF.type.toPython():
                nx_graph.nodes[s][RDF.type.toPython()] = o
            else:
                nx_graph.add_edge(s, o, predicate=p)

    return nx_graph

## Generic function that extracts features from any NetworkX

In [29]:
g_nx_traffic  = networkx(r"D:\Master\Thesis\Code\traffic\metr-la\LA_traffic_KG.ttl")

In [176]:
import re
from rdflib import RDF

def shorten_uri(uri, ontology_base):
    """
    Extract the local name from a full URI. 
    Removes known ontology base and keeps suffix like Pipe_9 or diameter.
    """
    if uri.startswith(ontology_base):
        return uri.replace(ontology_base, "")
    elif "#" in uri:
        return uri.split("#")[-1]
    elif "/" in uri:
        return uri.split("/")[-1]
    else:
        return uri

def extract_features_from_nx_graph(nx_graph, ontology_base, edge_predicates, node_type_filter, attribute_keys):
    # nx_graph: NetworkX graph
    # ontology_base: Base URI of the ontology to shorten URIs
    # edge_predicates: Either LIST or None - edge predicates to include as attributes
    # node_type_filter: Either None or a specific type to filter nodes by
    # attribute_keys: Either LIST or None - of specific attributes to include

    feature_dict = {}
    rdf_type_key = shorten_uri(str(RDF.type), ontology_base)

    for node, data in nx_graph.nodes(data=True):
        #print(f"Processing node: {node}, data: {data}")

        # Shorten RDF.type value and all keys
        cleaned_data = {
            shorten_uri(k, ontology_base): v if not isinstance(v, str) else shorten_uri(v, ontology_base)
            for k, v in data.items()
        }

        #print(f"Cleaned data: {cleaned_data}")

        # Filter by type if specified
        node_type = cleaned_data.get(rdf_type_key)
        if node_type_filter and node_type != node_type_filter:
            continue

        # Shorten node URI
        node_id = shorten_uri(node, ontology_base)

        # Select attributes
        if attribute_keys:
            features = {k: cleaned_data[k] for k in attribute_keys if k in cleaned_data}
        else:
            # Remove rdf:type from features
            features = {k: v for k, v in cleaned_data.items() if k != rdf_type_key}

        # Now add edges as attributes if requested
        if edge_predicates:
            for _, target, edge_data in nx_graph.out_edges(node, data=True):
                pred = shorten_uri(edge_data.get("predicate", ""), ontology_base)
                if pred in edge_predicates:
                    target_id = shorten_uri(target, ontology_base)
                    
                    if pred in features:
                        if isinstance(features[pred], list):
                            features[pred].append(target_id)
                        else:
                            features[pred] = [features[pred], target_id]
                    else:
                        features[pred] = target_id  # or [target_id] if you always want a list

        #print(f"added Data: {features}")
        feature_dict[node_id] = features

    return feature_dict


### Traffic Semantic Features

In [144]:
# Step 1: Extract traffic sensor data
sensor_features = extract_features_from_nx_graph(
    nx_graph=g_nx_traffic,
    ontology_base="http://example.org/traffic#",
    edge_predicates=["installedOn"],
    node_type_filter="TrafficSensor",
    attribute_keys=None,
)

# Step 2: Extract road segments with their attributes
road_features = extract_features_from_nx_graph(
    nx_graph=g_nx_traffic,
    ontology_base="http://example.org/traffic#",
    edge_predicates=None,
    node_type_filter="RoadSegment",
    attribute_keys=["roadType", "maxSpeed", "numberOfLanes"]
)

In [145]:
# Step 3: Link sensors to roads and build rows
traffic_features = []
for sensor_id, sensor_data in sensor_features.items():
    road_id = sensor_data.get("installedOn")
    if not road_id:
        continue
    road_data = road_features.get(road_id)
    if not road_data:
        continue
    row = {
        "sensor_id": sensor_id,
        "road_type": road_data.get("roadType", ""),
        "max_speed": float(road_data.get("maxSpeed", 0)),
        "num_lanes": int(road_data.get("numberOfLanes", 0))
    }
    traffic_features.append(row)

In [146]:
traffic_features

[{'sensor_id': 'sensor_759591',
  'road_type': 'motorway',
  'max_speed': 65.0,
  'num_lanes': 5},
 {'sensor_id': 'sensor_718204',
  'road_type': 'motorway',
  'max_speed': 65.0,
  'num_lanes': 5},
 {'sensor_id': 'sensor_767509',
  'road_type': 'motorway',
  'max_speed': 65.0,
  'num_lanes': 4},
 {'sensor_id': 'sensor_717452',
  'road_type': 'motorway',
  'max_speed': 55.0,
  'num_lanes': 4},
 {'sensor_id': 'sensor_717465',
  'road_type': 'motorway',
  'max_speed': 55.0,
  'num_lanes': 4},
 {'sensor_id': 'sensor_767554',
  'road_type': 'motorway',
  'max_speed': 65.0,
  'num_lanes': 4},
 {'sensor_id': 'sensor_761599',
  'road_type': 'motorway',
  'max_speed': 65.0,
  'num_lanes': 5},
 {'sensor_id': 'sensor_768469',
  'road_type': 'motorway',
  'max_speed': 55.0,
  'num_lanes': 6},
 {'sensor_id': 'sensor_764424',
  'road_type': 'motorway',
  'max_speed': 65.0,
  'num_lanes': 5},
 {'sensor_id': 'sensor_716960',
  'road_type': 'motorway',
  'max_speed': 65.0,
  'num_lanes': 4},
 {'sensor_

#### Pipe Semantic Features

In [147]:
g_nx_leakdb = networkx("KGS/knowledge_graph1-LeakDB.ttl")

In [148]:
pipe_features = extract_features_from_nx_graph(
    g_nx_leakdb,
   "https://raw.githubusercontent.com/DiTEC-project/wdn-knowledge-graph/refs/heads/main/wdn_ontology.ttl",
    node_type_filter="Pipe",  # Or full URI if you want
    attribute_keys=["diameter", "length", "roughness", "status"],
    edge_predicates=None
)

In [149]:
pipe_features

{'Pipe_9': {'diameter': 0.9925071,
  'length': 815.7009,
  'roughness': 132.6997,
  'status': 'Open'},
 'Pipe_3': {'diameter': 0.9429965,
  'length': 942.114,
  'roughness': 142.5124,
  'status': 'Open'},
 'Pipe_18': {'diameter': 0.6055914,
  'length': 782.5734,
  'roughness': 120.8565,
  'status': 'Open'},
 'Pipe_8': {'diameter': 1.038202,
  'length': 857.5721,
  'roughness': 125.6884,
  'status': 'Open'},
 'Pipe_11': {'diameter': 0.8017894,
  'length': 1238.727,
  'roughness': 134.9172,
  'status': 'Open'},
 'Pipe_23': {'diameter': 1.046681,
  'length': 2714.027,
  'roughness': 118.6827,
  'status': 'Open'},
 'Pipe_22': {'diameter': 0.3135412,
  'length': 506.56,
  'roughness': 120.6469,
  'status': 'Open'},
 'Pipe_16': {'diameter': 0.3693999,
  'length': 2858.393,
  'roughness': 131.5999,
  'status': 'Open'},
 'Pipe_19': {'diameter': 0.5864745,
  'length': 403.2065,
  'roughness': 132.9782,
  'status': 'Open'},
 'Pipe_4': {'diameter': 1.046316,
  'length': 1177.574,
  'roughness': 1

### MotionSense Semantic Features

In [89]:
g_nx_motionsense = networkx("D:\Master\Thesis\Code\motion-sense\motion_sense_ssn_kg.ttl")

In [150]:
motionSense_features = extract_features_from_nx_graph(
    g_nx_motionsense,
   "http://example.org/motion-sense#",
    node_type_filter="Participant",  # Or full URI if you want
    attribute_keys= ["hasGender"],
    edge_predicates=None
)

In [151]:
motionSense_features

{'Participant_P014': {'hasGender': 1},
 'Participant_P005': {'hasGender': 0},
 'Participant_P021': {'hasGender': 1},
 'Participant_P010': {'hasGender': 0},
 'Participant_P024': {'hasGender': 0},
 'Participant_P004': {'hasGender': 1},
 'Participant_P016': {'hasGender': 0},
 'Participant_P013': {'hasGender': 1},
 'Participant_P020': {'hasGender': 1},
 'Participant_P017': {'hasGender': 1},
 'Participant_P019': {'hasGender': 0},
 'Participant_P009': {'hasGender': 1},
 'Participant_P023': {'hasGender': 0},
 'Participant_P018': {'hasGender': 0},
 'Participant_P008': {'hasGender': 0},
 'Participant_P003': {'hasGender': 0},
 'Participant_P012': {'hasGender': 1},
 'Participant_P001': {'hasGender': 1},
 'Participant_P002': {'hasGender': 1},
 'Participant_P007': {'hasGender': 0},
 'Participant_P006': {'hasGender': 1},
 'Participant_P011': {'hasGender': 1},
 'Participant_P022': {'hasGender': 1},
 'Participant_P015': {'hasGender': 1}}

### LBNL Semantic Features

In [162]:
g_nx_lbnl = networkx("D:\Master\Thesis\Code\LBNL_FDD_Dataset_FCU\LBNL_Ontology_Original.ttl")

In [177]:
lbnl_features = extract_features_from_nx_graph(
    g_nx_lbnl,
   "http://example.org/motion-sense#",
    node_type_filter=None,  # Or full URI if you want
    attribute_keys= ["type"],
    edge_predicates=["hasPoint"]
)

In [178]:
lbnl_features

{'FCU': {'type': 'FCU',
  'hasPoint': ['FCU_DA_CFM',
   'FCU_RA_HUMD',
   'FCU_CTRL',
   'FCU_RAT',
   'FCU_DA_HUMD',
   'FCU_MAT',
   'FCU_DAT',
   'FCU_OAT',
   'FCU_OA_CFM',
   'FCU_OA_HUMD',
   'FCU_MA_HUMD']},
 'Supply_air_fan': {'type': 'Fan',
  'hasPoint': ['FCU_SPD', 'FCU_WAT', 'FAN_CTRL']},
 'FCU_OA_HUMD': {'type': 'Outside_Air_Humidity_Sensor'},
 'Cooling_coil': {'type': 'Chilled_Water_Coil',
  'hasPoint': ['FCU_CLG_GPM',
   'FCU_CLG_RWT',
   'FCU_CVLV',
   'FCU_CLG_EWT',
   'FCU_CVLV_DM']},
 'FCU_CLG_GPM': {'type': 'Chilled_Water_Supply_Flow_Sensor'},
 'Outdoor_air_damper': {'type': 'Outside_Damper',
  'hasPoint': ['FCU_DMPR', 'FCU_DMPR_DM']},
 'FCU_DMPR': {'type': 'Damper_Position_Sensor'},
 'FCU_MA_HUMD': {'type': 'Mixed_Air_Humidity_Sensor'},
 'FCU_HTG_EWT': {'type': 'Entering_Water_Temperature_Sensor'},
 'FCU_SPD': {'type': 'Speed_Status'},
 'FCU_DAT': {'type': 'Discharge_Air_Temperature_Sensor'},
 'Zone': {'type': 'Zone', 'hasPoint': ['RMHTGSPT', 'RM_TEMP', 'RMCLGSPT']}

### OfficeGraph Semantic features

In [155]:
g_nx_officeGraph = networkx("D:\Master\Thesis\Code\OfficeGraph\devices_in_rooms.ttl")

In [156]:
officeGraph_features = extract_features_from_nx_graph(
    g_nx_officeGraph,
   "https://saref.etsi.org/saref4bldg/",
    node_type_filter=None,  # Or full URI if you want
    attribute_keys= None,
    edge_predicates=None
)

In [157]:
officeGraph_features

{'zone_VL-F1-O4-M50': {'comment': 'support_zone'},
 'zone_VL-F1-W1-W40': {'comment': 'support_zone'},
 'zone_VL-F2-W4-38': {'comment': 'support_zone'},
 'VL_floor_2': {},
 'zone_VL-F3-O3-A-29': {'comment': 'support_zone'},
 'zone_VL-F1-O3-A-11': {'comment': 'support_zone'},
 'R5_177': {},
 'device_urn:Device:SmartThings:44f87a6f-4660-4441-beb7-5e1e1a8a34d2': {},
 'zone_VL-F4-O4-M38': {'comment': 'support_zone'},
 'R5_14': {},
 'roomname_3.067': {'comment': 'room'},
 'zone_VL-F5-WEST-M-48': {'comment': 'support_zone'},
 'VL_floor_5': {},
 'roomname_4.005': {'comment': 'room'},
 'device_urn:Device:SmartThings:81b4c38c-0622-47a0-a932-8087d28097b4': {},
 'R5_114': {},
 'roomname_3.015': {'comment': 'room'},
 'zone_VL-F2-W-W7': {'comment': 'support_zone'},
 'zone_VL-F3-O3-W29C': {'comment': 'support_zone'},
 'zone_VL-F3-O4-T40': {'comment': 'support_zone'},
 'device_urn:Device:SmartThings:0a1498b7-650a-48d5-9c69-37731e181bbb': {},
 'zone_Energie_verbruik_gebouw_videolab': {'comment': 'suppo