In [23]:
from rdflib import Graph, URIRef, Namespace
from rdflib.namespace import split_uri

def get_namespaces(graph):
    """Extract all unique namespaces from all URIRefs in the graph."""
    namespaces = set()
    node_to_ns = {}
    for s, p, o in graph:
        for node in (s, p, o):
            if isinstance(node, URIRef):
                try:
                    ns, _ = split_uri(str(node))
                    namespaces.add(ns)
                    node_to_ns[node] = ns
                except Exception:
                    node_to_ns[node] = None
    return namespaces, node_to_ns

def is_namespace_bound(graph, ns):
    """Check if the namespace is already bound to any prefix."""
    for prefix, namespace in graph.namespace_manager.namespaces():
        if str(namespace) == ns:
            return True
    return False

def bind_or_remove_with_warning(graph):
    """Bind all unique namespaces to prefixes; remove triples if binding fails, with warnings."""
    namespaces, node_to_ns = get_namespaces(graph)
    prefix_counter = 1
    for ns in sorted(namespaces):
        if is_namespace_bound(graph, ns):
            # Namespace already has a prefix, skip binding
            continue
        prefix = f'ns{prefix_counter}'
        try:
            graph.bind(prefix, Namespace(ns))
            prefix_counter += 1
        except Exception as e:
            # Remove all triples using this namespace and warn
            triples_to_remove = []
            for s, p, o in graph:
                for node in (s, p, o):
                    if isinstance(node, URIRef) and str(node).startswith(ns):
                        triples_to_remove.append((s, p, o))
                        break
            if triples_to_remove:
                print(f"WARNING: Could not bind namespace '{ns}'; removing {len(triples_to_remove)} triples:")
                for triple in triples_to_remove:
                    print("   ", triple)
                for triple in triples_to_remove:
                    graph.remove(triple)
    # Handle nodes with unextractable namespaces
    for node, ns in node_to_ns.items():
        if ns is None:
            triples_to_remove = []
            for s, p, o in graph:
                if node in (s, p, o):
                    triples_to_remove.append((s, p, o))
            if triples_to_remove:
                print(f"WARNING: Could not extract namespace for node '{node}'; removing {len(triples_to_remove)} triples:")
                for triple in triples_to_remove:
                    print("   ", triple)
                for triple in triples_to_remove:
                    graph.remove(triple)


In [24]:
g = Graph()
g.parse('brick-example.ttl')
# g.parse('ontologies/Brick14.ttl')

<Graph identifier=Nf8c0db53b17847c1a846719539588ff4 (<class 'rdflib.graph.Graph'>)>

In [25]:
bind_or_remove_with_warning(g)

    (rdflib.term.URIRef('urn:example#'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), rdflib.term.URIRef('http://www.w3.org/2002/07/owl#Ontology'))


In [26]:
g.skolemize()

<Graph identifier=N83021634a74f451ab1473f416cb31fcc (<class 'rdflib.graph.Graph'>)>

In [27]:
print(g.serialize(format = 'ttl'))

@prefix : <urn:example#> .
@prefix brick: <https://brickschema.org/schema/Brick#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

:multiple-zone-ahu_name_0 a brick:AHU ;
    rdfs:label "AHU" ;
    brick:feeds :vav-cooling-only_name_0_0,
        :vav-cooling-only_name_0_1,
        :vav-cooling-only_name_0_10,
        :vav-cooling-only_name_0_11,
        :vav-cooling-only_name_0_12,
        :vav-cooling-only_name_0_13,
        :vav-cooling-only_name_0_14,
        :vav-cooling-only_name_0_2,
        :vav-cooling-only_name_0_3,
        :vav-cooling-only_name_0_4,
        :vav-cooling-only_name_0_5,
        :vav-cooling-only_name_0_6,
        :vav-cooling-only_name_0_7,
        :vav-cooling-only_name_0_8,
        :vav-cooling-only_name_0_9,
        :vav-with-reheat_name_0_0,
        :vav-with-reheat_name_0_1,
        :vav-with-reheat_name_0_10,
        :vav-with-reheat_name_0_11,
        :vav-with-reheat_name_0_12,
        :vav-with-reheat_name_0_13,
        :vav-with-reheat_name

In [28]:
g.serialize('brick-example-parsed.ttl')

<Graph identifier=Nf8c0db53b17847c1a846719539588ff4 (<class 'rdflib.graph.Graph'>)>

In [9]:
g = Graph()
g.parse('brick-example.ttl')
len(g)

3429

In [10]:
g = Graph()
g.parse('s223-example.ttl')
len(g)

6901