In [2]:
import re
from rdflib import Graph, URIRef, Literal
from rdflib.namespace import RDFS, RDF, OWL
import rdflib.util
from owlready2 import get_ontology, OwlReadyOntologyParsingError, Thing

def normalize_name(name):
    """
    Normalize a name by converting to lowercase and removing separators.

    Args:
    - name (str): The name to normalize.

    Returns:
    - str: The normalized name.
    """
    return re.sub(r'[\s\-_]+', '', name.lower())

In [3]:
def get_local_name(uri):
    """
    Extract the local name from a URI.

    Args:
    - uri (URIRef): The URI to extract the local name from.

    Returns:
    - str: The local name.
    """
    if isinstance(uri, URIRef):
        return uri.split('/')[-1].split('#')[-1]
    return str(uri)

def load_ontology_files(file_paths):
    """
    Load ontology files into RDF graphs or OWL ontologies.

    Args:
    - file_paths (dict): A dictionary where keys are the names and values are the file paths of the ontology files.
    
    Returns:
    - dict: A dictionary where keys are the names and values are the loaded RDF graphs or OWL ontologies.
    """
    graphs = {}
    for name, file_path in file_paths.items():
        file_extension = file_path.split('.')[-1].lower()
        if file_extension == 'owl':
            try:
                # Try to load OWL file using owlready2
                onto = get_ontology(file_path).load()
                graphs[name] = onto
            except OwlReadyOntologyParsingError:
                # Fallback to rdflib if owlready2 fails
                g = Graph()
                g.parse(file_path, format='xml')
                graphs[name] = g
        else:
            # Load other RDF files using rdflib
            g = Graph()
            if file_extension == 'xrdf':
                g.parse(file_path, format='xml')
            elif file_extension == 'ttl':
                g.parse(file_path, format='turtle')
            else:
                g.parse(file_path, format=rdflib.util.guess_format(file_path))
            graphs[name] = g
    return graphs

In [4]:
def extract_classes_with_labels(graph):
    """
    Extract all class names and their labels from an RDF graph or OWL ontology.

    Args:
    - graph (Graph or Ontology): The RDF graph or OWL ontology.

    Returns:
    - dict: A dictionary where keys are class local names and values are labels (if available).
    """
    class_labels = {}

    if isinstance(graph, Graph):
        for s in graph.subjects(RDF.type, RDFS.Class):
            label = graph.value(s, RDFS.label)
            local_name = get_local_name(s)
            class_labels[local_name] = str(label) if label else None
        for s in graph.subjects(RDF.type, OWL.Class):
            label = graph.value(s, RDFS.label)
            local_name = get_local_name(s)
            class_labels[local_name] = str(label) if label else None
    else:
        # Handle OWL ontology loaded with owlready2
        for cls in graph.classes():
            label = cls.label.first() if cls.label else None
            local_name = cls.name
            class_labels[local_name] = label

    return class_labels

In [5]:
def extract_normalized_names(graph):
    """
    Extract and normalize all potential names from an RDF graph.

    Args:
    - graph (Graph): The RDF graph.

    Returns:
    - set: A set of normalized names.
    """
    normalized_names = set()
    
    for s, p, o in graph:
        if isinstance(s, (URIRef, Literal)):
            local_name = get_local_name(s) if isinstance(s, URIRef) else str(s)
            normalized_names.add(normalize_name(local_name))
        if isinstance(p, (URIRef, Literal)):
            local_name = get_local_name(p) if isinstance(p, URIRef) else str(p)
            normalized_names.add(normalize_name(local_name))
        if isinstance(o, (URIRef, Literal)):
            local_name = get_local_name(o) if isinstance(o, URIRef) else str(o)
            normalized_names.add(normalize_name(local_name))
    
    for s, _, label in graph.triples((None, RDFS.label, None)):
        normalized_names.add(normalize_name(str(label)))
    
    return normalized_names

In [6]:
def extract_subclass_superclass(graph):
    """
    Extract all subclass and superclass relationships from an RDF graph or OWL ontology.

    Args:
    - graph (Graph or Ontology): The RDF graph or OWL ontology.

    Returns:
    - dict: A dictionary where keys are class local names and values are dictionaries with 'subclasses' and 'superclasses'.
    """
    subclass_superclass_relations = {}

    if isinstance(graph, Graph):
        for s, o in graph.subject_objects(RDFS.subClassOf):
            subclass_local_name = get_local_name(s)
            superclass_local_name = get_local_name(o)
            if subclass_local_name not in subclass_superclass_relations:
                subclass_superclass_relations[subclass_local_name] = {'subclasses': set(), 'superclasses': set()}
            subclass_superclass_relations[subclass_local_name]['superclasses'].add(superclass_local_name)
            
            if superclass_local_name not in subclass_superclass_relations:
                subclass_superclass_relations[superclass_local_name] = {'subclasses': set(), 'superclasses': set()}
            subclass_superclass_relations[superclass_local_name]['subclasses'].add(subclass_local_name)
    else:
        # Handle OWL ontology loaded with owlready2
        for cls in graph.classes():
            for superclass in cls.is_a:
                if isinstance(superclass, Thing):
                    subclass_local_name = cls.name
                    superclass_local_name = superclass.name
                    if subclass_local_name not in subclass_superclass_relations:
                        subclass_superclass_relations[subclass_local_name] = {'subclasses': set(), 'superclasses': set()}
                    subclass_superclass_relations[subclass_local_name]['superclasses'].add(superclass_local_name)
                    
                    if superclass_local_name not in subclass_superclass_relations:
                        subclass_superclass_relations[superclass_local_name] = {'subclasses': set(), 'superclasses': set()}
                    subclass_superclass_relations[superclass_local_name]['subclasses'].add(subclass_local_name)

    return subclass_superclass_relations

In [7]:
def get_all_class_names_with_labels(loaded_graphs):
    """
    Get all class names and their labels from the loaded ontologies.

    Args:
    - loaded_graphs (dict): A dictionary where keys are the names and values are the loaded RDF graphs or OWL ontologies.

    Returns:
    - dict: A dictionary where keys are the graph names and values are dictionaries of class local names and labels.
    """
    class_names_labels_dict = {}
    for name, graph in loaded_graphs.items():
        class_names_labels_dict[name] = extract_classes_with_labels(graph)
    return class_names_labels_dict

In [8]:
file_paths = {
     'pmdco': 'owlapi.xrdf',
     # 'ncit': 'ncit.owl',
     # 'Incoterms' : 'dr.owl',
    'BFO' : 'bfo.owl',
     # 'EMMO' : 'emmo.ttl',
    'nfdicore' : 'nfdicore_2.ttl',
    'EMMO_OWL' : 'emmo.owl',
    'pmdco_TTL' : 'pmdco_core.ttl',
    'materialsmine' : 'materialsmine.ttl',
 }

In [10]:
loaded_graphs = load_ontology_files(file_paths)
