In [None]:
######   Find all the common classes in ontologies and save them including theri definitions

In [9]:
# List of ontology files to process
ontology_files = [
    # "../Ontologies/materialsmine.ttl", ### is not complete!
    "../Ontologies/materialsmine_converted.ttl",
    "../Ontologies/pmdco_core.ttl",
    "../Ontologies/nfdicore_2.ttl",
    # "../Ontologies/bfo.owl", #### using this ---->  long time to proccess!
    "../Ontologies/emmo.ttl",
    # "../Ontologies/owlapi.xrdf",
    "../Ontologies/schemaorg.owl",
    # "../Ontologies/MaterialsMine.xrdf",
    # '../Ontologies/emmo.owl', ### has problem of reading file
    # "../Ontologies/Physical_Activity_Ontology_V2.owl",
    # "../Ontologies/Physical_Activity_Ontology_V2.xrdf",
    # "../Ontologies/oboe.owl",
    # Add more file paths as needed
]

In [11]:
import csv
import re
from rdflib import Graph, RDF, RDFS, OWL, SKOS, Namespace, URIRef, Literal

# Define namespaces
skos = Namespace("http://www.w3.org/2004/02/skos/core#")
owl = Namespace("http://www.w3.org/2002/07/owl#")
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
dcterms = Namespace("http://purl.org/dc/terms/")

# Function to normalize string
def normalize_string(s):
    s = s.lower()
    s = re.sub(r'[_\-+\s]+', '', s)
    s = s.replace('...', '')
    return s

def get_class_label(g, cls):
    labels = list(g.objects(cls, SKOS.altLabel)) + list(g.objects(cls, SKOS.prefLabel)) + list(g.objects(cls, RDFS.label))
    return labels[0] if labels else None

def get_class_descriptions(g, cls):
    descriptions = list(g.objects(cls, dcterms.description)) + list(g.objects(cls, SKOS.definition)) + list(g.objects(cls, RDFS.comment))
    return " ".join([str(desc) for desc in descriptions]) if descriptions else None

def load_and_collect_classes(file_path):
    g = Graph()
    g.parse(file_path)
    
    classes = set()
    for cls in g.subjects(RDF.type, OWL.Class):
        if isinstance(cls, URIRef):
            class_name = normalize_string(str(cls))
            classes.add(class_name)
    
    for cls in g.subjects(RDF.type, RDFS.Class):
        if isinstance(cls, URIRef):
            class_name = normalize_string(str(cls))
            classes.add(class_name)
            
    return classes

def find_common_classes_in_ontologies(ontology_files, output_csv):
    all_classes = []
    common_classes = None

    # Load all classes from each ontology file
    for ontology_file in ontology_files:
        classes = load_and_collect_classes(ontology_file)
        all_classes.append(classes)
        
        if common_classes is None:
            common_classes = classes
        else:
            common_classes = common_classes.intersection(classes)
    
    # Write common classes to CSV
    with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerow(['File Path', 'Class URI', 'Label', 'Definition'])
        
        for ontology_file, classes in zip(ontology_files, all_classes):
            g = Graph()
            g.parse(ontology_file)
            for cls in classes:
                if cls in common_classes:
                    label = get_class_label(g, URIRef(cls))
                    definition = get_class_descriptions(g, URIRef(cls))
                    csv_writer.writerow([ontology_file, cls, label, definition])

    print(f"Common classes found in all ontologies have been saved to '{output_csv}'.")

# Example usage:
# ontology_files = ['ontology1.ttl', 'ontology2.owl', 'ontology3.xrdf']
output_csv = 'common_classes.csv'

find_common_classes_in_ontologies(ontology_files, output_csv)


Common classes found in all ontologies have been saved to 'common_classes.csv'.
