In [None]:
###Version 3

In [1]:
# List of class names to check
initial_class_names_to_check = [ 'Compression','AmperePerJoule','nfdi','stress']

In [15]:
# List of ontology files to process
ontology_files = [
    # "../Ontologies/materialsmine.ttl", ### is not complete!
    "../Ontologies/materialsmine_converted.ttl",
    # "../Ontologies/pmdco_core.ttl",
    # "../Ontologies/nfdicore_2.ttl",
    # "../Ontologies/bfo.owl",
    # "../Ontologies/emmo.ttl",
    # "../Ontologies/owlapi.xrdf",
    # "../Ontologies/schemaorg.owl",
    # "../Ontologies/MaterialsMine.xrdf",
    # '../Ontologies/emmo.owl', ### has problem of reading file
    # "../Ontologies/Physical_Activity_Ontology_V2.owl",
    # "../Ontologies/Physical_Activity_Ontology_V2.xrdf",
    # "../Ontologies/oboe.owl",
    # Add more file paths as needed
]

In [None]:
import csv
import re
from rdflib import Graph, RDF, RDFS, OWL, SKOS, Namespace, URIRef, Literal

# dcterms = Namespace("http://purl.org/dc/terms/")
# isPartOf = dcterms.isPartOf


ex = Namespace("http://example.org/ontology/")
sio = Namespace("http://semanticscience.org/resource/")
skos = Namespace("http://www.w3.org/2004/02/skos/core#")
owl = Namespace("http://www.w3.org/2002/07/owl#")
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
materialsmine = Namespace("http://materialsmine.org/ns/")
bibo = Namespace("http://purl.org/ontology/bibo/")
rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")
xml = Namespace("http://www.w3.org/XML/1998/namespace")
foaf = Namespace("http://xmlns.com/foaf/0.1/")
# isPartOf = Namespace("http://semanticscience.org/resource/isPartOf")
dcterms = Namespace("http://purl.org/dc/terms/")
isPartOf = dcterms.isPartOf
DCTERMS = Namespace("http://purl.org/dc/terms/")

# print(isPartOf)

def normalize_string(s):
    s = s.lower()
    s = re.sub(r'[_\-+\s]+', '', s)
    s = s.replace('...', '')
    return s

def get_class_label(g, cls):
    labels = list(g.objects(cls, SKOS.altLabel)) + list(g.objects(cls, SKOS.prefLabel)) + list(g.objects(cls, RDFS.label))
    return labels[0] if labels else None

def get_class_description(g, cls):
    descriptions = list(g.objects(cls, DCTERMS.description)) + list(g.objects(cls, SKOS.definition)) + list(g.objects(cls, RDFS.comment))
    return descriptions[0] if descriptions else None


def load_and_collect_classes_and_relations(file_path, class_names_to_check):
    if file_path.endswith('.ttl'):
        file_format = 'ttl'
    elif file_path.endswith('.owl'):
        file_format = 'xml'
    elif file_path.endswith('.xrdf'):
        file_format = 'xml'
    else:
        raise ValueError("Unsupported file format. Only .ttl and .owl files are supported.")
    
    g = Graph()
    g.parse(file_path, format=file_format)

    normalized_class_names_to_check = {normalize_string(name) for name in class_names_to_check}

    DCTERMS = Namespace("http://purl.org/dc/terms/")
    classes = set(g.subjects(RDF.type, OWL.Class)).union(g.subjects(RDF.type, RDFS.Class))

    data = []
    relations = []
    found_class_labels = set()
    for cls in classes:
        labels = list(g.objects(cls, SKOS.altLabel)) + list(g.objects(cls, SKOS.prefLabel)) + list(g.objects(cls, RDFS.label))
        description = get_class_description(g, cls)
        for label in labels:
            normalized_label = normalize_string(label)
            found_class_labels.add(normalized_label)
            if normalized_label in normalized_class_names_to_check:
                data.append([file_path, str(cls), str(label), str(description)])
                for obj in g.objects(cls, RDFS.subClassOf):
                    obj_label = get_class_label(g, obj)
                    relations.append([file_path, str(cls), str(label), 'subClassOf', str(obj), str(obj_label)])
                for obj in g.objects(cls, OWL.equivalentClass):
                    obj_label = get_class_label(g, obj)
                    relations.append([file_path, str(cls), str(label), 'equivalentClass', str(obj), str(obj_label)])
                for obj in g.objects(cls, DCTERMS.isPartOf):
                    obj_label = get_class_label(g, obj)
                    relations.append([file_path, str(cls), str(label), 'isPartOf', str(obj), str(obj_label)])

    return data, relations, found_class_labels

def filter_relations(all_relations, initial_class_names_to_check):
    normalized_initial_class_names = {normalize_string(name) for name in initial_class_names_to_check}
    return [relation for relation in all_relations if normalize_string(relation[2]) in normalized_initial_class_names or normalize_string(relation[5]) in normalized_initial_class_names]

def print_hierarchy(class_name, relations, g, writer):
    def recursive_print(class_name, depth=0):
        for relation in relations:
            if normalize_string(relation[2]) == normalize_string(class_name):
                subject_description = get_class_description(g, URIRef(relation[1]))
                object_description = get_class_description(g, URIRef(relation[4]))
                writer.writerow([relation[1], relation[2], subject_description, relation[3], relation[4], relation[5], object_description, relation[0]])
                indent = '  ' * depth
                print(f"{indent}{relation[1]} {relation[2]} is {relation[3]} {relation[4]} {relation[5]} (from {relation[0]})")
                recursive_print(relation[5], depth + 1)

    recursive_print(class_name)

output_hierarchy_file = "class_hierarchy.csv"
class_output_file = "ontology_classes.csv"
relations_output_file = "ontology_relations.csv"

all_data = []
all_relations = []
all_found_class_labels = set()

class_names_to_check = initial_class_names_to_check

max_iterations = 2
iteration_count = 0

while class_names_to_check and iteration_count < max_iterations:
    iteration_count += 1
    new_data = []
    new_relations = []
    new_found_class_labels = set()

    for ontology_file in ontology_files:
        file_data, file_relations, found_class_labels = load_and_collect_classes_and_relations(ontology_file, class_names_to_check)
        new_data.extend(file_data)
        new_relations.extend(file_relations)
        new_found_class_labels.update(found_class_labels)

    all_data.extend(new_data)
    all_relations.extend(new_relations)
    all_found_class_labels.update(new_found_class_labels)

    class_names_to_check = {str(label) for label in new_found_class_labels} - {normalize_string(name) for name in initial_class_names_to_check}

    class_names_to_check = [normalize_string(name) for name in class_names_to_check]

filtered_data = [row for row in all_data if normalize_string(row[2]) in {normalize_string(name) for name in initial_class_names_to_check}]
with open(class_output_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["File", "Class URI", "Label", "Description"])
    writer.writerows(filtered_data)

filtered_relations = filter_relations(all_relations, initial_class_names_to_check)
with open(relations_output_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["File", "Subject Class URI", "Subject Label", "Relation", "Object Class URI", "Object Label"])
    writer.writerows(filtered_relations)

print(f"Filtered class data has been saved to {class_output_file}")
print(f"Filtered class relations have been saved to {relations_output_file}")

print("\nInitial class names found in the output:")
for class_name in initial_class_names_to_check:
    normalized_class_name = normalize_string(class_name)
    found = False
    for label in all_found_class_labels:
        if normalized_class_name in label:
            found = True
            print(f"Class '{class_name}' found in:")
            for ontology_file in ontology_files:
                file_data, file_relations, found_class_labels = load_and_collect_classes_and_relations(ontology_file, initial_class_names_to_check)
                normalized_labels = [normalize_string(l) for l in found_class_labels]
                if normalized_class_name in normalized_labels:
                    print(f"- {ontology_file}")
            break
    if not found:
        print(f"Class '{class_name}' not found in the output.")

print("\nSaving class hierarchy to CSV file:")
with open(output_hierarchy_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["Class URI", "Class Name", "Class Description", "Relation Type", "Related Class URI", "Related Class Name", "Related Class Description", "File"])
    for class_name in initial_class_names_to_check:
        normalized_class_name = normalize_string(class_name)
        print_hierarchy(normalized_class_name, all_relations, Graph(), writer)

print(f"Class hierarchy has been saved to {output_hierarchy_file}")



In [None]:
import csv
import re
from rdflib import Graph, RDF, RDFS, OWL, SKOS, Namespace, URIRef, Literal

# Define namespaces
ex = Namespace("http://example.org/ontology/")
sio = Namespace("http://semanticscience.org/resource/")
skos = Namespace("http://www.w3.org/2004/02/skos/core#")
owl = Namespace("http://www.w3.org/2002/07/owl#")
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
materialsmine = Namespace("http://materialsmine.org/ns/")
bibo = Namespace("http://purl.org/ontology/bibo/")
rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")
xml = Namespace("http://www.w3.org/XML/1998/namespace")
foaf = Namespace("http://xmlns.com/foaf/0.1/")
dcterms = Namespace("http://purl.org/dc/terms/")
isPartOf = dcterms.isPartOf
DCTERMS = Namespace("http://purl.org/dc/terms/")

def normalize_string(s):
    s = s.lower()
    s = re.sub(r'[_\-+\s]+', '', s)
    s = s.replace('...', '')
    return s

def get_class_label(g, cls):
    labels = list(g.objects(cls, SKOS.altLabel)) + list(g.objects(cls, SKOS.prefLabel)) + list(g.objects(cls, RDFS.label))
    return labels[0] if labels else None

def get_class_description(g, cls):
    descriptions = list(g.objects(cls, DCTERMS.description)) + list(g.objects(cls, SKOS.definition)) + list(g.objects(cls, RDFS.comment))
    return descriptions[0] if descriptions else None

def load_and_collect_classes_and_relations(file_path, class_names_to_check):
    if file_path.endswith('.ttl'):
        file_format = 'ttl'
    elif file_path.endswith('.owl'):
        file_format = 'xml'
    elif file_path.endswith('.xrdf'):
        file_format = 'xml'
    else:
        raise ValueError("Unsupported file format. Only .ttl and .owl files are supported.")
    
    g = Graph()
    g.parse(file_path, format=file_format)

    normalized_class_names_to_check = {normalize_string(name) for name in class_names_to_check}

    classes = set(g.subjects(RDF.type, OWL.Class)).union(g.subjects(RDF.type, RDFS.Class))

    data = []
    relations = []
    found_class_labels = set()
    for cls in classes:
        labels = list(g.objects(cls, SKOS.altLabel)) + list(g.objects(cls, SKOS.prefLabel)) + list(g.objects(cls, RDFS.label))
        description = get_class_description(g, cls)
        for label in labels:
            normalized_label = normalize_string(label)
            found_class_labels.add(normalized_label)
            if normalized_label in normalized_class_names_to_check:
                data.append([file_path, str(cls), str(label), str(description)])
                for obj in g.objects(cls, RDFS.subClassOf):
                    obj_label = get_class_label(g, obj)
                    obj_description = get_class_description(g, obj)
                    relations.append([file_path, str(cls), str(label), 'subClassOf', str(obj), str(obj_label), str(obj_description)])
                for obj in g.objects(cls, OWL.equivalentClass):
                    obj_label = get_class_label(g, obj)
                    obj_description = get_class_description(g, obj)
                    relations.append([file_path, str(cls), str(label), 'equivalentClass', str(obj), str(obj_label), str(obj_description)])
                for obj in g.objects(cls, DCTERMS.isPartOf):
                    obj_label = get_class_label(g, obj)
                    obj_description = get_class_description(g, obj)
                    relations.append([file_path, str(cls), str(label), 'isPartOf', str(obj), str(obj_label), str(obj_description)])

    return data, relations, found_class_labels

def filter_relations(all_relations, initial_class_names_to_check):
    normalized_initial_class_names = {normalize_string(name) for name in initial_class_names_to_check}
    return [relation for relation in all_relations if normalize_string(relation[2]) in normalized_initial_class_names or normalize_string(relation[5]) in normalized_initial_class_names]

def print_hierarchy(class_name, relations, g, writer):
    def recursive_print(class_name, depth=0):
        for relation in relations:
            if normalize_string(relation[2]) == normalize_string(class_name):
                subject_description = get_class_description(g, URIRef(relation[1]))
                object_description = get_class_description(g, URIRef(relation[4]))
                writer.writerow([relation[1], relation[2], subject_description, relation[3], relation[4], relation[5], object_description, relation[0]])
                indent = '  ' * depth
                print(f"{indent}{relation[1]} {relation[2]} is {relation[3]} {relation[4]} {relation[5]} (from {relation[0]})")
                recursive_print(relation[5], depth + 1)

    recursive_print(class_name)

output_hierarchy_file = "class_hierarchy.csv"
class_output_file = "ontology_classes.csv"
relations_output_file = "ontology_relations.csv"

all_data = []
all_relations = []
all_found_class_labels = set()

class_names_to_check = initial_class_names_to_check

max_iterations = 2
iteration_count = 0

while class_names_to_check and iteration_count < max_iterations:
    iteration_count += 1
    new_data = []
    new_relations = []
    new_found_class_labels = set()

    for ontology_file in ontology_files:
        file_data, file_relations, found_class_labels = load_and_collect_classes_and_relations(ontology_file, class_names_to_check)
        new_data.extend(file_data)
        new_relations.extend(file_relations)
        new_found_class_labels.update(found_class_labels)

    all_data.extend(new_data)
    all_relations.extend(new_relations)
    all_found_class_labels.update(new_found_class_labels)

    class_names_to_check = {str(label) for label in new_found_class_labels} - {normalize_string(name) for name in initial_class_names_to_check}

    class_names_to_check = [normalize_string(name) for name in class_names_to_check]

# Filter and save class data
filtered_data = [row for row in all_data if normalize_string(row[2]) in {normalize_string(name) for name in initial_class_names_to_check}]
with open(class_output_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["File", "Class URI", "Label", "Description"])
    writer.writerows(filtered_data)

# Filter and save class relations
filtered_relations = filter_relations(all_relations, initial_class_names_to_check)
with open(relations_output_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["File", "Subject Class URI", "Subject Label", "Relation", "Object Class URI", "Object Label", "Object Description"])
    writer.writerows(filtered_relations)

print(f"Filtered class data has been saved to {class_output_file}")
print(f"Filtered class relations have been saved to {relations_output_file}")

print("\nInitial class names found in the output:")
for class_name in initial_class_names_to_check:
    normalized_class_name = normalize_string(class_name)
    found = False
    for label in all_found_class_labels:
        if normalized_class_name in label:
            found = True
            print(f"Class '{class_name}' found in:")
            for ontology_file in ontology_files:
                file_data, file_relations, found_class_labels = load_and_collect_classes_and_relations(ontology_file, initial_class_names_to_check)
                normalized_labels = [normalize_string(l) for l in found_class_labels]
                if normalized_class_name in normalized_labels:
                    print(f"- {ontology_file}")
            break
    if not found:
        print(f"Class '{class_name}' not found in the output.")

print("\nSaving class hierarchy to CSV file:")
with open(output_hierarchy_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["Class URI", "Class Name", "Class Description", "Relation Type", "Related Class URI", "Related Class Name", "Related Class Description", "File"])
    for class_name in initial_class_names_to_check:
        normalized_class_name = normalize_string(class_name)
        print_hierarchy(normalized_class_name, all_relations, Graph(), writer)

print(f"Class hierarchy has been saved to {output_hierarchy_file}")


In [3]:
import csv
import re
from rdflib import Graph, RDF, RDFS, OWL, SKOS, Namespace, URIRef, Literal

# Define namespaces
ex = Namespace("http://example.org/ontology/")
sio = Namespace("http://semanticscience.org/resource/")
skos = Namespace("http://www.w3.org/2004/02/skos/core#")
owl = Namespace("http://www.w3.org/2002/07/owl#")
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
materialsmine = Namespace("http://materialsmine.org/ns/")
bibo = Namespace("http://purl.org/ontology/bibo/")
rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")
xml = Namespace("http://www.w3.org/XML/1998/namespace")
foaf = Namespace("http://xmlns.com/foaf/0.1/")
dcterms = Namespace("http://purl.org/dc/terms/")
isPartOf = dcterms.isPartOf
DCTERMS = Namespace("http://purl.org/dc/terms/")

def normalize_string(s):
    s = s.lower()
    s = re.sub(r'[_\-+\s]+', '', s)
    s = s.replace('...', '')
    return s

def get_class_label(g, cls):
    labels = list(g.objects(cls, SKOS.altLabel)) + list(g.objects(cls, SKOS.prefLabel)) + list(g.objects(cls, RDFS.label))
    return labels[0] if labels else None

def get_class_description(g, cls):
    descriptions = list(g.objects(cls, DCTERMS.description)) + list(g.objects(cls, SKOS.definition)) + list(g.objects(cls, RDFS.comment))
    return descriptions[0] if descriptions else None

def load_and_collect_classes_and_relations(file_path, class_names_to_check):
    if file_path.endswith('.ttl'):
        file_format = 'ttl'
    elif file_path.endswith('.owl'):
        file_format = 'xml'
    elif file_path.endswith('.xrdf'):
        file_format = 'xml'
    else:
        raise ValueError("Unsupported file format. Only .ttl and .owl files are supported.")
    
    g = Graph()
    g.parse(file_path, format=file_format)

    normalized_class_names_to_check = {normalize_string(name) for name in class_names_to_check}

    classes = set(g.subjects(RDF.type, OWL.Class)).union(g.subjects(RDF.type, RDFS.Class))

    data = []
    relations = []
    found_class_labels = set()
    for cls in classes:
        labels = list(g.objects(cls, SKOS.altLabel)) + list(g.objects(cls, SKOS.prefLabel)) + list(g.objects(cls, RDFS.label))
        description = get_class_description(g, cls)
        for label in labels:
            normalized_label = normalize_string(label)
            found_class_labels.add(normalized_label)
            if normalized_label in normalized_class_names_to_check:
                data.append([file_path, str(cls), str(label), str(description)])
                for obj in g.objects(cls, RDFS.subClassOf):
                    obj_label = get_class_label(g, obj)
                    obj_description = get_class_description(g, obj)
                    relations.append([file_path, str(cls), str(label), 'subClassOf', str(obj), str(obj_label), str(obj_description)])
                for obj in g.objects(cls, OWL.equivalentClass):
                    obj_label = get_class_label(g, obj)
                    obj_description = get_class_description(g, obj)
                    relations.append([file_path, str(cls), str(label), 'equivalentClass', str(obj), str(obj_label), str(obj_description)])
                for obj in g.objects(cls, DCTERMS.isPartOf):
                    obj_label = get_class_label(g, obj)
                    obj_description = get_class_description(g, obj)
                    relations.append([file_path, str(cls), str(label), 'isPartOf', str(obj), str(obj_label), str(obj_description)])

    return data, relations, found_class_labels

def filter_relations(all_relations, initial_class_names_to_check):
    normalized_initial_class_names = {normalize_string(name) for name in initial_class_names_to_check}
    return [relation for relation in all_relations if normalize_string(relation[2]) in normalized_initial_class_names or normalize_string(relation[5]) in normalized_initial_class_names]

def print_hierarchy(class_name, relations, g, writer):
    def recursive_print(class_name, depth=0):
        for relation in relations:
            if normalize_string(relation[2]) == normalize_string(class_name):
                subject_description = get_class_description(g, URIRef(relation[1]))
                object_description = get_class_description(g, URIRef(relation[4]))
                writer.writerow([relation[1], relation[2], subject_description, relation[3], relation[4], relation[5], object_description, relation[0]])
                indent = '  ' * depth
                print(f"{indent}{relation[1]} {relation[2]} is {relation[3]} {relation[4]} {relation[5]} (from {relation[0]})")
                recursive_print(relation[5], depth + 1)

    recursive_print(class_name)


output_hierarchy_file = "class_hierarchy.csv"
class_output_file = "ontology_classes.csv"
relations_output_file = "ontology_relations.csv"

all_data = []
all_relations = []
all_found_class_labels = set()

class_names_to_check = initial_class_names_to_check

max_iterations = 2
iteration_count = 0

while class_names_to_check and iteration_count < max_iterations:
    iteration_count += 1
    new_data = []
    new_relations = []
    new_found_class_labels = set()

    for ontology_file in ontology_files:
        file_data, file_relations, found_class_labels = load_and_collect_classes_and_relations(ontology_file, class_names_to_check)
        new_data.extend(file_data)
        new_relations.extend(file_relations)
        new_found_class_labels.update(found_class_labels)

    all_data.extend(new_data)
    all_relations.extend(new_relations)
    all_found_class_labels.update(new_found_class_labels)

    class_names_to_check = {str(label) for label in new_found_class_labels} - {normalize_string(name) for name in initial_class_names_to_check}

    class_names_to_check = [normalize_string(name) for name in class_names_to_check]

# Filter and save class data
filtered_data = [row for row in all_data if normalize_string(row[2]) in {normalize_string(name) for name in initial_class_names_to_check}]
with open(class_output_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["File", "Class URI", "Label", "Description"])
    writer.writerows(filtered_data)

# Filter and save class relations
filtered_relations = filter_relations(all_relations, initial_class_names_to_check)
with open(relations_output_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["File", "Subject Class URI", "Subject Label", "Relation", "Object Class URI", "Object Label", "Object Description"])
    writer.writerows(filtered_relations)

print(f"Filtered class data has been saved to {class_output_file}")
print(f"Filtered class relations have been saved to {relations_output_file}")

print("\nInitial class names found in the output:")
for class_name in initial_class_names_to_check:
    normalized_class_name = normalize_string(class_name)
    found = False
    for label in all_found_class_labels:
        if normalized_class_name in label:
            found = True
            print(f"Class '{class_name}' found in:")
            for ontology_file in ontology_files:
                file_data, file_relations, found_class_labels = load_and_collect_classes_and_relations(ontology_file, initial_class_names_to_check)
                normalized_labels = [normalize_string(l) for l in found_class_labels]
                if normalized_class_name in normalized_labels:
                    print(f"- {ontology_file}")
            break
    if not found:
        print(f"Class '{class_name}' not found in the output.")

print("\nSaving class hierarchy to CSV file:")
with open(output_hierarchy_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["Class URI", "Class Name", "Class Description", "Relation Type", "Related Class URI", "Related Class Name", "Related Class Description", "File"])
    for class_name in initial_class_names_to_check:
        normalized_class_name = normalize_string(class_name)
        print_hierarchy(normalized_class_name, all_relations, Graph(), writer)

print(f"Class hierarchy has been saved to {output_hierarchy_file}")


Filtered class data has been saved to ontology_classes.csv
Filtered class relations have been saved to ontology_relations.csv

Initial class names found in the output:
Class 'Compression' found in:
- ../Ontologies/materialsmine_converted.ttl
Class 'AmperePerJoule' found in:
- ../Ontologies/emmo.ttl
Class 'nfdi' not found in the output.
Class 'stress' found in:
- ../Ontologies/materialsmine_converted.ttl
- ../Ontologies/pmdco_core.ttl
- ../Ontologies/emmo.ttl

Saving class hierarchy to CSV file:
http://materialsmine.org/ns/Compression Compression is subClassOf http://materialsmine.org/ns/ViscoelasticProperty Viscoelastic Property (from ../Ontologies/materialsmine_converted.ttl)
  http://materialsmine.org/ns/ViscoelasticProperty Viscoelastic Property is subClassOf http://semanticscience.org/resource/Quantity Amount (from ../Ontologies/materialsmine_converted.ttl)
    http://semanticscience.org/resource/Quantity Amount is subClassOf http://semanticscience.org/resource/MeasurementValue mea

In [4]:
import csv
import re
from rdflib import Graph, RDF, RDFS, OWL, SKOS, Namespace, URIRef, Literal

# Define namespaces
ex = Namespace("http://example.org/ontology/")
sio = Namespace("http://semanticscience.org/resource/")
skos = Namespace("http://www.w3.org/2004/02/skos/core#")
owl = Namespace("http://www.w3.org/2002/07/owl#")
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
materialsmine = Namespace("http://materialsmine.org/ns/")
bibo = Namespace("http://purl.org/ontology/bibo/")
rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")
xml = Namespace("http://www.w3.org/XML/1998/namespace")
foaf = Namespace("http://xmlns.com/foaf/0.1/")
dcterms = Namespace("http://purl.org/dc/terms/")
isPartOf = dcterms.isPartOf
DCTERMS = Namespace("http://purl.org/dc/terms/")

def normalize_string(s):
    s = s.lower()
    s = re.sub(r'[_\-+\s]+', '', s)
    s = s.replace('...', '')
    return s

def get_class_label(g, cls):
    labels = list(g.objects(cls, SKOS.altLabel)) + list(g.objects(cls, SKOS.prefLabel)) + list(g.objects(cls, RDFS.label))
    return labels[0] if labels else None

def get_class_description(g, cls):
    descriptions = list(g.objects(cls, DCTERMS.description)) + list(g.objects(cls, SKOS.definition)) + list(g.objects(cls, RDFS.comment))
    return descriptions[0] if descriptions else None

def load_and_collect_classes_and_relations(file_path, class_names_to_check):
    if file_path.endswith('.ttl'):
        file_format = 'ttl'
    elif file_path.endswith('.owl'):
        file_format = 'xml'
    elif file_path.endswith('.xrdf'):
        file_format = 'xml'
    else:
        raise ValueError("Unsupported file format. Only .ttl and .owl files are supported.")
    
    g = Graph()
    g.parse(file_path, format=file_format)

    normalized_class_names_to_check = {normalize_string(name) for name in class_names_to_check}

    classes = set(g.subjects(RDF.type, OWL.Class)).union(g.subjects(RDF.type, RDFS.Class))

    data = []
    relations = []
    found_class_labels = set()
    for cls in classes:
        labels = list(g.objects(cls, SKOS.altLabel)) + list(g.objects(cls, SKOS.prefLabel)) + list(g.objects(cls, RDFS.label))
        description = get_class_description(g, cls)
        for label in labels:
            normalized_label = normalize_string(label)
            found_class_labels.add(normalized_label)
            if normalized_label in normalized_class_names_to_check:
                data.append([file_path, str(cls), str(label), str(description)])
                for obj in g.objects(cls, RDFS.subClassOf):
                    obj_label = get_class_label(g, obj)
                    obj_description = get_class_description(g, obj)
                    relations.append([file_path, str(cls), str(label), 'subClassOf', str(obj), str(obj_label), str(obj_description)])
                for obj in g.objects(cls, OWL.equivalentClass):
                    obj_label = get_class_label(g, obj)
                    obj_description = get_class_description(g, obj)
                    relations.append([file_path, str(cls), str(label), 'equivalentClass', str(obj), str(obj_label), str(obj_description)])
                for obj in g.objects(cls, DCTERMS.isPartOf):
                    obj_label = get_class_label(g, obj)
                    obj_description = get_class_description(g, obj)
                    relations.append([file_path, str(cls), str(label), 'isPartOf', str(obj), str(obj_label), str(obj_description)])

    return data, relations, found_class_labels

def filter_relations(all_relations, initial_class_names_to_check):
    normalized_initial_class_names = {normalize_string(name) for name in initial_class_names_to_check}
    return [relation for relation in all_relations if normalize_string(relation[2]) in normalized_initial_class_names or normalize_string(relation[5]) in normalized_initial_class_names]

def print_hierarchy(class_name, relations, g, writer):
    def recursive_print(class_name, depth=0):
        for relation in relations:
            if normalize_string(relation[2]) == normalize_string(class_name):
                subject_description = get_class_description(g, URIRef(relation[1]))
                object_description = get_class_description(g, URIRef(relation[4]))
                writer.writerow([relation[1], relation[2], subject_description, relation[3], relation[4], relation[5], object_description, relation[0]])
                indent = '  ' * depth
                print(f"{indent}{relation[1]} {relation[2]} is {relation[3]} {relation[4]} {relation[5]} (from {relation[0]})")
                recursive_print(relation[5], depth + 1)

    recursive_print(class_name)


output_hierarchy_file = "class_hierarchy.csv"
class_output_file = "ontology_classes.csv"
relations_output_file = "ontology_relations.csv"

all_data = []
all_relations = []
all_found_class_labels = set()

class_names_to_check = initial_class_names_to_check

max_iterations = 2
iteration_count = 0

while class_names_to_check and iteration_count < max_iterations:
    iteration_count += 1
    new_data = []
    new_relations = []
    new_found_class_labels = set()

    for ontology_file in ontology_files:
        file_data, file_relations, found_class_labels = load_and_collect_classes_and_relations(ontology_file, class_names_to_check)
        new_data.extend(file_data)
        new_relations.extend(file_relations)
        new_found_class_labels.update(found_class_labels)

    all_data.extend(new_data)
    all_relations.extend(new_relations)
    all_found_class_labels.update(new_found_class_labels)

    class_names_to_check = {str(label) for label in new_found_class_labels} - {normalize_string(name) for name in initial_class_names_to_check}

    class_names_to_check = [normalize_string(name) for name in class_names_to_check]

# Filter and save class data
filtered_data = [row for row in all_data if normalize_string(row[2]) in {normalize_string(name) for name in initial_class_names_to_check}]
with open(class_output_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["File", "Class URI", "Label", "Description"])
    writer.writerows(filtered_data)

# Filter and save class relations
filtered_relations = filter_relations(all_relations, initial_class_names_to_check)
with open(relations_output_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["File", "Subject Class URI", "Subject Label", "Relation", "Object Class URI", "Object Label", "Object Description"])
    writer.writerows(filtered_relations)

print(f"Filtered class data has been saved to {class_output_file}")
print(f"Filtered class relations have been saved to {relations_output_file}")

print("\nInitial class names found in the output:")
for class_name in initial_class_names_to_check:
    normalized_class_name = normalize_string(class_name)
    found = False
    for label in all_found_class_labels:
        if normalized_class_name in label:
            found = True
            print(f"Class '{class_name}' found in:")
            for ontology_file in ontology_files:
                file_data, file_relations, found_class_labels = load_and_collect_classes_and_relations(ontology_file, initial_class_names_to_check)
                normalized_labels = [normalize_string(l) for l in found_class_labels]
                if normalized_class_name in normalized_labels:
                    print(f"- {ontology_file}")
            break
    if not found:
        print(f"Class '{class_name}' not found in the output.")

print("\nSaving class hierarchy to CSV file:")
with open(output_hierarchy_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["Class URI", "Class Name", "Class Description", "Relation Type", "Related Class URI", "Related Class Name", "Related Class Description", "File"])
    for class_name in initial_class_names_to_check:
        normalized_class_name = normalize_string(class_name)
        print_hierarchy(normalized_class_name, all_relations, Graph(), writer)

print(f"Class hierarchy has been saved to {output_hierarchy_file}")


Filtered class data has been saved to ontology_classes.csv
Filtered class relations have been saved to ontology_relations.csv

Initial class names found in the output:
Class 'Compression' found in:
- ../Ontologies/materialsmine_converted.ttl
Class 'AmperePerJoule' found in:
- ../Ontologies/emmo.ttl
Class 'nfdi' not found in the output.
Class 'stress' found in:
- ../Ontologies/materialsmine_converted.ttl
- ../Ontologies/pmdco_core.ttl
- ../Ontologies/emmo.ttl

Saving class hierarchy to CSV file:
http://materialsmine.org/ns/Compression Compression is subClassOf http://materialsmine.org/ns/ViscoelasticProperty Viscoelastic Property (from ../Ontologies/materialsmine_converted.ttl)
  http://materialsmine.org/ns/ViscoelasticProperty Viscoelastic Property is subClassOf http://semanticscience.org/resource/Quantity Amount (from ../Ontologies/materialsmine_converted.ttl)
    http://semanticscience.org/resource/Quantity Amount is subClassOf http://semanticscience.org/resource/MeasurementValue mea

In [3]:
#### removing blank nodes

In [5]:
import csv
import re
from rdflib import Graph, RDF, RDFS, OWL, SKOS, Namespace, URIRef, Literal

# Define namespaces
ex = Namespace("http://example.org/ontology/")
sio = Namespace("http://semanticscience.org/resource/")
skos = Namespace("http://www.w3.org/2004/02/skos/core#")
owl = Namespace("http://www.w3.org/2002/07/owl#")
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
materialsmine = Namespace("http://materialsmine.org/ns/")
bibo = Namespace("http://purl.org/ontology/bibo/")
rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")
xml = Namespace("http://www.w3.org/XML/1998/namespace")
foaf = Namespace("http://xmlns.com/foaf/0.1/")
dcterms = Namespace("http://purl.org/dc/terms/")
isPartOf = dcterms.isPartOf
DCTERMS = Namespace("http://purl.org/dc/terms/")

def normalize_string(s):
    s = s.lower()
    s = re.sub(r'[_\-+\s]+', '', s)
    s = s.replace('...', '')
    return s

def get_class_label(g, cls):
    labels = list(g.objects(cls, SKOS.altLabel)) + list(g.objects(cls, SKOS.prefLabel)) + list(g.objects(cls, RDFS.label))
    return labels[0] if labels else None

def get_class_description(g, cls):
    descriptions = list(g.objects(cls, DCTERMS.description)) + list(g.objects(cls, SKOS.definition)) + list(g.objects(cls, RDFS.comment))
    return descriptions[0] if descriptions else None

def load_and_collect_classes_and_relations(file_path, class_names_to_check):
    if file_path.endswith('.ttl'):
        file_format = 'ttl'
    elif file_path.endswith('.owl'):
        file_format = 'xml'
    elif file_path.endswith('.xrdf'):
        file_format = 'xml'
    else:
        raise ValueError("Unsupported file format. Only .ttl and .owl files are supported.")
    
    g = Graph()
    g.parse(file_path, format=file_format)

    normalized_class_names_to_check = {normalize_string(name) for name in class_names_to_check}

    classes = set(g.subjects(RDF.type, OWL.Class)).union(g.subjects(RDF.type, RDFS.Class))

    data = []
    relations = []
    found_class_labels = set()
    for cls in classes:
        if isinstance(cls, URIRef):  # Check if the subject is a URI
            labels = list(g.objects(cls, SKOS.altLabel)) + list(g.objects(cls, SKOS.prefLabel)) + list(g.objects(cls, RDFS.label))
            description = get_class_description(g, cls)
            for label in labels:
                normalized_label = normalize_string(label)
                found_class_labels.add(normalized_label)
                if normalized_label in normalized_class_names_to_check:
                    data.append([file_path, str(cls), str(label), str(description)])
                    for obj in g.objects(cls, RDFS.subClassOf):
                        if isinstance(obj, URIRef):  # Check if the object is a URI
                            obj_label = get_class_label(g, obj)
                            obj_description = get_class_description(g, obj)
                            relations.append([file_path, str(cls), str(label), 'subClassOf', str(obj), str(obj_label), str(obj_description)])
                    for obj in g.objects(cls, OWL.equivalentClass):
                        if isinstance(obj, URIRef):
                            obj_label = get_class_label(g, obj)
                            obj_description = get_class_description(g, obj)
                            relations.append([file_path, str(cls), str(label), 'equivalentClass', str(obj), str(obj_label), str(obj_description)])
                    for obj in g.objects(cls, DCTERMS.isPartOf):
                        if isinstance(obj, URIRef):
                            obj_label = get_class_label(g, obj)
                            obj_description = get_class_description(g, obj)
                            relations.append([file_path, str(cls), str(label), 'isPartOf', str(obj), str(obj_label), str(obj_description)])

    return data, relations, found_class_labels


def filter_relations(all_relations, initial_class_names_to_check):
    normalized_initial_class_names = {normalize_string(name) for name in initial_class_names_to_check}
    return [relation for relation in all_relations if normalize_string(relation[2]) in normalized_initial_class_names or normalize_string(relation[5]) in normalized_initial_class_names]

def print_hierarchy(class_name, relations, g, writer):
    def recursive_print(class_name, depth=0):
        for relation in relations:
            if normalize_string(relation[2]) == normalize_string(class_name):
                subject_description = get_class_description(g, URIRef(relation[1]))
                object_description = get_class_description(g, URIRef(relation[4]))
                writer.writerow([relation[1], relation[2], subject_description, relation[3], relation[4], relation[5], object_description, relation[0]])
                indent = '  ' * depth
                print(f"{indent}{relation[1]} {relation[2]} is {relation[3]} {relation[4]} {relation[5]} (from {relation[0]})")
                recursive_print(relation[5], depth + 1)

    recursive_print(class_name)


output_hierarchy_file = "class_hierarchy.csv"
class_output_file = "ontology_classes.csv"
relations_output_file = "ontology_relations.csv"

all_data = []
all_relations = []
all_found_class_labels = set()

class_names_to_check = initial_class_names_to_check

max_iterations = 2
iteration_count = 0

while class_names_to_check and iteration_count < max_iterations:
    iteration_count += 1
    new_data = []
    new_relations = []
    new_found_class_labels = set()

    for ontology_file in ontology_files:
        file_data, file_relations, found_class_labels = load_and_collect_classes_and_relations(ontology_file, class_names_to_check)
        new_data.extend(file_data)
        new_relations.extend(file_relations)
        new_found_class_labels.update(found_class_labels)

    all_data.extend(new_data)
    all_relations.extend(new_relations)
    all_found_class_labels.update(new_found_class_labels)

    class_names_to_check = {str(label) for label in new_found_class_labels} - {normalize_string(name) for name in initial_class_names_to_check}

    class_names_to_check = [normalize_string(name) for name in class_names_to_check]

# Filter and save class data
filtered_data = [row for row in all_data if normalize_string(row[2]) in {normalize_string(name) for name in initial_class_names_to_check}]
with open(class_output_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["File", "Class URI", "Label", "Description"])
    writer.writerows(filtered_data)

# Filter and save class relations
filtered_relations = filter_relations(all_relations, initial_class_names_to_check)
with open(relations_output_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["File", "Subject Class URI", "Subject Label", "Relation", "Object Class URI", "Object Label", "Object Description"])
    writer.writerows(filtered_relations)

print(f"Filtered class data has been saved to {class_output_file}")
print(f"Filtered class relations have been saved to {relations_output_file}")

print("\nInitial class names found in the output:")
for class_name in initial_class_names_to_check:
    normalized_class_name = normalize_string(class_name)
    found = False
    for label in all_found_class_labels:
        if normalized_class_name in label:
            found = True
            print(f"Class '{class_name}' found in:")
            for ontology_file in ontology_files:
                file_data, file_relations, found_class_labels = load_and_collect_classes_and_relations(ontology_file, initial_class_names_to_check)
                normalized_labels = [normalize_string(l) for l in found_class_labels]
                if normalized_class_name in normalized_labels:
                    print(f"- {ontology_file}")
            break
    if not found:
        print(f"Class '{class_name}' not found in the output.")

print("\nSaving class hierarchy to CSV file:")
with open(output_hierarchy_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["Class URI", "Class Name", "Class Description", "Relation Type", "Related Class URI", "Related Class Name", "Related Class Description", "File"])
    for class_name in initial_class_names_to_check:
        normalized_class_name = normalize_string(class_name)
        print_hierarchy(normalized_class_name, all_relations, Graph(), writer)

print(f"Class hierarchy has been saved to {output_hierarchy_file}")


Filtered class data has been saved to ontology_classes.csv
Filtered class relations have been saved to ontology_relations.csv

Initial class names found in the output:
Class 'Compression' found in:
- ../Ontologies/materialsmine_converted.ttl
Class 'AmperePerJoule' found in:
- ../Ontologies/emmo.ttl
Class 'nfdi' not found in the output.
Class 'stress' found in:
- ../Ontologies/materialsmine_converted.ttl
- ../Ontologies/emmo.ttl

Saving class hierarchy to CSV file:
http://materialsmine.org/ns/Compression Compression is subClassOf http://materialsmine.org/ns/ViscoelasticProperty Viscoelastic Property (from ../Ontologies/materialsmine_converted.ttl)
  http://materialsmine.org/ns/ViscoelasticProperty Viscoelastic Property is subClassOf http://semanticscience.org/resource/Quantity Amount (from ../Ontologies/materialsmine_converted.ttl)
    http://semanticscience.org/resource/Quantity Amount is subClassOf http://semanticscience.org/resource/MeasurementValue measurement value (from ../Ontolog

In [16]:
import csv
import re
from rdflib import Graph, RDF, RDFS, OWL, SKOS, Namespace, URIRef, Literal

# Define namespaces
ex = Namespace("http://example.org/ontology/")
sio = Namespace("http://semanticscience.org/resource/")
skos = Namespace("http://www.w3.org/2004/02/skos/core#")
owl = Namespace("http://www.w3.org/2002/07/owl#")
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
materialsmine = Namespace("http://materialsmine.org/ns/")
bibo = Namespace("http://purl.org/ontology/bibo/")
rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")
xml = Namespace("http://www.w3.org/XML/1998/namespace")
foaf = Namespace("http://xmlns.com/foaf/0.1/")
dcterms = Namespace("http://purl.org/dc/terms/")
isPartOf = dcterms.isPartOf
DCTERMS = Namespace("http://purl.org/dc/terms/")

def normalize_string(s):
    s = s.lower()
    s = re.sub(r'[_\-+\s]+', '', s)
    s = s.replace('...', '')
    return s

def get_class_label(g, cls):
    labels = list(g.objects(cls, SKOS.altLabel)) + list(g.objects(cls, SKOS.prefLabel)) + list(g.objects(cls, RDFS.label))
    return labels[0] if labels else None

def get_class_description(g, cls):
    descriptions = list(g.objects(cls, DCTERMS.description)) + list(g.objects(cls, SKOS.definition)) + list(g.objects(cls, RDFS.comment))
    return " ".join([str(desc) for desc in descriptions]) if descriptions else None

def load_and_collect_classes_and_relations(file_path, class_names_to_check):
    if file_path.endswith('.ttl'):
        file_format = 'ttl'
    elif file_path.endswith('.owl'):
        file_format = 'xml'
    elif file_path.endswith('.xrdf'):
        file_format = 'xml'
    else:
        raise ValueError("Unsupported file format. Only .ttl and .owl files are supported.")
    
    g = Graph()
    g.parse(file_path, format=file_format)

    normalized_class_names_to_check = {normalize_string(name) for name in class_names_to_check}

    classes = set(g.subjects(RDF.type, OWL.Class)).union(g.subjects(RDF.type, RDFS.Class))

    data = []
    relations = []
    found_class_labels = set()
    for cls in classes:
        if isinstance(cls, URIRef):  # Check if the subject is a URI
            labels = list(g.objects(cls, SKOS.altLabel)) + list(g.objects(cls, SKOS.prefLabel)) + list(g.objects(cls, RDFS.label))
            description = get_class_description(g, cls)
            for label in labels:
                normalized_label = normalize_string(label)
                found_class_labels.add(normalized_label)
                if normalized_label in normalized_class_names_to_check:
                    data.append([file_path, str(cls), str(label), str(description)])
                    for obj in g.objects(cls, RDFS.subClassOf):
                        if isinstance(obj, URIRef):  # Check if the object is a URI
                            obj_label = get_class_label(g, obj)
                            obj_description = get_class_description(g, obj)
                            relations.append([file_path, str(cls), str(label), 'subClassOf', str(obj), str(obj_label), str(obj_description)])
                    for obj in g.objects(cls, OWL.equivalentClass):
                        if isinstance(obj, URIRef):
                            obj_label = get_class_label(g, obj)
                            obj_description = get_class_description(g, obj)
                            relations.append([file_path, str(cls), str(label), 'equivalentClass', str(obj), str(obj_label), str(obj_description)])
                    for obj in g.objects(cls, DCTERMS.isPartOf):
                        if isinstance(obj, URIRef):
                            obj_label = get_class_label(g, obj)
                            obj_description = get_class_description(g, obj)
                            relations.append([file_path, str(cls), str(label), 'isPartOf', str(obj), str(obj_label), str(obj_description)])

    return data, relations, found_class_labels


def filter_relations(all_relations, initial_class_names_to_check):
    normalized_initial_class_names = {normalize_string(name) for name in initial_class_names_to_check}
    return [relation for relation in all_relations if normalize_string(relation[2]) in normalized_initial_class_names or normalize_string(relation[5]) in normalized_initial_class_names]

# def print_hierarchy(class_name, relations, g, writer):
#     def recursive_print(class_name, depth=0):
#         for relation in relations:
#             if normalize_string(relation[2]) == normalize_string(class_name):
#                 subject_description = get_class_description(g, URIRef(relation[1]))
#                 object_description = get_class_description(g, URIRef(relation[4]))
#                 writer.writerow([relation[1], relation[2], subject_description, relation[3], relation[4], relation[5], object_description, relation[0]])
#                 indent = '  ' * depth
#                 print(f"{indent}{relation[1]} {relation[2]} is {relation[3]} {relation[4]} {relation[5]} (from {relation[0]})")
#                 recursive_print(relation[5], depth + 1)

#     recursive_print(class_name)

def print_hierarchy(class_name, relations, g, writer):
    def recursive_print(class_name, depth=0):
        for relation in relations:
            if normalize_string(relation[2]) == normalize_string(class_name):
                subject_description = get_class_description(g, URIRef(relation[1]))
                object_description = get_class_description(g, URIRef(relation[4]))
                writer.writerow([relation[1], relation[2], subject_description, relation[3], relation[4], relation[5], object_description, relation[0]])
                indent = '  ' * depth
                print(f"{indent}{relation[1]} {relation[2]} is {relation[3]} {relation[4]} {relation[5]} (from {relation[0]})")
                recursive_print(relation[5], depth + 1)
    recursive_print(class_name)

output_hierarchy_file = "class_hierarchy.csv"
class_output_file = "ontology_classes.csv"
relations_output_file = "ontology_relations.csv"

all_data = []
all_relations = []
all_found_class_labels = set()

class_names_to_check = initial_class_names_to_check

# max_iterations = 2
# iteration_count = 0

max_iterations = 2
iteration_count = 0
g = Graph()  # Initialize the RDF graph here

# while class_names_to_check and iteration_count < max_iterations:
#     iteration_count += 1
#     new_data = []
#     new_relations = []
#     new_found_class_labels = set()

#     for ontology_file in ontology_files:
#         file_data, file_relations, found_class_labels = load_and_collect_classes_and_relations(ontology_file, class_names_to_check)
#         new_data.extend(file_data)
#         new_relations.extend(file_relations)
#         new_found_class_labels.update(found_class_labels)

#     all_data.extend(new_data)
#     all_relations.extend(new_relations)
#     all_found_class_labels.update(new_found_class_labels)

#     class_names_to_check = {str(label) for label in new_found_class_labels} - {normalize_string(name) for name in initial_class_names_to_check}

#     class_names_to_check = [normalize_string(name) for name in class_names_to_check]

while class_names_to_check and iteration_count < max_iterations:
    iteration_count += 1
    new_data = []
    new_relations = []
    new_found_class_labels = set()

    for ontology_file in ontology_files:
        file_data, file_relations, found_class_labels = load_and_collect_classes_and_relations(ontology_file, class_names_to_check)
        new_data.extend(file_data)
        new_relations.extend(file_relations)
        new_found_class_labels.update(found_class_labels)

    all_data.extend(new_data)
    all_relations.extend(new_relations)
    all_found_class_labels.update(new_found_class_labels)

    class_names_to_check = {str(label) for label in new_found_class_labels} - {normalize_string(name) for name in initial_class_names_to_check}
    class_names_to_check = [normalize_string(name) for name in class_names_to_check]


# Filter and save class data
filtered_data = [row for row in all_data if normalize_string(row[2]) in {normalize_string(name) for name in initial_class_names_to_check}]
with open(class_output_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["File", "Class URI", "Label", "Description"])
    writer.writerows(filtered_data)

# Filter and save class relations
filtered_relations = filter_relations(all_relations, initial_class_names_to_check)
with open(relations_output_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["File", "Subject Class URI", "Subject Label", "Relation", "Object Class URI", "Object Label", "Object Description"])
    writer.writerows(filtered_relations)

print(f"Filtered class data has been saved to {class_output_file}")
print(f"Filtered class relations have been saved to {relations_output_file}")

print("\nInitial class names found in the output:")
for class_name in initial_class_names_to_check:
    normalized_class_name = normalize_string(class_name)
    found = False
    for label in all_found_class_labels:
        if normalized_class_name in label:
            found = True
            print(f"Class '{class_name}' found in:")
            for ontology_file in ontology_files:
                file_data, file_relations, found_class_labels = load_and_collect_classes_and_relations(ontology_file, initial_class_names_to_check)
                normalized_labels = [normalize_string(l) for l in found_class_labels]
                if normalized_class_name in normalized_labels:
                    print(f"- {ontology_file}")
            break
    if not found:
        print(f"Class '{class_name}' not found in the output.")

# print("\nSaving class hierarchy to CSV file:")
# with open(output_hierarchy_file, mode='w', newline='', encoding='utf-8') as file:
#     writer = csv.writer(file)
#     writer.writerow(["Class URI", "Class Name", "Class Description", "Relation Type", "Related Class URI", "Related Class Name", "Related Class Description", "File"])
#     for class_name in initial_class_names_to_check:
#         normalized_class_name = normalize_string(class_name)
#         print_hierarchy(normalized_class_name, all_relations, g, writer)

# print(f"Class hierarchy has been saved to {output_hierarchy_file}")


# Save class hierarchy to CSV file
print("\nSaving class hierarchy to CSV file:")
with open(output_hierarchy_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["Class URI", "Class Name", "Class Description", "Relation Type", "Related Class URI", "Related Class Name", "Related Class Description", "File"])
    for class_name in initial_class_names_to_check:
        normalized_class_name = normalize_string(class_name)
        print_hierarchy(normalized_class_name, all_relations, g, writer)
print(f"Class hierarchy has been saved to {output_hierarchy_file}")

Filtered class data has been saved to ontology_classes.csv
Filtered class relations have been saved to ontology_relations.csv

Initial class names found in the output:
Class 'Compression' found in:
- ../Ontologies/materialsmine_converted.ttl
Class 'AmperePerJoule' not found in the output.
Class 'nfdi' not found in the output.
Class 'stress' found in:
- ../Ontologies/materialsmine_converted.ttl

Saving class hierarchy to CSV file:
http://materialsmine.org/ns/Compression Compression is subClassOf http://materialsmine.org/ns/ViscoelasticProperty Viscoelastic Property (from ../Ontologies/materialsmine_converted.ttl)
  http://materialsmine.org/ns/ViscoelasticProperty Viscoelastic Property is subClassOf http://semanticscience.org/resource/Quantity Amount (from ../Ontologies/materialsmine_converted.ttl)
    http://semanticscience.org/resource/Quantity Amount is subClassOf http://semanticscience.org/resource/MeasurementValue measurement value (from ../Ontologies/materialsmine_converted.ttl)
  

In [17]:
import csv
import re
from rdflib import Graph, RDF, RDFS, OWL, SKOS, Namespace, URIRef, Literal

# Define namespaces
ex = Namespace("http://example.org/ontology/")
sio = Namespace("http://semanticscience.org/resource/")
skos = Namespace("http://www.w3.org/2004/02/skos/core#")
owl = Namespace("http://www.w3.org/2002/07/owl#")
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
materialsmine = Namespace("http://materialsmine.org/ns/")
bibo = Namespace("http://purl.org/ontology/bibo/")
rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")
xml = Namespace("http://www.w3.org/XML/1998/namespace")
foaf = Namespace("http://xmlns.com/foaf/0.1/")
dcterms = Namespace("http://purl.org/dc/terms/")
isPartOf = dcterms.isPartOf
DCTERMS = Namespace("http://purl.org/dc/terms/")

def normalize_string(s):
    s = s.lower()
    s = re.sub(r'[_\-+\s]+', '', s)
    s = s.replace('...', '')
    return s

def get_class_label(g, cls):
    labels = list(g.objects(cls, SKOS.altLabel)) + list(g.objects(cls, SKOS.prefLabel)) + list(g.objects(cls, RDFS.label))
    return labels[0] if labels else None

def get_class_descriptions(g, cls):
    descriptions = list(g.objects(cls, DCTERMS.description)) + list(g.objects(cls, SKOS.definition)) + list(g.objects(cls, RDFS.comment))
    return " ".join([str(desc) for desc in descriptions]) if descriptions else None

def load_and_collect_classes_and_relations(file_path, class_names_to_check):
    if file_path.endswith('.ttl'):
        file_format = 'ttl'
    elif file_path.endswith('.owl'):
        file_format = 'xml'
    elif file_path.endswith('.xrdf'):
        file_format = 'xml'
    else:
        raise ValueError("Unsupported file format. Only .ttl and .owl files are supported.")
    
    g = Graph()
    g.parse(file_path, format=file_format)

    normalized_class_names_to_check = {normalize_string(name) for name in class_names_to_check}

    classes = set(g.subjects(RDF.type, OWL.Class)).union(g.subjects(RDF.type, RDFS.Class))

    data = []
    relations = []
    found_class_labels = set()
    for cls in classes:
        if isinstance(cls, URIRef):  # Check if the subject is a URI
            labels = list(g.objects(cls, SKOS.altLabel)) + list(g.objects(cls, SKOS.prefLabel)) + list(g.objects(cls, RDFS.label))
            description = get_class_descriptions(g, cls)
            for label in labels:
                normalized_label = normalize_string(label)
                found_class_labels.add(normalized_label)
                if normalized_label in normalized_class_names_to_check:
                    data.append([file_path, str(cls), str(label), str(description)])
                    for obj in g.objects(cls, RDFS.subClassOf):
                        if isinstance(obj, URIRef):  # Check if the object is a URI
                            obj_label = get_class_label(g, obj)
                            obj_description = get_class_descriptions(g, obj)
                            relations.append([file_path, str(cls), str(label), 'subClassOf', str(obj), str(obj_label), str(obj_description)])
                    for obj in g.objects(cls, OWL.equivalentClass):
                        if isinstance(obj, URIRef):
                            obj_label = get_class_label(g, obj)
                            obj_description = get_class_descriptions(g, obj)
                            relations.append([file_path, str(cls), str(label), 'equivalentClass', str(obj), str(obj_label), str(obj_description)])
                    for obj in g.objects(cls, DCTERMS.isPartOf):
                        if isinstance(obj, URIRef):
                            obj_label = get_class_label(g, obj)
                            obj_description = get_class_descriptions(g, obj)
                            relations.append([file_path, str(cls), str(label), 'isPartOf', str(obj), str(obj_label), str(obj_description)])

    return data, relations, found_class_labels, g

def filter_relations(all_relations, initial_class_names_to_check):
    normalized_initial_class_names = {normalize_string(name) for name in initial_class_names_to_check}
    return [relation for relation in all_relations if normalize_string(relation[2]) in normalized_initial_class_names or normalize_string(relation[5]) in normalized_initial_class_names]

def print_hierarchy(class_name, relations, g, writer):
    def recursive_print(class_name, depth=0):
        for relation in relations:
            if normalize_string(relation[2]) == normalize_string(class_name):
                subject_description = get_class_descriptions(g, URIRef(relation[1]))
                object_description = get_class_descriptions(g, URIRef(relation[4]))
                writer.writerow([relation[1], relation[2], subject_description, relation[3], relation[4], relation[5], object_description, relation[0]])
                indent = '  ' * depth
                print(f"{indent}{relation[1]} {relation[2]} is {relation[3]} {relation[4]} {relation[5]} (from {relation[0]})")
                recursive_print(relation[5], depth + 1)

    recursive_print(class_name)

output_hierarchy_file = "class_hierarchy.csv"
class_output_file = "ontology_classes.csv"
relations_output_file = "ontology_relations.csv"

all_data = []
all_relations = []
all_found_class_labels = set()

class_names_to_check = initial_class_names_to_check

max_iterations = 2
iteration_count = 0
graph = None

while class_names_to_check and iteration_count < max_iterations:
    iteration_count += 1
    new_data = []
    new_relations = []
    new_found_class_labels = set()

    for ontology_file in ontology_files:
        file_data, file_relations, found_class_labels, g = load_and_collect_classes_and_relations(ontology_file, class_names_to_check)
        new_data.extend(file_data)
        new_relations.extend(file_relations)
        new_found_class_labels.update(found_class_labels)

    all_data.extend(new_data)
    all_relations.extend(new_relations)
    all_found_class_labels.update(new_found_class_labels)

    class_names_to_check = {str(label) for label in new_found_class_labels} - {normalize_string(name) for name in initial_class_names_to_check}
    class_names_to_check = [normalize_string(name) for name in class_names_to_check]

    graph = g  # Ensure we keep the last loaded graph for hierarchy printing

# Filter and save class data
filtered_data = [row for row in all_data if normalize_string(row[2]) in {normalize_string(name) for name in initial_class_names_to_check}]
with open(class_output_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["File", "Class URI", "Label", "Description"])
    writer.writerows(filtered_data)

# Filter and save class relations
filtered_relations = filter_relations(all_relations, initial_class_names_to_check)
with open(relations_output_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["File", "Subject Class URI", "Subject Label", "Relation", "Object Class URI", "Object Label", "Object Description"])
    writer.writerows(filtered_relations)

print(f"Filtered class data has been saved to {class_output_file}")
print(f"Filtered class relations have been saved to {relations_output_file}")

print("\nInitial class names found in the output:")
for class_name in initial_class_names_to_check:
    normalized_class_name = normalize_string(class_name)
    found = False
    for label in all_found_class_labels:
        if normalized_class_name in label:
            found = True
            print(f"Class '{class_name}' found in:")
            for ontology_file in ontology_files:
                file_data, file_relations, found_class_labels, _ = load_and_collect_classes_and_relations(ontology_file, initial_class_names_to_check)
                normalized_labels = [normalize_string(l) for l in found_class_labels]
                if normalized_class_name in normalized_labels:
                    print(f"- {ontology_file}")
            break
    if not found:
        print(f"Class '{class_name}' not found in the output.")

print("\nSaving class hierarchy to CSV file:")
with open(output_hierarchy_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["Class URI", "Class Name", "Class Description", "Relation Type", "Related Class URI", "Related Class Name", "Related Class Description", "File"])
    for class_name in initial_class_names_to_check:
        normalized_class_name = normalize_string(class_name)
        print_hierarchy(normalized_class_name, all_relations, graph, writer)

print(f"Class hierarchy has been saved to {output_hierarchy_file}")


Filtered class data has been saved to ontology_classes.csv
Filtered class relations have been saved to ontology_relations.csv

Initial class names found in the output:
Class 'Compression' found in:
- ../Ontologies/materialsmine_converted.ttl
Class 'AmperePerJoule' not found in the output.
Class 'nfdi' not found in the output.
Class 'stress' found in:
- ../Ontologies/materialsmine_converted.ttl

Saving class hierarchy to CSV file:
http://materialsmine.org/ns/Compression Compression is subClassOf http://materialsmine.org/ns/ViscoelasticProperty Viscoelastic Property (from ../Ontologies/materialsmine_converted.ttl)
  http://materialsmine.org/ns/ViscoelasticProperty Viscoelastic Property is subClassOf http://semanticscience.org/resource/Quantity Amount (from ../Ontologies/materialsmine_converted.ttl)
    http://semanticscience.org/resource/Quantity Amount is subClassOf http://semanticscience.org/resource/MeasurementValue measurement value (from ../Ontologies/materialsmine_converted.ttl)
  