In [4]:
from rdflib import Graph, RDF, RDFS, OWL

In [5]:
# Initialize an empty graph
g = Graph()

# Parse your ontology file
# Supported formats: 'xml', 'turtle', 'nt', 'json-ld', etc.
g.parse("merged_individuals_updated_v2.ttl", format="turtle")

<Graph identifier=Nf04082e045174241b7521dcf1bace5a3 (<class 'rdflib.graph.Graph'>)>

In [6]:
num_triples = len(g)
print(f"Number of RDF triples: {num_triples}")

Number of RDF triples: 3861


In [7]:
owl_classes  = set(g.subjects(RDF.type, OWL.Class))
rdfs_classes = set(g.subjects(RDF.type, RDFS.Class))

all_classes  = owl_classes.union(rdfs_classes)
num_classes  = len(all_classes)

print(f"Number of classes: {num_classes}")

Number of classes: 131


In [8]:
rdf_properties       = set(g.subjects(RDF.type, RDF.Property))
owl_object_properties = set(g.subjects(RDF.type, OWL.ObjectProperty))
owl_data_properties   = set(g.subjects(RDF.type, OWL.DatatypeProperty))
owl_anno_properties   = set(g.subjects(RDF.type, OWL.AnnotationProperty))

all_properties = (
    rdf_properties
    .union(owl_object_properties)
    .union(owl_data_properties)
    .union(owl_anno_properties)
)

num_properties = len(all_properties)
print(f"Number of properties: {num_properties}")

Number of properties: 95


In [9]:
# First, gather all known classes as above
all_classes = owl_classes.union(rdfs_classes)

# Then, find subjects whose rdf:type is in that set
individuals = set()
for s, p, o in g.triples((None, RDF.type, None)):
    if o in all_classes:
        individuals.add(s)

num_individuals = len(individuals)
print(f"Number of individuals: {num_individuals}")

Number of individuals: 1008


In [10]:
all_entities = all_classes.union(individuals)

len(all_entities)

1137

In [11]:
annotated_entities = set()

for entity in all_entities:
    # Check if it has at least one annotation triple
    has_label = (entity, RDFS.label, None) in g
    has_comment = (entity, RDFS.comment, None) in g
    # rdfs:description is not standard, but let's assume it's a property in your ontology
    has_description = (entity, RDFS.seeAlso, None) in g  # or use the actual URI you want

    if has_label or has_comment or has_description:
        annotated_entities.add(entity)

num_annotated = len(annotated_entities)
total_entities = len(all_entities)
coverage = num_annotated / total_entities if total_entities else 0

print(f"Number of annotated entities: {num_annotated}")
print(f"Annotation coverage: {coverage:.2f}")

Number of annotated entities: 184
Annotation coverage: 0.16
