# HI Ontology -- Summary Statistics

In [9]:
!pip install owlready2 pandas prettytable
!pip install rdflib

Collecting rdflib
  Downloading rdflib-7.4.0-py3-none-any.whl.metadata (12 kB)
Downloading rdflib-7.4.0-py3-none-any.whl (569 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m569.0/569.0 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rdflib
Successfully installed rdflib-7.4.0


In [3]:
from owlready2 import *
import pandas as pd
from prettytable import PrettyTable

# Change this if your filename is different
ontology_path = "hi-ontology.owl"

onto = get_ontology(ontology_path).load()
print("Ontology loaded successfully!")


Ontology loaded successfully!


In [5]:
# Count classes
num_classes = len(list(onto.classes()))

# Count object and data properties
num_obj_properties = len(list(onto.object_properties()))
num_data_properties = len(list(onto.data_properties()))
num_annotation_properties = len(list(onto.annotation_properties()))

# Count individuals
num_individuals = len(list(onto.individuals()))

# Approximate axiom count (triples)
triples = list(onto.world.sparql("""
    SELECT ?a ?b ?c WHERE { ?a ?b ?c }
"""))
logical_axioms = len(triples)

# Restriction axioms
restriction_count = 0
for cls in onto.classes():
    for cond in cls.is_a:
        if isinstance(cond, Restriction):
            restriction_count += 1

# Subclass axioms
subclass_axioms = sum(
    1 for cls in onto.classes()
    for ax in cls.is_a if isinstance(ax, ThingClass)
)


In [6]:
table = PrettyTable()
table.field_names = ["Metric", "Value"]

table.add_row(["Number of Classes", num_classes])
table.add_row(["Object Properties", num_obj_properties])
table.add_row(["Data Properties", num_data_properties])
table.add_row(["Annotation Properties", num_annotation_properties])
table.add_row(["Individuals", num_individuals])
table.add_row(["Subclass Axioms", subclass_axioms])
table.add_row(["Restriction Axioms", restriction_count])
table.add_row(["All Triples (Approx.)", logical_axioms])

print(table)


+-----------------------+-------+
|         Metric        | Value |
+-----------------------+-------+
|   Number of Classes   |   17  |
|   Object Properties   |   40  |
|    Data Properties    |   2   |
| Annotation Properties |   0   |
|      Individuals      |   0   |
|    Subclass Axioms    |   18  |
|   Restriction Axioms  |   5   |
| All Triples (Approx.) |  343  |
+-----------------------+-------+


In [7]:
import pandas as pd

df = pd.DataFrame({
    "Metric": [
        "Number of Classes",
        "Object Properties",
        "Data Properties",
        "Annotation Properties",
        "Individuals",
        "Subclass Axioms",
        "Restriction Axioms",
        "All Triples (Approx.)"
    ],
    "Value": [
        num_classes,
        num_obj_properties,
        num_data_properties,
        num_annotation_properties,
        num_individuals,
        subclass_axioms,
        restriction_count,
        logical_axioms
    ]
})

df.to_csv("ontology_global_metrics.csv", index=False)
df


Unnamed: 0,Metric,Value
0,Number of Classes,17
1,Object Properties,40
2,Data Properties,2
3,Annotation Properties,0
4,Individuals,0
5,Subclass Axioms,18
6,Restriction Axioms,5
7,All Triples (Approx.),343


#HINT Thesaurus -- Summary Statistics

In [10]:
from rdflib import Graph, Namespace, RDF, SKOS
from prettytable import PrettyTable
import pandas as pd

# Change filename if needed
thesaurus_file = "hi-thesaurus.ttl"

g = Graph()
g.parse(thesaurus_file, format="turtle")

print("SKOS thesaurus loaded!")

SKOS thesaurus loaded!


In [11]:
SKOS_NS = SKOS

# Total concepts
concepts = list(g.subjects(RDF.type, SKOS_NS.Concept))
num_concepts = len(concepts)

# Collections (optional)
collections = list(g.subjects(RDF.type, SKOS_NS.Collection))
num_collections = len(collections)

# Broader / narrower relations
num_broader = len(list(g.triples((None, SKOS_NS.broader, None))))
num_narrower = len(list(g.triples((None, SKOS_NS.narrower, None))))

# Related
num_related = len(list(g.triples((None, SKOS_NS.related, None))))

# Lexical labels
num_prefLabels = len(list(g.triples((None, SKOS_NS.prefLabel, None))))
num_altLabels = len(list(g.triples((None, SKOS_NS.altLabel, None))))
num_hiddenLabels = len(list(g.triples((None, SKOS_NS.hiddenLabel, None))))

# Notes
num_definitions = len(list(g.triples((None, SKOS_NS.definition, None))))
num_notes = len(list(g.triples((None, SKOS_NS.note, None))))
num_scopeNotes = len(list(g.triples((None, SKOS_NS.scopeNote, None))))
num_examples = len(list(g.triples((None, SKOS_NS.example, None))))

# Total triples
total_triples = len(g)


In [12]:
table = PrettyTable()
table.field_names = ["Metric", "Value"]

table.add_row(["Concepts", num_concepts])
table.add_row(["Collections", num_collections])
table.add_row(["Broader relations", num_broader])
table.add_row(["Narrower relations", num_narrower])
table.add_row(["Related relations", num_related])
table.add_row(["prefLabel", num_prefLabels])
table.add_row(["altLabel", num_altLabels])
table.add_row(["hiddenLabel", num_hiddenLabels])
table.add_row(["Definitions", num_definitions])
table.add_row(["Notes", num_notes])
table.add_row(["Scope Notes", num_scopeNotes])
table.add_row(["Examples", num_examples])
table.add_row(["Total triples", total_triples])

print(table)


+--------------------+-------+
|       Metric       | Value |
+--------------------+-------+
|      Concepts      |  234  |
|    Collections     |   0   |
| Broader relations  |  218  |
| Narrower relations |   0   |
| Related relations  |   0   |
|     prefLabel      |  235  |
|      altLabel      |   0   |
|    hiddenLabel     |   0   |
|    Definitions     |  228  |
|       Notes        |   0   |
|    Scope Notes     |   0   |
|      Examples      |   0   |
|   Total triples    |  1147 |
+--------------------+-------+


In [13]:
df = pd.DataFrame({
    "Metric": [
        "Concepts","Collections","Broader","Narrower","Related",
        "prefLabel","altLabel","hiddenLabel",
        "Definitions","Notes","Scope Notes","Examples",
        "Total triples"
    ],
    "Value": [
        num_concepts,num_collections,
        num_broader,num_narrower,num_related,
        num_prefLabels,num_altLabels,num_hiddenLabels,
        num_definitions,num_notes,num_scopeNotes,num_examples,
        total_triples
    ]
})

df.to_csv("skos_thesaurus_stats.csv", index=False)
df

Unnamed: 0,Metric,Value
0,Concepts,234
1,Collections,0
2,Broader,218
3,Narrower,0
4,Related,0
5,prefLabel,235
6,altLabel,0
7,hiddenLabel,0
8,Definitions,228
9,Notes,0


## KGs -- Summary Statistics

In [14]:
from rdflib import Graph

# Load the file (fixing missing owl prefix)
with open("scenarios_kgs.ttl") as f:
    content = f.read()

fixed = "@prefix owl: <http://www.w3.org/2002/07/owl#> .\n" + content

g = Graph()
g.parse(data=fixed, format="turtle")

print("Total triples in KG:", len(g))

# --- Detect all use-case namespaces automatically ---
use_case_prefixes = set()

for prefix, namespace in g.namespaces():
    ns = str(namespace)
    if ns.startswith("https://w3id.org/hi-ontology/scenarios/"):
        use_case_prefixes.add(ns)

use_case_prefixes = sorted(list(use_case_prefixes))
print("\nDetected use cases:")
for ns in use_case_prefixes:
    print(" -", ns)

# --- Count triples per use case ---
def count_use_case_triples(namespace):
    count = 0
    for s, p, o in g:
        if str(s).startswith(namespace) or str(o).startswith(namespace):
            count += 1
    return count

print("\n=== Use Case Statistics ===")

results = []
for ns in use_case_prefixes:
    count = count_use_case_triples(ns)
    results.append((ns, count))
    print(f"{ns}  →  {count} triples")

print("\nCheck: sum of all use-case triples =", sum([r[1] for r in results]))


Total triples in KG: 998

Detected use cases:
 - https://w3id.org/hi-ontology/scenarios/1/
 - https://w3id.org/hi-ontology/scenarios/2/
 - https://w3id.org/hi-ontology/scenarios/3/
 - https://w3id.org/hi-ontology/scenarios/4/
 - https://w3id.org/hi-ontology/scenarios/5/
 - https://w3id.org/hi-ontology/scenarios/6/
 - https://w3id.org/hi-ontology/scenarios/7/

=== Use Case Statistics ===
https://w3id.org/hi-ontology/scenarios/1/  →  120 triples
https://w3id.org/hi-ontology/scenarios/2/  →  182 triples
https://w3id.org/hi-ontology/scenarios/3/  →  125 triples
https://w3id.org/hi-ontology/scenarios/4/  →  142 triples
https://w3id.org/hi-ontology/scenarios/5/  →  129 triples
https://w3id.org/hi-ontology/scenarios/6/  →  139 triples
https://w3id.org/hi-ontology/scenarios/7/  →  160 triples

Check: sum of all use-case triples = 997
