In [None]:
# Code that determines the bottom-level entities in uat
#  requirements: uat.ttl generated by https://github.com/rmcgranaghan/Helio-KNOW/blob/main/ADS_enrichment/compile_vocabs.ipynb
#  could also do this with the uat CSV file instead http://localhost:8889/edit/ADS_enrichment/data/UAT.csv

from rdflib import Graph, Namespace, RDF, RDFS, URIRef
import csv

# Load the TTL file
g = Graph()
g.parse("/Users/ryanmc/Documents/Helio_ECIP/dev/Helio-KNOW/ADS_enrichment/uat.ttl", format="ttl")

# Common namespaces
SKOS = Namespace("http://www.w3.org/2004/02/skos/core#")

# Step 1: Gather all concepts and their labels
concepts = set()
labels = {}
broader = set()
narrower = set()

for s, p, o in g:
    if p in (SKOS.prefLabel, RDFS.label):
        labels[str(s)] = str(o)
    if p in (SKOS.broader, RDFS.subClassOf):
        broader.add(str(s))
        narrower.add(str(o))
    if p in (RDF.type,) and "Concept" in str(o):
        concepts.add(str(s))

# Step 2: Determine bottom-level concepts
# Leaf terms = those not appearing as a broader/narrower target
leaf_terms = broader - narrower if broader else concepts - narrower

# Step 3: Write to CSV or flat structure
flat_rows = []
for uri in leaf_terms:
    label = labels.get(uri, "")
    flat_rows.append({"uri": uri, "label": label})

# Save to CSV
with open("/Users/ryanmc/Documents/Helio_ECIP/dev/Helio-KNOW/ADS_enrichment/uat_leaf_terms.csv", mode='w', newline='', encoding='utf-8') as f:
    writer = csv.DictWriter(f, fieldnames=["uri", "label"])
    writer.writeheader()
    writer.writerows(flat_rows)

print(f"Wrote {len(flat_rows)} leaf terms to uat_leaf_terms.csv")