In [None]:
from rdflib import BNode, URIRef, Literal, Graph, Namespace
from rdflib.collection import Collection
from rdflib.util import guess_format
from rdflib.namespace import RDF, XSD, RDFS, OWL, SKOS, DCTERMS
from rdflib.plugins.sparql import prepareQuery
from datetime import datetime
from urllib.request import urlopen, pathname2url
from urllib.parse import urlparse, urljoin
from typing import Dict, List, Tuple
import logging
from re import sub
import os

from deep_translator import GoogleTranslator

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

def path2url(path):
    return urljoin(
      'file:', pathname2url(os.path.abspath(path)))

dir=os.getcwd()
PMDCO = Namespace('https://w3id.org/pmd/co/')
bfo2020_url='http://purl.obolibrary.org/obo/bfo/2020/bfo.owl'
BFO = Namespace(bfo2020_url+"/")    
OBO = Namespace('http://purl.obolibrary.org/obo/')
PROV= Namespace('http://www.w3.org/ns/prov#')
IOFAV = Namespace('https://spec.industrialontologies.org/ontology/core/meta/AnnotationVocabulary/')
PATO = Namespace('http://purl.obolibrary.org/obo/pato/releases/2023-05-18/pato-full.owl')

editor="Thomas Hanke"

filename="pmdco-qualities.ttl"
this_ontology_url=path2url(filename)
pato_source="pato-full.owl"
pato_url=path2url(pato_source)
# Snake Chase - your_term
def snake_case(s):
  return '_'.join(
    sub('([A-Z][a-z]+)', r' \1',
    sub('([A-Z]+)', r' \1',
    s.replace('-', ' '))).split()).lower()

# Camel Chase - yourTerm
def lower_camel_case(s):
  #print(s)
  s = sub(r"(_|-)+", " ", s).title().replace(" ", "")
  return ''.join([s[0].lower(), s[1:]])

# Pascal Chase -  YourTerm
def upper_camel_case(s):
  #print(s)
  s = sub(r"(_|-)+", " ", s).title().replace(" ", "")
  return s

def parse_graph(url: str, graph: Graph = Graph(), format: str = "") -> Graph:
    """Parse a Graph from web url to rdflib graph object
    Args:
        url (AnyUrl): Url to an web ressource
        graph (Graph): Existing Rdflib Graph object to parse data to.
    Returns:
        Graph: Rdflib graph Object
    """
    logging.debug("parsing graph from {}".format(url))
    parsed_url = urlparse(url)
    META = Namespace(url + "/")
    if not format:
        format = guess_format(parsed_url.path)
    if parsed_url.scheme in ["https", "http"]:
        graph.parse(urlopen(parsed_url.geturl()).read(), format=format)
    elif parsed_url.scheme == "file":
        graph.parse(parsed_url.path, format=format)
    graph.bind("meta", META)
    return graph

def add_ontology_header(g):
    g.bind('owl',OWL)
    g.bind('bfo',BFO)
    g.bind('obo',OBO)
    g.bind('skos',SKOS)
    g.bind('dcterms',DCTERMS)
    g.bind('iof-av',IOFAV)
    g.bind('pmdco',PMDCO)
    g.bind('prov',PROV)
    return g

sub_classes = prepareQuery("SELECT ?entity WHERE {?entity rdfs:subClassOf* ?parent}")

all_labels = prepareQuery("SELECT ?entity ?label WHERE {?entity rdfs:label ?label}")

def get_all_sub_classes(superclass: URIRef, ontology: Graph, authorization=None) -> List[URIRef]:
    """Gets all subclasses of a given class.

    Args:
        superclass (URIRef): Rdflib URIRef of the superclass

    Returns:
        List[URIRef]: List of all subclasses
    """
    # parse template and add mapping results
    results = list(
        ontology.query(
            sub_classes,
            initBindings={"parent": superclass},
            # initNs={'cco': CCO, 'mseo': MSEO},
        ),
    )
    # print(list(ontology[ : RDFS.subClassOf]))
    classes = [result[0] for result in results]
    logging.info("Found following subclasses of {}: {}".format(superclass, classes))
    return classes

def add_morphologic_shape_qualities(g: Graph, pato_graph: Graph):
    pato_shape_class=OBO.PATO_0000052
    pmd_shape=g.value(predicate=RDFS.label,object=Literal("Shape", lang="en"))
    pato_shapes=get_all_sub_classes(pato_shape_class,pato_graph)
    i=0
    for shape in pato_shapes:
        label=None
        definition=None
        #skip shape class
        if str(shape)==str(pato_shape_class):
            g.add((pmd_shape,OWL.equivalentClass,pato_shape_class))
            continue
        else:
            i+=1
            for s,p, o in pato_graph.triples((shape,None,None)):
                #print(s,p,o)
                if p==RDFS.label:
                    label=str(o)
                    #label_de=togerman.translate(label)
                if p==OBO.IAO_0000115:
                    definition=o
            if label:
                iri=URIRef(PMDCO+upper_camel_case(label))
                g.add((iri,RDF.type,OWL.Class))
                g.add((iri,OWL.equivalentClass,shape))
                g.add((iri,RDFS.label,Literal(label,lang='en')))
                #g.add((iri,RDFS.label,Literal(label_de,lang='de')))
                if definition:
                    g.add((iri,SKOS.definition,definition))
                g.add((iri,OBO.IAO_0000117,Literal("PERSON: " + editor )))
    logging.info("added {} shape entities from pato".format(i))
    return g

# copys subclass relations from equivalentClasses of Pato
def copy_subclass_relations(g: Graph):
    i=0
    for s,p, o in g.triples((None,OWL.equivalentClass,None)):
        if isinstance(o,URIRef):
            subclassof=list(pato.objects(o,RDFS.subClassOf))
            for item in subclassof:
                pmd_class = g.value(predicate=OWL.equivalentClass, object=item, any=False)
                if pmd_class:
                    #print(s,RDFS.subClassOf,pmd_class)
                    g.add((s,RDFS.subClassOf,pmd_class))
                    i+=1
    logging.info("added {} subclass relations from equivalent pato entities".format(i))
    return g

def translate_labels(g: Graph, language: str='de'):
    translator=GoogleTranslator(source='auto', target=language)
    res=dict()
    labels=g.query(all_labels)    
    for thing, label in labels:
        if thing not in res.keys():
            res[thing]={}
        res[thing][label.language]=label
    
    for thing, labels in res.items():
        if not all(lang in labels.keys() for lang in ("en","de")):
            #entitys with only one label
            #print(thing,labels)
            #translate and add triple
            if labels.get('en',None):
                label_de=Literal(translator.translate(labels['en']),lang='de')
                logging.info('adding german label {} for entity {}'.format(label_de, thing))
                g.add((thing,RDFS.label,label_de))
                #add curation status - requires discussion
                g.add((thing,OBO.IAO_0000114,OBO.IAO_0000428))
    return g

def entitle_all_labels(g: Graph):
    res=dict()
    labels=g.query(all_labels)    
    for thing, label in labels:
        if thing not in res.keys():
            res[thing]={}
        res[thing][label.language]=label
    
    for thing, labels in res.items():
        for lang, label in labels.items():
            entitled_label=str(label).title()
            if not str(label)==entitled_label:
                logging.info('replacing label [{}] with [{}] on {}'.format(str(label), entitled_label, thing))
                #remove old label
                g.remove((thing,RDFS.label,label))
                #add capitalized one
                g.add((thing,RDFS.label,Literal(entitled_label,lang=getattr(label,'language',""))))
                #add curation status - requires discussion
                g.add((thing,OBO.IAO_0000114,OBO.IAO_0000428))
    return g

def add_morphologic_size_qualities(g: Graph, pato_graph: Graph):
    pato_size_class=OBO.PATO_0000117
    #also sub classes will be added
    #pato_size_classes_toadd=[OBO.PATO_0001708, OBO.PATO_0001709, OBO.PATO_0001710]
    pato_size_classes_toadd=[OBO.PATO_0000117]
    pmd_size=g.value(predicate=RDFS.label,object=Literal("Size", lang="en"))
    pato_shapes=[pato_size_class,]
    [pato_shapes.extend(get_all_sub_classes(size_class,pato_graph)) for size_class in pato_size_classes_toadd]
    #print(pato_shapes)
    i=0
    for shape in pato_shapes:
        label=None
        definition=None
        #skip shape class
        if str(shape)==str(pato_size_class):
            g.add((pmd_size,OWL.equivalentClass,pato_size_class))
            continue
        else:
            i+=1
            for s,p, o in pato_graph.triples((shape,None,None)):
                #print(s,p,o)
                if p==RDFS.label:
                    label=str(o)
                    #label_de=togerman.translate(label)
                if p==OBO.IAO_0000115:
                    definition=o
            if label:
                iri=URIRef(PMDCO+upper_camel_case(label))
                g.add((iri,RDF.type,OWL.Class))
                g.add((iri,OWL.equivalentClass,shape))
                g.add((iri,RDFS.label,Literal(label,lang='en')))
                #g.add((iri,RDFS.label,Literal(label_de,lang='de')))
                if definition:
                    g.add((iri,SKOS.definition,definition))
                g.add((iri,OBO.IAO_0000117,Literal("PERSON: " + editor )))
    logging.info("added {} size entities from pato".format(i))
    return g


In [None]:
pato=parse_graph(pato_url)

In [None]:

print(this_ontology_url)
onto=Graph()
onto=parse_graph(this_ontology_url,graph=onto)
onto=add_ontology_header(onto)
onto=add_morphologic_shape_qualities(onto,pato_graph=pato)
onto=add_morphologic_size_qualities(onto,pato_graph=pato)
onto=copy_subclass_relations(onto)
onto=translate_labels(onto,language='de')
onto=entitle_all_labels(onto)

onto.serialize("curated_"+filename,format='turtle')