In [52]:
from rdflib import Graph, URIRef, BNode
import networkx as nx
from pyscal_rdf.network import Network
import numpy as np

In [5]:
g = Graph()
#g.parse("http://purls.helmholtz-metadaten.de/cmso/", format="xml")
g.parse("../pyscal_rdf/data/cmso.owl", format='xml')

<Graph identifier=Nf404f6609b18440595f7166a76a14b67 (<class 'rdflib.graph.Graph'>)>

Interesting quantities we need:

- http://www.w3.org/2000/01/rdf-schema#domain
- http://www.w3.org/2000/01/rdf-schema#range
- http://www.w3.org/2002/07/owl#ObjectProperty
- http://www.w3.org/2002/07/owl#DatatypeProperty
- http://www.w3.org/2002/07/owl#Class
- http://www.w3.org/2002/07/owl#AnnotationProperty

In [53]:
class OntologyNetwork:
    def __init__(self, owlfile=None):
        #super().__init__()
        self.object_property_list = None
        self.data_property_list = None
        self.class_list = None
        if owlfile is not None:
            self.parse(owlfile)
        
    def parse(self, owlfile):
        self.g = Graph()
        self.ng = nx.DiGraph()
        self.g.parse(owlfile, format='xml')
        self._get_object_property()
        self._get_datatype_property()
        self._get_classes()
        
    def _get_object_property(self):
        obj_props = []
        for s in self.g.triples((None, None, URIRef("http://www.w3.org/2002/07/owl#ObjectProperty"))):
            obj_props.append(s[0])
        self.object_property_list = obj_props

    def _get_datatype_property(self):
        data_props = []
        for s in self.g.triples((None, None, URIRef("http://www.w3.org/2002/07/owl#DatatypeProperty"))):
            data_props.append(s[0])
        self.data_property_list = data_props

    def _get_classes(self):
        classes = []
        for s in self.g.triples((None, None, URIRef("http://www.w3.org/2002/07/owl#Class"))):
            classes.append(s[0])
            self.ng.add_node(s[0], node_type='class')
        self.class_list = classes

    def _get_domain_and_range(self):
        for prop in self.object_property_list:
            domain = list([s[2] for s in self.g.triples((prop, URIRef('http://www.w3.org/2000/01/rdf-schema#domain'), None))])
            range = list([s[2] for s in self.g.triples((prop, URIRef('http://www.w3.org/2000/01/rdf-schema#range'), None))])
            print("-------------------------")
            print(prop)
            print(domain)
            print(range)

    def _parse_bnode(self, bnode, class_to_add=[], class_to_delete=[]):
        """
        Parse a bnode

        Notes
        -----
        There are three possibilities: (1) unionOf: which gives another BNode
        then (2) first: gives one class of the union
        then (3) rest: gives a Bnode
            rest BNode -> first -> Class
            rest -> Nil
        """
        #first search for union, then its a base node
        res = list([s for s in self.g.triples((bnode, URIRef('http://www.w3.org/2002/07/owl#unionOf'), None))])
        if len(res)>0:
            target_bnode = res[0][2]
            #now chec if instead we get the first term
            #call this func again
            self._parse_bnode(target_bnode, class_to_add, class_to_delete)
        res = list([s for s in self.g.triples((bnode, URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#first'), None))])
        if len(res)>0:
            class_to_add.append(res[0][2])
            class_to_delete.append(res[0][0])
            #we would also get a rest term
            r_res = list([s for s in self.g.triples((bnode, URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#rest'), None))])
            if len(r_res)>0:
                #we can get either nil
                if r_res[0][2] == URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#nil'):
                    #we are done
                    return class_to_add, class_to_delete
                else:
                    target_bnode = r_res[0][2]
                    self._parse_bnode(target_bnode, class_to_add, class_to_delete)
    def parse_bnodes(self):
        class_to_add = []
        class_to_delete = []
        for cls in self.class_list:
            if isinstance(cls,BNode):
                self._parse_bnode(cls, class_to_add, class_to_delete)
        class_to_add = np.unique(class_to_add)
        class_to_delete = np.unique(class_to_delete)
        

In [54]:
o = OntologyNetwork("../pyscal_rdf/data/cmso.owl")

In [55]:
o.parse_bnodes()

(array(['http://purls.helmholtz-metadaten.de/cmso/Atom',
        'http://purls.helmholtz-metadaten.de/cmso/Basis',
        'http://purls.helmholtz-metadaten.de/cmso/LatticeParameter',
        'http://purls.helmholtz-metadaten.de/cmso/Length',
        'http://purls.helmholtz-metadaten.de/cmso/SimulationCell',
        'http://purls.helmholtz-metadaten.de/cmso/UnitCell'], dtype='<U57'),
 array(['N250aafdf40244753a02df505dae3ca8a',
        'N33afaa42a8b74d5d9aafcac60027e0fc',
        'N4340c32be14f45e39175f798466b5e44',
        'N579ec6001b06409cb7566c14790fcd9b',
        'N6b4530a20f6648b2ba5b46560276da85',
        'N7a1b6a4a889943abae2a95c419ec3979',
        'N7a573e1b217045f29311c434bb8ca795',
        'N8dbe204392c94de08c1bf4ba3eefb19c',
        'Nbee36b7f12b442f0b8369298248d2f5b',
        'Nd4eb367b4e1b48da88b00cea293610bf',
        'Ndf3fa3ff3eef4230954cd317a4040a08',
        'Neb51e55aba7045f999aaa6aad984712b'], dtype='<U33'))

In [None]:
def parse_bnodes(bnode):
    

In [39]:
for s in o.g:
    if s[0] == BNode('Nac2cb41d1f7945998d81e8db5cca8cec'):
        print(s)

(rdflib.term.BNode('Nac2cb41d1f7945998d81e8db5cca8cec'), rdflib.term.URIRef('http://www.w3.org/2002/07/owl#unionOf'), rdflib.term.BNode('Nbb9ef20d990a4eee9da802828006194c'))
(rdflib.term.BNode('Nac2cb41d1f7945998d81e8db5cca8cec'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), rdflib.term.URIRef('http://www.w3.org/2002/07/owl#Class'))


In [40]:
for s in o.g:
    if s[0] == BNode('Nbb9ef20d990a4eee9da802828006194c'):
        print(s)

(rdflib.term.BNode('Nbb9ef20d990a4eee9da802828006194c'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#first'), rdflib.term.URIRef('http://purls.helmholtz-metadaten.de/cmso/SimulationCell'))
(rdflib.term.BNode('Nbb9ef20d990a4eee9da802828006194c'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#rest'), rdflib.term.BNode('N28fec1db343247ea85888e9360c561e1'))


In [41]:
for s in o.g:
    if s[0] == BNode('N28fec1db343247ea85888e9360c561e1'):
        print(s)

(rdflib.term.BNode('N28fec1db343247ea85888e9360c561e1'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#rest'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#nil'))
(rdflib.term.BNode('N28fec1db343247ea85888e9360c561e1'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#first'), rdflib.term.URIRef('http://purls.helmholtz-metadaten.de/cmso/UnitCell'))
