In [1]:
from rdflib import Graph, URIRef, Literal, util
from rdflib.namespace import RDF, RDFS
import warnings
from pprint import pprint

In [2]:
class UnexpectedRangeDatatype(UserWarning):
     pass

In [3]:
class MultipleRangeDatatypes(UserWarning):
     pass

In [4]:
g = Graph()
g.parse("http://vps516494.ovh.net/Survol/survol/objtypes.py?xid=.&mode=rdf", format='xml')

<Graph identifier=N0ad2a3283adf4b9aa75a88f154c6f6a3 (<class 'rdflib.graph.Graph'>)>

In [5]:
# Get all subjects of type property, i.e. the properties declared for the business domain:
props = list(g.subjects(RDF.type, RDF.Property))
props.sort()
for p in props:
    print(p)

http://www.primhillcomputers.com/survol#DeviceID
http://www.primhillcomputers.com/survol#Domain
http://www.primhillcomputers.com/survol#File
http://www.primhillcomputers.com/survol#Handle
http://www.primhillcomputers.com/survol#Id
http://www.primhillcomputers.com/survol#Name
http://www.primhillcomputers.com/survol#directory


In [6]:
# Classify business domain properties between attributes and relationships:
if len(props) > 0:
    props_dict = {}
    for prop in props:
        objects = list(g.objects(None, prop))
        num_objs = len(objects)
        prop_dict = {'number_of_triples': num_objs}
        if num_objs > 0:
            num_lits = 0
            num_uri = 0
            num_else = 0
            for obj in objects:
                if isinstance(obj, Literal):
                    num_lits += 1
                elif isinstance(obj, URIRef):
                    num_uri += 1
                else:
                    num_else += 1
            if num_else > 0:
                warning.warn("At least one object was neither a URI or a Literal.", UnexpectedRangeDatatype)
            if num_lits > 0 and num_uri >0:
                warnings.warn("At least one object was URI and another a Literal.", MultipleRangeDatatypes)
            if num_lits >= num_uri: 
                datatype_flag = "attribute" # This will assign any different datatype, multiple datatypes (lit > uri), and num_lits == 0 & num_uri == 0 as a graph attribute.
            else:
                datatype_flag = "relationship"
            prop_dict['number_of_literals'] = num_lits
            prop_dict['number_of_uri'] = num_uri
            prop_dict['property_type'] = datatype_flag
            props_dict[prop] = prop_dict
        else:
            props_dict[prop] = prop_dict
        pprint(props_dict)
else:
    print("There are no properties declared for the business domain.")

{rdflib.term.URIRef('http://www.primhillcomputers.com/survol#DeviceID'): {'number_of_triples': 0}}
{rdflib.term.URIRef('http://www.primhillcomputers.com/survol#DeviceID'): {'number_of_triples': 0},
 rdflib.term.URIRef('http://www.primhillcomputers.com/survol#Domain'): {'number_of_triples': 0}}
{rdflib.term.URIRef('http://www.primhillcomputers.com/survol#DeviceID'): {'number_of_triples': 0},
 rdflib.term.URIRef('http://www.primhillcomputers.com/survol#Domain'): {'number_of_triples': 0},
 rdflib.term.URIRef('http://www.primhillcomputers.com/survol#File'): {'number_of_triples': 0}}
{rdflib.term.URIRef('http://www.primhillcomputers.com/survol#DeviceID'): {'number_of_triples': 0},
 rdflib.term.URIRef('http://www.primhillcomputers.com/survol#Domain'): {'number_of_triples': 0},
 rdflib.term.URIRef('http://www.primhillcomputers.com/survol#File'): {'number_of_triples': 0},
 rdflib.term.URIRef('http://www.primhillcomputers.com/survol#Handle'): {'number_of_triples': 0}}
{rdflib.term.URIRef('http:

In [7]:
# Get property classification:
def get_property_classification(property_uri, property_dict):
    if property_dict[property_uri]['number_of_triples'] > 0:
            return(property_dict[property_uri]['property_type'])

In [8]:
# Define standard and business domain node attributes:
standard_attributes = [URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
                       URIRef('http://www.w3.org/2000/01/rdf-schema#label'), 
                       URIRef('http://www.w3.org/2000/01/rdf-schema#comment'),
                       URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#language')]

domain_attributes = []

In [9]:
# Define standard and business domain relationships:
standard_relationships = [URIRef('http://www.w3.org/2000/01/rdf-schema#subClassOf')]

domain_relationships = []

In [10]:
# Retrieve all business domain attributes and relationships:
for prop in props_dict:
    property_type = get_property_classification(prop, props_dict)
    if property_type == 'attribute':
        domain_attributes.append(prop)
    if property_type == 'relationship':
        domain_relationships.append(prop)

print("Domain Attributes: {}".format(domain_attributes))
print("Domain Relationships: {}".format(domain_relationships))

Domain Attributes: []
Domain Relationships: [rdflib.term.URIRef('http://www.primhillcomputers.com/survol#directory')]


In [11]:
# Aggregate all attributes and relationships:
all_attributes = standard_attributes + domain_attributes
all_relationships = standard_relationships + domain_relationships

pprint("All Attributes: {}".format(all_attributes))
pprint("All Relationships: {}".format(all_relationships))

('All Attributes: '
 "[rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), "
 "rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), "
 "rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#comment'), "
 "rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#language')]")
('All Relationships: '
 "[rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#subClassOf'), "
 "rdflib.term.URIRef('http://www.primhillcomputers.com/survol#directory')]")


In [12]:
# Get the datatype for business
def get_property_range(property_uri, g):
    property = URIRef('http://www.w3.org/2000/01/rdf-schema#range')
    range_objects = g.objects(property_uri, property)
    if range_objects is not None:
        range_type = util.first(range_objects)
        return(range_type)
    else:
        print("Property {} does not have a declared range.".format(property_uri))
        return(range_objects) # Return None

In [13]:
# Get all subjects of type class, i.e. the classes declared for the business domain:
classes = list(g.subjects(RDF.type, RDFS.Class))
classes.sort()
#for c in classes:
#    print(c)

In [16]:
nodes_dict ={}
for c in classes:
    print(c)
    node_attributes = []
    for att_uri in all_attributes:
        att_val = g.objects(c, att_uri)
        if att_val is not None:
            try:
                att_name = att_uri.split('#')[-1]
            except:
                att_name = att_uri
        print("Property: {}\tObject Name: {}".format(att_uri, att_name))
        
        att_range = get_property_range(att_uri, g)
        #if att_range
        print("Range: {}".format(att_range))
    #instances = g.subjects(RDF.type, c)
    #print(list(instances))
    #for i in instances:
    #    props = g.predicates(i, None)    
    #    print(list(props))
    #props = g.predicates(c, None)
    #for p in props:   
    #break

http://www.primhillcomputers.com/survol#AC2
Property: http://www.w3.org/1999/02/22-rdf-syntax-ns#type	Object Name: type
Range: None
Property: http://www.w3.org/2000/01/rdf-schema#label	Object Name: label
Range: None
Property: http://www.w3.org/2000/01/rdf-schema#comment	Object Name: comment
Range: None
Property: http://www.w3.org/1999/02/22-rdf-syntax-ns#language	Object Name: language
Range: None
http://www.primhillcomputers.com/survol#Azure
Property: http://www.w3.org/1999/02/22-rdf-syntax-ns#type	Object Name: type
Range: None
Property: http://www.w3.org/2000/01/rdf-schema#label	Object Name: label
Range: None
Property: http://www.w3.org/2000/01/rdf-schema#comment	Object Name: comment
Range: None
Property: http://www.w3.org/1999/02/22-rdf-syntax-ns#language	Object Name: language
Range: None
http://www.primhillcomputers.com/survol#CIM_ComputerSystem
Property: http://www.w3.org/1999/02/22-rdf-syntax-ns#type	Object Name: type
Range: None
Property: http://www.w3.org/2000/01/rdf-schema#labe