In [98]:
from rdflib import Graph
import rdflib

# class definition

In [92]:
class SHACLtoQuery:
    # Takes a graph with SHACL rules and a top node, and generates a sparql query for the properties on the shacl
    def __init__(self, graph):
        self.graph = graph
    
    def get_query(self, top_node):
        self.complex_shacl_dict =  self.get_shacl_dict(top_node)

        str_shacl_dict = self.get_str_shacl_dict(self.complex_shacl_dict)

        self.node_name_map = self.get_node_name_map(str_shacl_dict)

        self.mapped_shacl_dict = self.convert_with_map(str_shacl_dict, self.node_name_map)

        self.dict_junctions, self.parent_dict = self.get_dict_junctions(self.mapped_shacl_dict)
        # print(self.dict_junctions)
        self.raw_sparql_query = self.get_sparql_query(self.mapped_shacl_dict)

        self.cleaned_sparql_query = self.delete_unhandled_paths(self.raw_sparql_query)

        return self.cleaned_sparql_query

    def get_shacl_dict(self, node):
        query_fragments = {
            'hasValue' : '?node sh:hasValue ?hasValue .',
            'class': '?node sh:class ?class . ',
            'path': '?node sh:path ?path .',
            'next_node': '?node ( (sh:or|sh:and)/rdf:rest*/rdf:first | sh:property | sh:node/sh:property | sh:qualifiedValueShape ) ?next_node .'
        }
        def build_query(fragment, node):
            query_prefix = """
                PREFIX s223: <http://data.ashrae.org/standard223#>
                PREFIX unit: <http://qudt.org/vocab/unit/>
                PREFIX quantitykind: <http://qudt.org/vocab/quantitykind/>
                PREFIX qudt: <http://qudt.org/schema/qudt/>
                PREFIX sh: <http://www.w3.org/ns/shacl#>
                PREFIX owl: <http://www.w3.org/2002/07/owl#>
                PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
                PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
                PREFIX g36: <http://data.ashrae.org/standard223/1.0/extensions/g36#>

                SELECT *
                WHERE {
        """
            query = query_prefix
            query += '\t'
            query += fragment
            query += '\n'
            query += '\t'
            query += f'FILTER(?node = <{node}>)'
            query += '\n'
            query += '}'
            return query

        def recursive_query(results, node):
            #Running different queries with each query fragment
            for key, fragment in query_fragments.items():
                query = build_query(fragment, node)
                res = g.query(query)
                # If results return for the query, it will be added to the results
                if (len(res.bindings)) > 0:
                    # Most cases will have just 1 binding. If there are multiple bindings, we should iterate them
                    for binding in res.bindings:
                        # If the node has no results stored, we should add a dictionary entry for it 
                        if results.get(binding.get('node')) is None:
                            results[binding.get('node')] = {}
                            results[binding.get('node')].update(binding)
                        # if the specific value returned by the query is not in the dictionary entry, we should add it
                        elif results.get(binding.get('node')).get(rdflib.term.Variable(key)) is None:
                            results[binding.get('node')].update(binding)
                        # if the values returned by the query are already in the dictionary entry, they don't need to be added
                        # display a message in this case 
                        elif results.get(binding.get('node')).get(rdflib.term.Variable(key)) == binding.get(rdflib.term.Variable(key)):
                            print('values are the same')
                        else:
                        # with and/or/multiple property queries, there may be multiple values, 
                        # each next node should be dictionary of its own, chaining
                            print('values are different')
                        if binding.get('next_node', None):
                            # update to change structure
                            next_node = binding.get('next_node')
                            recursive_query(results[binding.get('node')], next_node)
                            

                            # node = binding.get('next_node')
                            # results[node] = {}
                            # recursive_query(results[node], node)
                
            return results

        results = {}
        results = recursive_query(results, node)
        
        return results
    
    def get_str_shacl_dict(self, val_dict):

        def convert_to_str(val_dict):
            new_dict = {}
            for k, v in val_dict.items():
                if isinstance(v, dict):
                    new_dict[str(k)] = convert_to_str(v)
                else:
                    new_dict[str(k)] = str(v)
            return new_dict
        
        new_dict = convert_to_str(val_dict)
        return new_dict

    def get_node_name_map(self, val_dict):
        node_name_map = {}
        i = 0
        def create_node_name_map(val_dict, node_name_map = {}, i= 0):
            for k, v in val_dict.items():
                i += 1
                if isinstance(v, dict):
                    create_node_name_map(v, node_name_map, i)
                    node_name_map[k] = f"node_{i}"
                elif k in node_name_map:
                    continue
                elif 'node' in k:
                    node_name_map[v] = f"node_{i}"
                else:
                    continue
            return node_name_map
        return create_node_name_map(val_dict, node_name_map, i)

    def convert_with_map(self, val_dict, node_name_map):
        def _convert(val_dict, node_name_map):
            new_dict = {}
            for k, v in val_dict.items():
                if isinstance(v, dict):
                    new_dict[node_name_map[k]], node_name_map = _convert(v, node_name_map)
                elif v in node_name_map.keys(): 
                    new_dict[k] = node_name_map[v]
                else:
                    new_dict[k] = v
            return new_dict, node_name_map
        
        new_dict, _ = _convert(val_dict, node_name_map)
        return new_dict
    
    def get_dict_junctions(self, node_dict):
        def get_dict_junctions_with_parents(node_dict, dict_junction, dict_junctions, parents_dict):
            j = 0
            for k, v in node_dict.items():
                if isinstance(v, dict):
                    if j == 1:
                        if dict_junction:
                            dict_junctions.append(dict_junction)
                            dict_junction = None
                        dict_junctions.append(k)
                    if j == 0:
                        dict_junction = k
                    j = 1
                    get_dict_junctions_with_parents(v, dict_junction, dict_junctions, parents_dict)
                    if set(v.keys()).intersection(dict_junctions):
                        parents_dict[k] = set(v.keys()).intersection(dict_junctions)
                else:
                    continue
            return 

        dict_junction = None
        dict_junctions = [] 
        parents_dict = {}
        get_dict_junctions_with_parents(node_dict, dict_junction, dict_junctions, parents_dict)

        return dict_junctions, parents_dict

    def get_sparql_query(self, node_dict):
        # should probably pass in parent_dict
        def create_query_line(node_dict, previous_node, sparql_query):
            if 'class' in node_dict.keys():
                sparql_query += '\t'
                sparql_query += f"?{node_dict.get('node')} a <{node_dict.get('class')}> ."
                sparql_query += '\n'
            if 'path' in node_dict.keys():
                if node_dict.get('node') in self.dict_junctions:
                    previous_node = get_node_parent(self.parent_dict, node_dict.get('node'))
                sparql_query += '\t'
                sparql_query += f"?{previous_node} <{node_dict.get('path')}> "
                if node_dict.get('hasValue'):
                    sparql_query+= f"<{node_dict.get('hasValue')}> ."
                else:
                    sparql_query+= f"?{node_dict.get('next_node')}."
                sparql_query += '\n'
            previous_node = node_dict.get('node')
            return previous_node, sparql_query
            
        def get_node_parent(parent_dict, node_name):
            for k, v in parent_dict.items():
                if isinstance(v, dict):
                    get_node_parent(v)
                elif isinstance(v, set):
                    if node_name in v:
                        parent_node_name = k
                else:
                    continue
            return parent_node_name

        def construct(node_dict, previous_node, sparql_query):
            # more recursion haha
            previous_node, sparql_query = create_query_line(node_dict, previous_node, sparql_query)
            for k, v in node_dict.items():
                if isinstance(v, dict):
                    previous_node, sparql_query = construct(v, previous_node, sparql_query)
                else:
                    continue
            return previous_node, sparql_query

        previous_node = None
        sparql_query = """
                PREFIX s223: <http://data.ashrae.org/standard223#>
                PREFIX unit: <http://qudt.org/vocab/unit/>
                PREFIX quantitykind: <http://qudt.org/vocab/quantitykind/>
                PREFIX qudt: <http://qudt.org/schema/qudt/>
                PREFIX sh: <http://www.w3.org/ns/shacl#>
                PREFIX owl: <http://www.w3.org/2002/07/owl#>
                PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
                PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
                PREFIX g36: <http://data.ashrae.org/standard223/1.0/extensions/g36#>

                SELECT *
                    WHERE {
                """
        _, sparql_query = construct(node_dict, previous_node, sparql_query)
        sparql_query += '}'
        return sparql_query
    def delete_unhandled_paths(self, query):
        query_list = query.split('\n')
        new_query = ''
        for line in query_list:
            if '?None' in line:
                print(line)
                print('Unhandled line, likely due to complex property path')
            else:
                new_query += '\n'
                new_query += line
        return new_query
    def run_query(self, test_graph):
        res = test_graph.query(self.cleaned_sparql_query)
        return res.bindings

# using class

In [99]:
g = Graph()
g.parse('G36_SP223-v1.0.ttl', format = 'ttl')
g = g.skolemize()
a = SHACLtoQuery(g)

In [100]:
node = 'http://data.ashrae.org/standard223/1.0/extensions/g36#ElectricHeatingCoil'
complete_query = a.get_query(node)
print(complete_query)



                PREFIX s223: <http://data.ashrae.org/standard223#>
                PREFIX unit: <http://qudt.org/vocab/unit/>
                PREFIX quantitykind: <http://qudt.org/vocab/quantitykind/>
                PREFIX qudt: <http://qudt.org/schema/qudt/>
                PREFIX sh: <http://www.w3.org/ns/shacl#>
                PREFIX owl: <http://www.w3.org/2002/07/owl#>
                PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
                PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
                PREFIX g36: <http://data.ashrae.org/standard223/1.0/extensions/g36#>

                SELECT *
                    WHERE {
                	?node_1 a <http://data.ashrae.org/standard223#ResistanceHeater> .
	?node_1 <http://data.ashrae.org/standard223#hasProperty> ?node_9.
	?node_9 a <http://data.ashrae.org/standard223#QuantifiableActuatableProperty> .
	?node_9 <http://qudt.org/schema/qudt/hasQuantityKind> <http://qudt.org/vocab/quantitykind/DimensionlessRa

In [101]:
test_heater_graph = Graph()
test_heater_graph.parse('test.ttl')
a.run_query(test_heater_graph)

[{rdflib.term.Variable('node_9'): rdflib.term.URIRef('urn:ex/p1'), rdflib.term.Variable('node_1'): rdflib.term.URIRef('urn:ex/ahu')}]

In [102]:
node = 'http://data.ashrae.org/standard223/1.0/extensions/g36#VAV'
complete_query = a.get_query(node)
print(complete_query)

values are different


                PREFIX s223: <http://data.ashrae.org/standard223#>
                PREFIX unit: <http://qudt.org/vocab/unit/>
                PREFIX quantitykind: <http://qudt.org/vocab/quantitykind/>
                PREFIX qudt: <http://qudt.org/schema/qudt/>
                PREFIX sh: <http://www.w3.org/ns/shacl#>
                PREFIX owl: <http://www.w3.org/2002/07/owl#>
                PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
                PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
                PREFIX g36: <http://data.ashrae.org/standard223/1.0/extensions/g36#>

                SELECT *
                    WHERE {
                	?node_1 a <http://data.ashrae.org/standard223#SingleDuctTerminal> .
	?node_1 <http://data.ashrae.org/standard223#contains> ?node_9.
	?node_9 a <http://data.ashrae.org/standard223/1.0/extensions/g36#Damper> .
	?node_1 <http://data.ashrae.org/standard223#hasConnectionPoint> ?node_10.
	?node_10 a <htt

In [103]:
node = 'http://data.ashrae.org/standard223/1.0/extensions/g36#Zone'
complete_query = a.get_query(node)
print(complete_query)

values are different
values are different
values are different
values are different
values are different
values are different
values are different
values are different
values are different
	?node_15 <https://rdflib.github.io/.well-known/genid/rdflib/nf5f1b5e84e1d405e8a0729f249a4d827b93> ?None.
Unhandled line, likely due to complex property path


                PREFIX s223: <http://data.ashrae.org/standard223#>
                PREFIX unit: <http://qudt.org/vocab/unit/>
                PREFIX quantitykind: <http://qudt.org/vocab/quantitykind/>
                PREFIX qudt: <http://qudt.org/schema/qudt/>
                PREFIX sh: <http://www.w3.org/ns/shacl#>
                PREFIX owl: <http://www.w3.org/2002/07/owl#>
                PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
                PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
                PREFIX g36: <http://data.ashrae.org/standard223/1.0/extensions/g36#>

                SELECT *
                  