In [3]:
from rdflib import Graph, Namespace, URIRef, BNode, Literal
from rdflib.namespace import RDF, SKOS
import json

#Load RML config into dict
with open("RML_config.json", "r") as config:
    rml_conf = json.load(config)
    
rml_conf

{'JSON File': 'http://semweb.mmlab.be/ns/ql#JSONPath',
 'XML File': 'http://semweb.mmlab.be/ns/ql#XPath',
 'CSV File': 'http://semweb.mmlab.be/ns/ql#CSV'}

In [4]:
#Read input into inputGraph
inputGraph = Graph().parse("input_schema.ttl", format="ttl")
#import pprint
#for stmt in inputGraph:
# pprint.pprint(stmt)

In [5]:
#Read mapping into mappingGraph
mappingGraph = Graph().parse("mapping.ttl", format="ttl")
#import pprint
#for stmt in mappingGraph:
#    pprint.pprint(stmt)

In [6]:
##Create rr and base namespace @prefix rr: <http://www.w3.org/ns/r2rml#>.
rr = Namespace("http://www.w3.org/ns/r2rml#")
ql = Namespace("http://semweb.mmlab.be/ns/ql#")
rml = Namespace("http://semweb.mmlab.be/ns/rml#")
mapping_schema = Namespace("https://www.example.org/mappingSchema/")
n = Namespace("http://example.com/ns#")
dss = Namespace("https://www.example.org/dataSourceSchema/")

#create RML graph

def initialise_graph():
    g = Graph()

    

    g.bind("rr", rr)
    g.bind("ql", ql)
    g.bind("rml", rml)
    g.bind("mps", mapping_schema)
    g.bind("dss", dss)
    
    return g

In [7]:
def get_JSON_path(key):
    key_name = inputGraph.value(key, SKOS.prefLabel)
    for parent_key in inputGraph.subjects(dss.refersTo, key):
        if parent_key:
            parent_key_name = get_JSON_path(parent_key)
            return f"{parent_key_name}.{key_name}"    
    return key_name

print(get_JSON_path(URIRef("https://www.example.org/dataSource#city")))

location.city


In [8]:


def create_pattern(pattern, subjectKey):
    return pattern.replace("__self__", get_JSON_path(subjectKey))


def get_template(g, subjectKey):
    namespace = g.value(subjectKey, mapping_schema.hasNamespace)
    pattern_literal = g.value(subjectKey, mapping_schema.hasLocalNamePattern)
    pattern = create_pattern(pattern_literal, subjectKey)
    return f"{namespace}{pattern}"

def add_prediacate_object(g, mappingConcept, predicate, objectValue, isLiteral=True):
    bn_predicate_object = BNode()
    g.add((mappingConcept, rr.predicateObjectMap, bn_predicate_object))
    g.add((bn_predicate_object, rr.predicate, URIRef(predicate) )) 
    bn_object_map = BNode()
    g.add((bn_predicate_object, rr.objectMap, bn_object_map))
    if isLiteral:
        g.add((bn_object_map, rml.reference, Literal(objectValue) )) 

In [9]:
##Define Mapping concept


###Find all source files
with open('queries/source.rq') as f:###get SPARQL query from file
    source_query=f.read()
qres = inputGraph.query(source_query) ###query for all source files
for row in qres:
    g = initialise_graph()
    json_name = row.srcName
    mappingConcept = n[f"{json_name}Mapping"]
    g.add((mappingConcept, RDF.type, rr.TriplesMap))

    ##Define Datasource
    srcConcept = row.src
    file_name = row.srcFileName
    iterator = row.iterator
    referenceClass = URIRef(rml_conf[f"{row.fileType}"])

    #generate logicalSource
    bn_source = BNode()
    g.add((mappingConcept, rml.logicalSource, bn_source))
    g.add((bn_source, rml.iterator, Literal(iterator) )) 
    g.add((bn_source, rml.source, Literal(file_name) )) 
    g.add((bn_source, rml.referenceFormulation, referenceClass )) 
    
    ##generate subjects
    subjects = []
    for subjectKey in mappingGraph.subjects(mapping_schema.subjectOf, srcConcept):
        subjectClass = mappingGraph.value(subjectKey, mapping_schema.mapsTo)
        template = get_template(mappingGraph, subjectKey)
        bn_subject = BNode()
        g.add((mappingConcept, rr.subjectMap, bn_subject))
        g.add((bn_subject, rr.template, Literal(template) )) 
        g.add((bn_subject, rr["class"], URIRef(subjectClass) ))
        
        #generate predicate objects
        for objectKey in mappingGraph.subjects(mapping_schema.objectOf, subjectKey):
            predicate = mappingGraph.value(objectKey,  mapping_schema.mapsTo)
            objectValue = get_JSON_path(objectKey)
            add_prediacate_object(g, mappingConcept, predicate, objectValue)
    g.serialize(format='turtle', base=n, destination=f"{json_name}_rml_generated.ttl")