In [None]:
import yaml
import rdflib
import pandas as pd

In [None]:
with open("map.yml", "r") as f:
    data = yaml.safe_load(f.read())

In [None]:
g = rdflib.Graph()
ns_lookup = {}

for ns in data['namespaces']:
    g.namespace_manager.bind(namespace=ns['uri'], prefix=ns['name'])
    ns_lookup[ns['name']] = rdflib.Namespace(ns['uri'])

In [None]:
def handle_list(row, value, graph, identifier, predicate):
    for sub_value in value:
        add_literal(row, sub_value, graph, identifier, predicate)


def handle_dict(row, value, graph, identifier, predicate):
    route = {
        'uriref': add_uriref,
        'multival': add_multival,
    }
    if len(value) > 1:
        raise Exception("Only one sub node is supported")
    value_type = list(value.keys())[0]
    route_func = route[value_type]
    route_func(row, value[value_type], graph, identifier, predicate)
    
    
def add_literal(row, value, graph, identifier, predicate):
    if not pd.isna(row[value]):
        graph.add( (identifier, predicate, rdflib.Literal(row[value])) )


def add_uriref(row, value, graph, identifier, predicate):
    if not pd.isna(row[value]):
        graph.add( (identifier, predicate, rdflib.URIRef(row[value])) )


def add_multival(row, value, graph, identifier, predicate):
    node_type_lookup = {
        'literal': rdflib.Literal,
        'uriref': rdflib.URIRef,
    }
    fields = row[value['field']]
    if not pd.isna(fields):
        node_type = node_type_lookup[value['type']]
        rdf_objects = [node_type(field.strip()) for field in fields.split(value.get('delimiter')) if field.strip()] # if delimiter is not set, this will split on spaces
        for rdf_object in rdf_objects:
            graph.add( (identifier, predicate, rdf_object) )
        if len(rdf_objects) > 1:
            collection = graph.collection(identifier)
            collection += rdf_objects


def get_node_type(row, mapping):
    ''' get uriref node value or blank node '''
    uriref = mapping.get('uriref')
    if uriref:
        return rdflib.URIRef(row.get(uriref)) 
    else:
        return rdflib.BNode()
    
    
def csv_to_graph(mappings, filename, graph):
    route_by_type = {
        list: handle_list,
        dict: handle_dict,
        str: add_literal,
    }
    if mappings:
        for row in pd.read_csv(filename).to_dict(orient="records"):
            identifier = get_node_type(row, mappings)
            for key, value in ((key, value) for key, value in mappings.items() if 'uriref' not in key):
                ns, ns_attr = key.split(".")
                predicate = ns_lookup[ns][ns_attr]
                route_by_type[type(value)](row, value, graph, identifier, predicate)


for data_file in data['mappings']:
    filename = f"data/{data_file}.csv"
    mappings = data['mappings'][data_file]
    csv_to_graph(mappings, filename, g)


In [None]:
g.serialize(destination='output/triple.txt', format='trig')

In [None]:
query_result = g.query(
    """SELECT *
       WHERE {
          ?s ?p ?o .
          FILTER(regex(?o, "Hilarius Arelatensis", "i"))
       }""")

for row in query_result:
    print(row)

In [None]:
from flask import Flask, request
from json import dumps
app = Flask(__name__)

@app.route('/query', methods=['POST'])
def query_serve_example():
    sparql_query = request.get_data()
    query_results = g.query(sparql_query)
    return dumps(list(query_results))

if __name__ == '__main__':
    app.run()