In [19]:
import yaml
import rdflib
import pandas as pd

In [20]:
with open("map.yml", "r") as f:
    data = yaml.safe_load(f.read())
data

{'namespaces': [{'name': 'viaf', 'uri': 'https://viaf.org/'},
  {'name': 'dll', 'uri': 'https://catalog.digitallatin.org/'},
  {'name': 'madsrdf', 'uri': 'http://www.loc.gov/mads/rdf/v1#'},
  {'name': 'foaf', 'uri': 'http://xmlns.com/foaf/0.1/'},
  {'name': 'rdf', 'uri': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
  {'name': 'rdfs', 'uri': 'http://www.w3.org/2000/01/rdf-schema#'}],
 'mappings': {'author-authorities-export': {'uriref': 'DLL Page Link',
   'foaf.name': ['Authorized Name',
    'Author Name Native Language',
    'Author Name Latin',
    'Author Name English',
    'Other Alternative Name Form(s)'],
   'dll.id': 'DLL Identifier',
   'dll.authorizedName': 'Authorized Name',
   'dll.nativeLanguageName': 'Author Name Native Language',
   'dll.latinName': 'Author Name Latin',
   'dll.englishName': 'Author Name English'},
  'dll-works-export': {'uriref': 'DLL Page Link'},
  'dll-works-sorted-by-author-with-dll-number': {'uriref': 'DLL Page Link'},
  'item-records-export': {'u

In [21]:
g = rdflib.Graph()
ns_lookup = {}

for ns in data['namespaces']:
    g.namespace_manager.bind(namespace=ns['uri'], prefix=ns['name'])
    ns_lookup[ns['name']] = rdflib.Namespace(ns['uri'])


In [23]:
def csv_to_graph(mappings, filename, graph):
    if mappings:
        for row in pd.read_csv(filename).to_dict(orient="records"):
            uriref = mappings.get('uriref')
            if uriref:
                identifier = rdflib.URIRef(row.get(uriref))
            else:
                identifier = rdflib.BNode()
            for key, value in ((key, value) for key, value in mappings.items() if 'uriref' not in key):
                ns, ns_attr = key.split(".")
                if type(value) == list:
                    for val in value:
                        if not pd.isna(row[val]):
                            graph.add( (identifier, ns_lookup[ns][ns_attr], rdflib.Literal(row[val])) )
                else:
                    if not pd.isna(row[value]):
                        graph.add( (identifier, ns_lookup[ns][ns_attr], rdflib.Literal(row[value])) )

for data_file in data['mappings']:
    filename = f"data/{data_file}.csv"
    mappings = data['mappings'][data_file]
    csv_to_graph(mappings, filename, g)


In [10]:
g.serialize(destination='output/triple.txt', format='turtle')

In [34]:
qres = g.query(
    """SELECT *
       WHERE {
          ?s ?p ?o .
          FILTER(regex(?o, "Hilarius Arelatensis", "i"))
       }""")

In [35]:
for row in qres:
    print(row)

(rdflib.term.URIRef('http://xmlns.com/foaf/0.1/name'), rdflib.term.URIRef('https://catalog.digitallatin.org/dll-author/A4361'), rdflib.term.Literal('Pseudo-Hilarius Arelatensis/Hilarius Pictaviensis'))
(rdflib.term.URIRef('https://catalog.digitallatin.org/latinName'), rdflib.term.URIRef('https://catalog.digitallatin.org/dll-author/A5137'), rdflib.term.Literal('Hilarius Arelatensis'))
(rdflib.term.URIRef('https://catalog.digitallatin.org/latinName'), rdflib.term.URIRef('https://catalog.digitallatin.org/dll-author/A4361'), rdflib.term.Literal('Pseudo-Hilarius Arelatensis/Hilarius Pictaviensis'))
(rdflib.term.URIRef('http://xmlns.com/foaf/0.1/name'), rdflib.term.URIRef('https://catalog.digitallatin.org/dll-author/A5137'), rdflib.term.Literal('Hilarius Arelatensis'))


In [33]:
from flask import Flask, request
from json import dumps
app = Flask(__name__)

@app.route('/query', methods=['POST'])
def hello_world():
    sparql_query = request.get_data()
    query_results = g.query(sparql_query)
    return dumps(list(query_results))

if __name__ == '__main__':
    app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [17/Dec/2019 16:09:26] "POST /query HTTP/1.1" 200 -
