In [1]:
import yaml
import rdflib
import pandas as pd

In [9]:
with open("map.yml", "r") as f:
    data = yaml.safe_load(f.read())


In [10]:
g = rdflib.Graph()
ns_lookup = {}

for ns in data['namespaces']:
    g.namespace_manager.bind(namespace=ns['uri'], prefix=ns['name'])
    ns_lookup[ns['name']] = rdflib.Namespace(ns['uri'])


In [11]:
def csv_to_graph(mappings, filename, graph):
    if mappings:
        for row in pd.read_csv(filename).to_dict(orient="records"):
            uriref = mappings.get('uriref')
            if uriref:
                identifier = rdflib.URIRef(row.get(uriref))
            else:
                identifier = rdflib.BNode()
            for key, value in ((key, value) for key, value in mappings.items() if 'uriref' not in key):
                ns, ns_attr = key.split(".")
                if type(value) == list:
                    for val in value:
                        if not pd.isna(row[val]):
                            graph.add( (identifier, ns_lookup[ns][ns_attr], rdflib.Literal(row[val])) )
                else:
                    if not pd.isna(row[value]):
                        graph.add( (identifier, ns_lookup[ns][ns_attr], rdflib.Literal(row[value])) )

for data_file in data['mappings']:
    filename = f"data/{data_file}.csv"
    mappings = data['mappings'][data_file]
    csv_to_graph(mappings, filename, g)


In [12]:
g.serialize(destination='output/triple.txt', format='turtle')