In [39]:
from SPARQLWrapper import SPARQLWrapper, JSON, CONSTRUCT, TURTLE
from rdflib.plugins.sparql import prepareQuery

import rdflib
import pandas as pd

# since rdflib is not working properly with SPARQL construct, we use an alternative way using GraphDB for queries
# ... to use, please do the following steps: 
# 
# 1) change the flag below to True
# 2) prepare a SPARQL endpoint on the server and replace the sparql_endpoint name below
# 3) load both OCED ontology and resulted RDF file (in this case oced_ontology.ttl and 2013_small.ttl) to the prepared SPARQL endpoint

use_endpoint = False
sparql_endpoint = "http://localhost:7200/repositories/ocedo-small"

OCEDO_FIlENAME = 'files/oced_ontology.ttl'
INPUT_FILENAME = 'input/2013_small.ttl'
OUTPUT_FILENAME = INPUT_FILENAME.replace(".ttl", "-integrated.ttl")
OBJECT_OBJECT_MAP_FILENAME = 'files/2013_object-object_mapping.csv' 
EVENT_OBJECT_MAP_FILENAME = 'files/2013_event-object_mapping.csv'

In [40]:
# prepare rdflib graph
constructed_graph = rdflib.Graph()

ocedo = rdflib.Namespace('https://w3id.org/ocedo/core#')
ocedd = rdflib.Namespace('https://w3id.org/ocedo/domain#')
ocedr = rdflib.Namespace('https://w3id.org/ocedo/resource/')
constructed_graph.bind('ocedo', ocedo)
constructed_graph.bind('ocedd', ocedd)
constructed_graph.bind('ocedr', ocedr)

# load OCED ontology
constructed_graph.parse(OCEDO_FIlENAME, format="turtle")

# load input graph
input_graph = rdflib.Graph()
input_graph.parse(INPUT_FILENAME, format="turtle")
constructed_graph += input_graph


In [41]:
# prepare construct query for event_object enhancement
# -- parameters needed: $object_type, $ocedd_class, $ocedd_relation
event_object_cq = """
prefix ocedo: <https://w3id.org/ocedo/core#>
prefix ocedd: <https://w3id.org/ocedo/domain#>
prefix ocedr: <https://w3id.org/ocedo/resource/>

CONSTRUCT {
    ?object a $ocedd_class .
    ?event $ocedd_relation ?object .
} 
WHERE {
    ?eo a ocedo:EventObject ;
    	ocedo:eo_event ?event ;
    	ocedo:eo_object ?object ;
    .
    ?object ocedo:object_type "$object_type" .
}
"""

# prepare construct query for object_object enhancement
# -- parameters needed: $object1_class, $object2_class, $object1_type, $object2_type, $ocedd_relation
object_object_cq = """
prefix ocedo: <https://w3id.org/ocedo/core#>
prefix ocedd: <https://w3id.org/ocedo/domain#>
prefix ocedr: <https://w3id.org/ocedo/resource/>

CONSTRUCT {
    ?object1 a $object1_class .
    ?object2 a $object2_class .
    ?object1 $ocedd_relation ?object2 .
} 
WHERE {
    ?eo1 a ocedo:EventObject ;
    	ocedo:eo_event ?event ;
    	ocedo:eo_object ?object1 ;
    .
    ?eo2 a ocedo:EventObject ;
    	ocedo:eo_event ?event ;
    	ocedo:eo_object ?object2 ;
    .
    ?object1 ocedo:object_type "$object1_type" .
    ?object2 ocedo:object_type "$object2_type" .
}
"""

In [42]:

def build_event_object_cq(object_type, ocedd_class, ocedd_relation):
    cq = event_object_cq
    cq = cq.replace("$object_type", object_type)
    cq = cq.replace("$ocedd_class", ocedd_class)
    cq = cq.replace("$ocedd_relation", ocedd_relation)
    return cq

def build_object_object_cq(object1_class, object2_class, object1_type, object2_type, ocedd_relation):
    cq = object_object_cq
    cq = cq.replace("$object1_class", object1_class)
    cq = cq.replace("$object2_class", object2_class)
    cq = cq.replace("$object1_type", object1_type)
    cq = cq.replace("$object2_type", object2_type)
    cq = cq.replace("$ocedd_relation", ocedd_relation)
    return cq

def run_construct_query_endpoint(cq):
    sparql = SPARQLWrapper(sparql_endpoint)
    sparql.setQuery(cq)
    sparql.setReturnFormat(TURTLE)
    sparql.setMethod(CONSTRUCT)
    results = sparql.queryAndConvert()

    result_graph = rdflib.Graph()
    result_graph.parse(data=results, format='ttl')
    return result_graph

def run_construct_query_rdflib(cq):
    construct_query = prepareQuery(cq)
    result_graph = constructed_graph.query(construct_query).graph

    return result_graph


In [43]:
temp_result = rdflib.Graph()

df = pd.read_csv(EVENT_OBJECT_MAP_FILENAME)
for index, row in df.iterrows():
    o_type = row["object_type"]
    o_class = row["ocedd_class"]
    o_relation = row["ocedd_relation"]
    cq_string = build_event_object_cq(o_type, o_class, o_relation)
    # print(cq_string)
    if use_endpoint:
        temp_result += run_construct_query_endpoint(cq_string)
    else: 
        temp_result += run_construct_query_rdflib(cq_string)



In [44]:
df = pd.read_csv(OBJECT_OBJECT_MAP_FILENAME)
for index, row in df.iterrows():
    object1_class = row["object1_class"]
    object2_class = row["object2_class"]
    object1_type = row["object1_type"]
    object2_type = row["object2_type"]
    ocedd_relation = row["ocedd_relation"]
    cq_string = build_object_object_cq(object1_class, object2_class, object1_type, object2_type, ocedd_relation)
    # print(cq_string)
    if use_endpoint:
        temp_result += run_construct_query_endpoint(cq_string)
    else:
        temp_result += run_construct_query_rdflib(cq_string) # ==> not sure why, but it's really slow

In [45]:
constructed_graph += temp_result
constructed_graph.serialize(destination=OUTPUT_FILENAME, format='ttl')



<Graph identifier=Nb82a470c1ba64e83b0e5f355729bd158 (<class 'rdflib.graph.Graph'>)>