# Provenance Processing

## Setup

In [60]:
import os

# In the Terminal, export GCUBE_TOKEN=''
gcube_token = os.environ['GCUBE_TOKEN']
# Alternatively, include the token here
# gcube_token = ''

In [61]:
import json
import logging
import requests
import pandas as pd
from prov.serializers.provrdf import ProvRDFSerializer
from prov.dot import prov_to_dot
from rdflib import Graph
from io import BytesIO, StringIO
from lxml import etree as et
from IPython.display import display, Image
from owslib.wps import WebProcessingService, ComplexDataInput, monitorExecution
from rdflib.plugins.sparql.results.csvresults import CSVResultSerializer

pd.options.display.max_colwidth = 256
logging.basicConfig()
logging.getLogger().setLevel(logging.ERROR)
dataminer_url = 'http://dataminer-prototypes.d4science.org/wps/WebProcessingService'
headers = {'gcube-token': gcube_token}
wps = WebProcessingService(dataminer_url, headers=headers)

g = Graph()

def read():
    identifier = 'org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.PFREADDATA'
    execution = wps.execute(identifier, inputs=[('place', 'None')], output="non_deterministic_output")
    monitorExecution(execution, sleepSecs=2, download=False)
    output = execution.processOutputs[0]
    doc = et.parse(BytesIO(output.retrieveData(headers=headers)))
    outputUrl = doc.xpath('/ogr:FeatureCollection/gml:featureMember/ogr:Result/d4science:Description[text() = "output"]/../d4science:Data/text()', namespaces={
        'ogr': 'http://ogr.maptools.org/',
        'gml': 'http://www.opengis.net/gml',
        'd4science': 'http://www.d4science.org'})[0]
    r = g.parse(outputUrl, format='turtle')

def query(q):
    serializer = CSVResultSerializer(g.query(q))
    output = BytesIO()
    serializer.serialize(output)
    display(pd.read_csv(StringIO(output.getvalue().decode())))

In [62]:
read()

In [77]:
query("""
SELECT ?entity2 ?entity1 ?activity ?time
WHERE {
  ?entity2 prov:wasDerivedFrom ?entity1 .
  ?entity2 prov:wasGeneratedBy ?a .
  ?a rdf:type [ rdfs:label ?activity ] .
  ?a prov:startedAtTime ?time
}
ORDER BY ASC(?time)
""")

Unnamed: 0,entity2,entity1,activity,time
0,http://avaa.tdata.fi/web/smart/smear/d103491968bedbeaf3a42a583570936d,http://data.d4science.org/dHhsSHFPWmlLUGw0aEZvUVRHOEtETUE1VloyNzVrRFBHbWJQNStIS0N6Yz0-VLT,data visualization,2018-08-14T19:02:00.837769+02:00
1,http://avaa.tdata.fi/web/smart/smear/2c3514176ca67a77a99292cbb4b6a3ae,http://data.d4science.org/WUd0KzRpV3RZelp2UTVoazcrRUNDNXVWYWwxTXNsUzJHbWJQNStIS0N6Yz0-VLT,data visualization,2018-08-14T19:02:38.797378+02:00
2,http://avaa.tdata.fi/web/smart/smear/e25aabd777518fbf87f6949f7a955614,http://avaa.tdata.fi/web/smart/smear/c0764836f4cc7da9c35a163f1ce778d8,arithmetic mean calculation,2018-08-14T19:03:26.216544+02:00
