In [1]:
import re
import requests
from urllib.parse import quote_plus as urlencode
from rdflib import Dataset, Graph, compare
from tqdm import tqdm

In [2]:
inputFile = '../data/graphs/imageRegions.trig'
endpoint = "http://localhost:7776/blazegraph/sparql"

In [3]:
inputData = Dataset()

In [4]:
updateCondition = """
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX crmdig: <http://www.ics.forth.gr/isl/CRMdig/>
ASK { 
  ?s a crmdig:D35_Area ;
    crm:P33_used_specific_technique <https://github.com/swiss-art-research-net/bso-image-segmentation> .
}
"""

In [5]:
devnull = inputData.parse(inputFile, format='trig')

In [6]:
headers = {'Accept': 'text/turtle'}

graphs= {
    'new': [],
    'changed': [],
    'unchanged': []}

queryTemplate = """
CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <%s> { ?s ?p ?o }}
"""

print("Comparing graphs")
for context in tqdm(inputData.contexts()):
    if context.identifier.startswith("http"):
        r = requests.get(endpoint, headers=headers, params={"query": queryTemplate % context.identifier})
        if r.ok:
            remoteGraph = Graph()
            remoteGraph.parse(data=r.text, format='turtle')
            if not len(remoteGraph):
                graphs['new'].append((context, False))
            elif compare.to_isomorphic(context) == compare.to_isomorphic(remoteGraph):
                graphs['unchanged'].append((context, remoteGraph))
            else:
                graphs['changed'].append((context, remoteGraph))
        else:
            print(r.text)

11it [00:00, 71.22it/s]

Comparing graphs





In [7]:
print(len(graphs['new']), "new graphs")
print(len(graphs['changed']), "updated graphs")
print(len(graphs['unchanged']), "unchanged graphs")

0 new graphs
7 updated graphs
2 unchanged graphs


In [8]:
def putGraph(context, endpoint):
    # curl -X POST -H 'Content-Type:application/rdf+xml	' --data-binary '@{{.FILE}}' {{.ENDPOINT}}?context-uri={{.GRAPH}}
    # Remove old graph
    requests.get(endpoint, params={"query": "DROP GRAPH <%s>" % context.identifier})
    
    data = context.serialize(format='turtle').decode('utf8')
    params = {
        "context-uri": context.identifier,
    }
    headers = {"Content-Type" : "text/turtle"}
    r = requests.post(endpoint, params=params, data=data, headers=headers)
    return r.ok

In [9]:
graphsToUpdate = [d[0] for d in graphs['new']]

for graphPair in graphs['changed']:
    update = False
    if updateCondition:
        for result in graphPair[1].query(updateCondition):
            break
        update = result
    else:
        update = True
        
    if update:
        print("Overwriting", graphPair[0].identifier)
        graphsToUpdate.append(graphPair[0])


Overwriting https://resource.swissartresearch.net/digitalobject/26a329d6-c453-3f34-a682-f0fd1cdaa6f4/container/context
Overwriting https://resource.swissartresearch.net/digitalobject/aa537299-bce4-314d-bdac-979455cc0eef/container/context
Overwriting https://resource.swissartresearch.net/digitalobject/b957bb50-c31c-3f6e-a98f-1fae9bf4341d/container/context
Overwriting https://resource.swissartresearch.net/digitalobject/b184df35-4b8d-3ad5-a307-b1bddff7de6f/container/context
Overwriting https://resource.swissartresearch.net/digitalobject/08d4b5a7-9748-35b3-abb4-f5b420d5ae58/container/context
Overwriting https://resource.swissartresearch.net/digitalobject/7ddc1e30-cf39-3f0e-8e17-dc85695f763d/container/context
Overwriting https://resource.swissartresearch.net/digitalobject/5b29e4fd-7768-3fd1-a387-24f2b66dd0af/container/context


In [10]:
for g in tqdm(graphsToUpdate):
    putGraph(g, endpoint)

100%|██████████| 7/7 [00:00<00:00, 35.83it/s]
