In [1]:
import sys
sys.path.append('../services/smapshotapi/src/lib/')
!{sys.executable} -m pip install python-dotenv



In [2]:
from SPARQLWrapper import SPARQLWrapper, JSON
from SmapshotConnector import SmapshotConnector
from tqdm import tqdm
import os
from dotenv import load_dotenv

In [3]:
load_dotenv()

True

In [13]:
smapshotToken = os.getenv('SMAPSHOT_TOKEN')

In [14]:
def sparqlResultToDict(results):
    rows = []
    for result in results["results"]["bindings"]:
        row = {}
        for key in results["head"]["vars"]:
            if key in result:
                row[key] = result[key]["value"]
            else:
                row[key] = None
        rows.append(row)
    return rows

In [15]:
smapshot = SmapshotConnector(url="https://smapshot.heig-vd.ch/api/v1", token=smapshotToken)

Retrieve photographers from sMapshot

In [16]:
existingPhotographers = smapshot.listPhotographers({"company": "SARI"})

Retrieve relevant persons from BSO

In [17]:
sparql = SPARQLWrapper("http://localhost:8887/blazegraph/sparql")
sparql.setReturnFormat(JSON)

In [18]:
dataQuery = """PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX crmdig: <http://www.ics.forth.gr/isl/CRMdig/>
PREFIX search: <https://platform.swissartresearch.net/search/>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
SELECT DISTINCT ?person (?person_label as ?lastname) ?firstname WHERE {
    ?s a search:Object ;
       crm:P128_carries/crm:P94i_was_created_by/crm:P9_consists_of ?subcreation .
    ?classifier crm:P140_assigned_attribute_to ?s ;
      crm:P141_assigned <https://resource.swissartresearch.net/type/classification/landscape> ;
      crm:P33_used_specific_technique <https://github.com/swiss-art-research-net/bso-image-classification/blob/583c8891df9739723f81dfc1ac127b06e411ec34/models/model.pkl> .
    ?subcreation a crm:E65_Creation ;
      crm:P14_carried_out_by ?person .
    {
      SELECT ?person (SAMPLE(?person_label) as ?person_label) WHERE {
        ?person rdfs:label ?person_label .
      } GROUP BY ?person
    }
}
"""

In [19]:
sparql.setQuery(dataQuery)
persons = sparqlResultToDict(sparql.queryAndConvert())

Determine persons not yet on sMapshot

In [20]:
photographerHash = {}
for photographer in existingPhotographers:
    photographerHash[photographer['link']] = photographer

In [21]:
for person in persons:
    if person['person'] in photographerHash:
        person['smapshotID'] = photographerHash[person['person']]['id']
    else:
        person['smapshotID'] = False

In [22]:
personsNotInSmapshot = [d for d in persons if d['smapshotID'] == False]
len(personsNotInSmapshot)

260

Submit missing persons to sMapshot

In [23]:
for person in tqdm(personsNotInSmapshot):
    response = smapshot.addPhotographer(lastname=person['lastname'],
                            link=person['person'])
    person['smapshotID'] = response['id']

100%|██████████| 260/260 [00:34<00:00,  7.63it/s]


Retrieve photographers again (which now should include the new ones)

In [24]:
existingPhotographers = smapshot.listPhotographers()

In [25]:
photographerHash = {}
for photographer in existingPhotographers:
    photographerHash[photographer['link']] = photographer

Retrieve relevant images and persons from BSO

In [26]:
imagesQuery = """PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX crmdig: <http://www.ics.forth.gr/isl/CRMdig/>
PREFIX search: <https://platform.swissartresearch.net/search/>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
SELECT DISTINCT ?s (GROUP_CONCAT(DISTINCT ?person;SEPARATOR=";") as ?persons) WHERE {
    ?s a search:Object ;
       crm:P128_carries/crm:P94i_was_created_by/crm:P9_consists_of ?subcreation .
    ?classifier crm:P140_assigned_attribute_to ?s ;
      crm:P141_assigned <https://resource.swissartresearch.net/type/classification/landscape> ;
      crm:P33_used_specific_technique <https://github.com/swiss-art-research-net/bso-image-classification/blob/583c8891df9739723f81dfc1ac127b06e411ec34/models/model.pkl> .
    ?subcreation a crm:E65_Creation ;
      crm:P14_carried_out_by ?person .
    {
      SELECT ?person (SAMPLE(?person_label) as ?person_label) WHERE {
        ?person rdfs:label ?person_label .
      } GROUP BY ?person
    }
} GROUP BY ?s
"""

In [27]:
sparql.setQuery(imagesQuery)
images = sparqlResultToDict(sparql.queryAndConvert())

Add smapshot photographers to image entries

In [28]:
imagesHash = {}
for image in images:
    imagesHash[image['s']] = image
    image['smapshotPhotographers'] = []
    imagePersons = image['persons'].split(";")
    for imagePerson in imagePersons:
        if imagePerson not in photographerHash:
            print("not found", imagePerson)
        else:
            image['smapshotPhotographers'].append(photographerHash[imagePerson])

Retrieve all images from sMapshot

In [29]:
smapshotImages = smapshot.listImages()

In [30]:
print("Retrieved %d images" % len(smapshotImages))

Retrieved 1273 images


Update images with photographer IDs

In [None]:
for smapshotImage in tqdm(smapshotImages):
    uri = 'https://resource.swissartresearch.net/artwork/' + smapshotImage['original_id']
    imageID = smapshotImage['id']
    try:
        photographerIDs = [int(d['id']) for d in imagesHash[uri]['smapshotPhotographers']]
    except:
        continue
    smapshot.setImagePhotographerIDs(imageID, photographerIDs)

 42%|████▏     | 538/1273 [01:25<01:48,  6.79it/s]