# Update Items

### Import Libraries

In [1]:
import glob
import os
import json
import xml.etree.ElementTree as ET
import requests
from urllib import parse
import argparse
import urllib.request
import configparser
from tqdm import tqdm
import time

### Get configuration file

In [2]:
config = configparser.ConfigParser()
config.sections()
config.read('../config/api.ini')
client_id = config.get('main', 'client_id')
client_secret = config.get('main', 'client_secret')
endpoint = config.get('main', 'endpoint')

### Connect to the Api

In [3]:
params = {
    'key_identity': client_id,
    'key_credential': client_secret
}

access_url = endpoint
print("Connected to:    ", access_url)

Connected to:     http://localhost:8080/api/


### Navigate the xml and converts it to json

In [4]:
dictionary = {}
fileName = ''

for file in glob.glob("../input/*"):
    root = ET.parse(file).getroot()
    fileName = str(os.path.basename(file))
    
    for group in root.findall('family/group'):
        groupName = group.attrib['name']
        item = group.findall('item')
        
        for value in item:
            fieldName = value.attrib['name']         
            matchingName = groupName +"_"+ fieldName
            fieldValue = value.text
            
            #fix getty links 
            if "getty" in str(fieldValue) and 'subjectid=' in str(fieldValue) and '[' in str(fieldValue) and '[' in str(fieldValue):
                name  = fieldValue.split('[')[0].split(']')[0]
                url  = fieldValue.split('[')[1].split(']')[0]

                if  'subjectid=' in str(url):
                    url  = url.split('subjectid=')[1]
                    fieldValue = name + "; http://vocab.getty.edu/page/tgn/" +  url            

            cleanedText = str(fieldValue).replace("None", "").replace("\n", "").replace("\t", " ").replace('"', '').replace("'", "")
            
            # Creating/appending to a JSON structure
            if fileName not in dictionary:
                dictionary[fileName] = { matchingName :  [cleanedText] }
            else:
                if matchingName not in dictionary[fileName]:
                    dictionary[fileName][matchingName] = [cleanedText]
                else:
                    dictionary[fileName][matchingName].append(cleanedText)

#### Structure a JSON-LD item

In [7]:
def structureField(dict_type, property_id, value, lang='', public=True):
    if lang:
        return {dict_type:[ { "type": "literal", "property_id": property_id, "@value": value, "@language": lang, 'is_public': public } ]}
    else:
        return {dict_type:[ { "type": "literal", "property_id": property_id, "@value": value, 'is_public': public } ]}

def structureLabel(dict_type, property_id, value, lang='', public=True):
    if lang:
        return {"type": dict_type, "property_id": property_id, "@value": value, "@language": lang, 'is_public': public}
    else:
        return {"type": dict_type, "property_id": property_id, "@value": value, 'is_public': public}
    
def metadata(dictionaryID, labels, lang=''):
    objectEntry =  { dictionaryID: [] }

    # i am checking the length of the first element - double check if secondary fields have more entries
    firstKey = list(labels.keys())[0]
    firstItem = labels[firstKey][1]
    additionalItems = labels[firstKey][2:]
    
    mainField = dictionary[identifier][firstItem]

    for i in range(len(mainField)): 
        
        for key in labels:
            if(key != ""):
                
                value = dictionary[identifier][labels[key][1]][i]
                
                # Parse multiple parameters in one item
                if (additionalItems != ""):
                    for additionalkey in labels[firstKey][2:]:
                        value += ";" + dictionary[identifier][additionalkey][i]   
                                    
                elem = structureLabel(key, labels[key][0], value, lang)
                objectEntry[dictionaryID].append(elem)

    return objectEntry

### Import multiple items

In [10]:
url = endpoint + 'items?page=all'

print(url)
response = json.loads(urllib.request.urlopen(url).read().decode('utf-8'))

pbar = tqdm(range(len(response)), desc='description')

for i in pbar:
    try:
        if response[i]['crm:P1_is_identified_by'][0]['@value']:
            identifier = response[i]['crm:P1_is_identified_by'][0]['@value']
        else:
            identifier = response[i]['o:title'][0]['@value']
        resourceId = response[i]['o:id']
        
        if identifier in dictionary.keys():
            
            # --------- crm:P1 --------- #
                        
            dictionaryID = 'crm:P1_is_identified_by'
            propertyLabel = 'nesteddatatype#crm:E42_Identifier'
            propertyID = 1899
            
            cidocIdentifier =  { dictionaryID: [] }
                        
            properties = [
                {'label' : 'P190 has symbolic content', 'value' : identifier},
            ]
            
            cidocIdentifier[dictionaryID].append(structureLabel('literal', propertyID, identifier, public=False))            
            cidocIdentifier[dictionaryID].append(structureLabel(propertyLabel, propertyID, properties))
            
            # --------- crm:P43 --------- #

            dictionaryID = 'crm:P43_has_dimension'
            propertyLabel = 'nesteddatatype#crm:E54_Dimension'
            propertyID = 1853
            
            cidocDimension =  { dictionaryID: [] }
            
            paintingSize = dictionary[identifier]['Objekt_Gemälde Maße max. (H x B x T in cm)'][0].replace(',','.').split('x')
            frameSize = dictionary[identifier]['Objekt_Gemälde Maße max. (H x B x T in cm)'][0].replace(',','.').split('x')
            
            try:
                properties = [
                    {'label' : 'P2 has type', 'value' : 'Canvas height'},
                    {'label' : 'P90 has value', 'value' : paintingSize[0]},
                    {'label' : 'P91 has unit', 'value' : 'centimeters'}
                ]
                elem = structureLabel(propertyLabel, propertyID, properties)                
                cidocDimension[dictionaryID].append(elem)
            except:
                pass

            try:
                properties = [
                    {'label' : 'P2 has type', 'value' : 'Canvas width'},
                    {'label' : 'P90 has value', 'value' : paintingSize[1]},
                    {'label' : 'P91 has unit', 'value' : 'centimeters'}
                ]
                elem = structureLabel(propertyLabel, propertyID, properties)                
                cidocDimension[dictionaryID].append(elem)
            except:
                pass

            try:
                properties = [
                    {'label' : 'P2 has type', 'value' : 'Canvas depth'},
                    {'label' : 'P90 has value', 'value' : paintingSize[2]},
                    {'label' : 'P91 has unit', 'value' : 'centimeters'}
                ]
                elem = structureLabel(propertyLabel, propertyID, properties)
                cidocDimension[dictionaryID].append(elem)
            except:
                pass

            try:
                properties = [
                    {'label' : 'P2 has type', 'value' : 'Frame height'},
                    {'label' : 'P90 has value', 'value' : frameSize[0]},
                    {'label' : 'P91 has unit', 'value' : 'centimeters'}
                ]
                elem = structureLabel(propertyLabel, propertyID, properties)
                cidocDimension[dictionaryID].append(elem)
            except:
                pass

            try:
                properties = [
                    {'label' : 'P2 has type', 'value' : 'Frame width'},
                    {'label' : 'P90 has value', 'value' : frameSize[1]},
                    {'label' : 'P91 has unit', 'value' : 'centimeters'}
                ]
                elem = structureLabel(propertyLabel, propertyID, properties)
                cidocDimension[dictionaryID].append(elem)
            except:
                pass

            try:
                properties = [
                    {'label' : 'P2 has type', 'value' : 'Frame depth'},
                    {'label' : 'P90 has value', 'value' : frameSize[2]},
                    {'label' : 'P91 has unit', 'value' : 'centimeters'}
                ]
                elem = structureLabel(propertyLabel, propertyID, (properties))
                cidocDimension[dictionaryID].append(elem)
            except:
                pass
          
            # --------- crm:P45 --------- #
            
            dictionaryID = 'crm:P45_consists_of'
            propertyID = 1782
            cidocformat =  { dictionaryID: [] }
            
            # The original entries are splitted by a semicolon here ?!
            # check if it can be merged with the metadata() func
            
            paintingMaterial = dictionary[identifier]['Objekt_Gemälde Material'][0].split(';')
            
            for material in paintingMaterial:
                if(material != ""):
                    elem = structureLabel('literal', propertyID, material, 'de')
                    cidocformat[dictionaryID].append(elem)
                    
            # --------- crm:P102_has_title --------- #
            
            title = dictionary[identifier]['Objekttitel_Objekttitel'][0]
            cidoctitle = structureField('crm:P102_has_title', 1897, title, 'de')
                   
            # --------------------------- #
        
            resourceClass = {"o:resource_class": {"o:id": 592}}
   
            # Merge all jsons previously defined
            patch = {**resourceClass, **cidocIdentifier, **cidocDimension, **cidocformat, 
                     **cidoctitle}            

            # Merge with the response json
            response[i].update(patch)
           
            # If it is needed to delete items, do it before dumping the json            
            #del response[i]['dcterms:alternative'] 

            postRequest = json.dumps(response[i])
            
            putUrl = endpoint + 'items/' +  str(resourceId)
            headers = {"Content-Type": "application/json"}
            r = requests.patch(putUrl, data=postRequest, params = params, headers=headers)
            
            time.sleep(.25)
            pbar.set_description("ITEM: " + str(identifier) +  "| ID: "+  str(resourceId))
            
    except Exception as e:
        print(e)

http://localhost:8080/api/items?page=all


ITEM: G_017_093| ID: 1327: 100%|██████████| 641/641 [10:14<00:00,  1.04it/s]  


In [None]:
# --------- pina:signature  --------- #

dictionaryID = 'pina:signature'

labels = {
     'literal': [1181, 'Signatur_Name' , 'Signatur_Datum', 'Signatur_Anmerkung'],
     #'pinadatatypes:name':[1156,'Signatur_Name',],
     #'pinadatatypes:date':[1156,'Signatur_Datum',],
     #'pinadatatypes:note': [1156,'Signatur_Anmerkung'],
} 

pinaSignature = metadata(dictionaryID, labels, 'de') 