# Update Items

### Import Libraries

In [1]:
import glob
import os
import json
import xml.etree.ElementTree as ET
import requests
from urllib import parse
import urllib.request
import configparser
from tqdm import tqdm
import time

### Get configuration file

In [2]:
config = configparser.ConfigParser()
config.sections()
config.read('../config/api.ini')
client_id = config.get('main', 'client_id')
client_secret = config.get('main', 'client_secret')
endpoint = config.get('main', 'endpoint')

### Connect to the Api

In [3]:
params = {
    'key_identity': client_id,
    'key_credential': client_secret
}

access_url = endpoint
print("Connected to:    ", access_url)

Connected to:     http://localhost:8080/api/


### Navigate the xml and converts it to json

In [4]:
dictionary = {}

for file in glob.glob("../input/*"):
    root = ET.parse(file).getroot()
    fileName = str(os.path.basename(file))
    
    for group in root.findall('family/group'):
        groupName = group.attrib['name']
        item = group.findall('item')
        
        for value in item:
            fieldName = value.attrib['name']         
            matchingName = groupName +"_"+ fieldName
            fieldValue = value.text
            
            #fix getty links 
            if "getty" in str(fieldValue) and 'subjectid=' in str(fieldValue) and '[' in str(fieldValue) and '[' in str(fieldValue):
                name  = fieldValue.split('[')[0].split(']')[0]
                url  = fieldValue.split('[')[1].split(']')[0]

                if  'subjectid=' in str(url):
                    url  = url.split('subjectid=')[1]
                    fieldValue = name + "; http://vocab.getty.edu/page/tgn/" +  url            

            cleanedText = str(fieldValue).replace("None", "").replace("\n", "").replace("\t", " ").replace('"', '').replace("'", "")
            
            # Creating/appending to a JSON structure
            if fileName not in dictionary:
                dictionary[fileName] = { matchingName :  [cleanedText] }
            else:
                if matchingName not in dictionary[fileName]:
                    dictionary[fileName][matchingName] = [cleanedText]
                else:
                    dictionary[fileName][matchingName].append(cleanedText)

### Upload Images

In [72]:
headers = {"Content-Type": "application/json"}

for i in tqdm(dictionary.keys()): 
    url = access_url + 'items?property[0][property]=' + str(1899) + '&property[0][type]=in&property[0][text]=' + i
    response = json.loads(urllib.request.urlopen(url).read().decode('utf-8'))

    identifier = response[0]['crm:P1_is_identified_by'][0]['@value']    
    resourceId = response[0]['o:id']
    
    images = [f for f in glob.glob("../media/mid/*.jpg") if identifier in f]

    # Move the main image in the first position
    mainImagePos = ''

    for pos, item in enumerate(images):
        fileName = [os.path.basename(item).split('.')[0]][0]
        if 'recto-cropped' in item:
            mainImagePos = pos
        if 'recto-sdg-cropped' in item:   
            mainImagePos = pos
        if identifier == fileName:
            mainImagePos = pos

    if mainImagePos:
        images.insert(0, images.pop(mainImagePos))

    for pos, image in enumerate(images):

        # check whether the images exists
        imageName = os.path.basename(image)  
        
        url = access_url + 'media?property[0][property]=' + str(1899) + '&property[0][type]=in&property[0][text]=' + imageName
        urlResponse = json.loads(urllib.request.urlopen(url).read().decode('utf-8'))

        if urlResponse:
            continue

        data = [{
            "o:ingester": "upload", 
            "file_index": pos, 
            "o:item": {"o:id": resourceId },
            "o:resource_template": {"o:id": 4},
            "crm:P2_has_type": [
                {
                    "type": "uri",
                    "property_id": 1924,
                    "@id": "http://vocab.getty.edu/aat/300215302",
                    "o:label": "digital images"
                }
            ],
            "crm:P65i_is_shown_by": [
                {
                    "type": "resource",
                    "property_id": 1915,
                    "value_resource_id": resourceId,
                }
            ],
             "crm:P1_is_identified_by": [
                {
                    "type": "literal",
                    "property_id": 1899,
                    "@value": imageName,
                }
            ]

        }]

        if "Kartei" in imageName:
            data[0]["o:resource_class"] = {"o:id": 533}
        elif 'recto-cropped' in imageName or 'recto-sdg-cropped' in imageName or identifier == imageName:
            data[0]["o:resource_class"] = {"o:id": 579} 
        else:
            data[0]["o:resource_class"] = {"o:id": 579} # check wheter there are other classes

        files = [
             ("data", (None, json.dumps(data[0]), "application/json")),
             ("file["+str(pos)+"]", (image, open(image, "rb"), "image/jpg"))
        ]   
        
        r = requests.post(endpoint + 'media', params=params, files=files, headers=headers)
        
        time.sleep(.25)

            

  2%|▏         | 10/641 [00:42<45:07,  4.29s/it]


KeyboardInterrupt: 