# SGV 12 Dates & Locations

A notebook to create a data sets of images from the collection Ernst Brunner that contain dates and locations. 

In [2]:
import os
import json
from collections import defaultdict

# path to the complete, local, collection data
input_dir = "/Users/maxfrischknecht/mfvk_cloud/A_PhD/07_Data/SGV_12N_Metadata_full"

In [3]:
# loop over all the files in the input_dir
result = []
for file_name in os.listdir(input_dir):
    if file_name.endswith('.json'):
        with open(os.path.join(input_dir, file_name), 'r') as f:
            # the file is open
            data = json.load(f)
            # place to save the extracted data
            reduced_data = []

            for obj in data:
                
                # test if image has id and keywords
                if 'schema:identifier' in obj and 'schema:temporal' in obj and 'schema:location' in obj:
                    reduced_obj = {}

                    # save the filename as id
                    reduced_obj['schema:identifier'] = obj['schema:identifier'][0]['@value']

                    # loop over the array of dates
                    dates_list = []
                    for item in obj['schema:temporal']:
                        if '@value' in item:
                            dates_list.append(item['@value'])
                    reduced_obj["schema:temporal"] = dates_list;

                    # loop over the array of locations
                    loc_list = []
                    for item in obj['schema:location']:
                        if 'display_title' in item:
                            loc_list.append(item['display_title'])
                    reduced_obj["schema:location"] = loc_list;
 
                    # save the new objects with its properties in a list
                    # print(reduced_obj)
                    reduced_data.append(reduced_obj)

            # save the list of reduced objects in a global list
            result.extend(reduced_data)

In [4]:
print(result[5])
print(len(result))

{'schema:identifier': 'SGV_12N_07511', 'schema:temporal': ['1942-05'], 'schema:location': ['Kanton Uri']}
31069


Aggregate all objects together that have the same comment and same date to reduce file size

In [5]:
# Aggregating items
aggregated_result = defaultdict(lambda: {'schema:identifier': []})

for item in result:
    key = (tuple(item['schema:temporal']), tuple(item['schema:location']))
    aggregated_result[key]['schema:identifier'].append(item['schema:identifier'])
    aggregated_result[key]['schema:temporal'] = item['schema:temporal']
    aggregated_result[key]['schema:location'] = item['schema:location']

# Convert aggregated_result to a list
final_result = list(aggregated_result.values())
print(len(final_result))
print(final_result[6])

2133
{'schema:identifier': ['SGV_12N_07689', 'SGV_12N_07690', 'SGV_12N_07691', 'SGV_12N_07692', 'SGV_12N_07693', 'SGV_12N_07694', 'SGV_12N_09266', 'SGV_12N_09267', 'SGV_12N_09268', 'SGV_12N_09269', 'SGV_12N_09270', 'SGV_12N_09272', 'SGV_12N_09273', 'SGV_12N_09274', 'SGV_12N_09275', 'SGV_12N_09276', 'SGV_12N_09277', 'SGV_12N_09278', 'SGV_12N_09279', 'SGV_12N_09280', 'SGV_12N_09281', 'SGV_12N_09283', 'SGV_12N_09284', 'SGV_12N_09285', 'SGV_12N_09286', 'SGV_12N_09287', 'SGV_12N_09288', 'SGV_12N_09289', 'SGV_12N_09290', 'SGV_12N_09291', 'SGV_12N_09292', 'SGV_12N_09293', 'SGV_12N_09294', 'SGV_12N_09295', 'SGV_12N_09296', 'SGV_12N_09299', 'SGV_12N_09300'], 'schema:temporal': ['1942'], 'schema:location': ['Zürich']}


In [6]:
with open('./export/sgv-12_dates_locations.json', 'w') as json_file:
    json.dump(final_result, json_file, indent=4)

print("Data has been exported")

Data has been exported
