# Linked Art - Create Summary Data Files - Person

This notebook will create summary data files for the MOMA alternative data transformed to Linked Art JSON-LD, for use with an exhibition browser.

The JSON data files are in the `./data` directory as follows:

- `./data`
  - `activity`
  - `group`
  - `person`

Where:
- activity -> exhibition event 
- group -> organisation involved in exhibition events
- person -> person involved in exhibition events


# Import libraries

In [2]:
try:
    import json
except:
    %pip install json
    import json

from operator import itemgetter

import os

#  Create JSON file for all *Person* entities
The following code will create a JSON file 
`api/persons_all.json`

The JSON file contains summary information about each group entity defined in the `data/person` directory:
- id
- _label
- name
- born
- died
- nationality
- identified_by
- referred_to_by
- equivalent
- total_exhibitions
- exhibitions
- gender

# Define location of input and output files

In [10]:
linked_data_dir = "../../linked_data"

# location of Linked Art files containing group descriptions
linked_data_filepath_person = linked_data_dir + "/person"

linked_data_filepath_activity = linked_data_dir + "/activity"

# output file
output_dir = "../../output/summary"
output_person_dir = output_dir + "/person"
output_activity_dir = output_dir + "/activity"

persons_all_file = "persons_all.json"
activity_all_file = "events_all.json"

asc = "asc"
desc = "desc"
born = "born"
died = "died"
name = "name"
persons = "persons"
json_suffix = ".json"

output_persons_born_asc_file =  "_".join([persons,born,asc]) + json_suffix
output_persons_born_desc_file = "_".join([persons,born,desc]) + json_suffix

output_persons_died_asc_file =  "_".join([persons,died,asc]) + json_suffix
output_persons_died_desc_file = "_".join([persons,died,desc]) + json_suffix

output_persons_name_asc_file =  "_".join([persons,name,asc]) + json_suffix
output_persons_name_desc_file = "_".join([persons,name,desc]) + json_suffix

output_persons_total_exhibitions_desc = "_".join([persons,"total_exhibitions",desc]) + json_suffix

# number of records to include in summary output files
summary_number_records = 200 

In [6]:


persons = []

person_template = {
    "id": "",
    "_label":"",
    "name":"",
    "born": "",
    "died": "",
    "nationality": "",
    "identified_by": [],
    "referred_to_by": [],
    "equivalent": [],
    "total_exhibitions": "",
    "exhibitions": [],
    "gender": ""
}

  

for filename in os.listdir(linked_data_filepath_person):
    try:
        with open(os.path.join(linked_data_filepath_person, filename), 'r') as json_file:
            
            data = json.load(json_file)
            this = person_template.copy()

            #id
            this["id"] = data.get("id")
            #_label
            this["_label"] = data.get("_label")

            #name
            this["name"] = data.get("identified_by")[1].get("content")

            #born
            if "born" in data:
                this["born"] = data.get("born").get("timespan").get("begin_of_the_begin").split("T")[0]

            #dield
            if "died" in data:
                this["died"] = data.get("died").get("timespan").get("end_of_the_end").split("T")[0]

            # classified_as
            if "classified_as" in data:
                this["nationality"] = data.get("classified_as")[0].get("_label")

            # referred_to_by
            if "referred_to_by" in data:
                this["gender"] = data.get("referred_to_by")[0].get("content")


            copy = this.copy()
            persons.append(copy) 
            
    except:
        continue

with open(os.path.join(output_person_dir, persons_all_file), 'w') as file:
     file.write(json.dumps({"persons": persons},indent=2))

# Add total number of exhibitions 

Add total number of exhibitions per artist to `output/summary/person/persons_all.json`

Method:
- iterate over persons in `persons_all.json`
  - get person_id
  - iterate over exhibitions in `events_all.json`
    - if person_id in exhibition's influenced_by list
      - add exhibition to new list for person
        - update person record in `persons_all.json`
          - append list of exhibitions that person influenced
          - append total number of exhibitions that person influenced 


In [7]:
# this list will hold updated person records to overwrite persons_all.json
updated_persons_list = []

# get events summary data
with open(os.path.join(output_activity_dir, activity_all_file), 'r') as json_file:
    exdata = json.load(json_file)

# open summary file api/persons_all.json
with open(os.path.join(output_person_dir, persons_all_file), 'r') as json_file:
    data = json.load(json_file)

    #iterate through each person in persons_all.json
    for person in data["persons"]:
        # get id
        person_id = person.get("id")
         
        person_exhibitions = []

        # iterate through each exhibition in events_all.json
        for exhibition in exdata.get('events'):
           
            # if person's id is in the exhibition's 'influenced_by' list, append the exhibition info to the person summary entry in persons_all.json
            if person_id.upper() in (person_id.upper() for person_id in exhibition.get("influenced_by")):
               
                # append exhibition summary info to person_exhibitions[] list
                id = exhibition.get("id")
                label = exhibition.get("_label")
                start = exhibition.get("start")
                person_exhibitions.append({"id":id,"_label": label, "start": start})
        
        # calculate total number of exhibitions for person by counting number of exhibitions for person
        total_exhibitions = len(person_exhibitions)

        # copy existing person entry in persons_all.json and add new properties
        updated_person = person.copy()
        updated_person["total_exhibitions"] = total_exhibitions
        updated_person["exhibitions"] = person_exhibitions
        updated_persons_list.append(updated_person)

    with open(os.path.join(output_person_dir, persons_all_file), 'w') as file:
        file.write(json.dumps({"persons": updated_persons_list},indent=2))
    
       

# Summary data files for persons ordered by Birth date

The following code will create two JSON data files containining the first 200 person records when ordered by birth date in ascending and descending order
- person summary file - order by birth date - ascending - first 200 records
  - `persons_born_asc.json`
- person summary file - order by birth date - descending - first 200 records
  - `persons_born_desc.json`

In [12]:
# list to hold person records that have a value for birth date 
list_persons = []

selected_field = "born"

# open persons_all.json
with open(os.path.join(output_person_dir, persons_all_file), 'r') as json_file:
    data = json.load(json_file)
    
    #iterate through person records
    for idx, entry in enumerate(data.get("persons")):
        # if value exists for selected field, append person records to list list_persons
        if entry.get(selected_field).strip() != "":
            list_persons.append(entry)

    # order list by birth date in ascending order
    sorted_list_asc = sorted(list_persons, key=itemgetter(selected_field), reverse=False) 
    # get first 200 records
    sliced_list_asc = sorted_list_asc[:200]
    # write to file 
    with open(os.path.join(output_person_dir, output_persons_born_asc_file), 'w') as file:
        file.write(json.dumps({"persons": sliced_list_asc}, indent=2))

    # order list by birth date in descending order
    sorted_list_desc = sorted(list_persons, key=itemgetter(selected_field), reverse=True) 
    # get first 200 records
    sliced_list_desc = sorted_list_desc[:200]
    # write to file 
    with open(os.path.join(output_person_dir , output_persons_born_desc_file), 'w') as file:
        file.write(json.dumps({"persons": sliced_list_desc},indent=2))
    

# Summary data files for persons ordered by Death date

The following code will create two JSON data files containining the first 200 person records when ordered by death date in ascending and descending order
- person summary file - order by death date - ascending - first 200 records
  - `persons_died_asc.json`
- person summary file - order by death date - descending - first 200 records
  - `persons_died_desc.json`

In [17]:
# list to hold person records that have a value for death date 
list_persons = []

selected_field = 'died'

with open(os.path.join(output_person_dir, persons_all_file), 'r') as json_file:
    data = json.load(json_file)
    
    # iterate over person records in persons_all.json
    for idx, entry in enumerate(data.get("persons")):
        if entry.get(selected_field).strip() != "":
            list_persons.append(entry)


    sorted_list_asc = sorted(list_persons, key=itemgetter(selected_field), reverse=False) 
    # get limited number of records
    sliced_list_asc = sorted_list_asc[: summary_number_records]
    # write to file 
    with open(os.path.join(output_person_dir, output_persons_died_asc_file), 'w') as file:
        file.write(json.dumps({"persons": sliced_list_asc},indent=2))


    # order list by death date desc
    sorted_list_desc = sorted(list_persons, key=itemgetter(selected_field), reverse=True) 
    # get limited number of records
    sliced_list_desc = sorted_list_desc[: summary_number_records]
    # write to file 
    with open(os.path.join(output_person_dir, output_persons_died_desc_file), 'w') as file:
        file.write(json.dumps({"persons": sliced_list_desc},indent=2))

# Summary data files for persons ordered by Name

The following code will create two JSON data files containing the first limited set of person records when ordered by name in ascending and descending order
- person summary file - order by name - ascending - first 200 records
  - `persons_name_asc.json`
- person summary file - order by name - descending - first 200 records
  - `persons_name_desc.json`

In [18]:
with open(os.path.join(output_person_dir, persons_all_file), 'r') as json_file:
    data = json.load(json_file)

    field = 'name'
    list = []
    for idx, entry in enumerate(data.get("persons")):
        if entry.get(field).strip() != "":
            list.append(entry)


    # order list by name DESCENDING
    sorted_list_desc = sorted(list, key=itemgetter(field), reverse=True)
    # get first limited set of records
    sliced_list_desc = sorted_list_desc[:summary_number_records]
    # write to file 
    with open(os.path.join(output_person_dir, output_persons_name_desc_file), 'w') as file:
        file.write(json.dumps({"persons": sliced_list_desc},indent=2))

    # order list by name ASCENDING
    sorted_list_asc = sorted(list, key=itemgetter(field), reverse=False) 
    # get first limited set of  records
    sliced_list_asc = sorted_list_asc[:summary_number_records]
    # write to file 
    with open(os.path.join(output_person_dir, output_persons_name_asc_file), 'w') as file:
        file.write(json.dumps({"persons": sliced_list_asc},indent=2))

    

# Summary data files for persons ordered by total number of exhibitions 

The following code will create two JSON data files containining the first limited set of person records when ordered by total number of exhibitions in descending order

- person summary file - order by total number of exhibitions - descending - 
  - `persons_total_exhibitions_desc.json`Exhibitions

In [19]:
with open(os.path.join(output_person_dir, persons_all_file), 'r') as json_file:

    field = 'total_exhibitions'
    data = json.load(json_file)
    list = []
    for idx, entry in enumerate(data.get("persons")):
        if entry.get(field) != "":
            list.append(entry)
    
    # order list by total exhibitions
    sorted_list_desc = sorted(list, key=itemgetter(field), reverse=True) 
    # get first 200 records
    sliced_list_desc = sorted_list_desc[:200]
    # write to file 
    with open(os.path.join(output_person_dir, output_persons_total_exhibitions_desc), 'w') as file:
        file.write(json.dumps({"persons": sliced_list_desc},indent=2))