This notebook will document a data analysis of the MOMA alternative data transformed to Linked Art JSON-LD.

The .json data files are in the data directory as follows:

- data
  - activity
  - group
  - person


activity - exhibition event 
group - organisation involved in exhibition events
person - person involved in exhibition events


# Activity - data analysis

In [1]:
try:
    import json
except:
    %pip install json
    import json

try:
    import requests
except:
    %pip install requests
    import requests

try:
    import pandas as pd
except:
    %pip install pandas
    import pandas as pd

from operator import itemgetter

try:
    import json
except:
    %pip install json
    import json

# importing datetime
from datetime import datetime

import os

In [2]:
filepath = "data/person/"

persons = []

person_template = {
    "id": "",
    "label": "",
    "_label":"",
    "name":"",
    "born": "",
    "died": "",
    "nationality": "",
    "identified_by": [],
    "referred_to_by": [],
    "equivalent": [],
    "total_exhibitions": [],
    "exhibitions": [],
    "gender": ""
}

  

for filename in os.listdir(filepath):
    try:
        with open(os.path.join(filepath, filename), 'r') as json_file:
            
            data = json.load(json_file)
        
            this = person_template.copy()
            this["id"] = data.get("id")
            this["label"] = data.get("_label")
            this["_label"] = data.get("_label")

            this["name"] = data.get("identified_by")[1].get("content")

            if "born" in data:
                this["born"] = data.get("born").get("timespan").get("begin_of_the_begin").split("T")[0]
            if "died" in data:
                this["died"] = data.get("died").get("timespan").get("end_of_the_end").split("T")[0]

            if "classified_as" in data:
                this["nationality"] = data.get("classified_as")[0].get("_label")

            if "referred_to_by" in data:
                this["gender"] = data.get("referred_to_by")[0].get("content")


            copy = this.copy()
            persons.append(copy) 
            
    except:
        continue

with open('api/persons_all.json', 'w') as file:
     file.write(json.dumps({"persons": persons}))

# Add statistics to person summary records

- total number of exhibitions artist has featured in, in dataset

In [53]:


with open('api/events_all.json', 'r') as exjson_file:
    exdata = json.load(exjson_file)

    # get list of lists of person ids = artists
    all_artists = [d['influenced_by'] for d in exdata.get('events')]

   
    # flatten list of lists to single list
    all_ex_list =  [item for sublist in all_artists for item in sublist]
    
    # get unique artists 
    unique_artists_list = set(all_ex_list)

    unique_artists_dict = {}
    for artist in unique_artists_list:
        unique_artists_dict[artist] = []

    for exhibition in exdata.get('events'):
        artists = exhibition.get("influenced_by")
        exid = exhibition.get("id")
        for artist in artists:
            unique_artists_dict[artist].append(exid)


updated_persons_list = []

with open('api/persons_all.json', 'r') as json_file:
    data = json.load(json_file)
    for person in data["persons"]:
        person_id = person.get("id")
        
        person_exhibitions = []
        for exhibition in exdata.get('events'):
           
            if person_id.upper() in (person_id.upper() for person_id in exhibition.get("influenced_by")):
               
                id = exhibition.get("id")
                label = exhibition.get("_label")
                start = exhibition.get("start")
                person_exhibitions.append({"id":id,"_label": label, "start": start})
        
        total_exhibitions = len(person_exhibitions)

        updated_person = person.copy()
        updated_person["total_exhibitions"] = total_exhibitions
        updated_person["exhibitions"] = person_exhibitions
        updated_persons_list.append(updated_person)

    with open('api/persons_all.json', 'w') as file:
        file.write(json.dumps({"persons": updated_persons_list}))
    
       

Birth

In [55]:
with open('api/persons_all.json', 'r') as json_file:
    data = json.load(json_file)
    
    # order list by birth date asc 
    list_born_asc = []
    for idx, entry in enumerate(data.get("persons")):
        
        if entry.get('born').strip() != "":
            list_born_asc.append(entry)
    sorted_list_born_asc = sorted(list_born_asc, key=itemgetter('born'), reverse=False) 
    # get first 200 records
    sliced_list_born_asc = sorted_list_born_asc[:200]
    # write to file 
    with open('api/persons_born_asc.json', 'w') as file:
        file.write(json.dumps({"persons": sliced_list_born_asc}))

    # order list by birth date desc
    sorted_list_born_desc = sorted(list_born_asc, key=itemgetter('born'), reverse=True) 
    # get first 200 records
    sliced_list_born_desc = sorted_list_born_desc[:200]
    # write to file 
    with open('api/persons_born_desc.json', 'w') as file:
        file.write(json.dumps({"persons": sliced_list_born_desc}))
    

Death

In [54]:
with open('api/persons_all.json', 'r') as json_file:
    data = json.load(json_file)
    
    field = 'died'
    # order list by death date asc 
    list = []
    for idx, entry in enumerate(data.get("persons")):

        entry["died"] = entry.get("died").split("T")[0]
        if entry.get(field).strip() != "":
            list.append(entry)
    sorted_list_asc = sorted(list, key=itemgetter(field), reverse=False) 
    # get first 200 records
    sliced_list_asc = sorted_list_asc[:200]
    # write to file 
    with open('api/persons_died_asc.json', 'w') as file:
        file.write(json.dumps({"persons": sliced_list_asc}))

    # order list by death date desc
    sorted_list_desc = sorted(list, key=itemgetter('died'), reverse=True) 
    # get first 200 records
    sliced_list_desc = sorted_list_desc[:200]
    # write to file 
    with open('api/persons_died_desc.json', 'w') as file:
        file.write(json.dumps({"persons": sliced_list_desc}))

Name

In [50]:
with open('api/persons_all.json', 'r') as json_file:
    data = json.load(json_file)

    field = 'name'
    list = []
    for idx, entry in enumerate(data.get("persons")):
        if entry.get(field).strip() != "":
            list.append(entry)

    # order list by name desc
    sorted_list_desc = sorted(list, key=itemgetter(field), reverse=True)
    # get first 200 records
    sliced_list_desc = sorted_list_desc[:200]
    # write to file 
    with open('api/persons_name_desc.json', 'w') as file:
        file.write(json.dumps({"persons": sliced_list_desc}))

    # order list by name asc
    sorted_list_asc = sorted(list, key=itemgetter(field), reverse=False) 
    # get first 200 records
    sliced_list_asc = sorted_list_asc[:200]
    # write to file 
    with open('api/persons_name_asc.json', 'w') as file:
        file.write(json.dumps({"persons": sliced_list_asc}))

    

Exhibitions

In [49]:
with open('api/persons_all.json', 'r') as json_file:

    field = 'total_exhibitions'
    data = json.load(json_file)
    list = []
    for idx, entry in enumerate(data.get("persons")):
        if entry.get(field) != "":
            list.append(entry)
    
    # order list by total exhibitions
    sorted_list_desc = sorted(list, key=itemgetter(field), reverse=True) 
    # get first 200 records
    sliced_list_desc = sorted_list_desc[:200]
    # write to file 
    with open('api/persons_total_exhibitions_desc.json', 'w') as file:
        file.write(json.dumps({"persons": sliced_list_desc}))