## Define default variables
Overwritten by widget values if the notebook is viewed in Mercury

In [53]:
selected_birthyear = "1935"
print(f"selected_birthyear={selected_birthyear}")

selected_nationality = "American"
print(f"selected_nationality={selected_nationality}")

selected_birthyear=1935
selected_nationality=American


# Linked Art - Data Shape - Persons in Exhibitions
This notebook shows the *shape* of data for persons associated with exhibitions, included in the selected Linked Art dataset.

## Purpose
Understanding the shape of data helps to identify features of the data that can be presented in a useable, browsable sectioned display.


It is proposed that a characteristic of the data that results i an even distribution of records, will correspond to a useable, browsable breakdown of the data that can be presented in a sectioned display.

## How-to

## Visual Studio Code - Jupyter plugin
https://marketplace.visualstudio.com/items?itemName=ms-toolsai.jupyter 

If you have the Jupyter notebook extension, you can run the notebooks in Visual Studio Code editor:
- open and run the `variables.ipynb` notebook
- open and run the `persons.ipynb` notebook

## Jupyter Notebook
https://jupyter.org/ 


The run the Jupyter notebook in the browser, use the following command:
- `jupyter notebook`
- open and run the `variables.ipynb` notebook
- open and run the `persons.ipynb` notebook

### Mercury Web application
https://mljar.com/mercury/ 

The run this notebook as a web app using Mercury, use the following commands in the root directory:

- `mercury run variables.ipynb`
- `mercury run persons.ipynb`

- Go the to URL provided by Mercury, e.g. http://127.0.0.1:8000/ 
- Run the Notebook variables notebook
- Open the Persons notebook, and click the green `Run` button in the left-hand column

## Get stored variables
Get stored variables - see variables.ipynb

In [54]:

%store -r summary_data_dir_activity
%store -r activity_all_file


%store -r summary_data_dir_person
%store -r persons_all_file

%store -r events_nonmoma
%store -r events_moma

%store -r json_suffix

%store -r linked_data_filepath_group

%store -r datavis_venue_exhibitions 

In [55]:
import os
import json

try:
    import plotly.express as px
except:
    %pip install plotly.express
    import plotly.express as px


In [74]:
def create_histogram(data, x_label, y_label, histogram_title, entity_type, selected_field, ignore_list, histogram_type, sort_type): 

    null = None
    number_dict = {}
    # iterate through selected entity type
    for entity in data.get(entity_type):

        if selected_field in entity:
            property = entity.get(selected_field)
            if property == null:
                continue
            if histogram_type == "surname":
                property = property.split(" ")[0][:1].upper()
            if histogram_type == "born":
                property = property.split("-")[0]
            if histogram_type == "born_decade":
                property = property.split("-")[0][:3] + "0s"

            if property in ignore_list:
                continue

            
        
            # 
            if property in number_dict:
                number_dict[property] += 1
            else:
                number_dict[property] = 1

    # sort the dictionary by frequency or label - determined by var sort_type
    number_dict = dict(sorted(number_dict.items(), key=lambda item: item[sort_type]))

    fig = px.bar(x=number_dict.keys(), y=number_dict.values(), labels={"x": x_label, "y": y_label},title=histogram_title)
    return fig


def create_summary_datafile(data, entity_type, selected_field, ignore_list, histogram_type, sort_type): 

    null = None
    
    property_dict = {}
    number_dict = {}
    # iterate through selected entity type
    for entity in data.get(entity_type):

        if selected_field in entity:
            property = entity.get(selected_field)
            if property == null:
                continue
            if histogram_type == "surname":
                property = property.split(" ")[0][:1].upper()
            if histogram_type == "born":
                property = property.split("-")[0]
            if histogram_type == "born_decade":
                property = property.split("-")[0][:3] + "0s"

            if property in ignore_list:
                continue
 

            if property in number_dict:
                number_dict[property] += 1
            else:
                number_dict[property] = 1

            if property not in property_dict:
                property_dict[property] = []
                
            property_dict[property].append(entity)

    number_dict = dict(sorted(number_dict.items(), key=lambda item: item[sort_type]))

    property_dict = dict(sorted(property_dict.items(), key=lambda item: item[sort_type]))

    return ({"count": number_dict, entity_type : property_dict})


def getNonAmericanData(data, selected_field):
    data_selected = []
    entity_type="persons"
    ignore_list = ["Male", "Female" , "American", ""]
    
    # iterate through selected entity type
    for entity in data.get(entity_type):
        if selected_field in entity:
            property = entity.get(selected_field)
            if property == "":
                continue
            if property in ignore_list:
                continue
            if property is None:
                continue
            
            data_selected.append(entity)

    return data_selected

def getSelectedData(data, selected_field, selected_value=""):
    data_selected = []
    entity_type = "persons"
    ignore_list = ["Male", "Female", ""]

    # iterate through selected entity type
    for entity in data.get(entity_type):
        if selected_field in entity:
            property = entity.get(selected_field)

            if selected_field == "born":
                property = property.split("-")[0]
            if property == "":
                continue
            if selected_value != "":
                if property != selected_value:
                    continue
            if property in ignore_list:
                continue
            if property is None:
                continue
            
            data_selected.append(entity)

    return data_selected

In [57]:
## read person data file

data = {}

# open summary data file containing all person records 
with open(os.path.join(summary_data_dir_person, persons_all_file), 'r') as json_file: 
    # get data
    data = json.load(json_file)






# Total number of exhibitions

In [58]:
## person by total number of exhibitions

x_label = "Number of Exhibitions"
y_label = "Number of people"
histogram_title = "Total number of exhibitions"
entity_type = "persons"

selected_field = "total_exhibitions"
histogram_type = "total_exhibitions"

ignore_list = [""]

histogram = create_histogram(data, x_label, y_label, histogram_title, entity_type, selected_field, ignore_list,histogram_type,0)
histogram.show()

# Maximum number of exhibitions per decade for any artist



    




In [59]:
max_number_ex_decade = 0

max_number_decade = []

entity_type = "persons"

for person in data.get(entity_type):
    id = person.get("id")
    name = person.get("name")
    exhibitions = person.get("exhibitions")

    ex_start_decades = {}
    for exhibition in exhibitions:
        ex_start_year = exhibition.get("start").split("-")[0]
        ex_start_decade = ex_start_year[:3]

        if ex_start_decade in ex_start_decades:
            ex_start_decades[ex_start_decade] +=1
        else:
            ex_start_decades[ex_start_decade] = 1
    
    # sort decade dict by number of exhibitions 
    values = sorted(ex_start_decades.items(), key=lambda x:x[1])

    if len(values) > 0:
        max_decade_person = values.pop()

        if type(max_decade_person) is tuple:       
            # count exhibitions for this decade
            count = int(max_decade_person[1])
            # if count is greater than count for all persons/decades - make this new max count/decade
            if count > int(max_number_ex_decade):
                max_number_ex_decade = count
                max_number_decade = [count, max_decade_person[0], id, name]
        


person = max_number_decade[3]
count = max_number_decade[0]
decade = max_number_decade[1]
print(f"Person with the most number of exhibitions for any decade is {person} with {count} exhibitions in the {decade}0s.")

Person with the most number of exhibitions for any decade is Picasso Pablo with 54 exhibitions in the 1940s.


## Exhibition organiser:MoMA --> Person --> Birth year

In [64]:
# read data file
data_ex = {}

with open(os.path.join(summary_data_dir_activity, events_moma), 'r') as json_file: 
    # get data
    data_ex = json.load(json_file)

list_person = []
for ex in data_ex.get("events"):
    if "influenced_by" in ex:
        for person in ex.get("influenced_by"):
            list_person.append(person.get("id").upper())
# remove duplicates 
list_person = list(dict.fromkeys(list_person))

data_selected = []
for person in data.get("persons"):
    if person.get("id").upper() in list_person:
        data_selected.append(person)

x_label = "Birth Year"
y_label = "Number of people"
histogram_title = "Persons Involved in MoMA exhibitions : Birth Year"
entity_type = "persons"

selected_field = "born"
histogram_type = "born"

ignore_list = [""]

if len(data_selected) > 0:  
    histogram = create_histogram({"persons":data_selected}, x_label, y_label, histogram_title, entity_type, selected_field, ignore_list,histogram_type,0)
    histogram.show()


## Exhibition organiser:non-MoMA --> Person --> Birth year

In [75]:
## Exhibition organiser:non-MoMA --> Person --> Birth year

# read data file
data_ex = {}

with open(os.path.join(summary_data_dir_activity, events_nonmoma), 'r') as json_file: 
    # get data
    data_ex = json.load(json_file)

list_person = []
for ex in data_ex.get("events"):
    if "influenced_by" in ex:
        for person in ex.get("influenced_by"):
            list_person.append(person.get("id").upper())
# remove duplicates 
list_person = list(dict.fromkeys(list_person))


data_selected = []
for person in data.get("persons"):
    if person.get("id").upper() in list_person:
        data_selected.append(person)

x_label = "Birth Year"
y_label = "Number of people"
histogram_title = "Persons Involved in non-MoMA exhibitions : Birth Year"
entity_type = "persons"
selected_field = "born"
histogram_type = "born"

ignore_list = [""]


if len(data_selected) > 0:

    histogram = create_histogram({"persons":data_selected}, x_label, y_label, histogram_title, entity_type, selected_field, ignore_list,histogram_type,0)
    histogram.show()


    summary_datafile = create_summary_datafile({"persons":data_selected}, entity_type, selected_field, ignore_list,histogram_type,0)

    with open(os.path.join(summary_data_dir_activity, 'events_nonmoma_persons_birthyear.json'), 'w') as file:
        file.write(json.dumps(summary_datafile,indent=2))
    