## Define default variables
Overwritten by widget values if the notebook is viewed in Mercury

In [216]:
selected_birthyear = "1935"
print(f"selected_birthyear={selected_birthyear}")

selected_nationality = "American"
print(f"selected_nationality={selected_nationality}")

selected_birthyear=1935
selected_nationality=American


# Linked Art - Data Shape - Persons in Exhibitions
This notebook shows the *shape* of data for persons associated with exhibitions, included in the selected Linked Art dataset.

## Purpose
Understanding the shape of data helps to identify features of the data that can be presented in a useable, browsable sectioned display.


It is proposed that a characteristic of the data that results i an even distribution of records, will correspond to a useable, browsable breakdown of the data that can be presented in a sectioned display.

## How-to

## Visual Studio Code - Jupyter plugin
https://marketplace.visualstudio.com/items?itemName=ms-toolsai.jupyter 

If you have the Jupyter notebook extension, you can run the notebooks in Visual Studio Code editor:
- open and run the `variables.ipynb` notebook
- open and run the `persons.ipynb` notebook

## Jupyter Notebook
https://jupyter.org/ 


The run the Jupyter notebook in the browser, use the following command:
- `jupyter notebook`
- open and run the `variables.ipynb` notebook
- open and run the `persons.ipynb` notebook

### Mercury Web application
https://mljar.com/mercury/ 

The run this notebook as a web app using Mercury, use the following commands in the root directory:

- `mercury run variables.ipynb`
- `mercury run persons.ipynb`

- Go the to URL provided by Mercury, e.g. http://127.0.0.1:8000/ 
- Run the Notebook variables notebook
- Open the Persons notebook, and click the green `Run` button in the left-hand column

## Get stored variables
Get stored variables - see variables.ipynb

In [118]:

%store -r summary_data_dir_activity
%store -r activity_all_file


%store -r summary_data_dir_person
%store -r persons_all_file

%store -r events_nonmoma

%store -r json_suffix

%store -r linked_data_filepath_group

%store -r datavis_venue_exhibitions 

In [199]:
import os
import json

try:
    import plotly.express as px
except:
    %pip install plotly.express
    import plotly.express as px


In [185]:


def create_histogram(data, x_label, y_label, histogram_title, entity_type, selected_field, ignore_list, histogram_type, sort_type): 

    number_dict = {}
    # iterate through selected entity type
    for entity in data.get(entity_type):

        if selected_field in entity:
            property = entity.get(selected_field)

            if histogram_type == "surname":
                property = property.split(" ")[0][:1].upper()
            if histogram_type == "born":
                property = property.split("-")[0]

            if property in ignore_list:
                continue
            # 
            if property in number_dict:
                number_dict[property] += 1
            else:
                number_dict[property] = 1

    # sort the dictionary by frequency or label - determined by var sort_type
    number_dict = dict(sorted(number_dict.items(), key=lambda item: item[sort_type]))

    fig = px.bar(x=number_dict.keys(), y=number_dict.values(), labels={"x": x_label, "y": y_label},title=histogram_title)
    return fig


In [196]:
## read person data file
file_dir = summary_data_dir_person
file_entity = persons_all_file

data = {}

# open summary data file containing all person records 
with open(os.path.join(file_dir, file_entity), 'r') as json_file: 
    # get data
    data = json.load(json_file)


## Distribution by birth year

In [215]:
## person by birth year

x_label = "Birth Year"
y_label = "Number of people"
histogram_title = "Persons Involved in Exhibitions : Birth Year"
entity_type = "persons"

selected_field = "born"
histogram_type = "born"

ignore_list = [""]

histogram = create_histogram(data, x_label, y_label, histogram_title, entity_type, selected_field, ignore_list,histogram_type,0)
histogram.show()

## Nationality (excluding empty values)

In [217]:
x_label = "Nationality"
y_label = "Number of people"
histogram_title = "Persons Involved in Exhibitions : Nationality (excluding empty values)"
entity_type = "persons"

selected_field = "nationality"

ignore_list = ["","Male", "Female"]

histogram = create_histogram(data, x_label, y_label, histogram_title, entity_type, selected_field, ignore_list,"", 1)
histogram.show()


## Nationality (excluding Americans and empty values)

In [218]:
histogram_title = "Persons Involved in Exhibitions : Nationality (excluding excluding Americans and empty values)"
ignore_list = ["","Male", "Female", "American"]
histogram = create_histogram(data, x_label, y_label, histogram_title, entity_type, selected_field, ignore_list,"",1)
histogram.show()

## Distribution by surname

In [None]:
## persons by first letters of surname

x_label = "Surname"
y_label = "Number of people"
histogram_title = "Persons Involved in Exhibitions : Surname"
entity_type = "persons"

selected_field = "name"
histogram_type = "surname"

ignore_list = [""]

histogram = create_histogram(data, x_label, y_label, histogram_title, entity_type, selected_field, ignore_list,histogram_type,0)
histogram.show()


## Distribution by surname for selected birth year

In [214]:
 
# get data for selected birth year

data_selected = []

entity_type = "persons"
selected_field = "born"

data_dict = {}
# iterate through selected entity type
for entity in data.get(entity_type):

    if selected_field in entity:
        property = entity.get(selected_field)
        property = property.split("-")[0]

        if property == "" or int(property) != int(selected_birthyear):
            continue
        if property in data_dict:
            data_dict[property] += 1
        else:
            data_dict[property] = 1

        data_selected.append(entity)


x_label = "Surname"
y_label = "Number of people"
histogram_title = f"People distribution by first letter of surname for selected birth year {selected_birthyear}"


selected_field = "name"
histogram_type = "surname"
ignore_list = [""]

if len(data_selected) > 0:

    histogram = create_histogram({"persons": data_selected}, x_label, y_label, histogram_title, entity_type, selected_field, ignore_list,histogram_type,0)
    histogram.show()

## Distribution by surname for selected nationality

In [222]:
# get data for selected nationality

data_selected = []

entity_type = "persons"
selected_field = "nationality"

ignore_list = ["Male", "Female"]
data_dict = {}
# iterate through selected entity type
for entity in data.get(entity_type):

    if selected_field in entity:
        property = entity.get(selected_field)
        

        if property == "" or property in ignore_list or property != selected_nationality:
            continue
        if property in data_dict:
            data_dict[property] += 1
        else:
            data_dict[property] = 1

        data_selected.append(entity)


x_label = "Surname"
y_label = "Number of people"
histogram_title = f"People distribution by first letter of surname for selected nationality: {selected_nationality}"


selected_field = "name"
histogram_type = "surname"
ignore_list = [""]

if len(data_selected) > 0:

    histogram = create_histogram({"persons": data_selected}, x_label, y_label, histogram_title, entity_type, selected_field, ignore_list,histogram_type,0)
    histogram.show()

## Total number of exhibitions

In [195]:
## person by total number of exhibitions

x_label = "Number of Exhibitions"
y_label = "Number of people"
histogram_title = "Total number of exhibitions"
entity_type = "persons"

selected_field = "total_exhibitions"
histogram_type = "total_exhibitions"

ignore_list = [""]

histogram = create_histogram(data, x_label, y_label, histogram_title, entity_type, selected_field, ignore_list,histogram_type,0)
histogram.show()

## Maximum number of exhibitions per decade for any artist



    




In [246]:
max_number_ex_decade = 0

max_number_decade = []

entity_type = "persons"

for person in data.get(entity_type):
    id = person.get("id")
    name = person.get("name")
    exhibitions = person.get("exhibitions")

    ex_start_decades = {}
    for exhibition in exhibitions:
        ex_start_year = exhibition.get("start").split("-")[0]
        ex_start_decade = ex_start_year[:3]

        if ex_start_decade in ex_start_decades:
            ex_start_decades[ex_start_decade] +=1
        else:
            ex_start_decades[ex_start_decade] = 1
    
    # sort decade dict by number of exhibitions 
    values = sorted(ex_start_decades.items(), key=lambda x:x[1])

    if len(values) > 0:
        max_decade_person = values.pop()

        if type(max_decade_person) is tuple:       
            # count exhibitions for this decade
            count = int(max_decade_person[1])
            # if count is greater than count for all persons/decades - make this new max count/decade
            if count > int(max_number_ex_decade):
                max_number_ex_decade = count
                max_number_decade = [count, max_decade_person[0], id, name]
        


person = max_number_decade[3]
count = max_number_decade[0]
decade = max_number_decade[1]
print(f"Person with the most number of exhibitions for any decade is {person} with {count} exhibitions in the {decade}0s.")

    




    
    












Person with the most number of exhibitions for any decade is Picasso Pablo with 54 exhibitions in the 1940s.
