## Define default variables
Overwritten by widget values if the notebook is viewed in Mercury

In [73]:
selected_birthyear = "1935"
print(f"selected_birthyear={selected_birthyear}")

selected_nationality = "American"
print(f"selected_nationality={selected_nationality}")

selected_birthyear=1935
selected_nationality=American


# Linked Art - Data Shape - People
This notebook shows the *shape* of data for persons associated with exhibitions, included in the selected Linked Art dataset.

## Purpose
Understanding the shape of data helps to identify features of the data that can be presented in a useable, browsable sectioned display.


It is proposed that a characteristic of the data that results i an even distribution of records, will correspond to a useable, browsable breakdown of the data that can be presented in a sectioned display.

## How-to

## Visual Studio Code - Jupyter plugin
https://marketplace.visualstudio.com/items?itemName=ms-toolsai.jupyter 

If you have the Jupyter notebook extension, you can run the notebooks in Visual Studio Code editor:
- open and run the `variables.ipynb` notebook
- open and run the `persons.ipynb` notebook

## Jupyter Notebook
https://jupyter.org/ 


The run the Jupyter notebook in the browser, use the following command:
- `jupyter notebook`
- open and run the `variables.ipynb` notebook
- open and run the `persons.ipynb` notebook

### Mercury Web application
https://mljar.com/mercury/ 

The run this notebook as a web app using Mercury, use the following commands in the root directory:

- `mercury run variables.ipynb`
- `mercury run persons.ipynb`

- Go the to URL provided by Mercury, e.g. http://127.0.0.1:8000/ 
- Run the Notebook variables notebook
- Open the Persons notebook, and click the green `Run` button in the left-hand column

## Get stored variables
Get stored variables - see variables.ipynb

In [74]:

%store -r summary_data_dir_activity
%store -r activity_all_file


%store -r summary_data_dir_person
%store -r persons_all_file

%store -r events_nonmoma

%store -r json_suffix

%store -r linked_data_filepath_group

%store -r datavis_venue_exhibitions 

In [75]:
import os
import json

try:
    import plotly.express as px
except:
    %pip install plotly.express
    import plotly.express as px


In [83]:


def create_histogram(data, x_label, y_label, histogram_title, entity_type, selected_field, ignore_list, histogram_type, sort_type): 

    
    number_dict = {}
    # iterate through selected entity type
    for entity in data.get(entity_type):

        if selected_field in entity:
            property = entity.get(selected_field)
            if property is None:
                continue
            if histogram_type == "surname":
                property = property.split(" ")[0][:1].upper()
            if histogram_type == "born":
                property = property.split("-")[0]
            if histogram_type == "born_decade":
                property = property.split("-")[0][:3] + "0s"

            if property in ignore_list:
                continue

            
        
            # 
            if property in number_dict:
                number_dict[property] += 1
            else:
                number_dict[property] = 1

    # sort the dictionary by frequency or label - determined by var sort_type
    number_dict = dict(sorted(number_dict.items(), key=lambda item: item[sort_type]))

    fig = px.bar(x=number_dict.keys(), y=number_dict.values(), labels={"x": x_label, "y": y_label},title=histogram_title)
    return fig



def getNonAmericanData(data, selected_field):
    data_selected = []
    entity_type="persons"
    ignore_list = ["Male", "Female" , "American", ""]
    
    # iterate through selected entity type
    for entity in data.get(entity_type):
        if selected_field in entity:
            property = entity.get(selected_field)
            if property == "":
                continue
            if property in ignore_list:
                continue
            if property is None:
                continue
            
            data_selected.append(entity)

    return data_selected

def getSelectedData(data, selected_field, selected_value):
    data_selected = []
    entity_type = "persons"
    ignore_list = ["Male", "Female", ""]
    
    # iterate through selected entity type
    for entity in data.get(entity_type):
        if selected_field in entity:
            property = entity.get(selected_field)
            if selected_field == "born":
                property = property.split("-")[0]
            if property == "":
                continue
            if selected_value != "":
                if property != selected_value:
                    continue
            if property in ignore_list:
                continue
            if property is None:
                continue
            
            data_selected.append(entity)

    return data_selected


In [77]:
## read person data file
file_dir = summary_data_dir_person
file_entity = persons_all_file

data = {}

# open summary data file containing all person records 
with open(os.path.join(file_dir, file_entity), 'r') as json_file: 
    # get data
    data = json.load(json_file)


## Surname

### First letter of surname

In [78]:
## persons by first letters of surname

x_label = "Surname"
y_label = "Number of people"
histogram_title = "All: Surname"
entity_type = "persons"

selected_field = "name"
histogram_type = "surname"

ignore_list = [""]

histogram = create_histogram(data, x_label, y_label, histogram_title, entity_type, selected_field, ignore_list,histogram_type,0)
histogram.show()

## Birth year:selected --> surname

In [79]:
# get data for selected birth year


data_selected = getSelectedData(data, "born",selected_birthyear)


x_label = "Surname"
y_label = "Number of people"
histogram_title = f"Birth year:{selected_birthyear} --> Surname"


selected_field = "name"
histogram_type = "surname"
ignore_list = [""]

if len(data_selected) > 0:

    histogram = create_histogram({"persons": data_selected}, x_label, y_label, histogram_title, entity_type, selected_field, ignore_list,histogram_type,0)
    histogram.show()

## Nationality

### Nationality:all --> Surname

In [80]:
data_selected = getSelectedData(data, "nationality","")

x_label = "Surname"
y_label = "Number of people"
histogram_title = f"Nationality:all --> Surname"


selected_field = "name"
histogram_type = "surname"
ignore_list = [""]

if len(data_selected) > 0:

    histogram = create_histogram({"persons": data_selected}, x_label, y_label, histogram_title, entity_type, selected_field, ignore_list,histogram_type,0)
    histogram.show()

### Nationality:American --> Surname

In [81]:
# get data for selected nationality

data_selected = getSelectedData(data, "nationality","American")

x_label = "Surname"
y_label = "Number of people"
histogram_title = f"Nationality:American --> Surname"


selected_field = "name"
histogram_type = "surname"
ignore_list = [""]

if len(data_selected) > 0:

    histogram = create_histogram({"persons": data_selected}, x_label, y_label, histogram_title, entity_type, selected_field, ignore_list,histogram_type,0)
    histogram.show()

### Nationality:non-American --> Surname

In [82]:


data_selected = getNonAmericanData(data,"nationality")


x_label = "Surname"
y_label = "Number of people"
histogram_title = f"Nationality:non-American --> Surname"


selected_field = "name"
histogram_type = "surname"
ignore_list = [""]

if len(data_selected) > 0:

    histogram = create_histogram({"persons": data_selected}, x_label, y_label, histogram_title, entity_type, selected_field, ignore_list,histogram_type,0)
    histogram.show()