# Create data visualisations - histograms

This notebook created histogram data visualisations for the Linked Art data provided.

# Import libraries

In [4]:

import json
try:
    import json
except:
    %pip install json
    import json

import os

try:
    from sortedcontainers import SortedDict
except:
    %pip install sortedcontainers
    from sortedcontainers import SortedDict

try:
    import plotly.express as px
except:
    %pip install plotly.express
    import plotly.express as px



# Define file locations

In [28]:
linked_data_dir = "../../linked_data"

# location of Linked Art files containing group descriptions
linked_data_filepath_person = linked_data_dir + "/person"
linked_data_filepath_activity = linked_data_dir + "/activity"
linked_data_filepath_group = linked_data_dir + "/group"

# output file
output_dir = "../../output/summary"
output_person_dir = output_dir + "/person"
output_activity_dir = output_dir + "/activity"
output_group_dir = output_dir + "/group"

output_datavis_dir = "../../output/datavis"
output_datavis_venue_exhibitions = output_datavis_dir + "/venue/exhibitions/"

persons_all_file = "persons_all.json"
activity_all_file = "events_all.json"

asc = "asc"
desc = "desc"
born = "born"
died = "died"
name = "name"
persons = "persons"
json_suffix = ".json"

output_persons_born_asc_file =  "_".join([persons,born,asc]) + json_suffix
output_persons_born_desc_file = "_".join([persons,born,desc]) + json_suffix

output_persons_died_asc_file =  "_".join([persons,died,asc]) + json_suffix
output_persons_died_desc_file = "_".join([persons,died,desc]) + json_suffix

output_persons_name_asc_file =  "_".join([persons,name,asc]) + json_suffix
output_persons_name_desc_file = "_".join([persons,name,desc]) + json_suffix

output_persons_total_exhibitions_desc = "_".join([persons,"total_exhibitions",desc]) + json_suffix


output_events_nonmoma = "events_nonmoma.json"

# number of records to include in summary output files
summary_number_records = 200 

# Exhibitions per year

In [6]:
number_ex_by_year = {}

# open event summary file
with open(os.path.join(output_activity_dir, activity_all_file), 'r') as json_file:
    data = json.load(json_file)

    # iterate over events
    for event in data.get("events"):

        # get start date for event
        start = event.get("start")
        # get start year for event
        year = start.split("-")[0]
 
        if year in number_ex_by_year:
            number_ex_by_year[year] += 1
        else:
            number_ex_by_year[year] = 1
        
    number_ex_by_year = SortedDict(number_ex_by_year)

   # print(number_ex_by_year)

x_values = number_ex_by_year.keys()
y_values = number_ex_by_year.values()

fig = px.bar(x=x_values, y=y_values, 
labels={"x": "Year", "y": "Number of exhibitions"},
title="Number of exhibitions by year"
)
fig.show()

# Exhibitions per year - non-MoMa data

The following histogram shows number of exhibitions per year for non-MoMA events.

In [8]:

with open(os.path.join(output_activity_dir, output_events_nonmoma), 'r') as json_file:
    
    number_ex_by_year = {}

    data = json.load(json_file)
    for event in data.get("events"):

        start = event.get("start")
        year = start.split("-")[0]

        if year in number_ex_by_year:
            number_ex_by_year[year] += 1
        else:
            number_ex_by_year[year] = 1
        
    number_ex_by_year = SortedDict(number_ex_by_year)

    #print(number_ex_by_year)

    x_values = number_ex_by_year.keys()
    y_values = number_ex_by_year.values()

    fig = px.bar(x=x_values, y=y_values, 
    labels={"x": "Year", "y": "Number of exhibitions"},
    title="Number of exhibitions by year - non-MoMa data"
    )
    fig.show()

# Exhibitions per institution 

In [22]:
number_by_group = {}

with open(os.path.join(output_activity_dir, activity_all_file), 'r') as json_file:
    
    data = json.load(json_file)

    for event in data.get("events"):
        carried_out_by = event.get("carried_out_by")
        for org in carried_out_by: 
            if "Person" in org:
                continue
           
            # get label for id
            with open(os.path.join(linked_data_filepath_group, org.split("/").pop()) + json_suffix, 'r') as group_file:
                groupdata = json.load(group_file)
                label = groupdata.get("_label")

                if label in number_by_group:
                    number_by_group[label] += 1
                else:
                    number_by_group[label] = 1
        
    
    # sort by number
    number_by_group = dict(sorted(number_by_group.items(), key=lambda item: item[1]))

#print(number_by_org)

x_values = number_by_group.keys()
y_values = number_by_group.values()

fig = px.bar(x=x_values, y=y_values, 
labels={"x": "Group", "y": "Number of exhibitions"},
title="Number of exhibitions by group"
)
fig.show()

# Remove 'Museum of Modern Art' and 'MoMA PS1'
 

In [24]:
if "The Museum of Modern Art" in number_by_group:
    del number_by_group["The Museum of Modern Art"]
if "MoMA PS1" in number_by_group:
    del number_by_group["MoMA PS1"]

In [27]:
# sort by number
number_by_group = dict(sorted(number_by_group.items(), key=lambda item: item[1]))

x_values = number_by_group.keys()
y_values = number_by_group.values()

fig = px.bar(x=x_values, y=y_values, 
labels={"x": "Org", "y": "Number of exhibitions"},
title="Number of exhibitions by organisation (excluding MoMA PS1 and The Museum of Modern Art)"
)
fig.show()


# Number of exhibitions per venue

In [34]:

with open(os.path.join(output_activity_dir, activity_all_file), 'r') as json_file:
    events_venue = {}
    number_dict = {}

    data = json.load(json_file)
    for event in data.get("events"):
        venue = event.get("venue")

        
        
        if venue in number_dict:
            number_dict[venue] += 1
            events_venue[venue].append(event)
        else:
            number_dict[venue] = 1
            events_venue[venue] = [event]
        
    number_dict = SortedDict(number_dict)

    # sort by number
    number_dict = dict(sorted(number_dict.items(), key=lambda item: item[1]))

    for venue in events_venue:
        venue_filename = venue.replace(" ", "")
        venue_filename = venue_filename.replace(".", "")
        venue_filename = venue_filename.replace("/", "")
        with open(output_datavis_venue_exhibitions + venue_filename + '.json', 'w') as ex_file:
            ex_file.write(json.dumps({"meta": {"venue": venue, "exhibition_count": number_dict[venue]},"events": events_venue[venue]},indent=2))


    venue_index = []

    for venue in number_dict:
        venue_filename = venue.replace(" ", "")
        venue_filename = venue_filename.replace(".", "")
        venue_filename = venue_filename.replace("/", "")
        venue_index.append({"name": venue, "filename": venue_filename,  "number_exhibitions": number_dict[venue]})

    with open(output_datavis_venue_exhibitions + '/index.json', 'w') as ex_file:
            ex_file.write(json.dumps({"venues": venue_index}))

x_values = number_dict.keys()
y_values = number_dict.values()

fig = px.bar(x=x_values, y=y_values, 
labels={"x": "Venue", "y": "Number of exhibitions"},
title="Number of exhibitions by venue"
)
fig.show()

# Remove MoMA and MoMa PS1 from data

In [37]:
if "MoMA" in number_dict:
    del number_dict["MoMA"]
if "MoMA PS1" in number_dict:
    del number_dict["MoMA PS1"]

x_values = number_dict.keys()
y_values = number_dict.values()

fig = px.bar(x=x_values, y=y_values, 
labels={"x": "Venue", "y": "Number of exhibitions"},
title="Number of exhibitions by venue"
)
fig.show()

# Number of Artists per Nationality

In [38]:
number_dict = {}

with open(os.path.join(output_person_dir, persons_all_file), 'r') as json_file:
    
    
    data = json.load(json_file)
    for person in data.get("persons"):

        if "nationality" in person:
            nat = person.get("nationality")

            if nat in ["","Male", "Female"]:
                continue
            if nat == "American":
                continue
            if nat in number_dict:
                number_dict[nat] += 1
            else:
                number_dict[nat] = 1
        
    number_dict = dict(sorted(number_dict.items(), key=lambda item: item[1]))

    # sort by number
    
   # print(number_dict)

x_values = number_dict.keys()
y_values = number_dict.values()

fig = px.bar(x=x_values, y=y_values, 
labels={"x": "Nationality", "y": "Number of people"},
title="Number of people involved in exhibitions by nationality (excluding Americans and empty values)"
)
fig.show()

# Gender

In [39]:
with open(os.path.join(output_person_dir, persons_all_file), 'r') as json_file:
    
    number_dict = {}

    data = json.load(json_file)
    for person in data.get("persons"):

        if "nationality" in person:
            gender = person.get("nationality")

            if gender not in ["Male", "Female"]:
                continue
           
            if gender in number_dict:
                number_dict[gender] += 1
            else:
                number_dict[gender] = 1
        
    number_dict = dict(sorted(number_dict.items(), key=lambda item: item[1]))

    # sort by number
    
   # print(number_dict)

x_values = number_dict.keys()
y_values = number_dict.values()

fig = px.bar(x=x_values, y=y_values, 
labels={"x": "Gender", "y": "Number of people"},
title="Number of people involved in exhibitions by gender"
)
fig.show()

# Number of exhibitions per year for each organisation

In [40]:
with open(os.path.join(output_activity_dir, activity_all_file ), 'r') as json_file:
   
    number_ex_by_year = {}

    data = json.load(json_file)
    for event in data.get("events"):

        org = event.get("org")
        start = event.get("start")
        year = start.split("-")[0]

        if org in number_ex_by_year:

            if year in number_ex_by_year[org]:
                number_ex_by_year[org][year] += 1
            else:
                number_ex_by_year[org][year] = 1
        else:
            number_ex_by_year[org] = {}
            number_ex_by_year[org][year] = 1
        
    #number_ex_by_year = SortedDict(number_ex_by_year)

    

    for org in number_ex_by_year:

        numbers = number_ex_by_year[org]
        numbers = SortedDict(numbers)
        #print(numbers)
        x_values = numbers.keys()
        y_values = numbers.values()

        fig = px.bar(x=x_values, y=y_values, 
        labels={"x": "Year", "y": "Number of exhibitions"},
        title="Number of exhibitions by year - " + org
        )
        fig.show()
        