# Linked Art - Create Summary Data Files - Activity

This notebook will create summary data files for the MOMA alternative data transformed to Linked Art JSON-LD, for use with an exhibition browser.

The JSON data files are in the `./data` directory as follows:

- `./data`
  - `activity`
  - `group`
  - `person`

Where:
- activity -> exhibition event 
- group -> organisation involved in exhibition events
- person -> person involved in exhibition events


# Import libraries

In [1]:
try:
    import json
except:
    %pip install json
    import json

from operator import itemgetter

import os

#  Create JSON file for all *Activity* entities
The following code will create a JSON file 
`events_all.json`

The JSON file contains summary information about each group entity defined in the `data/person` directory:

- id string
- label string
- _label string
- start date
- end date
- location string
- org string
- venue string
- identified_by []
- timespan []
- carried_out_by []
- influenced_by []
- other_exhibitions_same_time []
            

# Define location of input and output files

In [2]:
%store -r output_activity_dir
%store -r output_person_dir

%store -r activity_all_file
%store -r persons_all_file

%store -r events_nonmoma
%store -r json_suffix
%store -r linked_data_filepath_group
%store -r datavis_venue_exhibitions 
%store -r output_person_dir



%store -r linked_data_dir
%store -r linked_data_filepath_activity
%store -r linked_data_filepath_person
%store -r linked_data_filepath_group



persons = "persons"
json_suffix = ".json"


## Create events_all.json 

Create summary file for all exhibitions 

In [3]:


events = []

template = {
    "id": "",
    "label": "",
    "start": "",
    "end": "",
    "location": "",
    "org": "",
    "venue": "",
    "_label": "",
   "coords": "",
    "carried_out_by":  [],
    "influenced_by": [],
    "other_exhibitions_same_time": [],
   
}

for filename in os.listdir(linked_data_filepath_activity):
    try:
        with open(os.path.join(linked_data_filepath_activity, filename), 'r') as json_file:

            data = json.load(json_file)
            this = template.copy()

            # id
            this["id"] = data.get("id")
            # label
            # "_label": "",
            this["_label"] = data.get("_label")
            this["label"] = data.get("_label")

            # "start": "",
            this["start"] = data.get("timespan").get("begin_of_the_begin")

            # "end": "",
            this["end"] = data.get("timespan").get("end_of_the_end")
            # "location": "",
            this["location"] = data.get("took_place_at")[0].get("_label")

            # coords
            this["coords"] = data.get("took_place_at")[0].get("defined_by")
            # "org": "",
            this["org"] = data.get("carried_out_by")[0].get("_label")
   
            # "venue": "",
   
  
    
   
            # "carried_out_by":  [],
            this["org"] = data.get("carried_out_by")[0].get("id")
   
            list_influenced_by = []
            # "influenced_by": []
            for p in data.get("influenced_by"):
                list_influenced_by.append({"id": p.get("id"), "name": p.get("_label")})
            this["influenced_by"] = list_influenced_by

            copy = this.copy()
            events.append(copy)

    except:
        continue

with open(os.path.join(output_activity_dir, activity_all_file), 'w') as file:
    file.write(json.dumps({"events": events}, indent=2))


## Add all exhibitions happening at same time, to events_all.json

In [4]:
from dateutil.parser import parse

def is_date(string, fuzzy=False):
    """
    Return whether the string can be interpreted as a date.

    :param string: str, string to check for date
    :param fuzzy: bool, ignore unknown tokens in string if True
    """
    try: 
        parse(string, fuzzy=fuzzy)
        return True

    except ValueError:
        return False

#   https://stackoverflow.com/questions/9044084/efficient-date-range-overlap-calculation

from datetime import datetime, date
from collections import namedtuple
Range = namedtuple('Range', ['start', 'end'])

co_occurrence_list = []


with open(os.path.join(output_activity_dir, activity_all_file), 'r') as file:
    data = json.load(file)
    

    events = data.get("events")
    events2 = data.get("events")

    for event in events:
       
        start_date = event.get("start")
        end_date = event.get("end")
        event_id = event.get("id")

        if  start_date is None or end_date is None:
                continue

        start_date = start_date.split("T")[0]
        end_date = end_date.split("T")[0]
       

        if start_date == "" or end_date == "":
            continue

        if (is_date(start_date, False) == False or is_date(end_date, False) == False):
            continue

        start_date = datetime.strptime(start_date, "%Y-%m-%d").date()
        end_date = datetime.strptime(end_date, "%Y-%m-%d").date()
        r1 = Range(start=start_date, end=end_date)

        other_events = []
        event_list = []

        for event2 in events2:

            start_date2 = event2.get("start")
            end_date2 = event2.get("end")
            event_id2 = event2.get("id")

            if event_id == event_id2:
                continue

            if  start_date2 is None or end_date2 is None:
                continue


            start_date2 = start_date2.split("T")[0]
            end_date2 = end_date2.split("T")[0]

            if start_date2 == "" or end_date2 == "":
                continue

            if is_date(start_date2, False) == False or is_date(end_date2, False) == False:
                continue

        
            start_date2 = datetime.strptime(start_date2, "%Y-%m-%d").date()
            end_date2 = datetime.strptime(end_date2, "%Y-%m-%d").date()
        
            r2 = Range(start=start_date2, end=end_date2)

            latest_start = max(r1.start, r2.start)
            earliest_end = min(r1.end, r2.end)
            delta = (earliest_end - latest_start).days + 1
            overlap = max(0, delta) 

            if float(overlap) > 0:
               
                ex = dict({"id": event_id2, "_label": event2.get("_label"), "coords": event2.get("coords"), "location": event2.get("location"), "start": event2.get("start").split("T")[0], "end": event2.get("end").split("T")[0], "org": event2.get("org"), })
                event_list.append(ex)
           
        
        if len(event_list) > 0:
            co_occurrence_list.append({event_id: event_list})

    
    with open(os.path.join(output_activity_dir, "ex_co.json"), 'w') as file:
        file.write(json.dumps(co_occurrence_list, indent=2))

## Create events_all_startdate.json

Create summary file of all exhibitions, sorted by start date

In [5]:
events = []

with open(os.path.join(output_activity_dir, activity_all_file), 'r') as file:
    data = json.load(file)

    events = data.get("events")
    sorted_events = sorted(events, key=itemgetter('start'))

with open(os.path.join(output_activity_dir, "events_all_startdate.json"), 'w') as file:
    file.write(json.dumps({"events": sorted_events}, indent=2))


## Create events_nonmoma.json and events_moma.json

Create two files, containing exhibition summary data for non-MoMA organisations, and in separate file, MoMA organisations.

In [6]:
events_nonmoma = []
events_moma = []

with open(os.path.join(output_activity_dir, activity_all_file), 'r') as file:
    data = json.load(file)

    events = data.get("events")

    for event in events:
        if event.get("org") in ["https://www.moma.org/data/Group/inst3", "https://www.moma.org/data/Group/inst7"]:
            events_moma.append(event)
        else:
            events_nonmoma.append(event)
            


sorted_events_nonmoma = sorted(events_nonmoma, key=itemgetter('start'))
with open(os.path.join(output_activity_dir, "events_nonmoma.json"), 'w') as file:
    file.write(json.dumps({"events": sorted_events_nonmoma}, indent=2))

sorted_events_moma = sorted(events_moma, key=itemgetter('start'))
with open(os.path.join(output_activity_dir, "events_moma.json"), 'w') as file:
    file.write(json.dumps({"events": sorted_events_moma}, indent=2))



## Create events_all_startdate.json

File containing all exhibitions ordered by start date

In [7]:
from collections import defaultdict

events = defaultdict(dict)

counter = defaultdict(dict)

template = {
    "id": "",
    "_label": "",
    "start": "",
    "end": "",
    "location": "",
    "org": "",
    "venue": "",
  
    "carried_out_by":  [],
    "influenced_by": [],
   
}

for filename in os.listdir(linked_data_filepath_activity):
    try:
        with open(os.path.join(linked_data_filepath_activity, filename), 'r') as json_file:

            data = json.load(json_file)
            this = template.copy()

            # id
            this["id"] = data.get("id")
            # label
            # "_label": "",
            this["_label"] = data.get("_label")
            this["label"] = data.get("_label")

            # "start": "",
            this["start"] = data.get("timespan").get("begin_of_the_begin").split("T")[0]

            

            # "end": "",
            this["end"] = data.get("timespan").get("end_of_the_end").split("T")[0]
            # "location": "",
            this["location"] = data.get("took_place_at")[0].get("_label")
            # "org": "",
            this["org"] = data.get("carried_out_by")[0].get("_label")
   
           
            # "carried_out_by":  [],
            this["carried_out_by"] = data.get("carried_out_by")[0].get("id")
   
            list_influenced_by = []
            
            # "influenced_by": []
            #for p in data.get("influenced_by"):   
            #    list_influenced_by.append({"id": p.get("id"), "name": p.get("_label")})
           # this["influenced_by"] = list_influenced_by

            copy_of_event = this.copy()

            start_year = this["start"].split("-")[0]
            start_month = this["start"].split("-")[1]
            
            
            if start_month not in events[start_year]:
                events[start_year][start_month] = []
           
            events[start_year][start_month].append(copy_of_event)

            if start_year not in counter:
                counter[start_year] = 0
            counter[start_year] += 1


           # events.append(copy)

    except:
        continue


#sorted_events = sorted(events, key=itemgetter('start'))
with open(os.path.join(output_activity_dir, "events_all_startdate.json"), 'w') as file:
    file.write(json.dumps({"counter": counter, "events": events}, indent=2))


# Create events_nonmoma.json
Create summary file of exhibitions organised by non-MoMA organisations

In [20]:
from collections import defaultdict

events = []
counter = defaultdict(dict)

template = {
    "id": "",
    "_label": "",
    "start": "",
    "end": "",
    "location": "",
    "org": "",
    "venue": "",
  
    "carried_out_by":  [],
    "influenced_by": [],
   
}

for filename in os.listdir(linked_data_filepath_activity):
   
        with open(os.path.join(linked_data_filepath_activity, filename), 'r') as json_file:

            data = json.load(json_file)

            org = this["carried_out_by"] = data.get("carried_out_by")[0].get("id")
            if org in ["https://www.moma.org/data/Group/inst3", "https://www.moma.org/data/Group/inst7"]:
                continue


            this = template.copy()

            
            this["id"] = data.get("id")
            
            this["_label"] = data.get("_label")
            this["label"] = data.get("_label")

            
            this["start"] = data.get("timespan").get("begin_of_the_begin").split("T")[0]
           
            if "end_of_the_end" in data.get("timespan"):
                this["end"] = data.get("timespan").get("end_of_the_end").split("T")[0]
           
            if "took_place_at" in data:
                this["location"] =  data.get("took_place_at")[0].get("_label")
            
            this["org"] = data.get("carried_out_by")[0].get("_label")
            
            this["carried_out_by"] = data.get("carried_out_by")[0].get("id")
   
            list_influenced_by = []
            
            copy_of_event = this.copy()
            
            events.append(copy_of_event)

   
sorted_events = sorted(events, key=itemgetter('start'))
with open(os.path.join(output_activity_dir, "events_nonmoma.json"), 'w') as file:
    file.write(json.dumps(sorted_events, indent=2))


## Create events_moma_startdate.json

File containing exhibitions organised by MoMA organisations ordered by start date



In [9]:
from collections import defaultdict

events = defaultdict(dict)
counter = defaultdict(dict)

nonmoma_events = defaultdict(dict)
nonmoma_counter = defaultdict(dict)

template = {
    "id": "",
    "_label": "",
    "start": "",
    "end": "",
    "location": "",
    "org": "",
    "venue": "",
  
    "carried_out_by":  [],
    "influenced_by": [],
   
}

for filename in os.listdir(linked_data_filepath_activity):
    try:
        with open(os.path.join(linked_data_filepath_activity, filename), 'r') as json_file:

            data = json.load(json_file)

            org = this["carried_out_by"] = data.get("carried_out_by")[0].get("id")
            if org not in ["https://www.moma.org/data/Group/inst3", "https://www.moma.org/data/Group/inst7"]:

                this = template.copy()

                # id
                this["id"] = data.get("id")
                # label
                # "_label": "",
                this["_label"] = data.get("_label")
                this["label"] = data.get("_label")

                # "start": "",
                this["start"] = data.get("timespan").get("begin_of_the_begin").split("T")[0]

            

                # "end": "",
                this["end"] = data.get("timespan").get("end_of_the_end").split("T")[0]
                # "location": "",
                this["location"] = data.get("took_place_at")[0].get("_label")
                # "org": "",
                this["org"] = data.get("carried_out_by")[0].get("_label")
   
           
                # "carried_out_by":  [],
                this["carried_out_by"] = data.get("carried_out_by")[0].get("id")
   
                list_influenced_by = []
            
                copy_of_event = this.copy()

                start_year = this["start"].split("-")[0]
                start_month = this["start"].split("-")[1]
            
            
                if start_month not in nonmoma_events[start_year]:
                    nonmoma_events[start_year][start_month] = []
           
                nonmoma_events[start_year][start_month].append(copy_of_event)

                if start_year not in nonmoma_counter:
                    nonmoma_counter[start_year] = 0
                    
                nonmoma_counter[start_year] += 1

                continue


            this = template.copy()

            # id
            this["id"] = data.get("id")
            # label
            # "_label": "",
            this["_label"] = data.get("_label")
            this["label"] = data.get("_label")

            # "start": "",
            this["start"] = data.get("timespan").get("begin_of_the_begin").split("T")[0]

            # "end": "",
            this["end"] = data.get("timespan").get("end_of_the_end").split("T")[0]
            # "location": "",
            this["location"] = data.get("took_place_at")[0].get("_label")
            # "org": "",
            this["org"] = data.get("carried_out_by")[0].get("_label")
   
            # "carried_out_by":  [],
            this["carried_out_by"] = data.get("carried_out_by")[0].get("id")
   
            list_influenced_by = []
            
         
            copy_of_event = this.copy()

            start_year = this["start"].split("-")[0]
            start_month = this["start"].split("-")[1]
            
            if start_month not in events[start_year]:
                events[start_year][start_month] = []
           
            events[start_year][start_month].append(copy_of_event)

            if start_year not in counter:
                counter[start_year] = 0
            counter[start_year] += 1


         

    except:
        continue


#sorted_events = sorted(events, key=itemgetter('start'))
with open(os.path.join(output_activity_dir, "events_moma_startdate.json"), 'w') as file:
    file.write(json.dumps({"counter": counter, "events": events}, indent=2))

with open(os.path.join(output_activity_dir, "events_nonmoma_startdate.json"), 'w') as file:
    file.write(json.dumps({"counter": nonmoma_counter, "events": nonmoma_events}, indent=2))


# events ordered by organisation, year, month - moma and non-moma

In [10]:
from collections import defaultdict

exhibitions = {"counter": [],"events": []}
exhibitions_moma = {"counter": [],"events": []}
exhibitions_nonmoma = {"counter": [],"events": []}

counter = defaultdict(dict)


number_by_org = {}

exhibitions_by_org = {}

data_file = "events_all.json"



with open(os.path.join(output_activity_dir, data_file), 'r') as json_file:
    
    data = json.load(json_file)

    events = data.get("events")
    sorted_events = sorted(events, key=itemgetter('start'))

    for event in sorted_events:
        org = event.get("org")
        start_date = event.get("start")
        start_date_year = start_date.split("-")[0]
        start_date_month = start_date.split("-")[1]

        filename = org.split("/").pop() + json_suffix
        
        with open(os.path.join(linked_data_filepath_group, filename), 'r') as group_file:
            groupdata = json.load(group_file)
            label = groupdata.get("_label")


            if label in number_by_org:
                number_by_org[label] += 1
            else:
                number_by_org[label] = 1

            if label not in exhibitions_by_org:
                exhibitions_by_org[label] = {}
                exhibitions_by_org[label][start_date_year] = {}
                exhibitions_by_org[label][start_date_year][start_date_month] = []

            if label in exhibitions_by_org and start_date_year not in exhibitions_by_org[label]:
                exhibitions_by_org[label][start_date_year] = {}
                exhibitions_by_org[label][start_date_year][start_date_month] = []

            if label in exhibitions_by_org and start_date_year in exhibitions_by_org[label] and start_date_month not in exhibitions_by_org[label][start_date_year]:
                exhibitions_by_org[label][start_date_year][start_date_month] = []

            exhibitions_by_org[label][start_date_year][start_date_month].append(event)

            

    # sort by name
    number_by_org = dict(sorted(number_by_org.items(), key=lambda item: item[0]))


exhibitions["counter"] = number_by_org
exhibitions["events"] = exhibitions_by_org

with open(os.path.join(output_activity_dir, "events_all_org.json"), 'w') as file:
    file.write(json.dumps(exhibitions, indent=2))

exhibitions_moma["counter"].append(exhibitions["counter"]["The Museum of Modern Art"])

