# Collect for each artist the announcements, and save them in a file

## Initial steps

In [1]:
import scraping_functions

In [2]:
with open('data/artists_contemporary_cleaned_v1.txt', 'r', encoding='utf-8') as f:
    artists_contemporary = f.read().splitlines()

with open('data/artists_cleaned_v1.txt', 'r', encoding='utf-8') as f:
    artists = f.read().splitlines()

## 1) Scrape announcements

### 1a) Contemporary announcements only (faster):

For a first step, we collect the data for artists in `artists_contemporary_cleaned_v1.txt`? If we just get these types of exhibitions by them, then we only have to use Selenium twice:
- "Contemporary Art"
- "Data & Information"
- "Installation" 
- "Mixed Media"
- "Posthumanism"
- "Postmodernism"
- "Technology"

(I'd think these are mostly the contemporary categories on the website, feel free to add more). For these cases, only 2 artists have over 30 exhibitions in these categories: Hans Ulrich Obrist (36) and Olafur Eliasson (34). We can just use simple scraping for other artists.<br>
URL for these categories: https://www.e-flux.com/announcements/?c[]=Contemporary%20Art&c[]=Data%20%26%20Information&c[]=Installation&c[]=Mixed%20Media&c[]=Posthumanism&c[]=Postmodernism&c[]=Technology


In [3]:
artist_announcements_dict=scraping_functions.get_announcements_of_artists_scroll_if_needed(artists_contemporary, contemporary=True)

This ran for ~35 minutes, for 8200 artists, only two of them needed scraping via opening the artist page (30+ announcements), the rest via simple HTML response.<br>
(Runtime depends more on internet connection than processing power.)

Example:

In [15]:
artist_announcements_dict[0]

{'name': 'A.K. Burns',
 'announcements': [{'id': 183436,
   'link': '/announcements/183436/final-group-of-project-highlights-announced/',
   'title_artists': None,
   'title': 'Final group of project highlights announced'},
  {'id': 164538,
   'link': '/announcements/164538/next-artist-shortlist-for-the-bmw-art-journey-announced-during-art-basel-in-miami-beach/',
   'title_artists': None,
   'title': 'Next artist shortlist for the BMW Art Journey announced during Art Basel in Miami Beach'},
  {'id': 78527,
   'link': '/announcements/78527/publishing-as-an-artistic-toolbox-1989-2017/',
   'title_artists': None,
   'title': 'Publishing as an Artistic Toolbox: 1989–2017'}]}

Save file:

In [17]:
import json

with open('data/artist_announcements_dict_contemporary.json', 'w', encoding='utf-8') as f:
    json.dump(artist_announcements_dict, f, ensure_ascii=False, indent=1)

### 1b) All announcements:

Problems with announcements date/subtitle: Achille Mbembe

In [3]:
artist_announcements_dict=scraping_functions.get_announcements_of_artists_scrolling(artists_contemporary)

Problem with amount of subtitles or announcement dates for:Adam Avikainen
Problem with amount of subtitles or announcement dates for:Adam Budak
Problem with amount of subtitles or announcement dates for:Adel Abdessemed
Problem with amount of subtitles or announcement dates for:Adrian Piper
Problem with amount of subtitles or announcement dates for:Aernout Mik
Problem with amount of subtitles or announcement dates for:Agnes Denes
Problem with amount of subtitles or announcement dates for:Agnes Martin
Problem with amount of subtitles or announcement dates for:Agnieszka Kurant
Problem with amount of subtitles or announcement dates for:Alessandro Rabottini
Problem with amount of subtitles or announcement dates for:Alice Neel
Problem with amount of subtitles or announcement dates for:Ali Kazma
Problem with amount of subtitles or announcement dates for:Allan Kaprow
Problem with amount of subtitles or announcement dates for:Allan Sekula
Problem with amount of subtitles or announcement dates f

In [4]:
import json

with open('data/artist_announcements_dict.json', 'w', encoding='utf-8') as f:
    json.dump(artist_announcements_dict, f, ensure_ascii=False, indent=1)

## 2) Create announcement-artist dictionary (the other way around, more useful for us)

### 2a) Contemporary announcements only:

In [60]:
with open('data/artist_announcements_dict_contemporary.json', 'r', encoding='utf-8') as f:
    artist_announcements_dict_contemporary = json.load(f)

#with open('data/artist_announcements_dict.json', 'r', encoding='utf-8') as f:
    #artist_announcements_dict = json.load(f)

In [61]:
artist_announcements_dict_contemporary[0]

{'name': 'A.K. Burns',
 'announcements': [{'id': 183436,
   'link': '/announcements/183436/final-group-of-project-highlights-announced/',
   'title_artists': None,
   'title': 'Final group of project highlights announced'},
  {'id': 164538,
   'link': '/announcements/164538/next-artist-shortlist-for-the-bmw-art-journey-announced-during-art-basel-in-miami-beach/',
   'title_artists': None,
   'title': 'Next artist shortlist for the BMW Art Journey announced during Art Basel in Miami Beach'},
  {'id': 78527,
   'link': '/announcements/78527/publishing-as-an-artistic-toolbox-1989-2017/',
   'title_artists': None,
   'title': 'Publishing as an Artistic Toolbox: 1989–2017'}]}

In [62]:
announcements_contemporary = [announcement for artist in artist_announcements_dict_contemporary for announcement in artist['announcements'] if announcement]

Remove duplicates (simple way)

In [63]:
announcements_contemporary = [announcement for i, announcement in enumerate(announcements_contemporary) if announcement not in announcements_contemporary[:i]]

In [64]:
len(announcements_contemporary)

3549

In [65]:
import copy

announcements_dict_contemporary = {announcement['id']: copy.deepcopy(announcement) for announcement in announcements_contemporary}
for key, value in announcements_dict_contemporary.items():
    del value['id']
for key, value in announcements_dict_contemporary.items():
    value['artists'] = []

In [68]:
for artist in artist_announcements_dict_contemporary:
    for announcement in artist['announcements']:
        id = announcement['id']
        announcements_dict_contemporary[id]['artists'] += [artist['name']]

In [71]:
announcements_dict_contemporary[164538]

{'link': '/announcements/164538/next-artist-shortlist-for-the-bmw-art-journey-announced-during-art-basel-in-miami-beach/',
 'title_artists': None,
 'title': 'Next artist shortlist for the BMW Art Journey announced during Art Basel in Miami Beach',
 'artists': ['A.K. Burns',
  'BMW',
  'ICA',
  'Jamal Cyrus',
  'LACMA',
  'Mariela Scafati',
  'MOCAD']}

Save file:

In [72]:
with open('data/announcements_contemporary.json', 'w', encoding='utf-8') as f:
    json.dump(announcements_dict_contemporary, f, ensure_ascii=False, indent=1)

### 2b) All announcements:

In [6]:
announcements = [announcement for artist in artist_announcements_dict for announcement in artist['announcements'] if announcement]
announcements = [announcement for i, announcement in enumerate(announcements) if announcement not in announcements[:i]]

In [7]:
len(announcements)

38105

In [18]:
announcements[5]

{'id': 311515,
 'link': '/announcements/311515/light-and-language/',
 'title_artists': None,
 'title': 'Light and Language',
 'subtitle': 'Lismore Castle Arts',
 'announcement_date': 'March 22, 2021'}

In [19]:
import itertools
import copy

announcements_copy = copy.deepcopy(announcements)

missing_data_announcement = [announcement for announcement in announcements if not (announcement['announcement_date'] and announcement['subtitle'])]
missing_announcement_ids = [announcement['id'] for announcement in missing_data_announcement]

for announcement1 in announcements:
    if announcement1 in missing_data_announcement:
        continue
    if announcement1['id'] in missing_announcement_ids:
        missing_announcement_ids.remove(announcement1['id'])
        id_announcements_missing = [announcement2 for announcement2 in missing_data_announcement if announcement2['id'] == announcement1['id']]
        for announcement2 in id_announcements_missing:
            announcements_copy_announcement2_indexes = [i for i, announcement in enumerate(announcements_copy) if announcement == announcement2]
            for index in announcements_copy_announcement2_indexes:
                del announcements_copy[index]
            missing_data_announcement.remove(announcement2)
