# Exhibitions

In [1]:
from collections import Counter

import pandas as pd

## Loading data

In [2]:
%%time

exhibitions_df = pd.read_csv('data/out/exhibitions.csv')
exhibitions_df.fillna('', inplace=True)

CPU times: user 796 ms, sys: 212 ms, total: 1.01 s
Wall time: 1.01 s


In [3]:
exhibitions_df.shape

(72742, 26)

In [4]:
exhibitions_df.head()

Unnamed: 0,ID,post_type,post_title,place_t,place_r,place_c,start_y,start_m,start_d,end_y,...,xplace_t,xplace_r,xplace_c,xstart_y,xstart_m,xstart_d,xend_y,xend_m,xend_d,xgender
0,117998,exposición,"""Elizabeth Blackwell"", Carmen Oliver",Granada,Andalucía,España,2017,3,1,2017,...,España,España,España,1985.0,1.0,1.0,,,,Femenino
1,117998,exposición,"""Elizabeth Blackwell"", Carmen Oliver",Granada,Andalucía,España,2017,3,1,2017,...,Granada,Andalucía,España,2017.0,1.0,1.0,,,,NO APLICA
2,117998,exposición,"""Elizabeth Blackwell"", Carmen Oliver",Granada,Andalucía,España,2017,3,1,2017,...,Granada,Andalucía,España,,,,,,,NO APLICA
3,117998,exposición,"""Elizabeth Blackwell"", Carmen Oliver",Granada,Andalucía,España,2017,3,1,2017,...,Granada,Andalucía,España,,,,,,,NO APLICA
4,108998,exposición,"""European Masterworks. Paintings from the Coll...",Nashville,Tennessee,Estados Unidos,2001,4,8,2001,...,Francia,Francia,Francia,1840.0,11.0,14.0,1926.0,12.0,5.0,Masculino


## Exploring data

In [5]:
important_columns = ['ID', 'post_type', 'post_title', 'xkey', 'xid', 'xpost_type', 'xpost_title']

### Unique values for important columns

In [6]:
for c in important_columns:
    n = exhibitions_df[c].nunique()
    print(c, n)
    if n <= 10:
        print(exhibitions_df[c].unique())

ID 9577
post_type 1
['exposición']
post_title 9016
xkey 9
['actor que participa como artista' 'catálogo' 'entidad organizadora'
 'fuente de información' 'coleccionista prestatario de obras'
 'actor comisario' 'entidad patrocinadora' 'exposición de la que depende'
 'empresa que realiza la museografía']
xid 23940
xpost_type 5
['actor' 'catálogo' 'entidad' 'exposición' 'empresa']
xpost_title 23786


### Number of unique exhibitions

In [7]:
exhibitions_df.ID.nunique()

9577

### Number of related elements

In [8]:
exhibitions_df.xid.nunique()

23940

### Number of unique elements for each "xpost_type"

In [9]:
len(exhibitions_df[['xid', 'xpost_type']].drop_duplicates())

23943

In [10]:
exhibitions_df[['xid', 'xpost_type']].drop_duplicates().xid.nunique()

23940

In [12]:
exhibitions_df[['xid', 'xpost_type']].drop_duplicates().xid.value_counts().head()

-1        2
 28487    2
 743      2
 71677    1
 67707    1
Name: xid, dtype: int64

In [25]:
exhibitions_df[exhibitions_df.xid == -1].drop_duplicates(['xid', 'xpost_type'])[important_columns]

Unnamed: 0,ID,post_type,post_title,xkey,xid,xpost_type,xpost_title
9349,101136,exposición,"Barcelona and Modernity: Picasso, Gaudí, Miró,...",exposición de la que depende,-1,exposición,Desconocido
72622,114958,exposición,Youniverse,actor que participa como artista,-1,actor,Desconocido


In [26]:
exhibitions_df[exhibitions_df.xid == 28487].drop_duplicates(['xid', 'xpost_type'])[important_columns]

Unnamed: 0,ID,post_type,post_title,xkey,xid,xpost_type,xpost_title
40319,33188,exposición,"La Ría de Vigo, en imágenes",actor que participa como artista,28487,actor,Desconocido
40320,33188,exposición,"La Ría de Vigo, en imágenes",coleccionista prestatario de obras,28487,entidad,Desconocido


In [27]:
exhibitions_df[exhibitions_df.xid == 743].drop_duplicates(['xid', 'xpost_type'])[important_columns]

Unnamed: 0,ID,post_type,post_title,xkey,xid,xpost_type,xpost_title
1261,64681,exposición,10 años después: Post Emergencias en la Colecc...,entidad organizadora,743,entidad,Musac - Museo de Arte Contemporáneo de Castill...
29973,28113,exposición,Fondo A UA CRAG al muro,actor que participa como artista,743,actor,Musac - Museo de Arte Contemporáneo de Castill...


In [51]:
1 in exhibitions_df.xid.unique()

False

In [52]:
2 in exhibitions_df.xid.unique()

False

In [53]:
1 in exhibitions_df.ID.unique()

False

In [54]:
2 in exhibitions_df.ID.unique()

False

In [45]:
284879 in exhibitions_df.ID.unique()

False

In [46]:
7439 in exhibitions_df.ID.unique()

False

In [43]:
for x in  exhibitions_df.xid.unique():
    if x<=0:
        print(x)

-1


In [34]:
%%time

exhibitions_df2 = pd.read_excel('data/src/20180410_EXHIBITIONS.xlsx')
exhibitions_df2.fillna('', inplace=True)

CPU times: user 24.6 s, sys: 208 ms, total: 24.8 s
Wall time: 24.8 s


In [35]:
len(exhibitions_df2[exhibitions_df2.xid == -1][important_columns])

2

In [36]:
len(exhibitions_df2[exhibitions_df2.xid == 28487][important_columns])

2

In [37]:
len(exhibitions_df2[exhibitions_df2.xid == 743][important_columns])

164

In [47]:
exhibitions_df2[exhibitions_df2.xid == 743][important_columns]

Unnamed: 0,ID,post_type,post_title,xkey,xid,xpost_type,xpost_title
1261,64681,exposición,10 años después: Post Emergencias en la Colecc...,entidad organizadora,743,entidad,Musac - Museo de Arte Contemporáneo de Castill...
1262,64681,exposición,10 años después: Post Emergencias en la Colecc...,fuente de información,743,entidad,Musac - Museo de Arte Contemporáneo de Castill...
3908,78672,exposición,Acortando distancias. Panorama asiático en la ...,entidad organizadora,743,entidad,Musac - Museo de Arte Contemporáneo de Castill...
3909,78672,exposición,Acortando distancias. Panorama asiático en la ...,fuente de información,743,entidad,Musac - Museo de Arte Contemporáneo de Castill...
4186,63894,exposición,"After all, tomorrow is another day. Jorge Pineda",coleccionista prestatario de obras,743,entidad,Musac - Museo de Arte Contemporáneo de Castill...
4362,78442,exposición,Akram Zaatari. El molesto asunto,entidad organizadora,743,entidad,Musac - Museo de Arte Contemporáneo de Castill...
4363,78442,exposición,Akram Zaatari. El molesto asunto,fuente de información,743,entidad,Musac - Museo de Arte Contemporáneo de Castill...
4727,71579,exposición,"All Yesterday’s Parties. Andy Warhol, música y...",entidad organizadora,743,entidad,Musac - Museo de Arte Contemporáneo de Castill...
4731,70829,exposición,"All Yesterday's Parties. Andy Warhol, música y...",entidad organizadora,743,entidad,Musac - Museo de Arte Contemporáneo de Castill...
5191,78417,exposición,Amikejo. Iratxe Jaio y Klaas van Gorkum,entidad organizadora,743,entidad,Musac - Museo de Arte Contemporáneo de Castill...


In [12]:
exhibitions_df.drop_duplicates('xid').xpost_type.value_counts()

actor         20106
entidad        2918
catálogo        681
exposición      222
empresa          13
Name: xpost_type, dtype: int64

### Total number of relationships

In [9]:
exhibitions_df.xkey.value_counts()

actor que participa como artista      44420
entidad organizadora                  11361
fuente de información                  8969
actor comisario                        3723
entidad patrocinadora                  2019
coleccionista prestatario de obras     1083
catálogo                                712
exposición de la que depende            423
empresa que realiza la museografía       32
Name: xkey, dtype: int64

### Number of unique actors and genders

In [11]:
exhibitions_df[exhibitions_df.xpost_type == 'actor'].xid.nunique()

20108

In [11]:
exhibitions_df[exhibitions_df.xpost_type == 'actor'].xid.nunique()

20108

In [13]:
exhibitions_df[exhibitions_df.xpost_type == 'entidad'].xid.nunique()

2919

### Same values for 'xplace_t', 'xplace_r', and 'xplace_c'

In [29]:
a = b = c = 0
for i, row in exhibitions_df.drop_duplicates('ID').iterrows():
    if row['place_t'] == row['place_r'] == row['place_c']:
        a += 1
    else:
        b += 1
    c += 1

In [30]:
a

2

In [31]:
b

9575

In [32]:
c

9577

In [33]:
a = b = c = 0
correct_places = []
incorrect_places = []
for i, row in exhibitions_df.drop_duplicates('xid').iterrows():
    if row['xplace_t'] == row['xplace_r'] == row['xplace_c']:
        a += 1
        incorrect_places.append(row['xpost_type'])
    else:
        b += 1
        correct_places.append(row['xpost_type'])
    c += 1

In [34]:
c

23940

In [35]:
a

20186

In [36]:
Counter(incorrect_places).most_common()

[('actor', 20106), ('catálogo', 45), ('entidad', 31), ('exposición', 4)]

In [37]:
b

3754

In [38]:
Counter(correct_places).most_common()

[('entidad', 2887), ('catálogo', 636), ('exposición', 218), ('empresa', 13)]

In [41]:
a = b = c = 0
exhibition_ids = exhibitions_df.ID.unique()
for x in exhibitions_df[exhibitions_df.xpost_type == 'exposición'].xid.unique():
    if x in exhibition_ids:
        a += 1
    else:
        b += 1
    c += 1

In [42]:
a

219

In [43]:
b

3

In [44]:
c

222

## Saving data

In [45]:
exhibitions_df.to_csv('data/out/exhibitions.csv', index=False)