# Exhibitions

In [1]:
from collections import Counter

import pandas as pd

## Loading data

In [2]:
%%time

exhibitions_df = pd.read_csv('data/out/exhibitions.csv')
exhibitions_df.fillna('', inplace=True)

CPU times: user 840 ms, sys: 136 ms, total: 976 ms
Wall time: 972 ms


In [3]:
exhibitions_df.shape

(72740, 26)

In [4]:
exhibitions_df.head()

Unnamed: 0,ID,post_type,post_title,place_t,place_r,place_c,start_y,start_m,start_d,end_y,...,xplace_t,xplace_r,xplace_c,xstart_y,xstart_m,xstart_d,xend_y,xend_m,xend_d,xgender
0,117998,exposición,"""Elizabeth Blackwell"", Carmen Oliver",Granada,Andalucía,España,2017,3,1,2017,...,España,España,España,1985.0,1.0,1.0,,,,Femenino
1,117998,exposición,"""Elizabeth Blackwell"", Carmen Oliver",Granada,Andalucía,España,2017,3,1,2017,...,Granada,Andalucía,España,2017.0,1.0,1.0,,,,NO APLICA
2,117998,exposición,"""Elizabeth Blackwell"", Carmen Oliver",Granada,Andalucía,España,2017,3,1,2017,...,Granada,Andalucía,España,,,,,,,NO APLICA
3,117998,exposición,"""Elizabeth Blackwell"", Carmen Oliver",Granada,Andalucía,España,2017,3,1,2017,...,Granada,Andalucía,España,,,,,,,NO APLICA
4,108998,exposición,"""European Masterworks. Paintings from the Coll...",Nashville,Tennessee,Estados Unidos,2001,4,8,2001,...,Francia,Francia,Francia,1840.0,11.0,14.0,1926.0,12.0,5.0,Masculino


## Exploring data

In [5]:
important_columns = ['ID', 'post_type', 'post_title', 'xkey', 'xid', 'xpost_type', 'xpost_title']

### Unique values for important columns

In [6]:
for c in important_columns:
    n = exhibitions_df[c].nunique()
    print(c, n)
    if n <= 10:
        print(exhibitions_df[c].unique())

ID 9577
post_type 1
['exposición']
post_title 9016
xkey 9
['actor que participa como artista' 'catálogo' 'entidad organizadora'
 'fuente de información' 'coleccionista prestatario de obras'
 'actor comisario' 'entidad patrocinadora' 'exposición de la que depende'
 'empresa que realiza la museografía']
xid 23941
xpost_type 5
['actor' 'catálogo' 'entidad' 'exposición' 'empresa']
xpost_title 23786


### Number of unique exhibitions

In [7]:
exhibitions_df.ID.nunique()

9577

### Number of unique related elements

In [8]:
exhibitions_df.xid.nunique()

23941

### Number of unique elements for each type

In [9]:
exhibitions_df.drop_duplicates('xid').xpost_type.value_counts()

actor         20107
entidad        2918
catálogo        681
exposición      222
empresa          13
Name: xpost_type, dtype: int64

### Total number of relationships

In [10]:
exhibitions_df.xkey.value_counts()

actor que participa como artista      44418
entidad organizadora                  11361
fuente de información                  8969
actor comisario                        3723
entidad patrocinadora                  2019
coleccionista prestatario de obras     1083
catálogo                                712
exposición de la que depende            423
empresa que realiza la museografía       32
Name: xkey, dtype: int64

### Types of relationships

In [11]:
exhibitions_df[['xkey', 'xpost_type']].drop_duplicates().sort_values('xkey')

Unnamed: 0,xkey,xpost_type
13,actor comisario,actor
0,actor que participa como artista,actor
1,catálogo,catálogo
9,coleccionista prestatario de obras,entidad
2313,coleccionista prestatario de obras,actor
1761,empresa que realiza la museografía,empresa
2,entidad organizadora,entidad
16,entidad patrocinadora,entidad
32,exposición de la que depende,exposición
3,fuente de información,entidad


### Number of unique actors and genders

In [12]:
exhibitions_df[exhibitions_df.xpost_type == 'actor'].xid.nunique()

20107

In [13]:
exhibitions_df[exhibitions_df.xpost_type == 'actor'].drop_duplicates('xid').xgender.value_counts()

Masculino       13482
Femenino         5582
No declarado     1043
Name: xgender, dtype: int64

### Same values for '(x)place_t', '(x)place_r', and '(x)place_c'

#### Same values for 'place_t', 'place_r', and 'place_c'

In [14]:
a = b = c = 0
for i, row in exhibitions_df.drop_duplicates('ID').iterrows():
    if row['place_t'] == row['place_r'] == row['place_c']:
        a += 1
    else:
        b += 1
    c += 1

In [15]:
a

2

In [16]:
b

9575

In [17]:
c

9577

#### Same values for 'xplace_t', 'xplace_r', and 'xplace_c'

In [18]:
a = b = c = 0
correct_places = []
incorrect_places = []
for i, row in exhibitions_df.drop_duplicates('xid').iterrows():
    if row['xplace_t'] == row['xplace_r'] == row['xplace_c']:
        a += 1
        incorrect_places.append(row['xpost_type'])
    else:
        b += 1
        correct_places.append(row['xpost_type'])
    c += 1

In [19]:
c

23941

In [20]:
a

20187

In [21]:
Counter(incorrect_places).most_common()

[('actor', 20107), ('catálogo', 45), ('entidad', 31), ('exposición', 4)]

In [22]:
b

3754

In [23]:
Counter(correct_places).most_common()

[('entidad', 2887), ('catálogo', 636), ('exposición', 218), ('empresa', 13)]

### Self-contained exhibitions

In [24]:
a = b = c = 0
exhibition_ids = exhibitions_df.ID.unique()
for x in exhibitions_df[exhibitions_df.xpost_type == 'exposición'].xid.unique():
    if x in exhibition_ids:
        a += 1
    else:
        b += 1
    c += 1

In [25]:
a

219

In [26]:
b

3

In [27]:
c

222