# Exhibitions

In [1]:
import networkx as nx
import pandas as pd

## Loading data

In [2]:
%%time

exhibitions_df = pd.read_csv('data/out/exhibitions.csv')
exhibitions_df.fillna('', inplace=True)

CPU times: user 840 ms, sys: 184 ms, total: 1.02 s
Wall time: 1.02 s


In [3]:
exhibitions_df.shape

(72740, 26)

In [4]:
exhibitions_df.head()

Unnamed: 0,ID,post_type,post_title,place_t,place_r,place_c,start_y,start_m,start_d,end_y,...,xplace_t,xplace_r,xplace_c,xstart_y,xstart_m,xstart_d,xend_y,xend_m,xend_d,xgender
0,117998,exposición,"""Elizabeth Blackwell"", Carmen Oliver",Granada,Andalucía,España,2017,3,1,2017,...,España,España,España,1985.0,1.0,1.0,,,,Femenino
1,117998,exposición,"""Elizabeth Blackwell"", Carmen Oliver",Granada,Andalucía,España,2017,3,1,2017,...,Granada,Andalucía,España,2017.0,1.0,1.0,,,,NO APLICA
2,117998,exposición,"""Elizabeth Blackwell"", Carmen Oliver",Granada,Andalucía,España,2017,3,1,2017,...,Granada,Andalucía,España,,,,,,,NO APLICA
3,117998,exposición,"""Elizabeth Blackwell"", Carmen Oliver",Granada,Andalucía,España,2017,3,1,2017,...,Granada,Andalucía,España,,,,,,,NO APLICA
4,108998,exposición,"""European Masterworks. Paintings from the Coll...",Nashville,Tennessee,Estados Unidos,2001,4,8,2001,...,Francia,Francia,Francia,1840.0,11.0,14.0,1926.0,12.0,5.0,Masculino


## Exploring values by type

In [5]:
for t in exhibitions_df.xpost_type.unique():
    print(t)
    print(exhibitions_df[exhibitions_df.xpost_type == t].xpost_title.value_counts()[:5])
    print()

actor
[Sin determinar]                 1272
Pablo Ruiz Picasso               1147
Colectivo sin determinar          172
Joan Miró                         126
Francisco de Goya y Lucientes     118
Name: xpost_title, dtype: int64

catálogo
Desconocido                       6
El Paisaje nórdico en el Prado    5
Arqueología preventiva            4
Heroínas                          2
Fábulas                           2
Name: xpost_title, dtype: int64

entidad
Diputación de Málaga                    522
Sala Aires                              460
Museo Nacional del Prado                370
Centro Andaluz de la Fotografía. CAF    341
Institut Valencià d'Art Modern          339
Name: xpost_title, dtype: int64

exposición
“Generación del 27 y su época”. Mollina                 17
Los Carteles de Miró. Nerja                             11
Huelva. La luz dentro del tiempo                         8
Modern masters from the Cleveland Museum of Art          8
"Málaga y Cervantes. La espada y la pluma

In [6]:
exhibitions_df[exhibitions_df.xpost_title == '[Sin determinar]'].xid.unique()

array([55293])

In [7]:
exhibitions_df[exhibitions_df.xpost_title == 'Colectivo sin determinar'].xid.unique()

array([23812])

## Processing data

In [8]:
def extract_e_info(row):
    return {
        'id': row.ID,
        'type': row.post_type,
        'title': row.post_title,
        'place_t': row.place_t,
        'place_r': row.place_r,
        'place_c': row.place_c,
        'start_y': row.start_y,
        'start_m': row.start_m,
        'start_d': row.start_d,
        'end_y': row.end_y,
        'end_m': row.end_m,
        'end_d': row.end_d,
    }

def extract_x_info(row):
    return {
        'id': row.xid,
        'type': row.xpost_type,
        'title': row.xpost_title,
        'place_t': row.xplace_t,
        'place_r': row.xplace_r,
        'place_c': row.xplace_c,
        'start_y': row.xstart_y,
        'start_m': row.xstart_m,
        'start_d': row.xstart_d,
        'end_y': row.xend_y,
        'end_m': row.xend_m,
        'end_d': row.xend_d,
        'gender': row.xgender,
    }

In [9]:
%%time

g = nx.DiGraph()
for i, row in exhibitions_df.iterrows():
    e = extract_e_info(row)
    x = extract_x_info(row)
    g.add_node(e['id'], **e)
    g.add_node(x['id'], **x)
    g.add_edge(e['id'], x['id'], label=row.xkey)

CPU times: user 41.7 s, sys: 72 ms, total: 41.8 s
Wall time: 41.8 s


In [10]:
g.remove_node(55293) # Removing actor '[Sin determinar]'
g.remove_node(23812) # Removing actor 'Colectivo sin determinar'
for n, data in list(g.nodes(data=True)): # Removing nodes of type 'catálogo' and 'empresa'
    if data['type'] == 'catálogo' or data['type'] == 'empresa':
        g.remove_node(n)

In [11]:
g.number_of_nodes()

32603

In [12]:
g.number_of_edges()

62869

In [13]:
g.number_of_selfloops()

8

In [14]:
nx.write_gexf(g, 'data/out/exhibitions.gexf')