In [1]:
import json

import networkx as nx

# Building graph

In [107]:
graph = nx.MultiDiGraph()

## Nodes

In [108]:
for c in ['ent', 'per', 'pub', 'emp', 'exp']:
    with open('data/' + c + '-posts.json') as f:
        js = json.load(f)
        for e in js[1:]:
            ide = str(e['ID'])
            typ = e['post_type']
            if typ == 'entity':
                name = e['post_title']
                keyword = '; '.join([t['name'] for t in e['terms'] if t['taxonomy'] == 'tax_keyword'])
                typology = '; '.join([t['name'] for t in e['terms'] if t['taxonomy'] == 'tax_typology'])
                ownership = '; '.join([t['name'] for t in e['terms'] if t['taxonomy'] == 'tax_ownership'])
                nationality = ''
                if '_cp__ent_town' in e['meta']:
                    nationality  = e['meta']['_cp__ent_town'][0].split('; ')[-1]
                data = {
                    'id': ide,
                    'type': typ,
                    'name': name,
                    'keyword': keyword,
                    'typology': typology,
                    'ownership': ownership,
                    'nationality': nationality,
                }
            elif typ == 'person':
                name = e['post_title']
                activity = '; '.join([t['name'] for t in e['terms'] if t['taxonomy'] == 'tax_activity'])
                chronology = ''
                nationality = ''
                if '_cp__ent_town' in e['meta']:
                    nationality  = e['meta']['_cp__peo_country'][0].split('; ')[-1]
                data = {
                    'id': ide,
                    'type': typ,
                    'name': name,
                    'activity': activity,
                    'chronology': chronology,
                    'nationality': nationality,
                }
            elif typ == 'book':
                title = e['post_title']
                date = ''
                if '_cp__boo_publishing_date' in e['meta']:
                    date = e['meta']['_cp__boo_publishing_date'][0]
                publisher = '; '.join([t['name'] for t in e['terms'] if t['taxonomy'] == 'tax_publisher'])
                data = {
                    'id': ide,
                    'type': typ,
                    'title': title,
                    'publisher': publisher,
                    'date': date,
                }
            elif typ == 'company':
                name = e['post_title']
                activity = '; '.join([t['name'] for t in e['terms'] if t['taxonomy'] == 'tax_isic4_category'])
                nationality = e['meta']['_cp__com_company_headquarter_place'][0].split('; ')[-1]
                data = {
                    'id': ide,
                    'type': typ,
                    'name': name,
                    'activity': activity,
                    'nationality': nationality,
                }
            elif typ == 'exhibition':
                title = e['post_title']
                movement = '; '.join([t['name'] for t in e['terms'] if t['taxonomy'] == 'tax_movement'])
                period = '; '.join([t['name'] for t in e['terms'] if t['taxonomy'] == 'tax_period'])
                topic = '; '.join([t['name'] for t in e['terms'] if t['taxonomy'] == 'tax_topic'])
                artwork_type = '; '.join([t['name'] for t in e['terms'] if t['taxonomy'] == 'tax_artwork_type'])
                typology = '; '.join([t['name'] for t in e['terms'] if t['taxonomy'] == 'tax_exhibition_type'])
                nationality = ''
                if '_cp__exh_exhibition_town' in e['meta']:
                    nationality = e['meta']['_cp__exh_exhibition_town'][0].split('; ')[-1]
                data = {
                    'id': ide,
                    'type': typ,
                    'title': name,
                    'movement': movement,
                    'period': period,
                    'topic': topic,
                    'artwork_type': artwork_type,
                    'typology': typology,
                    'nationality': nationality,
                }
            graph.add_node(ide, data)

## Relationships

In [109]:
highest_id = str(max(int(ide) for ide in graph))

In [110]:
def increase_id():
    global highest_id
    highest_id = str(int(highest_id) + 1)

In [111]:
def get_id_and_name(s):
    if ': ' in s:
        ide = s.split(': ')[0]
        name = s.split(': ')[1]
    else:
        increase_id()
        ide = highest_id
        name = s
    return (ide, name)

In [112]:
for c in ['ent', 'per', 'pub', 'emp', 'exp']:
    with open('data/' + c + '-posts.json') as f:
        js = json.load(f)
        for e in js[1:]:
            ide = e['ID']
            typ = e['post_type']
            if typ == 'entity':
                pass
            elif typ == 'person':
                if '_cp__peo_entity_relation' in e['meta']:
                    for entity in e['meta']['_cp__peo_entity_relation']:
                        ide, name  = get_id_and_name(entity)
                        if ide not in graph:
                            data = {
                                'id': ide,
                                'type': 'entity',
                                'name': name,
                                'keyword': '',
                                'typology': '',
                                'ownership': '',
                                'nationality': '',
                            }
                            graph.add_node(entity_id, data)
                        graph.add_edge(ide, entity_id, label='_cp__peo_entity_relation')
            elif typ == 'book':
                if '_cp__boo_paper_author' in e['meta']:
                    for author in e['meta']['_cp__boo_paper_author']:
                        ide, name  = get_id_and_name(author)
                        if ide not in graph:
                            data = {
                                'id': ide,
                                'type': 'entity',
                                'name': name,
                                'activity': activity,
                                'chronology': chronology,
                                'nationality': nationality,
                            }
                            graph.add_node(author_id, data)
                        graph.add_edge(author_id, ide, label='_cp__boo_paper_author')
                if '_cp__boo_sponsorship' in e['meta']:
                    for entity in e['meta']['_cp__boo_sponsorship']:
                        ide, name  = get_id_and_name(entity)
                        if ide not in graph:
                            data = {
                                'id': ide,
                                'type': 'entity',
                                'name': name,
                                'keyword': '',
                                'typology': '',
                                'ownership': '',
                                'nationality': '',
                            }
                            graph.add_node(entity_id, data)
                        graph.add_edge(entity_id, ide, label='_cp__boo_sponsorship')
            elif typ == 'company':
                pass
            elif typ == 'exhibition':
                if '_cp__exh_info_source' in e['meta']:
                    for entity in e['meta']['_cp__exh_info_source']:
                        ide, name  = get_id_and_name(entity)
                        if ide not in graph:
                            data = {
                                'id': ide,
                                'type': 'entity',
                                'name': name,
                                'keyword': '',
                                'typology': '',
                                'ownership': '',
                                'nationality': '',
                            }
                            graph.add_node(entity_id, data)
                        graph.add_edge(entity_id, ide, label='_cp__exh_info_source')
                if '_cp__exh_supporter_entity' in e['meta']:
                    for entity in e['meta']['_cp__exh_supporter_entity']:
                        ide, name  = get_id_and_name(entity)
                        if ide not in graph:
                            data = {
                                'id': ide,
                                'type': 'entity',
                                'name': name,
                                'keyword': '',
                                'typology': '',
                                'ownership': '',
                                'nationality': '',
                            }
                            graph.add_node(entity_id, data)
                        graph.add_edge(entity_id, ide, label='_cp__exh_supporter_entity')
                if '_cp__exh_funding_entity' in e['meta']:
                    for entity in e['meta']['_cp__exh_funding_entity']:
                        ide, name  = get_id_and_name(entity)
                        if ide not in graph:
                            data = {
                                'id': ide,
                                'type': 'entity',
                                'name': name,
                                'keyword': '',
                                'typology': '',
                                'ownership': '',
                                'nationality': '',
                            }
                            graph.add_node(entity_id, data)
                        graph.add_edge(entity_id, ide, label='_cp__exh_funding_entity')
                if '_cp__exh_artwork_author' in e['meta']:
                    for person in e['meta']['_cp__exh_artwork_author']:
                        ide, name  = get_id_and_name(person)
                        if ide not in graph:
                            data = {
                                'id': ide,
                                'type': 'entity',
                                'name': name,
                                'activity': '',
                                'chronology': '',
                                'nationality': '',
                            }
                            graph.add_node(person_id, data)
                        graph.add_edge(person_id, ide, label='_cp__exh_artwork_author')
                if '_cp__exh_curator' in e['meta']:
                    for person in e['meta']['_cp__exh_curator']:
                        ide, name  = get_id_and_name(person)
                        if ide not in graph:
                            data = {
                                'id': ide,
                                'type': 'entity',
                                'name': name,
                                'activity': '',
                                'chronology': '',
                                'nationality': '',
                            }
                            graph.add_node(person_id, data)
                        graph.add_edge(person_id, ide, label='_cp__exh_curator')
                if '_cp__exh_art_collector' in e['meta']:
                    for person in e['meta']['_cp__exh_art_collector']:
                        ide, name  = get_id_and_name(person)
                        if ide not in graph:
                            data = {
                                'id': ide,
                                'type': 'entity',
                                'name': name,
                                'activity': '',
                                'chronology': '',
                                'nationality': '',
                            }
                            graph.add_node(person_id, data)
                        graph.add_edge(person_id, ide, label='_cp__exh_art_collector')
                if '_cp__exh_museography' in e['meta']:
                    for company in e['meta']['_cp__exh_museography']:
                        ide, name  = get_id_and_name(company)
                        if ide not in graph:
                            data = {
                                'id': ide,
                                'type': 'entity',
                                'name': name,
                                'activity': '',
                                'nationality': '',
                            }
                            graph.add_node(company_id, data)
                        graph.add_edge(company_id, ide, label='_cp__exh_museography')
                if '_cp__exh_catalog' in e['meta']:
                    for book in e['meta']['_cp__exh_catalog']:
                        ide, name  = get_id_and_name(book)
                        if ide not in graph:
                            data = {
                                'id': ide,
                                'type': 'entity',
                                'name': name,
                                'publisher': '',
                                'date': '',
                            }
                            graph.add_node(book_id, data)
                        graph.add_edge(book_id, ide, label='_cp__exh_catalog')

# Exporting to Sylva

In [14]:
# Mappings

classes = {
    'entity': 'Entidad',
    'person': 'Persona',
    'book': 'Publicación',
    'company': 'Empresa',
    'exhibition': 'Exposición',
}

propiedades y relaciones de cada claseb

In [None]:
copiar codigo para generar schema y CSVs