In [1]:
import json
import pickle

import networkx as nx

In [2]:
with open('pickle/star-trek_relevant_properties_dict.pickle', 'rb') as f:
    relevant_properties_dict = pickle.load(f)

with open('pickle/star-trek_relevant_types_dict.pickle', 'rb') as f:
    relevant_types_dict = pickle.load(f)

with open('pickle/star-trek_relevant_instances_dict.pickle', 'rb') as f:
    relevant_instances_dict = pickle.load(f)

with open('pickle/star-trek_all_instances_dict.pickle', 'rb') as f:
    all_instances_dict = pickle.load(f)

In [3]:
# Properties and qualifiers
#
# If no qualifiers
#
# Case 1) value: string
# Actions:
# Property type: attribute
# Property name: property name
# Property value: value
# Property attributes: none
# More than one value: list serialized: '["v1", "v2", "v3"]'
#
# Case 2) value: object
# Actions:
# Property type: relationship
# Property name: property name
# Property value: value
# Property attributes: none
# More than one value: for each value, a relationship between the node and the value
#
# If qualifiers
#
# Case a) value: string, qualifier: string
# Actions:
# Property type: attribute
# Property name: property name
# Property value: '{"value": v, "qualifiers": {"q1": v1, "q2": v2}}'
# Property attributes: none
# More than one value: list of dictionaries serialized
#
# Case b) value: string, qualifier: object
# Actions:
# Property type: relationship
# Property name: qualifier name
# Property value: qualifier value
# Property attributes: {property name: value}
# More than one value: for each qualifier, a relationship between the node and the qualifier value
#
# Case c) value: object, qualifier: string
# Actions:
# Property type: relationship
# Property name: property name
# Property value: value
# Property attributes: {q1: v1, q2: v2}
# More than one value: for each value, a relationship between the node and the value
#
# Case d) value: object, qualifier: object
# Actions:
# Property type: relationship
# Property name: property name
# Property value: value
# Property attributes: none
# More than one value: for each value, a relationship between the node and the value
# For each qualifier:
# Property type: relationship
# Property name: qualifier name
# Property value: qualifier value
# Property attributes: none
# More than one qualifier value: for each value, a relationship between the node and the qualifier value

In [4]:
network = nx.DiGraph() # network of instances

In [5]:
%%time

network = nx.DiGraph() # network of instances
for ide in relevant_instances_dict:
    instance = relevant_instances_dict[ide]
    base = {
        'id': ide,
        'type': relevant_types_dict[instance['type']]['name'],
        'name': instance['name'],
        'wikilink': instance['wikilink'],
        'description': instance['description'],
    }
    attributes = {}
    for prop in instance['properties']:
        p_name = relevant_properties_dict[prop]['name']
        p_attr_values = []
        for prop_inst in instance['properties'][prop]: # possible cases: 1, 2, a, b, c, d
            p_value = prop_inst['value']
            p_type = prop_inst['type']
            if p_type == 'object' and p_value in relevant_instances_dict: # possible cases: 2, c, d
                network.add_edge(ide, p_value, {'label': p_name}) # case 2
                qualifiers = prop_inst['qualifiers']
                if qualifiers: # possible cases: c, d
                    for q in qualifiers:
                        q_name = relevant_properties_dict[q]['name']
                        for q_inst in qualifiers[q]:
                            q_value = q_inst['value']
                            q_type = q_inst['type']
                            if q_type == 'object' and q_value in relevant_instances_dict:
                                network.add_edge(ide, q_value, {'label': q_name}) # case d
                            else:
                                if q_type == 'object' and q_value not in relevant_instances_dict:
                                    q_value = all_instances_dict[q_value]['name']
                                    network.add_edge(ide, p_value, {'label': p_name, q_name: q_value}) # case c
            else: # possible cases: 1, a, b
                if p_type == 'object' and p_value not in relevant_instances_dict:
                    p_value = all_instances_dict[p_value]['name']
                qualifiers = prop_inst['qualifiers']
                if not qualifiers:
                    p_attr_values.append(p_value) # case 1
                else: # possible cases: 1, a, b
                    q_dict = {}
                    for q in qualifiers:
                        q_name = relevant_properties_dict[q]['name']
                        q_dict[q_name] = []
                        for q_inst in qualifiers[q]:
                            q_value = q_inst['value']
                            q_type = q_inst['type']
                            if q_type == 'object' and q_value in relevant_instances_dict:
                                network.add_edge(ide, q_value, {'label': q_name, p_name: p_value}) # case b
                            else:
                                if q_type == 'object' and q_value not in relevant_instances_dict:
                                    q_value = all_instances_dict[q_value]['name']
                                q_dict[q_name].append(q_value)
                    if not q_dict:
                        p_attr_values.append(p_value) # case 1
                    else:
                        p_attr_values.append({'value': p_value, 'qualifiers': q_dict}) # case a
        if p_attr_values:
            if len(p_attr_values) == 1:
                attr_value = p_attr_values[0]
            else:
                attr_value = json.dumps(p_attr_values)
            attributes[p_name] = attr_value
    info = {}
    info.update(base)
    info.update(attributes)
    network.add_node(ide, info)

CPU times: user 65.9 ms, sys: 7.9 ms, total: 73.8 ms
Wall time: 74.2 ms


In [6]:
len(relevant_instances_dict)

1281

In [7]:
len(network)

1281

In [8]:
len(network.edges(data=True))

4613

In [9]:
network.nodes(data=True)[:2]

[('Q7601028',
  {'description': '',
   'id': 'Q7601028',
   'name': 'Star Trek: Year Four',
   'type': 'limited series',
   'wikilink': 'Star Trek: Year Four'}),
 ('Q7840052',
  {'OCLC control number': '63083895',
   'description': 'Star Trek: The Original Series novel',
   'id': 'Q7840052',
   'name': 'Triangle',
   'type': 'book',
   'wikilink': 'Triangle (novel)'})]

In [10]:
network.edges(data=True)[:2]

[('Q7840052', 'Q1077', {'label': 'series'}),
 ('Q7840052', 'Q1860', {'label': 'original language of work'})]

In [11]:
with open('pickle/star-trek_network_graph.pickle', 'wb') as f:
    pickle.dump(network, f)