In [1]:
import json
import pickle

import networkx as nx

In [2]:
with open('pickle/star-trek_relevant_properties_dict.pickle', 'rb') as f:
    relevant_properties_dict = pickle.load(f)

with open('pickle/star-trek_relevant_types_dict.pickle', 'rb') as f:
    relevant_types_dict = pickle.load(f)

with open('pickle/star-trek_relevant_instances_dict.pickle', 'rb') as f:
    relevant_instances_dict = pickle.load(f)

with open('pickle/star-trek_all_instances_dict.pickle', 'rb') as f:
    all_instances_dict = pickle.load(f)

In [3]:
# Properties and qualifiers
#
# If no qualifiers
#
# Case 1) value: string
# Actions:
# Property type: attribute
# Property name: property name
# Property value: value
# Property attributes: none
# More than one value: list serialized: '["v1", "v2", "v3"]'
#
# Case 2) value: object
# Actions:
# Property type: relationship
# Property name: property name
# Property value: value
# Property attributes: none
# More than one value: for each value, a relationship between the node and the value
#
# If qualifiers
#
# Case a) value: string, qualifier: string
# Actions:
# Property type: attribute
# Property name: property name
# Property value: '{"value": v, "qualifiers": {"q1": v1, "q2": v2}}'
# Property attributes: none
# More than one value: list of dictionaries serialized
#
# Case b) value: string, qualifier: object
# Actions:
# Property type: relationship
# Property name: qualifier name
# Property value: qualifier value
# Property attributes: {property name: value}
# More than one value: for each qualifier, a relationship between the node and the qualifier value
#
# Case c) value: object, qualifier: string
# Actions:
# Property type: relationship
# Property name: property name
# Property value: value
# Property attributes: {q1: v1, q2: v2}
# More than one value: for each value, a relationship between the node and the value
#
# Case d) value: object, qualifier: object
# Actions:
# Property type: relationship
# Property name: property name
# Property value: value
# Property attributes: none
# More than one value: for each value, a relationship between the node and the value
# For each qualifier:
# Property type: relationship
# Property name: qualifier name
# Property value: qualifier value
# Property attributes: none
# More than one qualifier value: for each value, a relationship between the node and the qualifier value

In [4]:
relevant_instances_dict['Q25366']

{'description': 'Star Trek: The Original Series episode',
 'name': 'Where No Man Has Gone Before',
 'properties': {'P155': [{'qualifiers': {},
    'type': 'object',
    'value': 'Q20909'},
   {'qualifiers': {}, 'type': 'object', 'value': 'Q2659593'}],
  'P156': [{'qualifiers': {}, 'type': 'object', 'value': 'Q1187077'},
   {'qualifiers': {}, 'type': 'object', 'value': 'Q2984478'}],
  'P179': [{'qualifiers': {}, 'type': 'object', 'value': 'Q1077'}],
  'P345': [{'qualifiers': {}, 'type': 'string', 'value': 'tt0061027'}],
  'P361': [{'qualifiers': {}, 'type': 'object', 'value': 'Q747598'}],
  'P364': [{'qualifiers': {}, 'type': 'object', 'value': 'Q1860'}],
  'P495': [{'qualifiers': {}, 'type': 'object', 'value': 'Q30'}],
  'P577': [{'qualifiers': {'P17': [{'type': 'object', 'value': 'Q30'}]},
    'type': 'date',
    'value': '1966-09-22'},
   {'qualifiers': {'P17': [{'type': 'object', 'value': 'Q183'}]},
    'type': 'date',
    'value': '1973-10-06'},
   {'qualifiers': {'P17': [{'type': 

In [21]:
network = nx.DiGraph() # network of instances

In [32]:
%%time

network = nx.DiGraph() # network of instances
for ide in relevant_instances_dict:
    if ide == 'Q7562328':
        print(ide)
    instance = relevant_instances_dict[ide]
    base = {
        'id': ide,
        'type': (instance['type'], relevant_types_dict[instance['type']]['name']),
        'name': instance['name'],
        'wikilink': instance['wikilink'],
        'description': instance['description'],
    }
    attributes = {}
    for prop in instance['properties']:
        if ide == 'Q7562328':
            print(prop, relevant_properties_dict[prop]['name'])
        p_name = relevant_properties_dict[prop]['name']
        p_attr_values = []
        for prop_inst in instance['properties'][prop]: # possible cases: 1, 2, a, b, c, d
            p_value = prop_inst['value']
            p_type = prop_inst['type']
            if p_type == 'object' and p_value in relevant_instances_dict: # possible cases: 2, c, d
                network.add_edge(ide, p_value, {'label': p_name}) # case 2
                qualifiers = prop_inst['qualifiers']
                if qualifiers: # possible cases: c, d
                    for q in qualifiers:
                        q_name = relevant_properties_dict[q]['name']
                        for q_inst in qualifiers[q]:
                            q_value = q_inst['value']
                            q_type = q_inst['type']
                            if q_type == 'object' and q_value in relevant_instances_dict:
                                network.add_edge(ide, q_value, {'label': q_name}) # case d
                            else:
                                if q_type == 'object' and q_value not in relevant_instances_dict:
                                    q_value = all_instances_dict[q_value]['name']
                                    network.add_edge(ide, p_value, {'label': p_name, q_name: q_value}) # case c
            else: # possible cases: 1, a, b
                if p_type == 'object' and p_value not in relevant_instances_dict:
                    p_value = all_instances_dict[p_value]['name']
                qualifiers = prop_inst['qualifiers']
                if not qualifiers:
                    p_attr_values.append(p_value) # case 1
                else: # possible cases: 1, a, b
                    q_dict = {}
                    for q in qualifiers:
                        q_name = relevant_properties_dict[q]['name']
                        q_dict[q_name] = []
                        for q_inst in qualifiers[q]:
                            q_value = q_inst['value']
                            q_type = q_inst['type']
                            if q_type == 'object' and q_value in relevant_instances_dict:
                                network.add_edge(ide, q_value, {'label': q_name, p_name: p_value}) # case b
                            else:
                                if q_type == 'object' and q_value not in relevant_instances_dict:
                                    q_value = all_instances_dict[q_value]['name']
                                q_dict[q_name].append(q_value)
                    if not q_dict:
                        p_attr_values.append(p_value) # case 1
                    else:
                        p_attr_values.append({'value': p_value, 'qualifiers': q_dict}) # case a
        if p_attr_values:
            if len(p_attr_values) == 1:
                attr_value = p_attr_values[0]
            else:
                attr_value = json.dumps(p_attr_values)
            attributes[p_name] = attr_value
    info = {}
    info.update(base)
    info.update(attributes)
    network.add_node(ide, info)

Q7562328
P155 follows
P495 country of origin
P361 part of
P179 series
P156 followed by
P345 IMDb identifier
P364 original language of work
CPU times: user 32.9 ms, sys: 0 ns, total: 32.9 ms
Wall time: 33 ms


In [23]:
len(relevant_instances_dict)

1000

In [24]:
len(network)

1000

In [29]:
network['Q8036508']

{}

In [27]:
network.edges(data=True)

[('Q7562328', 'Q108774', {'label': 'series'}),
 ('Q7562328', 'Q1860', {'label': 'original language of work'}),
 ('Q7562328', 'Q1092502', {'label': 'follows'}),
 ('Q7562328', 'Q3468897', {'label': 'part of'}),
 ('Q7562328', 'Q4857904', {'label': 'followed by'}),
 ('Q5281321', 'Q3468983', {'label': 'part of'}),
 ('Q5281321', 'Q1860', {'label': 'original language of work'}),
 ('Q5281321', 'Q7735700', {'label': 'followed by'}),
 ('Q5281321', 'Q16290', {'label': 'series'}),
 ('Q5281321', 'Q16744119', {'label': 'follows'}),
 ('Q3496615', 'Q1860', {'label': 'original language of work'}),
 ('Q3496615', 'Q5422685', {'label': 'followed by'}),
 ('Q3496615', 'Q3469077', {'label': 'part of'}),
 ('Q3496615', 'Q7784320', {'label': 'follows'}),
 ('Q3496615', 'Q16290', {'label': 'series'}),
 ('Q4880330', 'Q1860', {'label': 'original language of work'}),
 ('Q4880330', 'Q3468788', {'label': 'part of'}),
 ('Q4880330', 'Q156329', {'label': 'series'}),
 ('Q4880330', 'Q5438625', {'label': 'follows'}),
 ('Q48

In [25]:
network.nodes(data=True)

[('Q8036508',
  {'description': '',
   'id': 'Q8036508',
   'name': 'World Without End',
   'type': ('Q571', 'book'),
   'wikilink': 'World Without End (Haldeman novel)'}),
 ('Q7562328',
  {'IMDb identifier': 'tt0708641',
   'country of origin': 'United States of America',
   'description': '',
   'id': 'Q7562328',
   'name': 'Sons of Mogh',
   'type': ('Q1983062', 'episode'),
   'wikilink': 'Sons of Mogh'}),
 ('Q5281321',
  {'IMDb identifier': 'tt0708703',
   'country of origin': 'United States of America',
   'description': 'Star Trek',
   'id': 'Q5281321',
   'name': 'Disaster',
   'type': ('Q1983062', 'episode'),
   'wikilink': 'Disaster (Star Trek: The Next Generation)'}),
 ('Q48263',
  {'description': 'video game console developed by and produced for Microsoft',
   'id': 'Q48263',
   'name': 'Xbox 360',
   'type': ('Q8076', 'video game console'),
   'wikilink': 'Xbox 360'}),
 ('Q208189',
  {'description': 'strategy video game subgenre',
   'id': 'Q208189',
   'name': 'real-time s