In [1]:
import json
import pickle

import networkx as nx

In [2]:
with open('pickle/star-trek_relevant_properties_dict.pickle', 'rb') as f:
    relevant_properties_dict = pickle.load(f)

with open('pickle/star-trek_relevant_types_dict.pickle', 'rb') as f:
    relevant_types_dict = pickle.load(f)

with open('pickle/star-trek_relevant_instances_dict.pickle', 'rb') as f:
    relevant_instances_dict = pickle.load(f)

with open('pickle/star-trek_all_instances_dict.pickle', 'rb') as f:
    all_instances_dict = pickle.load(f)

In [3]:
# Properties and qualifiers
#
# If no qualifiers
#
# Case 1) value: string
# Actions:
# Property type: attribute
# Property name: property name
# Property value: value
# Property attributes: none
# More than one value: list serialized: '["v1", "v2", "v3"]'
#
# Case 2) value: object
# Actions:
# Property type: relationship
# Property name: property name
# Property value: value
# Property attributes: none
# More than one value: for each value, a relationship between the node and the value
#
# If qualifiers
#
# Case a) value: string, qualifier: string
# Actions:
# Property type: attribute
# Property name: property name
# Property value: '{"value": v, "qualifiers": {"q1": v1, "q2": v2}}'
# Property attributes: none
# More than one value: list of dictionaries serialized
#
# Case b) value: string, qualifier: object
# Actions:
# Property type: relationship
# Property name: qualifier name
# Property value: qualifier value
# Property attributes: {property name: value}
# More than one value: for each qualifier, a relationship between the node and the qualifier value
#
# Case c) value: object, qualifier: string
# Actions:
# Property type: relationship
# Property name: property name
# Property value: value
# Property attributes: {q1: v1, q2: v2}
# More than one value: for each value, a relationship between the node and the value
#
# Case d) value: object, qualifier: object
# Actions:
# Property type: relationship
# Property name: property name
# Property value: value
# Property attributes: none
# More than one value: for each value, a relationship between the node and the value
# For each qualifier:
# Property type: relationship
# Property name: qualifier name
# Property value: qualifier value
# Property attributes: none
# More than one qualifier value: for each value, a relationship between the node and the qualifier value

In [4]:
network = nx.DiGraph() # network of instances

In [5]:
def new_node_info(ide):
    info = {
        'id': (ide, None),
        'type': ('no type', None),
        'name': (all_instances_dict[ide]['name'], 'string'),
        'wikilink': (all_instances_dict[ide]['wikilink'], 'string'),
        'description': (all_instances_dict[ide]['description'], 'string'),
    }
    return info

In [6]:
%%time

network = nx.DiGraph() # network of instances
for ide in relevant_instances_dict:
    instance = relevant_instances_dict[ide]
    base = {
        'id': (ide, None),
        'type': (relevant_types_dict[instance['type']]['name'], None),
        'name': (instance['name'], 'string'),
        'wikilink': (instance['wikilink'], 'string'),
        'description': (instance['description'], 'string'),
    }
    attributes = {}
    for prop in instance['properties']:
        p_name = relevant_properties_dict[prop]['name']
        if p_name.startswith('language'):
            print(ide)
        p_attr_values = []
        for prop_inst in instance['properties'][prop]: # possible cases: 1, 2, a, b, c, d
            p_value = prop_inst['value']
            p_type = prop_inst['type']
            if p_type == 'object': # possible cases: 2, c, d
                if p_value not in relevant_instances_dict:
                    new_info = new_node_info(p_value)
                    network.add_node(p_value, new_info)
                network.add_edge(ide, p_value, {'label': (p_name, None)}) # case 2
                qualifiers = prop_inst['qualifiers']
                if qualifiers: # possible cases: c, d
                    for q in qualifiers:
                        q_name = relevant_properties_dict[q]['name']
                        for q_inst in qualifiers[q]:
                            q_value = q_inst['value']
                            q_type = q_inst['type']
                            if q_type == 'object':
                                if q_value not in relevant_instances_dict:
                                    new_info = new_node_info(q_value)
                                    network.add_node(q_value, new_info)
                                network.add_edge(ide, q_value, {'label': (q_name, None)}) # case d
                            else:
                                # Update existing edge
                                network.add_edge(ide, p_value, {'label': (p_name, None), q_name: (q_value, q_type)}) # case c
            else: # possible cases: 1, a, b
                qualifiers = prop_inst['qualifiers']
                if not qualifiers:
                    p_attr_values.append((p_value, p_type)) # case 1
                else: # possible cases: 1, a, b
                    q_dict = {}
                    for q in qualifiers:
                        q_name = relevant_properties_dict[q]['name']
                        q_dict[q_name] = []
                        for q_inst in qualifiers[q]:
                            q_value = q_inst['value']
                            q_type = q_inst['type']
                            if q_type == 'object':
                                if q_value not in relevant_instances_dict:
                                    new_info = new_node_info(q_value)
                                    network.add_node(q_value, new_info)
                                network.add_edge(ide, q_value, {'label': (q_name, None), p_name: (p_value, p_type)}) # case b
                            else:
                                q_dict[q_name].append((q_value, q_type))
                    if not q_dict:
                        p_attr_values.append((p_value, p_type)) # case 1
                    else:
                        p_attr_values.append(({'value': (p_value, p_type), 'qualifiers': q_dict}, 'string')) # case a
        if p_attr_values:
            if len(p_attr_values) == 1:
                attr_value = p_attr_values[0]
            else:
                attr_value = (json.dumps(p_attr_values), 'string')
            attributes[p_name] = attr_value
    info = {}
    info.update(base)
    info.update(attributes)
    network.add_node(ide, info)

CPU times: user 79.3 ms, sys: 0 ns, total: 79.3 ms
Wall time: 79.4 ms


In [7]:
len(relevant_instances_dict)

1009

In [8]:
len(network)

1087

In [9]:
len(network.edges(data=True))

4720

In [10]:
network.nodes(data=True)[:2]

[('Q3506645',
  {'description': ('episode of Star Trek: The Next Generation', 'string'),
   'id': ('Q3506645', None),
   'name': ('Peak Performance', 'string'),
   'type': ('episode', None),
   'wikilink': ('Peak Performance (Star Trek: The Next Generation)',
    'string')}),
 ('Q7769334',
  {'description': ('Star Trek: The Animated Series episode', 'string'),
   'id': ('Q7769334', None),
   'name': ('The Time Trap', 'string'),
   'type': ('episode', None),
   'wikilink': ('The Time Trap', 'string')})]

In [11]:
network.edges(data=True)[:2]

[('Q3506645', 'Q30', {'label': ('country of origin', None)}),
 ('Q3506645', 'Q604559', {'label': ('part of', None)})]

In [12]:
for e in network.edges(data=True):
    if len(e[2]) > 1:
        print(e)

('Q25366', 'Q183', {'publication date': ('1973-10-06', 'date'), 'label': ('country', None)})
('Q25366', 'Q30', {'publication date': ('1966-09-22', 'date'), 'label': ('country', None)})
('Q25366', 'Q142', {'publication date': ('1986-05-07', 'date'), 'label': ('country', None)})
('Q7601031', 'Q48263', {'label': ('platform', None), 'Metacritic ID': ('xbox-360/star-trek-the-video-game', 'string')})
('Q7601031', 'Q16338', {'label': ('platform', None), 'Metacritic ID': ('pc/star-trek-the-video-game', 'string')})
('Q7601031', 'Q10683', {'label': ('platform', None), 'Metacritic ID': ('playstation-3/star-trek-the-video-game', 'string')})


In [13]:
with open('pickle/star-trek_network_graph.pickle', 'wb') as f:
    pickle.dump(network, f)