In [1]:
import json
import pickle

import networkx as nx
import requests

In [2]:
json.dumps({'a':1})

'{"a": 1}'

In [3]:
json.dumps(('a', {'b':2}))

'["a", {"b": 2}]'

In [4]:
json.dumps({"value": 0, "qualifiers": {"q1": 1, "q2": 2}})

'{"qualifiers": {"q2": 2, "q1": 1}, "value": 0}'

In [23]:
with open('pickle/star-trek_only_relevant_properties_dict.pickle', 'rb') as f:
    only_relevant_properties_dict = pickle.load(f)

with open('pickle/star-trek_wikidata_types_to_sylva_classes.pickle', 'rb') as f:
    wikidata_types_to_sylva_classes = pickle.load(f)

with open('pickle/star-trek_only_relevant_instances_dict.pickle', 'rb') as f:
    only_relevant_instances_dict = pickle.load(f)

with open('pickle/star-trek_hierarchy_graph.pickle', 'rb') as f:
    hierarchy = pickle.load(f)

In [6]:
graph = nx.DiGraph() # network

In [7]:
# for ide in only_relevant_instances_dict:
#     assert(only_relevant_instances_dict[ide]['type'] in only_relevant_types_dict)
#     for prop in only_relevant_instances_dict[ide]['properties']:
#         assert(prop in only_relevant_properties_dict)

In [26]:
g = nx.DiGraph()
g.add_node(1, {'a': 2})
g.add_edge(1,2)

In [27]:
g.add_node(1, {'b': 3})

In [24]:
g.add_edge(3,4, x=1)
g.add_edge(3,4, y=2)

In [28]:
g.nodes(data=True)

[(1, {'a': 2, 'b': 3}), (2, {})]

In [10]:
g.add_node(2,{'d':1})

In [11]:
g.nodes(data=True)

[(1, {'a': 2}), (2, {'d': 1})]

In [25]:
g.edges(data=True)

[(1, 2, {}), (3, 4, {'x': 1, 'y': 2})]

In [13]:
g.node[1].update({'b':3})

In [22]:
def get_direct_superclasses(c):
    all_neighbors = hierarchy.neighbors(c)
    direct_superclasses = list(filter(lambda x: hierarchy[c][x]['type'] == 'is_subclass_of', all_neighbors))
    return direct_superclasses

def get_superclasses(c):
    direct_superclasses = get_direct_superclasses(c)
    if not direct_superclasses:
        superclasses = [[c]]
    else:
        superclasses = [[c] + spclsss for c2 in direct_superclasses for spclsss in get_superclasses(c2)]
    return superclasses

def is_subclass_of(c1, c2):
    return any(map(lambda x: c2 in x, get_superclasses(c1)))

def get_closest_class(c, cs):
    cs = list(cs.keys())
    lengths = []
    for c2 in cs:
        try:
            # WARNING: if there is more than one shortest path, then it returns only one of them
            length = nx.shortest_path_length(hierarchy, c, c2)
        except nx.exception.NetworkXNoPath as e:
            length = 999
        lengths.append(length)
    min_length = min(lengths)
    closest = cs[lengths.index(min_length)]
    count_min = lengths.count(min_length)
    return closest

def get_closest_relevant_class(c):
    return get_closest_class(c, only_relevant_types_dict)

def is_subclass_of_any_relevant_class(c):
    return any(map(lambda x: is_subclass_of(c, x), only_relevant_types_dict))

def get_relevant_type(ide):
    relevant_type = None
    if ide in all_instances_and_types_dict:
        types = all_instances_and_types_dict[ide]
        if len(types) == 1:
            t = types.pop(); types.add(t)
            if is_subclass_of_any_relevant_class(t):
                relevant_type = get_closest_relevant_class(t)
        else:
            relevant_candidates = set()
            for t in types:
                if is_subclass_of_any_relevant_class(t):
                    relevant_type = get_closest_relevant_class(t)
                    relevant_candidates.add(relevant_type)
            if relevant_candidates:
                # WARNING: if there is more than one relevant_candidates, then it returns only one of them
                relevant_type = relevant_candidates.pop(); relevant_candidates.add(relevant_type)
    return relevant_type

In [14]:
# Properties and qualifiers
#
# If no qualifiers
#
# Case 1) value: string
# Actions:
# Property type: attribute
# Property name: property name
# Property value: value
# Property attributes: none
# More than one value: list serialized: '["v1", "v2", "v3"]'
#
# Case 2) value: object
# Actions:
# Property type: relationship
# Property name: property name
# Property value: value
# Property attributes: none
# More than one value: for each value, a relationship between the node and the value
#
# If qualifiers
#
# Case a) qualifier: string, value: string
# Actions:
# Property type: attribute
# Property name: property name
# Property value: '{"value": v, "qualifiers": {"q1": v1, "q2": v2}}'
# Property attributes: none
# More than one value: list of dictionaries serialized
#
# Case b) qualifier: object, value: string
# Actions:
# Property type: relationship
# Property name: qualifier name
# Property value: qualifier value
# Property attributes: {property name: value}
# More than one value: for each qualifier, a relationship between the node and the qualifier value
#
# Case c) qualifier: string, value: object
# Actions:
# Property type: relationship
# Property name: property name
# Property value: value
# Property attributes: {q1: v1, q2: v2}
# More than one value: for each value, a relationship between the node and the value
#
# Case d) qualifier: object, value: object
# Actions:
# Property type: relationship
# Property name: property name
# Property value: value
# Property attributes: none
# More than one value: for each value, a relationship between the node and the value
# For each qualifier:
# Property type: relationship
# Property name: qualifier name
# Property value: qualifier value
# Property attributes: none
# More than one qualifier value: for each value, a relationship between the node and the qualifier value

In [18]:
def get_info(ide):
    base_url = 'https://www.wikidata.org/w/api.php'
    payload = {'action': 'wbgetentities', 'ids': ide, 'languages': 'en', 'format': 'json'}
    r = requests.get(base_url, params=payload)
    response = r.json()
    d = response['entities'][ide]
    if 'en' in d['labels']:
        name = d['labels']['en']['value']
    else:
        name = ''
    if 'enwiki' in d['sitelinks']:
        wikilink = d['sitelinks']['enwiki']['title']
    else:
        wikilink = ''
    if 'en' in d['descriptions']:
        description = d['descriptions']['en']['value']
    else:
        description = ''
    info = {
        'id': ide,
        'type': typ,
        'name': name,
        'wikilink': wikilink,
        'description': description,
        'properties': {},
    }
    
    
    
        'id': info['id'],
        'type': wikidata_types_to_sylva_classes[info['type']],
            
            
            
    if 'en' in d['labels']:
        name = d['labels']['en']['value']
    else:
        name = ide
    if 'en' in d['descriptions']:
        description = d['descriptions']['en']['value']
    else:
        description = 
    if 'en' in d['labels']:
        val = d['labels']['en']
        d['labels'].clear()
        d['labels']['en'] = val
    else:
        d['labels'].clear()
    if 'en' in d['aliases']:
        val = d['aliases']['en']
        d['aliases'].clear()
        d['aliases']['en'] = val
    else:
        d['aliases'].clear()
    if 'enwiki' in d['sitelinks']:
        val = d['sitelinks']['enwiki']
        d['sitelinks'].clear()
        d['sitelinks']['enwiki'] = val
    else:
        d['sitelinks'].clear()
    return name

In [17]:
{'Q25366': {'description': 'Star Trek: The Original Series episode',
  'id': 'Q25366',
  'name': 'Where No Man Has Gone Before',
  'properties': {'P155': [{'qualifiers': {},
     'type': 'object',
     'value': 'Q20909'},
    {'qualifiers': {}, 'type': 'object', 'value': 'Q2659593'}],
   'P156': [{'qualifiers': {}, 'type': 'object', 'value': 'Q1187077'},
    {'qualifiers': {}, 'type': 'object', 'value': 'Q2984478'}],
   'P179': [{'qualifiers': {}, 'type': 'object', 'value': 'Q1077'}],
   'P345': [{'qualifiers': {}, 'type': 'string', 'value': 'tt0061027'}],
   'P361': [{'qualifiers': {}, 'type': 'object', 'value': 'Q747598'}],
   'P364': [{'qualifiers': {}, 'type': 'object', 'value': 'Q1860'}],
   'P495': [{'qualifiers': {}, 'type': 'object', 'value': 'Q30'}],
   'P577': [{'qualifiers': {'P17': [{'type': 'object', 'value': 'Q30'}]},
     'type': 'date',
     'value': '1966-09-22'},
    {'qualifiers': {'P17': [{'type': 'object', 'value': 'Q183'}]},
     'type': 'date',
     'value': '1973-10-06'},
    {'qualifiers': {'P17': [{'type': 'object', 'value': 'Q142'}]},
     'type': 'date',
     'value': '1986-05-07'}],
   'P646': [{'qualifiers': {}, 'type': 'string', 'value': '/m/023x20'}]},
  'type': 'Q1983062',
  'wikilink': 'Where No Man Has Gone Before'}}

{'Q25366': {'description': 'Star Trek: The Original Series episode',
  'id': 'Q25366',
  'name': 'Where No Man Has Gone Before',
  'properties': {'P155': [{'qualifiers': {},
     'type': 'object',
     'value': 'Q20909'},
    {'qualifiers': {}, 'type': 'object', 'value': 'Q2659593'}],
   'P156': [{'qualifiers': {}, 'type': 'object', 'value': 'Q1187077'},
    {'qualifiers': {}, 'type': 'object', 'value': 'Q2984478'}],
   'P179': [{'qualifiers': {}, 'type': 'object', 'value': 'Q1077'}],
   'P345': [{'qualifiers': {}, 'type': 'string', 'value': 'tt0061027'}],
   'P361': [{'qualifiers': {}, 'type': 'object', 'value': 'Q747598'}],
   'P364': [{'qualifiers': {}, 'type': 'object', 'value': 'Q1860'}],
   'P495': [{'qualifiers': {}, 'type': 'object', 'value': 'Q30'}],
   'P577': [{'qualifiers': {'P17': [{'type': 'object', 'value': 'Q30'}]},
     'type': 'date',
     'value': '1966-09-22'},
    {'qualifiers': {'P17': [{'type': 'object', 'value': 'Q183'}]},
     'type': 'date',
     'value': '197

In [None]:
%%time

for ide in only_relevant_instances_dict:
    info = only_relevant_instances_dict[ide]
    base = {
        'id': info['id'],
        'type': wikidata_types_to_sylva_classes[info['type']],
        'name': info['name'],
        'wikilink': info['wikilink'],
        'description': info['description'],
    }
    attributes = {}
    for prop in info['properties']:
        prop_name = get_name(prop)
        prop_type_count = {'rel': 0, 'att': 0}
        prop_attr_values = []
        for prop_inst in info['properties'][prop]: # possible cases: 1, 2, a, b, c, d
            prop_value = prop_inst['value']
            prop_type = prop_inst['type']
            if prop_type == 'string': # possible cases: 1, a, b
                prop_qualifiers = prop_inst['qualifiers']
                if not prop_qualifiers: # case 1
                    prop_attr_values.append(prop_value)
                else: # possible cases: a, b
                    value_dict = {'value': prop_value, 'qualifiers': {}}
                    for q in prop_qualifiers:
                        q_name = get_name(q)
                        q_value = prop_qualifiers[q]['value']
                        q_type = prop_qualifiers[q]['type']
                        if q_type == 'string': # case a
                            value_dict['qualifiers'][q_name] = q_value
                        else: q_type == 'object': # possible cases: a, b
                            if q_value in only_relevant_instances_dict: # case b
                                graph.add_edge(ide, q_value, {prop_name: prop_value})
                            else: # possible cases: a, b
                                typ = get_relevant_type(q_value)
                                
                                
                    prop_attr_values.append(value_dict)
            
            if not prop_inst['qualifiers']:
                prop_value = 
        if attr_values:
            if len(attr_values == 1):
                attr_value = attr_values[0]
            else:
                attr_value = json.dumps(attr_values)
            attributes[prop.name] = attr_value


with open('wikidata-20150907-star_trek-relevant-rels-and-types.json') as f:
    for line in f:
        try:
            total += 1
            cleaned = line.strip()[:-1]
            d = json.loads(cleaned)
            id1 = d['id']
            for rel in d['claims']:
                for rel_inst in d['claims'][rel]:
                    type_rel = rel_inst['mainsnak']['datavalue']['type']
                    id2 = None
                    if type_rel == 'wikibase-entityid':
                        value = rel_inst['mainsnak']['datavalue']['value']['numeric-id']
                        id2 = 'Q' + str(value)
                    graph[rel].append((id1, id2))
                    count += 1
        except Exception as e:
            error += 1
print('Total:', total)
print('Count:', count)
print('Error:', error)