In [1]:
import json
import pickle

import networkx as nx
import requests

In [2]:
with open('pickle/star-trek_only_relevant_relationships_dict.pickle', 'rb') as f:
    only_relevant_relationships_dict = pickle.load(f)

In [3]:
only_relevant_relationships_dict

{'P103': 'native language',
 'P1074': 'fictional analog of',
 'P1080': 'from fictional universe',
 'P1235': 'ISFDB series ID',
 'P1237': 'Box Office Mojo film ID',
 'P1258': 'Rotten Tomatoes identifier',
 'P1265': 'AlloCiné movie ID',
 'P135': 'movement',
 'P136': 'genre',
 'P138': 'named after',
 'P1407': 'MusicBrainz series ID',
 'P1412': 'languages spoken or published',
 'P1434': 'describes the fictional universe',
 'P144': 'based on',
 'P1441': 'present in work',
 'P1445': 'fictional universe described in',
 'P1476': 'title',
 'P155': 'follows',
 'P156': 'followed by',
 'P1562': 'AllMovie movie ID',
 'P1651': 'YouTube video identifier',
 'P17': 'country',
 'P1712': 'Metacritic ID',
 'P179': 'series',
 'P180': 'depicts',
 'P1804': 'Danish National Filmography ID',
 'P1811': 'list of episodes',
 'P1813': 'short name',
 'P1970': 'MovieMeter movie ID',
 'P2002': 'Twitter username',
 'P2003': 'Instagram username',
 'P2013': 'Facebook ID',
 'P212': 'ISBN-13',
 'P243': 'OCLC control numbe

In [4]:
hierarchy = nx.DiGraph()
processed = set()

In [5]:
def add_to_hierarchy(c):
    if c not in processed:
        hierarchy.add_node(c)
        base_url = 'https://www.wikidata.org/w/api.php'
        payload = {'action': 'wbgetentities', 'ids': c, 'languages': 'en', 'format': 'json'}
        r = requests.get(base_url, params=payload)
        response = r.json()
        d = response['entities'][c]
        if 'claims' in d and 'P279' in d['claims']:
            for rel_inst in d['claims']['P279']:
                if 'datavalue' in rel_inst['mainsnak']:
                    super_c = 'Q' + str(rel_inst['mainsnak']['datavalue']['value']['numeric-id'])
                    hierarchy.add_node(super_c)
                    hierarchy.add_edge(c, super_c, {'type': 'subclass'})
                    hierarchy.add_edge(super_c, c, {'type': 'superclass'})
                    add_to_hierarchy(super_c)
        processed.add(c)

In [6]:
%%time
total = 0
count = 0
error = 0
with open('data/wikidata-20150907-star_trek.json') as f:
    for line in f:
        try:
            total += 1
            cleaned = line.strip()[:-1]
            d = json.loads(cleaned)
            for rel in d['claims']:
                if rel in only_relevant_relationships_dict:
                    for rel_inst in d['claims'][rel]:
                        type_rel = rel_inst['mainsnak']['datavalue']['type']
                        if type_rel == 'wikibase-entityid':
                            typ = 'Q' + str(rel_inst['mainsnak']['datavalue']['value']['numeric-id'])
                            add_to_hierarchy(typ)
            count += 1
        except Exception as e:
            print(e)
            error += 1
print('Total:', total)
print('Count:', count)
print('Error:', error)

Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Total: 1630
Count: 1628
Error: 2
CPU times: user 30.6 s, sys: 799 ms, total: 31.4 s
Wall time: 5min 46s


In [7]:
with open('pickle/star-trek_hierarchy_graph.pickle', 'wb') as f:
    pickle.dump(hierarchy, f)