In [1]:
import json
import pickle

import requests

In [2]:
with open('pickle/star-trek_all_instances_and_types_dict.pickle', 'rb') as f:
    all_instances_and_types_dict = pickle.load(f)

with open('pickle/star-trek_only_relevant_relationships_dict.pickle', 'rb') as f:
    only_relevant_relationships_dict = pickle.load(f)

with open('pickle/star-trek_only_relevant_types_dict.pickle', 'rb') as f:
    only_relevant_types_dict = pickle.load(f)

In [3]:
new_types_set = set() # new types for relevant relationships

In [4]:
%%time
total = 0
count = 0
error = 0
with open('data/wikidata-20150907-star_trek.json') as f:
    for line in f:
        try:
            total += 1
            cleaned = line.strip()[:-1]
            d = json.loads(cleaned)
            for rel in d['claims']:
                if rel in only_relevant_relationships_dict:
                    for rel_inst in d['claims'][rel]:
                        type_rel = rel_inst['mainsnak']['datavalue']['type']
                        if type_rel == 'wikibase-entityid':
                            ide = 'Q' + str(rel_inst['mainsnak']['datavalue']['value']['numeric-id'])
                            if ide in all_instances_and_types_dict:
                                types = all_instances_and_types_dict[ide]
                                for typ in types:
                                    if typ not in only_relevant_types_dict:
                                        new_types_set.add(typ)
                                
            count += 1
        except Exception as e:
            error += 1
print('Total:', total)
print('Count:', count)
print('Error:', error)

Total: 1630
Count: 1628
Error: 2
CPU times: user 211 ms, sys: 9.29 ms, total: 220 ms
Wall time: 203 ms


In [5]:
len(new_types_set)

12

In [6]:
new_types_list = list(new_types_set)

In [7]:
new_types_dict = {}

In [8]:
base_url = 'https://www.wikidata.org/w/api.php'
limit = 50

chunks = [new_types_list[x:x+limit] for x in range(0, len(new_types_list), limit)]
for chunk in chunks:
    type_ids = '|'.join(chunk)
    payload = {'action': 'wbgetentities', 'ids': type_ids, 'languages': 'en', 'format': 'json'}
    r = requests.get(base_url, params=payload)
    response = r.json()
    entities = response['entities']
    for ide in entities:
        try:
            new_types_dict[ide] = entities[ide]['labels']['en']['value']
        except Exception as e:
            new_types_dict[ide] = ''

In [9]:
len(new_types_dict)

12

In [10]:
with open('pickle/star-trek_new_types_dict.pickle', 'wb') as f:
    pickle.dump(new_types_dict, f)