In [1]:
import json
import pickle

import networkx as nx
import requests

In [2]:
with open('pickle/star-trek_only_relevant_relationships_dict.pickle', 'rb') as f:
    only_relevant_relationships_dict = pickle.load(f)

with open('pickle/star-trek_only_relevant_types_dict.pickle', 'rb') as f:
    only_relevant_types_dict = pickle.load(f)

with open('pickle/star-trek_all_instances_and_types_dict.pickle', 'rb') as f:
    all_instances_and_types_dict = pickle.load(f)

with open('pickle/star-trek_hierarchy_graph.pickle', 'rb') as f:
    hierarchy = pickle.load(f)

In [3]:
relevant_instances = {} # all relevant information for instances of relevant types
new_instances = set() # new instances that are target of relevant relationships

In [20]:
def get_direct_superclasses(c):
    all_neighbors = hierarchy.neighbors(c)
    direct_superclasses = list(filter(lambda x: hierarchy[c][x]['type'] == 'is_subclass_of', all_neighbors))
    return direct_superclasses

def get_superclasses(c):
    direct_superclasses = get_direct_superclasses(c)
    if not direct_superclasses:
        superclasses = [[c]]
    else:
        superclasses = [[c] + spclsss for c2 in direct_superclasses for spclsss in get_superclasses(c2)]
    return superclasses

def is_subclass_of(c1, c2):
    return any(map(lambda x: c2 in x, get_superclasses(c1)))

def get_closest_class(c, cs):
    try:
        lengths = [nx.shortest_path_length(hierarchy, c, t) for t in cs]
        min_length = min(lengths)
        count = lengths.count(min_length)
        closest = cs[cs.index(min_length)]
        if count > 1:
            print('MORE THAN ONE CLOSEST CLASS')
        print('YES')
    except nx.exception.NetworkXNoPath as e:
        print(e)
        closest = None
    return closest

def get_closest_relevant_class(c):
    return get_closest_class(c, only_relevant_types_dict)

def is_subclass_of_any_relevant_class(c):
    return any(map(lambda x: is_subclass_of(c, x), only_relevant_types_dict))

def get_relevant_type(ide):
    relevant_type = None
    types = all_instances_and_types_dict[ide]
    if len(types) == 1:
        t = types.pop(); types.add(t)
        if is_subclass_of_any_relevant_class(t):
            relevant_type = get_closest_relevant_class(t)
            print('>>>',relevant_type)
    else:
        relevant_candidates = set()
        for t in types:
            if is_subclass_of_any_relevant_class(t):
                relevant_type = get_closest_relevant_class(t)
                relevant_candidates.add(relevant_type)
        if relevant_candidates:
            relevant_type = relevant_candidates.pop(); relevant_candidates.add(relevant_type)
            if len(relevant_candidates) > 1:
                print('MORE THAN ONE RELEVANT CANDIDATE:', ide, types, relevant_candidates)
    return relevant_type

In [21]:
def add_new_instance(ide):
    base_url = 'https://www.wikidata.org/w/api.php'
    payload = {'action': 'wbgetentities', 'ids': ide, 'languages': 'en', 'format': 'json'}
    r = requests.get(base_url, params=payload)
    response = r.json()
    d = response['entities'][ide]
    ide = d['id']
    typ = get_relevant_type(ide)
    if typ:
        if 'en' in d['labels']:
            name = d['labels']['en']['value']
        else:
            name = ''
        if 'enwiki' in d['sitelinks']:
            wikilink = d['sitelinks']['enwiki']['title']
        else:
            wikilink = ''
        if 'en' in d['descriptions']:
            description = d['descriptions']['en']['value']
        else:
            description = ''
        info = {
            'id': ide,
            'type': typ,
            'name': name,
            'wikilink': wikilink,
            'description': description,
            'relationships': {},
            'attributes': {},
        }
        relevant_instances[ide] = info

In [22]:
# ide = 'Q5014725'
# base_url = 'https://www.wikidata.org/w/api.php'
# payload = {'action': 'wbgetentities', 'ids': ide, 'languages': 'en', 'format': 'json'}
# r = requests.get(base_url, params=payload)
# print(r.url)
# response = r.json()
# response['entities'][ide]

In [23]:
%%time
total = 0
count = 0
error = 0
relevant_instances = {}
new_instances = set()
with open('data/wikidata-20150907-star_trek.json') as f:
    for line in f:
#         try:
        if not line.startswith('[') and not line.startswith(']'):
            total += 1
            cleaned = line.strip()[:-1]
            d = json.loads(cleaned) # keys: ['type', 'labels', 'descriptions', 'claims', 'aliases', 'sitelinks', 'id']
            ide = d['id']
            typ = get_relevant_type(ide)
            if typ:
                if 'en' in d['labels']:
                    name = d['labels']['en']['value']
                else:
                    name = ''
                if 'enwiki' in d['sitelinks']:
                    wikilink = d['sitelinks']['enwiki']['title']
                else:
                    wikilink = ''
                if 'en' in d['descriptions']:
                    description = d['descriptions']['en']['value']
                else:
                    description = ''
                info = {
                    'id': ide,
                    'type': typ,
                    'name': name,
                    'wikilink': wikilink,
                    'description': description,
                    'relationships': {},
                    'attributes': {},
                }
                for rel in d['claims']:
                    if rel in only_relevant_relationships_dict and rel != 'P31' and rel != 'P279':
                        values = set()
                        for rel_inst in d['claims'][rel]:
                            type_rel = rel_inst['mainsnak']['datavalue']['type']
                            if type_rel == 'wikibase-entityid':
                                value = 'Q' + str(rel_inst['mainsnak']['datavalue']['value']['numeric-id'])
                                if value in all_instances_and_types_dict:
                                    values.add(value)
                                else:
                                    if value not in new_instances:
                                        add_new_instance(value)
                                        new_instances.add(value)
                            elif type_rel == 'string':
                                value = str(rel_inst['mainsnak']['datavalue']['value'])
                                values.add(value)
                            elif type_rel == 'time':
                                value = str(rel_inst['mainsnak']['datavalue']['value']['time'][1:11])
                                values.add(value)
                            else:
                                print('UNKOWN RELATIONSHIP TYPE')
                        if type_rel == 'wikibase-entityid':
                            info['relationships'][rel] = values
                        else:
                            info['attributes'][rel] = values
                relevant_instances[ide] = info
                count += 1
#         except Exception as e:
#             error += 1
print('Total:', total)
print('Count:', count)
print('Error:', error)

No path between Q1137046 and Q196600.
>>> None
No path between Q1137046 and Q33215.
No path between Q1137046 and Q2623733.
No path between Q1137046 and Q278379.
No path between Q1137046 and Q1983062.
>>> None
No path between Q1137046 and Q1983062.
>>> None
No path between Q1137046 and Q1983062.
>>> None
No path between Q1137046 and Q11424.
>>> None
No path between Q1137046 and Q5398426.
>>> None
No path between Q1137046 and Q11424.
>>> None
No path between Q1137046 and Q1983062.
>>> None
No path between Q1137046 and Q11424.
>>> None
No path between Q1137046 and Q1983062.
>>> None
No path between Q1137046 and Q1983062.
>>> None
No path between Q1137046 and Q3464665.
>>> None
No path between Q1137046 and Q3464665.
>>> None
No path between Q1137046 and Q11424.
>>> None
No path between Q35127 and Q1137046.
>>> None
No path between Q1137046 and Q1983062.
>>> None
No path between Q1137046 and Q3252662.
No path between Q35127 and Q1137046.
No path between Q1137046 and Q1983062.
>>> None
No pa

In [12]:
relevant_instances

{}

In [10]:
%%time
total = 0
count = 0
error = 0
with open('data/wikidata-20150907-star_trek.json') as f:
    for line in f:
        try:
            total += 1
            cleaned = line.strip()[:-1]
            d = json.loads(cleaned)
            for rel in d['claims']:
                if rel in only_relevant_relationships_dict:
                    for rel_inst in d['claims'][rel]:
                        type_rel = rel_inst['mainsnak']['datavalue']['type']
                        if type_rel == 'wikibase-entityid':
                            ide = 'Q' + str(rel_inst['mainsnak']['datavalue']['value']['numeric-id'])
                            if ide in all_instances_and_types_dict:
                                types = all_instances_and_types_dict[ide]
                                for typ in types:
                                    if typ not in only_relevant_types_dict:
                                        print(ide, typ)
                                
            count += 1
        except Exception as e:
            error += 1
print('Total:', total)
print('Count:', count)
print('Error:', error)

Q16276 Q15773347
Q16276 Q15632617
Q16276 Q15773317
Q10134 Q2623733
Total: 1630
Count: 1628
Error: 2
CPU times: user 209 ms, sys: 4.16 ms, total: 213 ms
Wall time: 198 ms


In [14]:
# relevant_instances

In [13]:
{"type":"item","id":"Q796793","labels":{"he":{"language":"he","value":"All Good Things..."},"ko":{"language":"ko","value":"USS \uc5d4\ud130\ud504\ub77c\uc774\uc988"},"en":{"language":"en","value":"All Good Things..."},"es":{"language":"es","value":"Todas las cosas buenas"},"ro":{"language":"ro","value":"All Good Things..."},"sv":{"language":"sv","value":"All Good Things..."},"it":{"language":"it","value":"Ieri, oggi, domani"},"de":{"language":"de","value":"Gestern, heute, morgen"}},"descriptions":{"it":{"language":"it","value":"episodio di Star Trek: The Next Generation"},"de":{"language":"de","value":"Star-Trek-Episode"}},"aliases":{"es":[{"language":"es","value":"Todas Las Buenas Cosas"}]},"claims":{"P31":[{"mainsnak":{"snaktype":"value","property":"P31","datavalue":{"value":{"entity-type":"item","numeric-id":1137046},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q796793$99e9f75c-468c-9a9d-3db3-30efe4c3d087","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P31","datavalue":{"value":{"entity-type":"item","numeric-id":3252662},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q796793$b3efb0b3-4caf-8d82-e4db-9b6feddede74","rank":"normal"}],"P646":[{"mainsnak":{"snaktype":"value","property":"P646","datavalue":{"value":"\/m\/031_yd","type":"string"},"datatype":"string"},"type":"statement","id":"Q796793$F4A416DE-5C06-4C67-A447-25E8B13BD56B","rank":"normal","references":[{"hash":"af38848ab5d9d9325cffd93a5ec656cc6ca889ed","snaks":{"P248":[{"snaktype":"value","property":"P248","datavalue":{"value":{"entity-type":"item","numeric-id":15241312},"type":"wikibase-entityid"},"datatype":"wikibase-item"}],"P577":[{"snaktype":"value","property":"P577","datavalue":{"value":{"time":"+2013-10-28T00:00:00Z","timezone":0,"before":0,"after":0,"precision":11,"calendarmodel":"http:\/\/www.wikidata.org\/entity\/Q1985727"},"type":"time"},"datatype":"time"}]},"snaks-order":["P248","P577"]}]}],"P179":[{"mainsnak":{"snaktype":"value","property":"P179","datavalue":{"value":{"entity-type":"item","numeric-id":16290},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q796793$FA692EAC-D45E-491B-8702-4D266E02A1D9","rank":"normal","references":[{"hash":"7eb64cf9621d34c54fd4bd040ed4b61a88c4a1a0","snaks":{"P143":[{"snaktype":"value","property":"P143","datavalue":{"value":{"entity-type":"item","numeric-id":328},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"snaks-order":["P143"]}]}],"P57":[{"mainsnak":{"snaktype":"value","property":"P57","datavalue":{"value":{"entity-type":"item","numeric-id":178387},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q796793$383B334F-8FAD-40EF-992F-B3ED0E67BB4E","rank":"normal","references":[{"hash":"7eb64cf9621d34c54fd4bd040ed4b61a88c4a1a0","snaks":{"P143":[{"snaktype":"value","property":"P143","datavalue":{"value":{"entity-type":"item","numeric-id":328},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"snaks-order":["P143"]}]}],"P58":[{"mainsnak":{"snaktype":"value","property":"P58","datavalue":{"value":{"entity-type":"item","numeric-id":540510},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q796793$3535D034-FE7B-48BC-AE30-72AA72B31D84","rank":"normal","references":[{"hash":"7eb64cf9621d34c54fd4bd040ed4b61a88c4a1a0","snaks":{"P143":[{"snaktype":"value","property":"P143","datavalue":{"value":{"entity-type":"item","numeric-id":328},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"snaks-order":["P143"]}]},{"mainsnak":{"snaktype":"value","property":"P58","datavalue":{"value":{"entity-type":"item","numeric-id":549942},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q796793$12360894-FC79-47EA-9239-7922B0EABE48","rank":"normal","references":[{"hash":"7eb64cf9621d34c54fd4bd040ed4b61a88c4a1a0","snaks":{"P143":[{"snaktype":"value","property":"P143","datavalue":{"value":{"entity-type":"item","numeric-id":328},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"snaks-order":["P143"]}]}],"P155":[{"mainsnak":{"snaktype":"value","property":"P155","datavalue":{"value":{"entity-type":"item","numeric-id":7239746},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q796793$FD671369-068E-416A-9347-E05A47A199B7","rank":"normal","references":[{"hash":"7eb64cf9621d34c54fd4bd040ed4b61a88c4a1a0","snaks":{"P143":[{"snaktype":"value","property":"P143","datavalue":{"value":{"entity-type":"item","numeric-id":328},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"snaks-order":["P143"]}]}],"P361":[{"mainsnak":{"snaktype":"value","property":"P361","datavalue":{"value":{"entity-type":"item","numeric-id":3469077},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q796793$A6157804-CE98-47EF-892C-60D66F09799A","rank":"normal"}],"P364":[{"mainsnak":{"snaktype":"value","property":"P364","datavalue":{"value":{"entity-type":"item","numeric-id":1860},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q796793$50E553F6-5758-4F49-9940-2F14934DDED0","rank":"normal"}],"P495":[{"mainsnak":{"snaktype":"value","property":"P495","datavalue":{"value":{"entity-type":"item","numeric-id":30},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q796793$2133B3B9-CFF8-4990-997C-945706A9AD0E","rank":"normal"}],"P161":[{"mainsnak":{"snaktype":"value","property":"P161","datavalue":{"value":{"entity-type":"item","numeric-id":16296},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","qualifiers":{"P453":[{"snaktype":"value","property":"P453","hash":"f467190d5b044e6d3f27c383d2e5cf20e17f780e","datavalue":{"value":{"entity-type":"item","numeric-id":16276},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"qualifiers-order":["P453"],"id":"Q796793$6FC75F5F-69A0-472C-84F9-D2D7D0785DC1","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P161","datavalue":{"value":{"entity-type":"item","numeric-id":346595},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","qualifiers":{"P453":[{"snaktype":"value","property":"P453","hash":"8db507abe8231d078093e9404ca92bf0f4d41462","datavalue":{"value":{"entity-type":"item","numeric-id":752877},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"qualifiers-order":["P453"],"id":"Q796793$A625424C-5BC1-467B-8EE8-7E41A0E5DFA0","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P161","datavalue":{"value":{"entity-type":"item","numeric-id":311453},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","qualifiers":{"P453":[{"snaktype":"value","property":"P453","hash":"d22209f45821ceebcf2c8fdc1a844066b2ca6ccb","datavalue":{"value":{"entity-type":"item","numeric-id":22983},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"qualifiers-order":["P453"],"id":"Q796793$0AA41FE1-D960-4957-9CF3-2B0B67D5225D","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P161","datavalue":{"value":{"entity-type":"item","numeric-id":232945},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","qualifiers":{"P453":[{"snaktype":"value","property":"P453","hash":"02dc85b367844eab1c309d55a97b53df0dbf2ea8","datavalue":{"value":{"entity-type":"item","numeric-id":625731},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"qualifiers-order":["P453"],"id":"Q796793$9AB7EBF5-8776-4BE5-90D2-9F616800D7FE","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P161","datavalue":{"value":{"entity-type":"item","numeric-id":312570},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","qualifiers":{"P453":[{"snaktype":"value","property":"P453","hash":"21a427f5689877da782cbad30f06667073d8d562","datavalue":{"value":{"entity-type":"item","numeric-id":384569},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"qualifiers-order":["P453"],"id":"Q796793$B3D21A4B-1576-4072-8483-8CF087E4ECD8","rank":"normal"}],"P577":[{"mainsnak":{"snaktype":"value","property":"P577","datavalue":{"value":{"time":"+1994-05-23T00:00:00Z","timezone":0,"before":0,"after":0,"precision":11,"calendarmodel":"http:\/\/www.wikidata.org\/entity\/Q1985727"},"type":"time"},"datatype":"time"},"type":"statement","id":"Q796793$7afe5406-4e06-80a9-bc5b-f47ffc63c7cd","rank":"normal"}]},"sitelinks":{"hewiki":{"site":"hewiki","title":"All Good Things...","badges":[]},"enwiki":{"site":"enwiki","title":"All Good Things... (Star Trek: The Next Generation)","badges":[]},"eswiki":{"site":"eswiki","title":"Todas las cosas buenas (Star Trek: La nueva generaci\u00f3n)","badges":[]},"rowiki":{"site":"rowiki","title":"All Good Things...","badges":[]},"svwiki":{"site":"svwiki","title":"All Good Things... (Star Trek: The Next Generation)","badges":[]},"dewiki":{"site":"dewiki","title":"Gestern, heute, morgen","badges":[]}}}

{'aliases': {'es': [{'language': 'es', 'value': 'Todas Las Buenas Cosas'}]},
 'claims': {'P155': [{'id': 'Q796793$FD671369-068E-416A-9347-E05A47A199B7',
    'mainsnak': {'datatype': 'wikibase-item',
     'datavalue': {'type': 'wikibase-entityid',
      'value': {'entity-type': 'item', 'numeric-id': 7239746}},
     'property': 'P155',
     'snaktype': 'value'},
    'rank': 'normal',
    'references': [{'hash': '7eb64cf9621d34c54fd4bd040ed4b61a88c4a1a0',
      'snaks': {'P143': [{'datatype': 'wikibase-item',
         'datavalue': {'type': 'wikibase-entityid',
          'value': {'entity-type': 'item', 'numeric-id': 328}},
         'property': 'P143',
         'snaktype': 'value'}]},
      'snaks-order': ['P143']}],
    'type': 'statement'}],
  'P161': [{'id': 'Q796793$6FC75F5F-69A0-472C-84F9-D2D7D0785DC1',
    'mainsnak': {'datatype': 'wikibase-item',
     'datavalue': {'type': 'wikibase-entityid',
      'value': {'entity-type': 'item', 'numeric-id': 16296}},
     'property': 'P161',
   

In [381]:
for ide in all_instances_and_types_dict:
    typ = get_relevant_type(ide)
    if typ:
        

0
1
[['Q866']]
2
[['Q278379', 'Q33215', 'Q34770', 'Q17376908', 'Q11028', 'Q853614', 'Q7184903', 'Q488383', 'Q35120'], ['Q278379', 'Q33215', 'Q34770', 'Q17376908', 'Q7184903', 'Q488383', 'Q35120'], ['Q278379', 'Q33215', 'Q17537576', 'Q15621286', 'Q386724', 'Q16686448', 'Q488383', 'Q35120']]
3
[['Q150', 'Q37351', 'Q19814', 'Q19860', 'Q771861']]
4
[['Q14637321', 'Q18670171', 'Q6619802', 'Q14897293', 'Q17537576', 'Q15621286', 'Q386724', 'Q16686448', 'Q488383', 'Q35120']]
5
[['Q14637321', 'Q18670171', 'Q6619802', 'Q14897293', 'Q17537576', 'Q15621286', 'Q386724', 'Q16686448', 'Q488383', 'Q35120']]
6
[['Q20922']]
7
[['Q15632617', 'Q95074', 'Q14897293', 'Q17537576', 'Q15621286', 'Q386724', 'Q16686448', 'Q488383', 'Q35120'], ['Q15632617', 'Q95074', 'Q15619164', 'Q830077', 'Q35120'], ['Q15632617', 'Q95074', 'Q18336849'], ['Q15632617', 'Q6619679', 'Q95074', 'Q14897293', 'Q17537576', 'Q15621286', 'Q386724', 'Q16686448', 'Q488383', 'Q35120'], ['Q15632617', 'Q6619679', 'Q95074', 'Q15619164', 'Q83007

KeyboardInterrupt: 

In [208]:


def is_subclass(c1, c2):
    return get_superclasses(c1).search(c2)

def get_all_types(ide):
    types = set()
    with open('data/wikidata-20150907-star_trek.json') as f:
        for line in f:
            try:
                cleaned = line.strip()[:-1]
                d = json.loads(cleaned)
                if ide == d['id']:
                    for rel_inst in d['claims']['P31']:
                        typ = 'Q' + str(rel_inst['mainsnak']['datavalue']['value']['numeric-id'])
                        types.add(typ)
                    break
            except Exception as e:
                pass
    return types

def get_type(ide):
    types = get_all_types(ide)
    if len(types) == 1:
        typ = types.pop(); types.add(typ)
    else:
        superclasses_tree_list = [get_superclasses(t) for t in types]
        common_classes = 
            
        
        
        types_aux = set(types)
        for x in types:
            for y in types:
                if x != y and is_subclass(x, y):
                    try:
                        types_aux.remove(y)
                    except KeyError as e:
                        pass
        if len(types_aux) == 0:
            typ = None
        elif len(types_aux) == 1:
            typ = types_aux.pop(); types_aux.add(typ)
        else:
            print('MARRON')
            print(ide)
            typ = None
    return typ

In [209]:
# %%time
# Check if the types of the target of the relevant relationships are included in the relevant types
total = 0
count = 0
error = 0
not_included_types = set()
with open('data/wikidata-20150907-star_trek.json') as f:
    for line in f:
#         try:
        if not line.startswith('[') and not line.startswith(']'):
            total += 1
            cleaned = line.strip()[:-1]
            d = json.loads(cleaned)
            for rel in d['claims']:
                if rel in only_relevant_relationships_dict:
                    for rel_inst in d['claims'][rel]:
                        type_rel = rel_inst['mainsnak']['datavalue']['type']
                        if type_rel == 'wikibase-entityid':
                            val = rel_inst['mainsnak']['datavalue']['value']['numeric-id']
                            id2 = 'Q' + str(val)
                            get_type(id2)
                            get_superclass(id2)
            count += 1
#         except Exception as e:
#             print(e)
#             error += 1
        print(total)
print('Total:', total)
print('Count:', count)
print('Error:', error)

0
MAS DE UNA SUPERCLASE Q196600
1
MAS DE UNA SUPERCLASE Q2623733


KeyboardInterrupt: 

In [None]:
s={1,2,3}

In [42]:
%%time
base_url = 'https://www.wikidata.org/w/api.php'
limit = 50

# chunks = [all_relationships_list[x:x+limit] for x in range(0, len(all_relationships_list), limit)]
# for chunk in chunks:
# relationship_ids = '|'.join(chunk)
payload = {'action': 'wbgetentities', 'ids': 'Q1137046', 'languages': 'en', 'format': 'json'}
r = requests.get(base_url, params=payload)
response = r.json()
# entities = response['entities']
# for ide in entities:
#     all_relationships_dict[ide] = entities[ide]['labels']['en']['value']

CPU times: user 15.4 ms, sys: 7.96 ms, total: 23.3 ms
Wall time: 266 ms


In [46]:
{'id': 'Q1137046$484f9602-478e-27de-ced5-7cd2c33d07ea',
    'mainsnak': {'datatype': 'wikibase-item',
     'datavalue': {'type': 'wikibase-entityid',
      'value': {'entity-type': 'item', 'numeric-id': 1983062}},
     'property': 'P279',
     'snaktype': 'value'},
    'rank': 'normal',
    'type': 'statement'}

{'id': 'Q1137046$484f9602-478e-27de-ced5-7cd2c33d07ea',
 'mainsnak': {'datatype': 'wikibase-item',
  'datavalue': {'type': 'wikibase-entityid',
   'value': {'entity-type': 'item', 'numeric-id': 1983062}},
  'property': 'P279',
  'snaktype': 'value'},
 'rank': 'normal',
 'type': 'statement'}

In [None]:
# Double check
total = 0
count = 0
error = 0
with open('wikidata-20150907-star_trek-relevant-rels.json') as f:
    for line in f:
        try:
            total += 1
            cleaned = line.strip()[:-1]
            d = json.loads(cleaned)
            count += 1
        except Exception as e:
            error += 1
print('Total:', total)
print('Count:', count)
print('Error:', error)

In [None]:
# # Report
# import csv
# 
# total = 0
# count = 0
# error = 0
# with open('wikidata-20150907-star_trek-relevant-rels.json') as f1,open('star_trek-all_types.csv', 'w') as f2:
#     csv_writer = csv.writer(
#         f2,
#         delimiter=',',
#         quotechar='"',
#         quoting=csv.QUOTE_MINIMAL
#     )
#     for line in f1:
#         ide = None
#         title = None
#         type_id = None
#         type_title = None
#         try:
#             total += 1
#             cleaned = line.strip()[:-1]
#             d = json.loads(cleaned)
#             ide = d['id']
#             if 'en' in d['labels']:
#                 title = d['labels']['en']['value']
#             if 'P31' in d['claims']:
#                 type_id = 'Q' + str(d['claims']['P31'][0]['mainsnak']['datavalue']['value']['numeric-id'])
#                 if type_id in all_related_types_dict:
#                     type_title = all_related_types_dict[type_id]
#             csv_writer.writerow([ide, title, type_id, type_title])
#             count += 1
#         except Exception as e:
#             error += 1
# print('Total:', total)
# print('Count:', count)
# print('Error:', error)