In [1]:
from qwikidata.entity import WikidataItem, WikidataLexeme, WikidataProperty
from qwikidata.linked_data_interface import get_entity_dict_from_api

# create an item representing "Douglas Adams"
Q_DOUGLAS_ADAMS = "Q42"
q42_dict = get_entity_dict_from_api(Q_DOUGLAS_ADAMS)
q42 = WikidataItem(q42_dict)

# create a property representing "subclass of"
P_SUBCLASS_OF = "P279"
p279_dict = get_entity_dict_from_api(P_SUBCLASS_OF)
p279 = WikidataProperty(p279_dict)

# create a lexeme representing "bank"
L_BANK = "L3354"
l3354_dict = get_entity_dict_from_api(L_BANK)
l3354 = WikidataLexeme(l3354_dict)


In [211]:
from SPARQLWrapper import SPARQLWrapper, JSON
from qwikidata.sparql import (get_subclasses_of_item, return_sparql_query_results)
import requests
import time

def select_query_service(item, pre):
    return """
    PREFIX bd: <http://www.bigdata.com/rdf#>
    PREFIX wd: <http://www.wikidata.org/entity/>
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    PREFIX wikibase: <http://wikiba.se/ontology#>
    
    SELECT ?item ?itemLabel ?superItem ?superItemLabel
    { 
      wd:"""+ item +""" wdt:"""+ pre +"""* ?item.
      ?item wdt:P279 ?superItem.
      SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
    }
    """
def select_query(item, pre):
    return """
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX wd: <http://www.wikidata.org/entity/>
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    
    SELECT ?item ?label
    WHERE 
    {
        wd:"""+ item +""" wdt:"""+ pre +""" ?item.
        ?item rdfs:label ?label.
        FILTER (lang(?label) = "en")
    }
    """

def map_dbp_wikid(dbpediaConcept):
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    sparql.setQuery("""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX dbpedia: <http://dbpedia.org/resource/>
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    SELECT ?obj
    WHERE 
    { 
        <"""+ dbpediaConcept +"""> (owl:sameAs|^owl:sameAs) ?obj
        FILTER(CONTAINS(str(?obj), \"http://www.wikidata.org\")) 
    }
    """)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()["results"]["bindings"]
    
    if len(results)==0:
        raise Exception("No results in dbpedia found for " + dbpediaConcept)
        print("No results in dbpedia found for " + dbpediaConcept)
    if len(results)>1:
        raise Exception("Multiple results in dbpedia found for " + dbpediaConcept)
        print("Multiple results in dbpedia found for " + dbpediaConcept)
    return results[0]["obj"]["value"]

def get_items_by_pre(nodeId, predicate):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
    sparql.setQuery(select_query(nodeId, predicate))
    sparql.setReturnFormat(JSON)
    print(sparql)
    results = sparql.query().convert()["results"]["bindings"]
    return results

def wikidata_sparql_request(nodeId, predicate, counter=0):
    url = 'https://query.wikidata.org/sparql'
    res = requests.get(url, params={'query': select_query_service(nodeId, predicate), 'format': 'json'})
    if res.status_code is 200:
        return res.json()["results"]["bindings"]
    elif counter < 10: 
        time.sleep(1)
        return wikidata_sparql_request(nodeId, predicate, counter+1)
    else: raise Exception("Status code is " + str(res.status_code))

def get_all_subclasses(nodeId): return wikidata_sparql_request(nodeId,"P279")
def get_all_instances_of(nodeId): return wikidata_sparql_request(nodeId, "P31")
    
def convert_dbp_wikid_ids(dbp_concepts):
    return [map_dbp_wikid(c).split("/")[-1] for c in dbp_concepts]

In [202]:
class Node:
    def __init__(self,node_id):
        self.id = node_id
        self.neighbours = []
    def getId(self):
        return self.id
    def add_neighbour(self, nkey):
        self.neighbours += [nkey]

# Directed Knoleage Graph
class K_Graph:
    def __init__(self, nodes):
        self.graph = dict(zip(nodes, [Node(n) for n in nodes]))
        self.concepts = nodes
        self.is_build = False
    def get_graph(self):
        return self.graph
    def build_edge(self, key1, key2):
        if key1 not in self.graph:
            self.graph[key1] = Node(key1)
        if key2 not in self.graph:
            self.graph[key2] = Node(key2)
        self.graph[key1].add_neighbour(key2)
    def build_graph(self):
        if self.is_build: return True
        
        for n in self.concepts:
            print(n)
            for s in get_all_subclasses(n):
                self.build_edge(s["item"]["value"].split("/")[-1], s["superItem"]["value"].split("/")[-1])
        self.is_build = True
        return True

In [4]:
import json
import numpy as np
with open('ac1-export-complete.json', 'r') as f:
    ac1_dict = json.load(f)["@graph"]
    ac1_dict = list(filter(lambda i: "concept" in i and type(i["concept"]) is list, ac1_dict))
    content = list(map(lambda idea: idea["content"], ac1_dict));
    concept_per_idea = list(map(lambda idea: [c["linkedConcept"] for c in idea["concept"]], ac1_dict));
    concepts = np.array(list(set([y for x in concept_per_idea for y in x])))
    print(len(concepts))

199


In [5]:
wiki_concepts = convert_dbp_wikid_ids(concepts)

In [204]:
n = Node("Q146")
get_all_subclasses(n.getId())

[{'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q146'},
  'superItem': {'type': 'uri',
   'value': 'http://www.wikidata.org/entity/Q39201'},
  'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'house cat'},
  'superItemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'pet'}},
 {'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q146'},
  'superItem': {'type': 'uri',
   'value': 'http://www.wikidata.org/entity/Q57814795'},
  'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'house cat'},
  'superItemLabel': {'xml:lang': 'en',
   'type': 'literal',
   'value': 'domesticated mammal'}},
 {'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q729'},
  'superItem': {'type': 'uri',
   'value': 'http://www.wikidata.org/entity/Q7239'},
  'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'animal'},
  'superItemLabel': {'xml:lang': 'en',
   'type': 'literal',
   'value': 'organism'}},
 {'item': {'type': 'uri', 'value

In [184]:
sparql_query = """
SELECT (COUNT(?item) AS ?count)
WHERE 
{
    ?item wdt:P31/wdt:P279* wd:Q5.
}
"""
res = return_sparql_query_results(sparql_query)

In [185]:
res


{'head': {'vars': ['count']},
 'results': {'bindings': [{'count': {'datatype': 'http://www.w3.org/2001/XMLSchema#integer',
     'type': 'literal',
     'value': '5185433'}}]}}

In [212]:
graph = K_Graph(wiki_concepts)
graph.build_graph()


Q3695508
Q1428941
Q23387
Q899581
Q9893
Q1295013
Q597512
Q110079
Q623149
Q2095
Q128234
Q702492
Q193837
Q11016
Q35473
Q2731419
Q3947
Q43006
Q218152
Q656656
Q795052
Q6505497
Q287
Q23797
Q28823
Q3396184


Exception: Status code is 403

In [213]:
print(len(graph.get_graph()))

362


In [145]:
import requests
url = 'https://query.wikidata.org/sparql'
data = requests.get(url, params={'query': select_query_service(n.getId(), "P279"), 'format': 'json'})..json()["results"]["bindings"]
