In [1]:
from qwikidata.entity import WikidataItem, WikidataLexeme, WikidataProperty
from qwikidata.linked_data_interface import get_entity_dict_from_api

# create an item representing "Douglas Adams"
Q_DOUGLAS_ADAMS = "Q42"
q42_dict = get_entity_dict_from_api(Q_DOUGLAS_ADAMS)
q42 = WikidataItem(q42_dict)

# create a property representing "subclass of"
P_SUBCLASS_OF = "P279"
p279_dict = get_entity_dict_from_api(P_SUBCLASS_OF)
p279 = WikidataProperty(p279_dict)

# create a lexeme representing "bank"
L_BANK = "L3354"
l3354_dict = get_entity_dict_from_api(L_BANK)
l3354 = WikidataLexeme(l3354_dict)


In [27]:
from SPARQLWrapper import SPARQLWrapper, JSON
from qwikidata.sparql import (get_subclasses_of_item,
                              return_sparql_query_results)

def select_query_service(item, pre):
    return """
    PREFIX bd: <http://www.bigdata.com/rdf#>
    PREFIX wd: <http://www.wikidata.org/entity/>
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    PREFIX wikibase: <http://wikiba.se/ontology#>
    
    SELECT ?type ?typeLabel ?superType ?superTypeLabel
    { 
      wd:"""+ item +""" wdt:"""+ pre +"""* ?type.
      ?type wdt:P279 ?superType.
      SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
    }
    """
def select_query(item, pre):
    return """
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX wd: <http://www.wikidata.org/entity/>
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    
    SELECT ?item ?label
    WHERE 
    {
        wd:"""+ item +""" wdt:"""+ pre +""" ?item.
        ?item rdfs:label ?label.
        FILTER (lang(?label) = "en")
    }
    """

def map_dbp_wikid(dbpediaConcept):
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    sparql.setQuery("""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX dbpedia: <http://dbpedia.org/resource/>
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    SELECT ?obj
    WHERE 
    { 
        <"""+ dbpediaConcept +"""> (owl:sameAs|^owl:sameAs) ?obj
        FILTER(CONTAINS(str(?obj), \"http://www.wikidata.org\")) 
    }
    """)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()["results"]["bindings"]
    
    if len(results)==0:
        raise Exception("No results in dbpedia found for " + dbpediaConcept)
        print("No results in dbpedia found for " + dbpediaConcept)
    if len(results)>1:
        raise Exception("Multiple results in dbpedia found for " + dbpediaConcept)
        print("Multiple results in dbpedia found for " + dbpediaConcept)
    return results[0]["obj"]["value"]

def get_items_by_pre(node, predicate):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
    sparql.setQuery(select_query(node.getId(), predicate))
    sparql.setReturnFormat(JSON)
    print(sparql)
    results = sparql.query().convert()["results"]["bindings"]
    return results

def get_subclass(node): return get_items_by_pre(node,"P279")
def get_instanceof(node): return get_items_by_pre(node, "P31")
    
def convert_dbp_wikid_ids(dbp_concepts):
    return [map_dbp_wikid(c).split("/")[-1] for c in dbp_concepts]

In [3]:
class Node:
    def __init__(self,node_id):
        self.id = node_id
        self.neighbours = []
    def getId(self):
        return self.id
    def add_neighbour(self, n):
        self.neighbours += [n]
    
class K_Graph:
    def __init__(self, nodes):
        self.graph_nodes = dict(zip(nodes, [Node(n) for n in nodes]))
    def get_graph_nodes(self):
        return self.graph_nodes
    def build_edge(self, n1, n2):
        n1.add_neighbour(n2)
    def build_graph(self):
        for k,n in self.graph_nodes.items():
            sub_ids = [s["item"]["value"].split("/")[-1] for s in get_subclass(n)]
            for s in sub_ids:
                if s not in self.graph_nodes:
                    self.graph_nodes[s] = Node(s)
                self.build_edge(n, self.graph_nodes[s])

In [4]:
import json
import numpy as np
with open('ac1-export-complete.json', 'r') as f:
    ac1_dict = json.load(f)["@graph"]
    ac1_dict = list(filter(lambda i: "concept" in i and type(i["concept"]) is list, ac1_dict))
    content = list(map(lambda idea: idea["content"], ac1_dict));
    concept_per_idea = list(map(lambda idea: [c["linkedConcept"] for c in idea["concept"]], ac1_dict));
    concepts = np.array(list(set([y for x in concept_per_idea for y in x])))
    print(len(concepts))

199


In [5]:
wiki_concepts = convert_dbp_wikid_ids(concepts)

In [68]:
n = Node("Q146")
get_subclass(n)

<SPARQLWrapper.Wrapper.SPARQLWrapper object at 0x0000000119005C50>
{"_defaultGraph" : None,
"_defaultReturnFormat" : 'xml',
"agent" : 'sparqlwrapper 1.8.4 (rdflib.github.io/sparqlwrapper)',
"customHttpHeaders" : {},
"endpoint" : 'https://query.wikidata.org/sparql',
"http_auth" : 'BASIC',
"method" : 'GET',
"onlyConneg" : False,
"parameters" : {},
"passwd" : None,
"queryString" : '\n    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n    PREFIX wd: <http://www.wikidata.org/entity/>\n    PREFIX wdt: <http://www.wikidata.org/prop/direct/>\n    \n    SELECT ?item ?label\n    WHERE \n    {\n        wd:Q146 wdt:P279 ?item.\n        ?item rdfs:label ?label.\n        FILTER (lang(?label) = "en")\n    }\n    ',
"queryType" : 'SELECT',
"requestMethod" : 'urlencoded',
"returnFormat" : 'json',
"timeout" : None,
"updateEndpoint" : 'https://query.wikidata.org/sparql',
"user" : None}


HTTPError: HTTP Error 403: Forbidden

In [66]:
sparql_query = """
SELECT (COUNT(?item) AS ?count)
WHERE 
{
    ?item wdt:P31/wdt:P279* wd:Q5.
}
"""
res = return_sparql_query_results(sparql_query)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
graph = K_Graph(wiki_concepts)
graph.build_graph()

In [61]:
import requests
query = '''

SELECT ?president ?cause ?dob ?dod WHERE {
    ?pid wdt:P39 wd:Q11696 .
    ?pid wdt:P509 ?cid .
    ?pid wdt:P569 ?dob .
    ?pid wdt:P570 ?dod .

    OPTIONAL {
        ?pid rdfs:label ?president filter (lang(?president) = "en") .
    }
    OPTIONAL {
        ?cid rdfs:label ?cause filter (lang(?cause) = "en") .
    }
}'''

n = Node("Q146")
url = 'https://query.wikidata.org/sparql'
data = requests.get(url, params={'query': query, 'format': 'json'})


In [62]:
print(data)

<Response [200]>
