In [1]:
from SPARQLWrapper import SPARQLWrapper, JSON, RDFXML, XML
from rdflib import Graph
import pprint, time, sys

In [2]:
sparql = SPARQLWrapper("http://localhost:3030/w3c-email-q17/sparql")
sparql.setQuery("""
    PREFIX base: <http://www.kde.cs.tsukuba.ac.jp/~aso/w3c-email/>
    PREFIX schema: <https://schema.org/>
    PREFIX email: <http://www.w3.org/2000/10/swap/pim/email#>
    PREFIX wd: <http://www.wikidata.org/entity/>
    PREFIX itsrdf: <https://www.w3.org/2005/11/its/rdf#>
    PREFIX olia: <http://purl.org/olia/olia.owl#>
    PREFIX nif: <http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#>
    PREFIX nerd: <http://nerd.eurecom.fr/ontology#>
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    
    CONSTRUCT{
        ?mention itsrdf:taIdentRef ?wikidataEntity.
    }
    WHERE{
        ?mention itsrdf:taIdentRef ?wikidataEntity.
    }
    """)
sparql.setReturnFormat(XML)
results = sparql.query().convert()

In [4]:
# pprint.pprint(results.serialize(format='turtle').decode())

In [3]:
from rdflib import Graph, Literal, RDF, URIRef, BNode, Namespace
from rdflib.namespace import CSVW, DC, DCAT, DCTERMS, DOAP, FOAF, ODRL2, ORG, OWL, \
                           PROF, PROV, RDF, RDFS, SDO, SH, SKOS, SOSA, SSN, TIME, \
                           VOID, XMLNS, XSD
BASE = Namespace("http://www.kde.cs.tsukuba.ac.jp/~aso/w3c-email/")
EMAIL = Namespace("http://www.w3.org/2000/10/swap/pim/email#")
OLIA = Namespace("http://purl.org/olia/olia.owl#")
NERD = Namespace("http://nerd.eurecom.fr/ontology#")
NIF = Namespace("http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#")
ITSRDF = Namespace("https://www.w3.org/2005/11/its/rdf#")
WD = Namespace("http://www.wikidata.org/entity/")
WDT = Namespace("http://www.wikidata.org/prop/direct/")

In [4]:
# RDFグラフ
g = Graph()
g.bind('',BASE)
g.bind('schema',SDO)
g.bind('foaf',FOAF)
g.bind('owl',OWL)
g.bind('rdf',RDF)
g.bind('rdfs',RDFS)
g.bind('olia',OLIA)
g.bind('nerd',NERD)
g.bind('nif',NIF)
g.bind('itsrdf',ITSRDF)
g.bind('wd',WD)
g.bind('wdt',WDT)

In [5]:
for result in results:
    g.add(result)

In [6]:
wikidataEntity = []
for s, p, o in results.triples((None, None, None)):
    wikidataEntity.append(str(o))
wikidataEntity = list(set(wikidataEntity))

In [7]:
len(wikidataEntity)

595

In [8]:
def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
    # TODO adjust user agent; see https://w.wiki/CX6
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(XML)
    return sparql.query().convert()

In [9]:
endpoint_url = "https://query.wikidata.org/sparql"

start = time.time()

for i, entity in enumerate(wikidataEntity):
    query = 'CONSTRUCT{<'+ entity +'> wdt:P31 ?class1;\
                                wdt:P279 ?class2;\
                                rdfs:label ?label.\
                        ?class1 rdfs:label ?classlabel1.\
                        ?class2 rdfs:label ?classlabel2.\
                }WHERE{\
                    OPTIONAL{<'+ entity +'> wdt:P31 ?class1.\
                            ?class1 rdfs:label ?classlabel1.\
                            FILTER(lang(?classlabel1)=\'en\')}\
                    OPTIONAL{<'+ entity +'> wdt:P279 ?class2.\
                            ?class2 rdfs:label ?classlabel2.\
                            FILTER(lang(?classlabel2)=\'en\')}\
                    OPTIONAL{<'+ entity +'> rdfs:label ?label.\
                            FILTER(lang(?label)=\'en\')}\
                }'
    results = get_results(endpoint_url, query)
    for result in results:
        g.add(result)
    print('{}:{}'.format(i,time.time()-start))
print(time.time()-start)

0:1.9232871532440186
1:3.3031771183013916
2:4.354772090911865
3:5.32398533821106
4:6.9437172412872314
5:8.069536209106445
6:9.29788613319397
7:10.462874174118042
8:11.244425058364868
9:12.780203104019165
10:13.802605152130127
11:15.157036066055298
12:15.852317333221436
13:16.77046823501587
14:17.7975492477417
15:18.923272132873535
16:20.051069974899292
17:21.177611112594604
18:22.304403066635132
19:23.4289653301239
20:24.454434156417847
21:25.78464412689209
22:26.90981411933899
23:28.038397073745728
24:29.369742155075073
25:29.984577178955078
26:31.009058237075806
27:31.610685110092163
28:32.33882927894592
29:33.343512296676636
30:34.48995018005371
31:35.7180962562561
32:36.68778324127197
33:37.76559019088745
34:38.58245515823364
35:39.71193027496338
36:40.941198110580444
37:41.96583127975464
38:42.57695412635803
39:43.64278507232666
40:44.7173330783844
41:45.344290256500244
42:46.36764907836914
43:47.49660539627075
44:48.61920118331909
45:49.3379180431366
46:50.15897607803345
47:51.08

In [10]:
ITSRDF = Namespace("https://www.w3.org/2005/11/its/rdf#")
qres = g.query('CONSTRUCT{?spot <https://www.w3.org/2005/11/its/rdf#taClassRef> ?class.}WHERE{?spot itsrdf:taIdentRef ?entity. OPTIONAL{?entity wdt:P31 ?class.} OPTIONAL{?entity wdt:P279 ?class.}}',initNs={'itsrdf':ITSRDF, 'wdt':WDT})

In [11]:
for triple in qres.graph:
    g.add(triple)

In [15]:
#print(g.serialize(format="turtle").decode("utf-8"))

In [12]:
# turtle形式でファイル出力
g.serialize(destination='/Users/taroaso/myprojects/OpenIE/trec/output/17/class_info.ttl', format='turtle')