In [1]:
import pandas as pd
import json
from SPARQLWrapper import SPARQLWrapper, JSON

In [2]:
def get_sparql_dataframe(service, query):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    sparql = SPARQLWrapper(service)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)

    return pd.DataFrame(out, columns=cols)

In [3]:
wds = "https://query.wikidata.org/sparql"

In [4]:
rq = """
PREFIX bd: <http://www.bigdata.com/rdf#>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wos: <http://localhost/ontology/wos#>

 select distinct
   ?item
   ?itemLabel
   ?orcid
   ?description
WHERE {
  ?item wdt:P496 ?orcid 
  BIND(STRAFTER(str(?item), "http://www.wikidata.org/entity/") as ?wid)
  OPTIONAL { ?item schema:description ?description filter (lang(?description) = "en") }
  SERVICE wikibase:label {
        bd:serviceParam wikibase:language "en" .
  }
}
"""

In [5]:
df = get_sparql_dataframe(wds, rq)

In [6]:
df.head()

Unnamed: 0,item,itemLabel,orcid,description
0,http://www.wikidata.org/entity/Q74301,Hans Joachim Schellnhuber,0000-0001-7453-4935,German physicist
1,http://www.wikidata.org/entity/Q825669,Bernhard Nauck,0000-0003-3156-328X,German sociologist
2,http://www.wikidata.org/entity/Q1265435,Duncan McCargo,0000-0002-4352-5734,British academic
3,http://www.wikidata.org/entity/Q503265,Andres Jäschke,0000-0002-4625-2655,German chemist and professor
4,http://www.wikidata.org/entity/Q474194,Amory Lovins,0000-0002-6362-3526,American physicist


In [7]:
df.describe()

Unnamed: 0,item,itemLabel,orcid,description
count,95465,95465,95465,6177
unique,95438,94620,95391,2074
top,http://www.wikidata.org/entity/Q41353653,Wei Wang,0000-0003-0766-4590,researcher
freq,2,9,3,3037


In [8]:
df['description'].value_counts()[:10]

researcher                     3037
German physicist                 29
American computer scientist      28
German computer scientist        28
computer scientist               24
American mathematician           20
American chemist                 20
British computer scientist       18
biologist                        18
German economist                 16
Name: description, dtype: int64