# Exploring BNF/WikiData SPARQL data

https://www.w3.org/2009/Talks/0615-qbe/
https://www.ibm.com/developerworks/library/j-sparql/
https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples
https://github.com/bobdc/misc/blob/master/JupyterSPARQL/JupyterSPARQLFun.ipynb

In [50]:
import requests
import pandas as pd
import json
import SPARQLWrapper

In [51]:
# https://lawlesst.github.io/notebook/sparql-dataframe.html
def get_sparql_dataframe(endpoint, query):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    sparql = SPARQLWrapper.SPARQLWrapper(endpoint)
    sparql.setQuery(query)
    sparql.setReturnFormat(SPARQLWrapper.JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)

    return pd.DataFrame(out, columns=cols)

In [52]:
prefixes = """
PREFIX mus: <http://data.doremus.org/ontology#>
PREFIX ecrm: <http://erlangen-crm.org/current/>
PREFIX efrbroo: <http://erlangen-crm.org/efrbroo/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX dcmitype: <http://purl.org/dc/dcmitype/>
PREFIX dwc: <http://rs.tdwg.org/dwc/terms/>
PREFIX tdwg: <http://rs.tdwg.org/dwc/terms/#>
PREFIX mrtg: <http://xxx.org/XXX/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX txn:   <http://lod.taxonconcept.org/ontology/txn.owl#>
PREFIX dsw: <http://purl.org/dsw/>
PREFIX mbank: <http://www.morphbank.net/schema/morphbank#>
"""

## WikiData

In [53]:
endpoint = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql'

In [54]:
query = """
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT ?president ?cause ?dob ?dod WHERE {
    ?pid wdt:P39 wd:Q11696 .
    ?pid wdt:P509 ?cid .
    ?pid wdt:P569 ?dob .
    ?pid wdt:P570 ?dod .

    OPTIONAL {
        ?pid rdfs:label ?president filter (lang(?president) = "en") .
    }
    OPTIONAL {
        ?cid rdfs:label ?cause filter (lang(?cause) = "en") .
    }
}
"""

In [55]:
requests.get(endpoint, params={'query': query, 'format': 'json'}).json()

{'head': {'vars': ['president', 'cause', 'dob', 'dod']},
 'results': {'bindings': [{'cause': {'xml:lang': 'en',
     'type': 'literal',
     'value': 'influenza'},
    'president': {'xml:lang': 'en',
     'type': 'literal',
     'value': 'Benjamin Harrison'},
    'dod': {'datatype': 'http://www.w3.org/2001/XMLSchema#dateTime',
     'type': 'literal',
     'value': '1901-03-13T00:00:00Z'},
    'dob': {'datatype': 'http://www.w3.org/2001/XMLSchema#dateTime',
     'type': 'literal',
     'value': '1833-08-20T00:00:00Z'}},
   {'cause': {'xml:lang': 'en',
     'type': 'literal',
     'value': "Alzheimer's disease"},
    'president': {'xml:lang': 'en',
     'type': 'literal',
     'value': 'Ronald Reagan'},
    'dod': {'datatype': 'http://www.w3.org/2001/XMLSchema#dateTime',
     'type': 'literal',
     'value': '2004-06-05T00:00:00Z'},
    'dob': {'datatype': 'http://www.w3.org/2001/XMLSchema#dateTime',
     'type': 'literal',
     'value': '1911-02-06T00:00:00Z'}},
   {'cause': {'xml:lang'

In [56]:
get_sparql_dataframe(endpoint, query).head()

Unnamed: 0,president,cause,dob,dod
0,Benjamin Harrison,influenza,1833-08-20T00:00:00Z,1901-03-13T00:00:00Z
1,Ronald Reagan,Alzheimer's disease,1911-02-06T00:00:00Z,2004-06-05T00:00:00Z
2,Zachary Taylor,cholera,1784-11-24T00:00:00Z,1850-07-09T00:00:00Z
3,James K. Polk,cholera,1795-11-02T00:00:00Z,1849-06-15T00:00:00Z
4,Calvin Coolidge,myocardial infarction,1872-07-04T00:00:00Z,1933-01-05T00:00:00Z


## Data BNF

http://data.bnf.fr/fr/opendata

In [57]:
endpoint = 'http://data.bnf.fr/sparql'

In [125]:
query = """
SELECT ?artist ?name ?bdate ?ddate ?wdurl ?mburl
WHERE {
    ?artist isni:identifierValid "0000000108935378" .
    ?artist owl:sameAs ?wdurl .
    FILTER (regex (?wdurl, "wikidata.org"))
    ?artist owl:sameAs ?mburl .
    FILTER (regex (?mburl, "musicbrainz.org")) .
    OPTIONAL {
        ?artist bio:birth ?bdate .
        ?artist bio:death ?ddate .
        ?artist foaf:name ?name
    }
}
"""
get_sparql_dataframe(endpoint, query).head()

Unnamed: 0,artist,name,bdate,ddate,wdurl,mburl
0,http://data.bnf.fr/ark:/12148/cb13894801b,,,,http://wikidata.org/entity/Q319732,https://musicbrainz.org/artist/88b4ad33-63ba-4...


In [97]:
query = """
SELECT DISTINCT ?predicate ?val
WHERE {
    <http://data.bnf.fr/ark:/12148/cb13894801b> ?predicate ?val
}
"""
get_sparql_dataframe(endpoint, query).head()

Unnamed: 0,predicate,val
0,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2004/02/skos/core#Concept
1,http://purl.org/dc/terms/created,1989-06-01
2,http://purl.org/dc/terms/modified,2016-04-18
3,http://www.w3.org/2000/01/rdf-schema#seeAlso,http://catalogue.bnf.fr/ark:/12148/cb13894801b
4,http://www.w3.org/2002/07/owl#sameAs,http://wikidata.org/entity/Q319732


In [119]:
query = """
SELECT ?artist ?name ?isni
WHERE {
    ?artist foaf:name "Emilʹ Grigorʹevič Gilelʹs" ;
            foaf:name ?name .
    #?artist isni:identifierValid ?isni
}
"""
get_sparql_dataframe(endpoint, query).head()

Unnamed: 0,artist,name,isni
0,http://data.bnf.fr/ark:/12148/cb13894801b#about,Emilʹ Grigorʹevič Gilelʹs,


In [None]:
http://data.bnf.fr/sparql?default-graph-uri=&query=PREFIX+foaf%3A+%3Chttp%3A%2F%2Fxmlns.com%2Ffoaf%2F0.1%2F%3E%0D%0APREFIX+rdarelationships%3A+%3Chttp%3A%2F%2Frdvocab.info%2FRDARelationshipsWEMI%2F%3E%0D%0APREFIX+dcterms%3A+%3Chttp%3A%2F%2Fpurl.org%2Fdc%2Fterms%2F%3E%0D%0ASELECT+DISTINCT+%3Fedition+%3Ftitre+%3Fdate+%3Fediteur+%3FURLGallica%0D%0AWHERE+{%0D%0A%3Chttp%3A%2F%2Fdata.bnf.fr%2Fark%3A%2F12148%2Fcb12258414j%3E+foaf%3Afocus+%3Foeuvre.%0D%0A%3Fedition+rdarelationships%3AworkManifested+%3Foeuvre.%0D%0AOPTIONAL+{%0D%0A%3Fedition+dcterms%3Adate+%3Fdate.%0D%0A++}%0D%0AOPTIONAL+{%0D%0A%3Fedition+dcterms%3Atitle+%3Ftitre.+%0D%0A++}%0D%0AOPTIONAL+{%0D%0A%3Fedition+dcterms%3Apublisher+%3Fediteur.%0D%0A++}%0D%0AOPTIONAL+{%0D%0A%3Fedition+rdarelationships%3AelectronicReproduction+%3FURLGallica.%0D%0A++}%0D%0A}&format=application%2Fjson&timeout=0&should-sponge=&debug=on

In [None]:
query = """"
SELECT DISTINCT ?name ?gender ?nat ?bday ?dday
WHERE {
    ?mbartist foaf:name ?name ;
              foaf:gender ?gender ;
              rdagroup2elements:dateOfBirth ?bday ;
              rdagroup2elements:dateOfDeath ?dday .
OPTIONAL
  {
    ?mbartist foaf:nationality ?nat
  }
}
LIMIT 10
"""
get_sparql_dataframe(endpoint, query).head()

In [132]:
query = """SELECT ?auteur ?jour ?date1 ?date2 ?nom
WHERE {
  ?auteur  foaf:birthday ?jour.
  ?auteur bio:birth ?date1.
  ?auteur bio:death ?date2.
  OPTIONAL {
    ?auteur foaf:name ?nom.
  }
} 	
ORDER BY (?jour)
LIMIT 10
"""
get_sparql_dataframe(endpoint, query).head()

Unnamed: 0,auteur,jour,date1,date2,nom
0,http://data.bnf.fr/ark:/12148/cb14656286b#about,-1-8.,18..--1-8.,18..,Michel Pourchet
1,http://data.bnf.fr/ark:/12148/cb16930386j#about,..-..,1890,1959-10,Walter Rossi
2,http://data.bnf.fr/ark:/12148/cb16933183p#about,..-..,18..,1913-04-17,Jacques Aumont-Thiéville
3,http://data.bnf.fr/ark:/12148/cb12407363j#about,..-..,1625,1702,Diacinto Maria Marmi
4,http://data.bnf.fr/ark:/12148/cb13534973q#about,..-..,1760,1793-03-31,Louis Bruneau


In [133]:
query = """
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX bnf-onto: <http://data.bnf.fr/ontology/bnf-onto/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
SELECT DISTINCT ?name ?year ?endyear ?url ?wikidata ?gallica ?gender
WHERE {
    <http://data.bnf.fr/ark:/12148/cb13894801b#foaf:Person> foaf:name ?name ;
        bnf-onto:firstYear ?year ;
        bnf-onto:lastYear ?endyear ;
        owl:sameAs ?url ;
        foaf:page ?wikidata ;
        foaf:depiction ?gallica ;
        foaf:gender ?gender .
}
"""
get_sparql_dataframe(endpoint, query).head()

Unnamed: 0,name,year,endyear,url,wikidata,gallica,gender
