# Notebook requêtant wikidata en SPARQL

Imports

In [47]:
from SPARQLWrapper import SPARQLWrapper, JSON
import numpy as np
import pandas as pd
import folium
from folium.plugins import MarkerCluster

### 1) Récupération de tous les rois de France

In [48]:
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
sparql.setQuery("""
SELECT ?item ?itemLabel ?dateOfBirth ?placeOfBirthLabel ?dateOfDeath ?placeOfDeathLabel ?mannerOfDeathLabel ?placeOfBurialLabel ?fatherLabel ?motherLabel ?spouseLabel ?conflictLabel
WHERE
{
{?item wdt:P31 wd:Q5 .} 
{?item wdt:P39 wd:Q18384454 .} UNION {?item wdt:P39 wd:Q22923081 .} UNION {?item wdt:P39 wd:Q3439798 .} UNION {?item wdt:P39 wd:Q15135541 .}
{?item wdt:P569 ?dateOfBirth} UNION {MINUS {?item wdt:P569 ?dateOfBirth}}
{?item wdt:P19 ?placeOfBirth} UNION {MINUS {?item wdt:P19 ?placeOfBirth}}
{?item wdt:P570 ?dateOfDeath} UNION {MINUS {?item wdt:P570 ?dateOfDeath}}
{?item wdt:P19 ?placeOfDeath} UNION {MINUS {?item wdt:P19 ?placeOfDeath}}
{?item wdt:P509 ?mannerOfDeath} UNION {MINUS {?item wdt:P509 ?mannerOfDeath}}
{?item wdt:P119 ?placeOfBurial} UNION {MINUS {?item wdt:P119 ?placeOfBurial}}
{?item wdt:P22 ?father} UNION {MINUS {?item wdt:P22 ?father}}
{?item wdt:P25 ?mother} UNION {MINUS {?item wdt:P25 ?mother}}
{?item wdt:P26 ?spouse} UNION {MINUS {?item wdt:P26 ?spouse}}
{?item wdt:P607 ?conflict} UNION {MINUS {?item wdt:P607 ?conflict}}
  
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],fr" }
}
""")

sparql.setReturnFormat(JSON)
results = sparql.query().convert()

In [49]:
results

{'head': {'vars': ['item',
   'itemLabel',
   'dateOfBirth',
   'placeOfBirthLabel',
   'dateOfDeath',
   'placeOfDeathLabel',
   'mannerOfDeathLabel',
   'placeOfBurialLabel',
   'fatherLabel',
   'motherLabel',
   'spouseLabel',
   'conflictLabel']},
 'results': {'bindings': [{'item': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q73806'},
    'dateOfBirth': {'datatype': 'http://www.w3.org/2001/XMLSchema#dateTime',
     'type': 'literal',
     'value': '0844-01-01T00:00:00Z'},
    'dateOfDeath': {'datatype': 'http://www.w3.org/2001/XMLSchema#dateTime',
     'type': 'literal',
     'value': '0887-01-15T00:00:00Z'},
    'itemLabel': {'xml:lang': 'fr',
     'type': 'literal',
     'value': 'Boson de Provence'},
    'placeOfBirthLabel': {'type': 'literal', 'value': 't1546630289'},
    'placeOfDeathLabel': {'type': 'literal', 'value': 't1546630289'},
    'mannerOfDeathLabel': {'xml:lang': 'fr',
     'type': 'literal',
     'value': 'maladie'},
    'fatherLabel': {'xml:lang

In [50]:
rois = []
for r in results['results']['bindings']:
    roi = {}
    if("itemLabel" in r):
        roi['nom'] = r['itemLabel']['value']
    if("dateOfBirth" in r):
        roi['dateOfBirth'] = r['dateOfBirth']['value']
    if("placeOfBirthLabel" in r):
        roi['placeOfBirthLabel'] = r['placeOfBirthLabel']['value']
    if("dateOfDeath" in r):
        roi['dateOfDeath'] = r['dateOfDeath']['value']
    if("placeOfDeathLabel" in r):
        roi['placeOfDeathLabel'] = r['placeOfDeathLabel']['value']
    if("mannerOfDeathLabel" in r):
        roi['mannerOfDeathLabel'] = r['mannerOfDeathLabel']['value']
    if("placeOfBurialLabel" in r):
        roi['placeOfBurialLabel'] = r['placeOfBurialLabel']['value']
    if("fatherLabel" in r):
        roi['fatherLabel'] = r['fatherLabel']['value']
    if("motherLabel" in r):
        roi['motherLabel'] = r['motherLabel']['value']
    if("spouseLabel" in r):
        roi['spouseLabel'] = r['spouseLabel']['value']
    if("conflictLabel" in r):
        roi['conflictLabel'] = r['conflictLabel']['value']
    rois.append(roi)

In [51]:
df = pd.DataFrame(rois)
df

Unnamed: 0,nom,dateOfBirth,placeOfBirthLabel,dateOfDeath,placeOfDeathLabel,mannerOfDeathLabel,fatherLabel,motherLabel,spouseLabel,placeOfBurialLabel,conflictLabel
0,Boson de Provence,0844-01-01T00:00:00Z,t1546630289,0887-01-15T00:00:00Z,t1546630289,maladie,Bivin,Q61717067,Ermengarde,,
1,Pépin le Bref,0714-01-01T00:00:00Z,Liège,0768-09-28T00:00:00Z,Liège,,Charles Martel,Rotrude,Bertrade de Laon,basilique Saint-Denis,
2,Pépin le Bref,0715-01-01T00:00:00Z,Liège,0768-09-28T00:00:00Z,Liège,,Charles Martel,Rotrude,Bertrade de Laon,basilique Saint-Denis,
3,Clovis Ier,0466-01-01T00:00:00Z,Tournai,0511-11-29T00:00:00Z,Tournai,,Childéric Ier,Basine de Thuringe,princesse franque,abbaye Sainte-Geneviève de Paris,
4,Clovis Ier,0466-01-01T00:00:00Z,Tournai,0511-11-29T00:00:00Z,Tournai,,Childéric Ier,Basine de Thuringe,Clotilde,abbaye Sainte-Geneviève de Paris,
5,Thierry IV,0712-01-01T00:00:00Z,t349901652,0737-01-01T00:00:00Z,t349901652,,Dagobert III,,,basilique Saint-Denis,
6,Childebert IV,0678-01-01T00:00:00Z,t1772877138,0711-04-18T00:00:00Z,t1772877138,,Thierry III,Clotilde dite Doda,,,
7,Chilpéric II,0673-01-01T00:00:00Z,t1771688088,0721-02-17T00:00:00Z,t1771688088,,Childéric II,Bilichilde,,Noyon,
8,Chilpéric II,0670-01-01T00:00:00Z,t1771688088,0721-02-17T00:00:00Z,t1771688088,,Childéric II,Bilichilde,,Noyon,
9,Dagobert III,0698-01-01T00:00:00Z,t1555106032,0715-01-01T00:00:00Z,t1555106032,,Childebert IV,,,,


In [52]:
df = df.drop_duplicates()

In [53]:
df.count()

nom                   269
dateOfBirth           266
placeOfBirthLabel     249
dateOfDeath           265
placeOfDeathLabel     249
mannerOfDeathLabel    87 
fatherLabel           230
motherLabel           207
spouseLabel           201
placeOfBurialLabel    162
conflictLabel         27 
dtype: int64

In [54]:
pd.options.display.max_rows = 999
dfs = df
dfs = dfs.dropna(subset = ["dateOfBirth"])
dfs.sort_values(by=['dateOfBirth'])


Unnamed: 0,nom,dateOfBirth,placeOfBirthLabel,dateOfDeath,placeOfDeathLabel,mannerOfDeathLabel,fatherLabel,motherLabel,spouseLabel,placeOfBurialLabel,conflictLabel
141,Mérogaise,0300-01-01T00:00:00Z,,0306-01-01T00:00:00Z,,,,,,,
169,Ascaric,0300-01-01T00:00:00Z,,0306-01-01T00:00:00Z,,,,,,,
74,Clodion le Chevelu,0393-01-01T00:00:00Z,t1560948388,0448-01-01T00:00:00Z,t1560948388,,Théodomir,,,,
155,Mallobaud,0400-01-01T00:00:00Z,,0400-01-01T00:00:00Z,,,,,,,
127,Marcomir,0400-01-01T00:00:00Z,,0392-01-01T00:00:00Z,,,,,,,
162,Sunnon,0400-01-01T00:00:00Z,,0400-01-01T00:00:00Z,,,,,,,
168,Théodomir,0400-01-01T00:00:00Z,,,,,Richomer,,,,
149,Gennobaud,0400-01-01T00:00:00Z,,0388-01-01T00:00:00Z,,,,,,,
263,Mérovée,0415-01-01T00:00:00Z,t1673002659,0457-01-01T00:00:00Z,t1673002659,,Clodion le Chevelu,,,,
58,Childéric Ier,0436-01-01T00:00:00Z,t1773883112,0481-01-01T00:00:00Z,t1773883112,,Mérovée,,Basine de Thuringe,Tournai,


### 4) Save in csv

In [110]:
df.to_csv("monuments-paris.csv")