# Notebook requêtant wikidata en SPARQL

Imports

In [25]:
from SPARQLWrapper import SPARQLWrapper, JSON
import numpy as np
import pandas as pd
import folium
from folium.plugins import MarkerCluster

### 1) Récupération de tous les rois de France

In [26]:
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
sparql.setQuery("""
SELECT ?item ?itemLabel (GROUP_CONCAT(DISTINCT ?spouseLabel;separator=", ") AS ?spouses) (GROUP_CONCAT(DISTINCT ?mannerOfDeathLabel;separator=", ") AS ?mannersOfDeath) 
?dateOfBirth ?placeOfBirthLabel ?dateOfDeath ?placeOfDeathLabel ?placeOfBurialLabel ?fatherLabel ?motherLabel ?conflictLabel ?startTime ?endTime

WHERE
{
{?item wdt:P31 wd:Q5 .} 
{?item wdt:P39 wd:Q18384454 .} UNION {?item wdt:P39 wd:Q22923081} UNION {?item wdt:P39 wd:Q3439798}
{?item wdt:P569 ?dateOfBirth} UNION {MINUS {?item wdt:P569 ?dateOfBirth}}
{?item wdt:P19 ?placeOfBirth} UNION {MINUS {?item wdt:P19 ?placeOfBirth}}
{?item wdt:P570 ?dateOfDeath} UNION {MINUS {?item wdt:P570 ?dateOfDeath}}
{?item wdt:P19 ?placeOfDeath} UNION {MINUS {?item wdt:P19 ?placeOfDeath}}
{?item wdt:P509 ?mannerOfDeath} UNION {MINUS {?item wdt:P509 ?mannerOfDeath}}
{?item wdt:P119 ?placeOfBurial} UNION {MINUS {?item wdt:P119 ?placeOfBurial}}
{?item wdt:P22 ?father} UNION {MINUS {?item wdt:P22 ?father}}
{?item wdt:P25 ?mother} UNION {MINUS {?item wdt:P25 ?mother}}
{?item wdt:P26 ?spouse} UNION {MINUS {?item wdt:P26 ?spouse}}
{?item wdt:P607 ?conflict} UNION {MINUS {?item wdt:P607 ?conflict}}
?item p:P39 ?positionHeld.
{?positionHeld pq:P580 ?startTime.} UNION {MINUS {?positionHeld pq:P580 ?startTime.}}
{?positionHeld pq:P582 ?endTime.} UNION {MINUS {?positionHeld pq:P582 ?endTime.}}
  
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],fr" .
                         ?item rdfs:label ?itemLabel .
                         ?placeOfBirth rdfs:label ?placeOfBirthLabel .
                         ?placeOfDeath rdfs:label ?placeOfDeathLabel .
                         ?mannerOfDeath rdfs:label ?mannerOfDeathLabel .
                         ?placeOfBurial rdfs:label ?placeOfBurialLabel .
                         ?father rdfs:label ?fatherLabel .
                         ?mother rdfs:label ?motherLabel .
                         ?spouse rdfs:label ?spouseLabel .}
}GROUP BY ?item ?itemLabel ?dateOfBirth ?placeOfBirthLabel ?dateOfDeath ?placeOfDeathLabel ?placeOfBurialLabel ?fatherLabel ?motherLabel ?conflictLabel ?startTime ?endTime

""")

sparql.setReturnFormat(JSON)
results = sparql.query().convert()

In [27]:
results

{'head': {'vars': ['item',
   'itemLabel',
   'spouses',
   'mannersOfDeath',
   'dateOfBirth',
   'placeOfBirthLabel',
   'dateOfDeath',
   'placeOfDeathLabel',
   'placeOfBurialLabel',
   'fatherLabel',
   'motherLabel',
   'conflictLabel',
   'startTime',
   'endTime']},
 'results': {'bindings': [{'item': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q309934'},
    'itemLabel': {'xml:lang': 'fr', 'type': 'literal', 'value': 'Clovis IV'},
    'dateOfBirth': {'datatype': 'http://www.w3.org/2001/XMLSchema#dateTime',
     'type': 'literal',
     'value': '0677-01-01T00:00:00Z'},
    'placeOfBirthLabel': {'type': 'literal', 'value': 't1716563649'},
    'dateOfDeath': {'datatype': 'http://www.w3.org/2001/XMLSchema#dateTime',
     'type': 'literal',
     'value': '0695-01-01T00:00:00Z'},
    'placeOfDeathLabel': {'type': 'literal', 'value': 't1716563649'},
    'fatherLabel': {'xml:lang': 'fr',
     'type': 'literal',
     'value': 'Thierry III'},
    'motherLabel': {'xml:la

In [28]:
rois = []
for r in results['results']['bindings']:
    roi = {}
    if("itemLabel" in r):
        roi['nom'] = r['itemLabel']['value']
    if("dateOfBirth" in r):
        roi['dateOfBirth'] = r['dateOfBirth']['value']
    if("placeOfBirthLabel" in r):
        roi['placeOfBirthLabel'] = r['placeOfBirthLabel']['value']
    if("dateOfDeath" in r):
        roi['dateOfDeath'] = r['dateOfDeath']['value']
    if("placeOfDeathLabel" in r):
        roi['placeOfDeathLabel'] = r['placeOfDeathLabel']['value']
    if("mannerOfDeathLabel" in r):
        roi['mannerOfDeathLabel'] = r['mannerOfDeathLabel']['value']
    if("placeOfBurialLabel" in r):
        roi['placeOfBurialLabel'] = r['placeOfBurialLabel']['value']
    if("fatherLabel" in r):
        roi['fatherLabel'] = r['fatherLabel']['value']
    if("motherLabel" in r):
        roi['motherLabel'] = r['motherLabel']['value']
    if("spouses" in r):
        roi['spouses'] = r['spouses']['value']
    if("conflictLabel" in r):
        roi['conflictLabel'] = r['conflictLabel']['value']
    if("conflictLabel" in r):
        roi['conflictLabel'] = r['conflictLabel']['value']
    if("conflictLabel" in r):
        roi['conflictLabel'] = r['conflictLabel']['value']
    rois.append(roi)

In [37]:
df = pd.DataFrame(rois)
df.count()

nom                   170
dateOfBirth           167
placeOfBirthLabel     149
dateOfDeath           166
placeOfDeathLabel     149
fatherLabel           154
motherLabel           136
placeOfBurialLabel    110
dtype: int64

In [30]:
df = df.drop_duplicates()

In [31]:
df.count()

nom                   139
dateOfBirth           136
placeOfBirthLabel     119
dateOfDeath           135
placeOfDeathLabel     119
fatherLabel           124
motherLabel           106
placeOfBurialLabel     80
dtype: int64

In [32]:
pd.options.display.max_rows = 999
df.sort_values(by=['dateOfBirth'])


Unnamed: 0,nom,dateOfBirth,placeOfBirthLabel,dateOfDeath,placeOfDeathLabel,fatherLabel,motherLabel,placeOfBurialLabel
118,Mérogaise,0300-01-01T00:00:00Z,,0306-01-01T00:00:00Z,,,,
95,Ascaric,0300-01-01T00:00:00Z,,0306-01-01T00:00:00Z,,,,
101,Clodion le Chevelu,0393-01-01T00:00:00Z,t1560140345,0448-01-01T00:00:00Z,t1560140345,Théodomir,,
1,Marcomir,0400-01-01T00:00:00Z,,0392-01-01T00:00:00Z,,,,
2,Mallobaud,0400-01-01T00:00:00Z,,0400-01-01T00:00:00Z,,,,
153,Gennobaud,0400-01-01T00:00:00Z,,0388-01-01T00:00:00Z,,,,
166,Sunnon,0400-01-01T00:00:00Z,,0400-01-01T00:00:00Z,,,,
94,Théodomir,0400-01-01T00:00:00Z,,,,Richomer,,
144,Mérovée,0415-01-01T00:00:00Z,t1675385804,0457-01-01T00:00:00Z,t1675385804,Clodion le Chevelu,,
155,Childéric Ier,0436-01-01T00:00:00Z,t1776591244,0481-01-01T00:00:00Z,t1776591244,Mérovée,,Tournai


In [33]:
df = df.groupby(['nom']).min()

In [34]:
df.sort_values(by=['dateOfBirth'])

Unnamed: 0_level_0,dateOfBirth,placeOfBirthLabel,dateOfDeath,placeOfDeathLabel,fatherLabel,motherLabel,placeOfBurialLabel
nom,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Ascaric,0300-01-01T00:00:00Z,,0306-01-01T00:00:00Z,,,,
Mérogaise,0300-01-01T00:00:00Z,,0306-01-01T00:00:00Z,,,,
Clodion le Chevelu,0393-01-01T00:00:00Z,t1560140345,0448-01-01T00:00:00Z,t1560140345,Théodomir,,
Théodomir,0400-01-01T00:00:00Z,,,,Richomer,,
Mallobaud,0400-01-01T00:00:00Z,,0400-01-01T00:00:00Z,,,,
Marcomir,0400-01-01T00:00:00Z,,0392-01-01T00:00:00Z,,,,
Gennobaud,0400-01-01T00:00:00Z,,0388-01-01T00:00:00Z,,,,
Sunnon,0400-01-01T00:00:00Z,,0400-01-01T00:00:00Z,,,,
Mérovée,0415-01-01T00:00:00Z,t1675385804,0457-01-01T00:00:00Z,t1675385804,Clodion le Chevelu,,
Childéric Ier,0436-01-01T00:00:00Z,t1776591244,0481-01-01T00:00:00Z,t1776591244,Mérovée,,Tournai


### 4) Save in csv

In [35]:
df.to_csv("rois-france.csv")