# Notebook requêtant wikidata en SPARQL

Imports

In [1]:
from SPARQLWrapper import SPARQLWrapper, JSON
import numpy as np
import pandas as pd
import folium
from folium.plugins import MarkerCluster

### 1) Récupération de tous les rois de France

In [2]:
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
sparql.setQuery("""
SELECT ?item ?itemLabel (GROUP_CONCAT(DISTINCT ?spouseLabel;separator=", ") AS ?spouses) (GROUP_CONCAT(DISTINCT ?mannerOfDeathLabel;separator=", ") AS ?mannersOfDeath) 
?dateOfBirth ?placeOfBirthLabel ?dateOfDeath ?placeOfDeathLabel ?placeOfBurialLabel ?fatherLabel ?motherLabel ?conflictLabel ?startTime ?endTime

WHERE
{
{?item wdt:P31 wd:Q5 .} 
{?item wdt:P39 wd:Q18384454 .} UNION {?item wdt:P39 wd:Q22923081} UNION {?item wdt:P39 wd:Q3439798}
{?item wdt:P569 ?dateOfBirth} UNION {MINUS {?item wdt:P569 ?dateOfBirth}}
{?item wdt:P19 ?placeOfBirth} UNION {MINUS {?item wdt:P19 ?placeOfBirth}}
{?item wdt:P570 ?dateOfDeath} UNION {MINUS {?item wdt:P570 ?dateOfDeath}}
{?item wdt:P19 ?placeOfDeath} UNION {MINUS {?item wdt:P19 ?placeOfDeath}}
{?item wdt:P509 ?mannerOfDeath} UNION {MINUS {?item wdt:P509 ?mannerOfDeath}}
{?item wdt:P119 ?placeOfBurial} UNION {MINUS {?item wdt:P119 ?placeOfBurial}}
{?item wdt:P22 ?father} UNION {MINUS {?item wdt:P22 ?father}}
{?item wdt:P25 ?mother} UNION {MINUS {?item wdt:P25 ?mother}}
{?item wdt:P26 ?spouse} UNION {MINUS {?item wdt:P26 ?spouse}}
{?item wdt:P607 ?conflict} UNION {MINUS {?item wdt:P607 ?conflict}}
?item p:P39 ?positionHeld.
{?positionHeld pq:P580 ?startTime.} UNION {MINUS {?positionHeld pq:P580 ?startTime.}}
{?positionHeld pq:P582 ?endTime.} UNION {MINUS {?positionHeld pq:P582 ?endTime.}}
  
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],fr" .
                         ?item rdfs:label ?itemLabel .
                         ?placeOfBirth rdfs:label ?placeOfBirthLabel .
                         ?placeOfDeath rdfs:label ?placeOfDeathLabel .
                         ?mannerOfDeath rdfs:label ?mannerOfDeathLabel .
                         ?placeOfBurial rdfs:label ?placeOfBurialLabel .
                         ?father rdfs:label ?fatherLabel .
                         ?mother rdfs:label ?motherLabel .
                         ?spouse rdfs:label ?spouseLabel .}
}GROUP BY ?item ?itemLabel ?dateOfBirth ?placeOfBirthLabel ?dateOfDeath ?placeOfDeathLabel ?placeOfBurialLabel ?fatherLabel ?motherLabel ?conflictLabel ?startTime ?endTime

""")

sparql.setReturnFormat(JSON)
results = sparql.query().convert()

In [3]:
results

{'head': {'vars': ['item',
   'itemLabel',
   'spouses',
   'mannersOfDeath',
   'dateOfBirth',
   'placeOfBirthLabel',
   'dateOfDeath',
   'placeOfDeathLabel',
   'placeOfBurialLabel',
   'fatherLabel',
   'motherLabel',
   'conflictLabel',
   'startTime',
   'endTime']},
 'results': {'bindings': [{'item': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q134259'},
    'itemLabel': {'xml:lang': 'fr',
     'type': 'literal',
     'value': 'Louis VII de France'},
    'dateOfBirth': {'datatype': 'http://www.w3.org/2001/XMLSchema#dateTime',
     'type': 'literal',
     'value': '1120-01-01T00:00:00Z'},
    'placeOfBirthLabel': {'xml:lang': 'fr',
     'type': 'literal',
     'value': 'Paris'},
    'dateOfDeath': {'datatype': 'http://www.w3.org/2001/XMLSchema#dateTime',
     'type': 'literal',
     'value': '1180-09-25T00:00:00Z'},
    'placeOfDeathLabel': {'xml:lang': 'fr',
     'type': 'literal',
     'value': 'Paris'},
    'placeOfBurialLabel': {'xml:lang': 'fr',
     'type

In [4]:
rois = []
for r in results['results']['bindings']:
    roi = {}
    if("itemLabel" in r):
        roi['nom'] = r['itemLabel']['value']
    if("dateOfBirth" in r):
        roi['dateOfBirth'] = r['dateOfBirth']['value']
    if("placeOfBirthLabel" in r):
        roi['placeOfBirthLabel'] = r['placeOfBirthLabel']['value']
    if("dateOfDeath" in r):
        roi['dateOfDeath'] = r['dateOfDeath']['value']
    if("placeOfDeathLabel" in r):
        roi['placeOfDeathLabel'] = r['placeOfDeathLabel']['value']
    if("mannerOfDeathLabel" in r):
        roi['mannerOfDeathLabel'] = r['mannerOfDeathLabel']['value']
    if("placeOfBurialLabel" in r):
        roi['placeOfBurialLabel'] = r['placeOfBurialLabel']['value']
    if("fatherLabel" in r):
        roi['fatherLabel'] = r['fatherLabel']['value']
    if("motherLabel" in r):
        roi['motherLabel'] = r['motherLabel']['value']
    if("spouses" in r):
        roi['spouses'] = r['spouses']['value']
    if("conflictLabel" in r):
        roi['conflictLabel'] = r['conflictLabel']['value']
    if("startTime" in r):
        roi['startTime'] = r['startTime']['value']
    if("endTime" in r):
        roi['endTime'] = r['endTime']['value']
    rois.append(roi)

In [14]:
df = pd.DataFrame(rois)
df.count()

nom                   170
dateOfBirth           167
placeOfBirthLabel     149
dateOfDeath           166
placeOfDeathLabel     149
placeOfBurialLabel    110
fatherLabel           154
motherLabel           136
spouses               170
startTime              80
endTime                80
dtype: int64

In [15]:
df = df.drop_duplicates()

In [16]:
df.count()

nom                   170
dateOfBirth           167
placeOfBirthLabel     149
dateOfDeath           166
placeOfDeathLabel     149
placeOfBurialLabel    110
fatherLabel           154
motherLabel           136
spouses               170
startTime              80
endTime                80
dtype: int64

In [17]:
pd.options.display.max_rows = 999

In [19]:
df = df.drop_duplicates(subset=["nom"])

In [21]:
df.count()

nom                   120
dateOfBirth           117
placeOfBirthLabel     102
dateOfDeath           116
placeOfDeathLabel     102
placeOfBurialLabel     67
fatherLabel           106
motherLabel            89
spouses               120
startTime              50
endTime                50
dtype: int64

In [22]:
df.sort_values(by=['startTime'])

Unnamed: 0,nom,dateOfBirth,placeOfBirthLabel,dateOfDeath,placeOfDeathLabel,placeOfBurialLabel,fatherLabel,motherLabel,spouses,startTime,endTime
34,Mallobaud,0400-01-01T00:00:00Z,,0400-01-01T00:00:00Z,,,,,,0378-01-01T00:00:00Z,0378-01-01T00:00:00Z
33,Marcomir,0400-01-01T00:00:00Z,,0392-01-01T00:00:00Z,,,,,,0380-01-01T00:00:00Z,0400-01-01T00:00:00Z
29,Clovis Ier,0466-01-01T00:00:00Z,Tournai,0511-11-29T00:00:00Z,Tournai,abbaye Sainte-Geneviève de Paris,Childéric Ier,Basine de Thuringe,"princesse franque, Clotilde",0481-01-01T00:00:00Z,0511-11-29T00:00:00Z
50,Childéric II,0655-01-01T00:00:00Z,t1589541665,0675-01-01T00:00:00Z,t1589541665,abbaye de Saint-Germain-des-Prés,Clovis II,Bathilde,Bilichilde,0673-01-01T00:00:00Z,0675-01-01T00:00:00Z
57,Thierry III,0657-01-01T00:00:00Z,t1551909395,0691-01-01T00:00:00Z,t1551909395,Arras,Clovis II,Bathilde,Clotilde dite Doda,0675-01-01T00:00:00Z,0679-01-01T00:00:00Z
52,Dagobert II,0652-01-01T00:00:00Z,t1554304759,0679-12-26T00:00:00Z,t1554304759,,Sigebert III,Chimnechilde,,0676-01-01T00:00:00Z,0679-12-26T00:00:00Z
32,Clovis IV,0677-01-01T00:00:00Z,t1716563649,0695-01-01T00:00:00Z,t1716563649,,Thierry III,Clotilde dite Doda,,0691-01-01T00:00:00Z,0695-01-01T00:00:00Z
2,Childebert IV,0678-01-01T00:00:00Z,t1775585407,0711-04-18T00:00:00Z,t1775585407,,Thierry III,Clotilde dite Doda,,0695-01-01T00:00:00Z,0711-01-01T00:00:00Z
20,Dagobert III,0698-01-01T00:00:00Z,t1554304902,0715-01-01T00:00:00Z,t1554304902,,Childebert IV,,,0711-01-01T00:00:00Z,0715-01-01T00:00:00Z
53,Clotaire IV,0685-01-01T00:00:00Z,t1421563132,0719-01-01T00:00:00Z,t1421563132,,Childebert IV,,,0717-01-01T00:00:00Z,0719-01-01T00:00:00Z


In [47]:
df2 = df[df['startTime'].notna() & df['endTime'].notna()]

In [48]:
df2.shape

(50, 11)

In [26]:
from datetime import datetime

In [51]:
df2['startYear'] = df2['startTime'].map(lambda x: datetime.strptime(x, "%Y-%m-%dT%H:%M:%S%z").year)
df2['endYear'] = df2['endTime'].map(lambda x: datetime.strptime(x, "%Y-%m-%dT%H:%M:%S%z").year)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [52]:
df2

Unnamed: 0,nom,dateOfBirth,placeOfBirthLabel,dateOfDeath,placeOfDeathLabel,placeOfBurialLabel,fatherLabel,motherLabel,spouses,startTime,endTime,startYear,endYear
0,Louis VII de France,1120-01-01T00:00:00Z,Paris,1180-09-25T00:00:00Z,Paris,basilique Saint-Denis,Louis VI de France,Adélaïde de Savoie,"Constance de Castille, Adèle de Champagne, Ali...",1137-08-08T00:00:00Z,1180-09-25T00:00:00Z,1137,1180
1,Louis VIII de France,1187-09-12T00:00:00Z,Paris,1226-11-15T00:00:00Z,Paris,basilique Saint-Denis,Philippe II Auguste,Isabelle de Hainaut,Blanche de Castille,1223-07-21T00:00:00Z,1226-11-15T00:00:00Z,1223,1226
2,Childebert IV,0678-01-01T00:00:00Z,t1775585407,0711-04-18T00:00:00Z,t1775585407,,Thierry III,Clotilde dite Doda,,0695-01-01T00:00:00Z,0711-01-01T00:00:00Z,695,711
3,Charles VII de France,1403-03-03T00:00:00Z,Paris,1461-07-31T00:00:00Z,Paris,basilique Saint-Denis,Charles VI,Isabeau de Bavière,Marie d'Anjou,1422-11-08T00:00:00Z,1461-08-09T00:00:00Z,1422,1461
4,Thierry IV,0712-01-01T00:00:00Z,t349901652,0737-01-01T00:00:00Z,t349901652,basilique Saint-Denis,Dagobert III,,,0721-01-01T00:00:00Z,0737-01-01T00:00:00Z,721,737
5,Hugues Capet,0940-01-01T00:00:00Z,Dourdan,0996-10-29T00:00:00Z,Dourdan,basilique Saint-Denis,Hugues le Grand,Hedwige de Saxe,Adélaïde d'Aquitaine,0987-06-08T00:00:00Z,0996-10-29T00:00:00Z,987,996
6,Louis VI de France,1081-12-07T00:00:00Z,Paris,1137-08-08T00:00:00Z,Paris,basilique Saint-Denis,Philippe Ier de France,Berthe de Hollande,"Lucienne de Rochefort, Adélaïde de Savoie",1108-08-06T00:00:00Z,1137-08-08T00:00:00Z,1108,1137
7,Chilpéric II,0673-01-01T00:00:00Z,t1774396202,0721-02-17T00:00:00Z,t1774396202,Noyon,Childéric II,Bilichilde,,0719-01-01T00:00:00Z,0721-01-01T00:00:00Z,719,721
11,Henri II,1519-04-10T00:00:00Z,Saint-Germain-en-Laye,1559-07-20T00:00:00Z,Saint-Germain-en-Laye,basilique Saint-Denis,François Ier,Claude de France,Catherine de Médicis,1547-04-10T00:00:00Z,1559-07-20T00:00:00Z,1547,1559
12,Charles VIII,1470-07-09T00:00:00Z,Amboise,1498-04-16T00:00:00Z,Amboise,basilique Saint-Denis,Louis XI,Charlotte de Savoie,Anne de Bretagne,1483-09-17T00:00:00Z,1498-04-25T00:00:00Z,1483,1498


In [57]:
df2.dtypes

nom                   object
dateOfBirth           object
placeOfBirthLabel     object
dateOfDeath           object
placeOfDeathLabel     object
placeOfBurialLabel    object
fatherLabel           object
motherLabel           object
spouses               object
startTime             object
endTime               object
startYear              int64
endYear                int64
dtype: object

In [55]:
def which_king(year):
    df_king = df2[df2["startYear"] <= year]
    return df2[df2["startYear"] <= year & df2["endYear"] > year]

In [56]:
toto = which_king(1222)
toto

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [63]:
dft = df2[df2["startYear"] <= 1222 & df2["endYear"] > 1222]

In [64]:
dft

Unnamed: 0,nom,dateOfBirth,placeOfBirthLabel,dateOfDeath,placeOfDeathLabel,placeOfBurialLabel,fatherLabel,motherLabel,spouses,startTime,endTime,startYear,endYear
1,Louis VIII de France,1187-09-12T00:00:00Z,Paris,1226-11-15T00:00:00Z,Paris,basilique Saint-Denis,Philippe II Auguste,Isabelle de Hainaut,Blanche de Castille,1223-07-21T00:00:00Z,1226-11-15T00:00:00Z,1223,1226
3,Charles VII de France,1403-03-03T00:00:00Z,Paris,1461-07-31T00:00:00Z,Paris,basilique Saint-Denis,Charles VI,Isabeau de Bavière,Marie d'Anjou,1422-11-08T00:00:00Z,1461-08-09T00:00:00Z,1422,1461
11,Henri II,1519-04-10T00:00:00Z,Saint-Germain-en-Laye,1559-07-20T00:00:00Z,Saint-Germain-en-Laye,basilique Saint-Denis,François Ier,Claude de France,Catherine de Médicis,1547-04-10T00:00:00Z,1559-07-20T00:00:00Z,1547,1559
12,Charles VIII,1470-07-09T00:00:00Z,Amboise,1498-04-16T00:00:00Z,Amboise,basilique Saint-Denis,Louis XI,Charlotte de Savoie,Anne de Bretagne,1483-09-17T00:00:00Z,1498-04-25T00:00:00Z,1483,1498
13,Charles VI,1368-12-11T00:00:00Z,Paris,1422-10-30T00:00:00Z,Paris,basilique Saint-Denis,Charles V de France,Jeanne de Bourbon,Isabeau de Bavière,1380-09-24T00:00:00Z,1422-10-30T00:00:00Z,1380,1422
15,Charles V de France,1338-01-29T00:00:00Z,Vincennes,1380-09-24T00:00:00Z,Vincennes,basilique Saint-Denis,Jean II de France,Bonne de Luxembourg,Jeanne de Bourbon,1364-04-16T00:00:00Z,1380-09-24T00:00:00Z,1364,1380
16,Philippe VI de France,1293-11-24T00:00:00Z,Fontainebleau,1350-08-30T00:00:00Z,Fontainebleau,basilique Saint-Denis,Charles de Valois,Marguerite d'Anjou,"Blanche de Navarre, Jeanne de Bourgogne",1328-04-09T00:00:00Z,1350-08-30T00:00:00Z,1328,1350
17,Charles IV de France,1294-06-25T00:00:00Z,Creil,1328-02-09T00:00:00Z,Creil,basilique Saint-Denis,Philippe IV de France,Jeanne Ire de Navarre,"Blanche de Bourgogne, Marie de Luxembourg, Jea...",1322-01-11T00:00:00Z,1328-02-09T00:00:00Z,1322,1328
19,Charles IX,1550-07-07T00:00:00Z,Saint-Germain-en-Laye,1574-06-09T00:00:00Z,Saint-Germain-en-Laye,basilique Saint-Denis,Henri II,Catherine de Médicis,Élisabeth d'Autriche,1560-12-25T00:00:00Z,1574-06-19T00:00:00Z,1560,1574
21,Philippe IV de France,1268-01-01T00:00:00Z,Fontainebleau,1314-12-07T00:00:00Z,Fontainebleau,basilique Saint-Denis,Philippe III de France,Isabelle d'Aragon,Jeanne Ire de Navarre,1285-10-12T00:00:00Z,1314-12-07T00:00:00Z,1285,1314


### 2) Save in csv

In [23]:
df.to_csv("rois-france.csv")