# Notebook requêtant wikidata en SPARQL

Imports

In [1]:
from SPARQLWrapper import SPARQLWrapper, JSON
import numpy as np
import pandas as pd
import folium
from folium.plugins import MarkerCluster

### 1) Récupération de toutes les entités administratives de Paris

In [2]:
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
sparql.setQuery("""
SELECT ?item ?itemLabel
WHERE
{
?item wdt:P131 wd:Q90 . #Tous les 'located in the administrative territorial entity' de Paris
?item wdt:P31 wd:Q702842 . #Toutes les instances de 'municipal arrondissement'

SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
}
""")

sparql.setReturnFormat(JSON)
results = sparql.query().convert()

Stock résultats

In [3]:
list_arr = []
list_entity = []
for result in results['results']['bindings']:
    #stock de tous les noms d'arrdt
    list_arr = np.append(list_arr, result['itemLabel']['value'])
    #strock de tous les IDs des arrdt
    entity = result['item']['value']
    entity = entity.replace("http://www.wikidata.org/entity/", "")
    list_entity = np.append(list_entity, entity)
    #Print
    print(result['itemLabel']['value'] + " - " + entity)

1st arrondissement of Paris - Q161741
10th arrondissement of Paris - Q163948
11th arrondissement of Paris - Q169293
12th arrondissement of Paris - Q171689
13th arrondissement of Paris - Q175129
14th arrondissement of Paris - Q187153
15th arrondissement of Paris - Q191066
16th arrondissement of Paris - Q194420
17th arrondissement of Paris - Q197297
18th arrondissement of Paris - Q200126
19th arrondissement of Paris - Q204622
2nd arrondissement of Paris - Q209549
20th arrondissement of Paris - Q210720
3rd arrondissement of Paris - Q223140
4th arrondissement of Paris - Q230127
5th arrondissement of Paris - Q238723
6th arrondissement of Paris - Q245546
7th arrondissement of Paris - Q259463
8th arrondissement of Paris - Q270230
9th arrondissement of Paris - Q275118


In [4]:
list_entity

array(['Q161741', 'Q163948', 'Q169293', 'Q171689', 'Q175129', 'Q187153',
       'Q191066', 'Q194420', 'Q197297', 'Q200126', 'Q204622', 'Q209549',
       'Q210720', 'Q223140', 'Q230127', 'Q238723', 'Q245546', 'Q259463',
       'Q270230', 'Q275118'], dtype='<U32')

Préparation de la requête SPARQL

In [5]:
str = ""
for r in list_entity:
    if str == "":
        str = str+"{?item wdt:P131 wd:"+r+" .}"
    else:
        str = str+" UNION {?item wdt:P131 wd:"+r+" .}"
str

'{?item wdt:P131 wd:Q161741 .} UNION {?item wdt:P131 wd:Q163948 .} UNION {?item wdt:P131 wd:Q169293 .} UNION {?item wdt:P131 wd:Q171689 .} UNION {?item wdt:P131 wd:Q175129 .} UNION {?item wdt:P131 wd:Q187153 .} UNION {?item wdt:P131 wd:Q191066 .} UNION {?item wdt:P131 wd:Q194420 .} UNION {?item wdt:P131 wd:Q197297 .} UNION {?item wdt:P131 wd:Q200126 .} UNION {?item wdt:P131 wd:Q204622 .} UNION {?item wdt:P131 wd:Q209549 .} UNION {?item wdt:P131 wd:Q210720 .} UNION {?item wdt:P131 wd:Q223140 .} UNION {?item wdt:P131 wd:Q230127 .} UNION {?item wdt:P131 wd:Q238723 .} UNION {?item wdt:P131 wd:Q245546 .} UNION {?item wdt:P131 wd:Q259463 .} UNION {?item wdt:P131 wd:Q270230 .} UNION {?item wdt:P131 wd:Q275118 .}'

### 2) Requête tous les monuments historiques de Paris

In [7]:
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
sparql.setQuery("""
SELECT ?item ?itemLabel ?lon ?lat ?inception ?height
WHERE
{
{?item wdt:P1435 wd:Q10387684 .} UNION {?item wdt:P1435 wd:Q10387575}
"""+str+""" 
 ?item p:P625 ?coordinate.
 ?coordinate ps:P625 ?coord.
 ?coordinate psv:P625 ?coordinate_node.
 ?coordinate_node wikibase:geoLongitude ?lon.
 ?coordinate_node wikibase:geoLatitude ?lat.
 {?item wdt:P571 ?inception .} UNION {MINUS { ?item wdt:P571 ?inception . }}
 {?item wdt:P2048 ?height .} UNION {MINUS { ?item wdt:P2048 ?height . }}

  
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],fr" }
}
""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()


KeyboardInterrupt: 

Stock tous les monuments dans un tableau

In [None]:
results

In [None]:
monuments = []
for r in results['results']['bindings']:
    monument = {}
    monument['nom'] = r['itemLabel']['value']
    monument['lon'] = r['lon']['value']
    monument['lat'] = r['lat']['value']
    #print(r["inception"]["value"])
    if("inception" in r):
        monument['inception'] = r["inception"]["value"]
    if("height" in r):
        monument['height'] = r["height"]["value"]
    monuments.append(monument)

In [8]:
df = pd.DataFrame(monuments)
df

NameError: name 'monuments' is not defined

In [105]:
df = df.drop_duplicates()

In [106]:
df.count()

nom          1847
lon          1847
lat          1847
inception     257
height         16
dtype: int64

### 3) Display on a map

In [107]:
m = folium.Map(location=[48.866667,2.333333], tiles="OpenStreetMap", zoom_start=12)

In [108]:
mc = MarkerCluster()

In [111]:
for index, row in df.iterrows():
    mc.add_child(folium.Marker(location=[row['lat'],row['lon']], popup='<i>'+row['nom']+'</i>',tooltip=row['nom']))

m.add_child(mc)
m

### 4) Save in csv

In [110]:
df.to_csv("monuments-paris.csv")

In [9]:
df = pd.read_csv("monuments-paris.csv")

In [10]:
df

Unnamed: 0.1,Unnamed: 0,nom,lon,lat,inception,height
0,0,gare de Paris-Austerlitz,2.365833,48.842222,,
1,1,théâtre des Bouffes du Nord,2.358800,48.884000,,
2,2,Crimée,2.376944,48.891667,,
3,3,cimetière de Montmartre,2.330278,48.887778,1825-01-01T00:00:00Z,
4,7,gare de Paris-Saint-Lazare,2.324444,48.876944,,
...,...,...,...,...,...,...
1842,2388,"immeuble, 21 rue Danielle-Casanova",2.331806,48.867833,,
1843,2389,couvent de la Merci à Paris,2.356700,48.860300,1727-01-01T00:00:00Z,
1844,2390,galerie Argentine,2.283100,48.868300,,
1845,2391,maison des étudiants,2.348449,48.851799,1905-01-01T00:00:00Z,


In [13]:
df_date = df[pd.notnull(df["inception"])]

In [15]:
pd.options.display.max_rows = 999

In [16]:
df_date

Unnamed: 0.1,Unnamed: 0,nom,lon,lat,inception,height
3,3,cimetière de Montmartre,2.330278,48.887778,1825-01-01T00:00:00Z,
5,8,gare de Paris-Lyon,2.373611,48.844722,1847-01-01T00:00:00Z,
6,12,lycée Charlemagne,2.360833,48.854444,1802-01-01T00:00:00Z,
15,27,Bréguet - Sabin,2.37054,48.85675,1906-01-01T00:00:00Z,
18,33,gare de Denfert-Rochereau,2.332778,48.833333,1846-06-07T00:00:00Z,
20,43,Bataclan,2.370833,48.863056,1865-01-01T00:00:00Z,
21,44,jardin des Tuileries,2.326111,48.863889,1564-01-01T00:00:00Z,
22,45,Petit Palais,2.314553,48.866033,1902-01-01T00:00:00Z,
26,52,musée national des Arts asiatiques - Guimet,2.2939,48.8653,1889-01-01T00:00:00Z,
32,61,Olympia,2.328333,48.870278,1893-01-01T00:00:00Z,


In [18]:
from datetime import datetime

In [20]:
df_date.dtypes

Unnamed: 0      int64
nom            object
lon           float64
lat           float64
inception      object
height        float64
dtype: object

In [22]:
df_date['constructionYear'] = df_date['inception'].map(lambda x: x[:4])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [23]:
df_date

Unnamed: 0.1,Unnamed: 0,nom,lon,lat,inception,height,constructionYear
3,3,cimetière de Montmartre,2.330278,48.887778,1825-01-01T00:00:00Z,,1825
5,8,gare de Paris-Lyon,2.373611,48.844722,1847-01-01T00:00:00Z,,1847
6,12,lycée Charlemagne,2.360833,48.854444,1802-01-01T00:00:00Z,,1802
15,27,Bréguet - Sabin,2.37054,48.85675,1906-01-01T00:00:00Z,,1906
18,33,gare de Denfert-Rochereau,2.332778,48.833333,1846-06-07T00:00:00Z,,1846
20,43,Bataclan,2.370833,48.863056,1865-01-01T00:00:00Z,,1865
21,44,jardin des Tuileries,2.326111,48.863889,1564-01-01T00:00:00Z,,1564
22,45,Petit Palais,2.314553,48.866033,1902-01-01T00:00:00Z,,1902
26,52,musée national des Arts asiatiques - Guimet,2.2939,48.8653,1889-01-01T00:00:00Z,,1889
32,61,Olympia,2.328333,48.870278,1893-01-01T00:00:00Z,,1893


In [26]:
df_date.dtypes

Unnamed: 0            int64
nom                  object
lon                 float64
lat                 float64
inception            object
height              float64
constructionYear     object
dtype: object

In [29]:
df_date = df_date.astype({'constructionYear': 'int32'})

In [30]:
df_date.dtypes

Unnamed: 0            int64
nom                  object
lon                 float64
lat                 float64
inception            object
height              float64
constructionYear      int32
dtype: object

In [31]:
df_date = df_date[df_date["constructionYear"] > 987]

In [34]:
df_date

Unnamed: 0.1,Unnamed: 0,nom,lon,lat,inception,height,constructionYear
3,3,cimetière de Montmartre,2.330278,48.887778,1825-01-01T00:00:00Z,,1825
5,8,gare de Paris-Lyon,2.373611,48.844722,1847-01-01T00:00:00Z,,1847
6,12,lycée Charlemagne,2.360833,48.854444,1802-01-01T00:00:00Z,,1802
15,27,Bréguet - Sabin,2.37054,48.85675,1906-01-01T00:00:00Z,,1906
18,33,gare de Denfert-Rochereau,2.332778,48.833333,1846-06-07T00:00:00Z,,1846
20,43,Bataclan,2.370833,48.863056,1865-01-01T00:00:00Z,,1865
21,44,jardin des Tuileries,2.326111,48.863889,1564-01-01T00:00:00Z,,1564
22,45,Petit Palais,2.314553,48.866033,1902-01-01T00:00:00Z,,1902
26,52,musée national des Arts asiatiques - Guimet,2.2939,48.8653,1889-01-01T00:00:00Z,,1889
32,61,Olympia,2.328333,48.870278,1893-01-01T00:00:00Z,,1893
