Get World geritage in Wikidata and check OSM
* [This Notebook](https://github.com/salgo60/Gravstensinventeringen-Wikidata/blob/main/Notebook/OSM%20Wikidata_World_Heritage.ipynb)
* EDIT groups [rel](https://editgroups.toolforge.org/b/OR/f3c26448592/) [way]()



In [1]:
from datetime import datetime
start_time  = datetime.now()
print("Last run: ", start_time)

Last run:  2023-02-15 13:27:27.774163


In [2]:
import pandas as pd
#
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/

import sys,json
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"
 
# https://w.wiki/6Ldv
queryWD = """#title World Heritage with no OSM
SELECT distinct ?item (REPLACE(STR(?item), ".*Q", "Q") AS ?itemid) ?itemLabel  WHERE {
  #?item wdt:P6104 wd:Q115206846.
  ?item wdt:P757 ?wh.
 # ?item wdt:P17/wdt:P361 wd:Q46
  minus {?item wdt:P10689 ?OSM}
  minus {?item wdt:P402 ?OSMrel}

  SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". }
}
"""


def get_sparql_dataframe(endpoint_url, query):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    user_agent = "salgo60/%s.%s" % (sys.version_info[0], sys.version_info[1])
 
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)

    return pd.DataFrame(out, columns=cols)

WD = get_sparql_dataframe(endpoint_url, queryWD)    
WD.shape

(6957, 3)

In [3]:
WD.head()

Unnamed: 0,item,itemid,itemLabel
0,http://www.wikidata.org/entity/Q72895,Q72895,Parker i kanadensiska Klippiga bergen
1,http://www.wikidata.org/entity/Q93643,Q93643,Hidden Christian Sites in the Nagasaki Region
2,http://www.wikidata.org/entity/Q106540,Q106540,Ruwenzori
3,http://www.wikidata.org/entity/Q112738,Q112738,Castle of Xabier
4,http://www.wikidata.org/entity/Q120314,Q120314,Wutaishan


In [4]:
import urllib3, json
from tqdm import tqdm
http = urllib3.PoolManager()

listWD = []
for WD, row in tqdm(WD.iterrows(), total=WD.shape[0]):
    #print(row["itemid"] ) 
    url = "https://osm.wikidata.link/tagged/api/item/" + row["itemid"] 
    
    new_item = dict()
    new_item['wikidata'] = row["itemid"] 
#    new_item['coord'] = row["coord"] 
    try:
        r = http.request('GET', url) 
        data = json.loads(r.data.decode('utf-8'))
    except:
        print (r.status, url)
#    print (r.status)
    try:
        #print ("OSM ", data["osm"], "Type: ", type(data["osm"]))
        #print ("ID: ", data["osm"][0]["id"] , "\tType: ", data["osm"][0]["type"])
        osmid = data["osm"][0]["id"] 
        osmType = data["osm"][0]["type"]
        new_item['osmid'] = osmid 
        new_item['type'] = osmType
        listWD.append(new_item)
        #print(listWD)

    except:
        #print ("error")
        pass
print (len(listWD))

100%|██████████| 6957/6957 [12:50<00:00,  9.03it/s]  

2257





In [5]:
listWD

[{'wikidata': 'Q106540', 'osmid': 1869749, 'type': 'relation'},
 {'wikidata': 'Q112738', 'osmid': 96311715, 'type': 'way'},
 {'wikidata': 'Q120314', 'osmid': 1697356333, 'type': 'node'},
 {'wikidata': 'Q131377', 'osmid': 5329720522, 'type': 'node'},
 {'wikidata': 'Q151545', 'osmid': 2272817622, 'type': 'node'},
 {'wikidata': 'Q155443', 'osmid': 206772, 'type': 'relation'},
 {'wikidata': 'Q156316', 'osmid': 7074, 'type': 'relation'},
 {'wikidata': 'Q167231', 'osmid': 9078076, 'type': 'relation'},
 {'wikidata': 'Q171857', 'osmid': 310318567, 'type': 'way'},
 {'wikidata': 'Q176792', 'osmid': 159049311, 'type': 'way'},
 {'wikidata': 'Q106636', 'osmid': 32678002, 'type': 'node'},
 {'wikidata': 'Q172613', 'osmid': 558067645, 'type': 'way'},
 {'wikidata': 'Q71279', 'osmid': 6436433, 'type': 'relation'},
 {'wikidata': 'Q75459', 'osmid': 145690044, 'type': 'way'},
 {'wikidata': 'Q122026', 'osmid': 570667299, 'type': 'node'},
 {'wikidata': 'Q155188', 'osmid': 3881907, 'type': 'relation'},
 {'wik

In [6]:
OSMtot = pd.DataFrame(listWD,
                  columns=['wikidata','osmid','type'])
OSMtot.shape

(2257, 3)

In [7]:
pd.set_option('max_colwidth', 400)
OSMtot.head(10)

Unnamed: 0,wikidata,osmid,type
0,Q106540,1869749,relation
1,Q112738,96311715,way
2,Q120314,1697356333,node
3,Q131377,5329720522,node
4,Q151545,2272817622,node
5,Q155443,206772,relation
6,Q156316,7074,relation
7,Q167231,9078076,relation
8,Q171857,310318567,way
9,Q176792,159049311,way


In [8]:
OSMtot.to_csv("WD_OSM_WH.csv")

* [WD_OSM_WH.csv](https://github.com/salgo60/Gravstensinventeringen-Wikidata/blob/main/Notebook/WD_OSM_WH.csv)

In [9]:
end = datetime.now()
print("Ended: ", end) 
print('Time elapsed (hh:mm:ss.ms) {}'.format(datetime.now() - start_time))

Ended:  2023-02-15 13:40:26.580131
Time elapsed (hh:mm:ss.ms) 0:12:58.807619
