Get all Swedish churches and check OSM
* This Notebook


In [1]:
from datetime import datetime
start_time  = datetime.now()
print("Last run: ", start_time)

In [2]:
import pandas as pd
#
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/

import sys,json
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"
 
# https://w.wiki/3S35  
queryChurch = """SELECT (REPLACE(STR(?church), ".*Q", "Q") AS ?churchid) ?churchLabel ?OSM ?OSMrel WHERE {
  ?church wdt:P31/wdt:P279* wd:Q16970.
  ?church wdt:P17 wd:Q34.
  OPTIONAL {?church wdt:P10689 ?OSM}
  OPTIONAL {?church wdt:P402 ?OSMrel}


  SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". }
}
ORDER BY (?churchLabel)"""


def get_sparql_dataframe(endpoint_url, query):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    user_agent = "salgo60/%s.%s" % (sys.version_info[0], sys.version_info[1])
 
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)

    return pd.DataFrame(out, columns=cols)

WDSweChurch = get_sparql_dataframe(endpoint_url, queryChurch)    
WDSweChurch.shape

(4331, 4)

In [3]:
WDSweChurch.head()

Unnamed: 0,churchid,churchLabel,OSM,OSMrel
0,Q12001497,Q12001497,,
1,Q1573660,Church ruins of Agnestad,,
2,Q106831114,Pingstkyrkan Ulricehamn,,
3,Q11735120,Saint Mary's Chapel (Linköping Cathedral),,
4,Q29893517,"""G:a kyrka"",Kyrkmon",,


In [4]:
import urllib3, json
from tqdm import tqdm
http = urllib3.PoolManager()

listChurch = []
for WD, row in tqdm(WDSweChurch.iterrows(), total=WDSweChurch.shape[0]):
    #print(row["churchid"] ) 
    url = "https://osm.wikidata.link/tagged/api/item/" + row["churchid"] 
    
    new_item = dict()
    new_item['wikidata'] = row["churchid"] 
#    new_item['coord'] = row["coord"] 
    try:
        r = http.request('GET', url) 
        data = json.loads(r.data.decode('utf-8'))
    except:
        print (r.status, url)
#    print (r.status)
    try:
        #print ("OSM ", data["osm"], "Type: ", type(data["osm"]))
        #print ("ID: ", data["osm"][0]["id"] , "\tType: ", data["osm"][0]["type"])
        osmid = data["osm"][0]["id"] 
        osmType = data["osm"][0]["type"]
        new_item['osmid'] = osmid 
        new_item['type'] = osmType
        listChurch.append(new_item)
        #print(listChurch)

    except:
        #print ("error")
        pass
print (len(listChurch))

100%|██████████| 4331/4331 [06:21<00:00, 11.36it/s]

2371





In [8]:
#listChurch

In [7]:
OSMtot = pd.DataFrame(listChurch,
                  columns=['wikidata','osmid','type'])
OSMtot.shape

(2371, 3)

In [9]:
pd.set_option('max_colwidth', 400)
OSMtot.head(10)

Unnamed: 0,wikidata,osmid,type
0,Q10398038,166372147,way
1,Q10399592,427761718,way
2,Q10400870,839880967,way
3,Q4682106,106745873,way
4,Q360572,42038493,way
5,Q10401444,1375024,relation
6,Q93797701,7782172690,node
7,Q31899783,323215588,way
8,Q10403698,176185391,way
9,Q10404021,840512525,way


In [10]:
OSMtot.to_csv("WDOSMChurch.csv")

In [11]:
end = datetime.now()
print("Ended: ", end) 
print('Time elapsed (hh:mm:ss.ms) {}'.format(datetime.now() - start_time))

Ended:  2023-01-07 02:13:16.609709
Time elapsed (hh:mm:ss.ms) 0:10:28.375369
