Get all Wikidata objects and checks if they are connected to an OSM object using an [API](https://osm.wikidata.link/tagged)

* this [Notebook](https://github.com/salgo60/ProjectOutdoorGyms/blob/main/Jupyter/OSM_Wikidata.ipynb)

* API [Wikidata to OpenStreetMap](https://osm.wikidata.link/tagged)
  * eg. [https://osm.wikidata.link/tagged/api/item/Q106708773](https://osm.wikidata.link/tagged/api/item/Q106708773)

* Another tool [osm.wikidata.link](https://osm.wikidata.link/search)
  

TODO: 
* 


In [1]:
from datetime import datetime
start_time  = datetime.now()
print("Last run: ", start_time)

Last run:  2023-08-05 00:31:18.644475


In [2]:
import pandas as pd


In [3]:
#
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/

import sys,json
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"
 
#
queryWD = """SELECT DISTINCT (REPLACE(STR(?site), ".*Q", "Q") AS ?qid) ?site WHERE {
  #?site wdt:P17 wd:Q33. 
  #?site wdt:P17/wdt:P30 wd:Q46. #Europe
  ?site wdt:P31/wdt:P279* wd:Q3914.
  ?site wdt:P625 ?coordinates.
   minus {
    { ?site wdt:P10689 ?OSMid. }
    UNION
    { ?site wdt:P402 ?OSMrelid. }
    UNION 
    { ?site wdt:P11693 ?OSMnode. }
  }
} limit 200000"""


def get_sparql_dataframe(endpoint_url, query):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    user_agent = "salgo60/%s.%s" % (sys.version_info[0], sys.version_info[1])
 
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)

    return pd.DataFrame(out, columns=cols)

WDo = get_sparql_dataframe(endpoint_url, queryWD )
WDo["Source"] = "WD"     
WDo.shape

(200000, 3)

In [4]:
WDo.head()

Unnamed: 0,qid,site,Source
0,Q318880,http://www.wikidata.org/entity/Q318880,WD
1,Q12654912,http://www.wikidata.org/entity/Q12654912,WD
2,Q12659978,http://www.wikidata.org/entity/Q12659978,WD
3,Q12661362,http://www.wikidata.org/entity/Q12661362,WD
4,Q12677508,http://www.wikidata.org/entity/Q12677508,WD


In [5]:
import urllib3, json
from tqdm import tqdm
http = urllib3.PoolManager()

listWDo = []
for WD, row in tqdm(WDo.iterrows(), total=WDo.shape[0]):
    url = "https://osm.wikidata.link/tagged/api/item/" + row["qid"] 
    
    new_item = dict()
    new_item['wikidata'] = row["qid"] 
    try:
        r = http.request('GET', url) 
        data = json.loads(r.data.decode('utf-8'))
    except:
        print (r.status, url)
    #print (r.status)
    try:
        #print(data)
        osmid = data["osm"][0]["id"]            
        osmtype = data["osm"][0]["type"]              
    except:
        #print ("error")
        #print(data)
        osmid =""
        osmtype =""
    new_item['osmid'] = osmid  
    new_item['type'] = osmtype 
    
    listWDo.append(new_item)
print (len(listWDo))

 22%|██▏       | 43184/200000 [2:33:00<556:55:58, 12.79s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6811795


 22%|██▏       | 43185/200000 [2:34:00<1028:43:08, 23.62s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6811796


 22%|██▏       | 43186/200000 [2:35:00<1421:14:13, 32.63s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6813655


 22%|██▏       | 43187/200000 [2:36:00<1732:57:54, 39.78s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6813653


 22%|██▏       | 43188/200000 [2:37:01<1974:01:34, 45.32s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6813656


 22%|██▏       | 43189/200000 [2:38:01<2154:02:00, 49.45s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6813798


 22%|██▏       | 43190/200000 [2:39:01<2286:17:24, 52.49s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6813902


 22%|██▏       | 43191/200000 [2:40:01<2383:27:45, 54.72s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6814114


 22%|██▏       | 43192/200000 [2:41:01<2452:11:27, 56.30s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6815375


 22%|██▏       | 43193/200000 [2:42:01<2502:25:25, 57.45s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6815451


 22%|██▏       | 43194/200000 [2:43:02<2538:02:38, 58.27s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6815454


 22%|██▏       | 43195/200000 [2:44:02<2561:52:12, 58.82s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6815455


 22%|██▏       | 43196/200000 [2:45:02<2578:38:04, 59.20s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6815452


 22%|██▏       | 43197/200000 [2:46:02<2591:46:01, 59.50s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6815458


 22%|██▏       | 43198/200000 [2:47:02<2599:47:46, 59.69s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6815456


 22%|██▏       | 43199/200000 [2:48:02<2605:27:58, 59.82s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6815457


 22%|██▏       | 43200/200000 [2:49:02<2609:26:44, 59.91s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6815806


 22%|██▏       | 43201/200000 [2:50:02<2611:46:24, 59.96s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6816419


 22%|██▏       | 43202/200000 [2:51:03<2614:40:18, 60.03s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6816426


 22%|██▏       | 43203/200000 [2:52:03<2617:01:10, 60.09s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6816470


 22%|██▏       | 43204/200000 [2:53:03<2617:29:35, 60.10s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6816731


 22%|██▏       | 43205/200000 [2:54:03<2618:46:54, 60.13s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6816741


 22%|██▏       | 43206/200000 [2:55:03<2618:43:30, 60.13s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6817135


 22%|██▏       | 43207/200000 [2:56:03<2618:41:08, 60.13s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6817189


 22%|██▏       | 43208/200000 [2:57:04<2619:26:00, 60.14s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6817198


 22%|██▏       | 43209/200000 [2:58:04<2620:18:10, 60.16s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6817270


 22%|██▏       | 43210/200000 [2:59:04<2619:47:05, 60.15s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6817279


 22%|██▏       | 43211/200000 [3:00:04<2620:20:22, 60.17s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6817583


 22%|██▏       | 43212/200000 [3:01:04<2620:56:10, 60.18s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6817584


 22%|██▏       | 43213/200000 [3:02:04<2620:13:25, 60.16s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6817597


 22%|██▏       | 43214/200000 [3:03:05<2619:29:38, 60.15s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6818181


 22%|██▏       | 43215/200000 [3:04:05<2619:10:16, 60.14s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6818192


 22%|██▏       | 43216/200000 [3:05:05<2618:55:05, 60.13s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6818254


 22%|██▏       | 43217/200000 [3:06:05<2619:22:37, 60.15s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6818275


 22%|██▏       | 43218/200000 [3:07:05<2618:52:52, 60.13s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6818278


 22%|██▏       | 43219/200000 [3:08:05<2618:42:24, 60.13s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6818287


 22%|██▏       | 43220/200000 [3:09:05<2619:35:36, 60.15s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6818727


 22%|██▏       | 43221/200000 [3:10:06<2620:21:29, 60.17s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6818746


 22%|██▏       | 43222/200000 [3:11:06<2619:33:37, 60.15s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6842039


 22%|██▏       | 43223/200000 [3:12:06<2620:19:55, 60.17s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6842094


 22%|██▏       | 43224/200000 [3:13:06<2620:53:46, 60.18s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6842095


 22%|██▏       | 43225/200000 [3:14:06<2621:14:23, 60.19s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6842093


 22%|██▏       | 43226/200000 [3:15:07<2620:20:54, 60.17s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6842098


 22%|██▏       | 43227/200000 [3:16:07<2620:39:42, 60.18s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6842099


 22%|██▏       | 43228/200000 [3:17:07<2619:54:58, 60.16s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6842100


 22%|██▏       | 43229/200000 [3:18:07<2620:23:23, 60.17s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6842124


 22%|██▏       | 43230/200000 [3:19:07<2619:43:37, 60.16s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6842307


 22%|██▏       | 43231/200000 [3:20:07<2620:13:32, 60.17s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6842350


 22%|██▏       | 43232/200000 [3:21:07<2619:36:04, 60.16s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6842348


 22%|██▏       | 43233/200000 [3:22:08<2620:07:00, 60.17s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6842352


 22%|██▏       | 43234/200000 [3:23:08<2620:40:32, 60.18s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6842382


 22%|██▏       | 43235/200000 [3:24:08<2619:32:42, 60.16s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6842460


 22%|██▏       | 43236/200000 [3:25:08<2618:33:29, 60.13s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6842586


 22%|██▏       | 43237/200000 [3:26:08<2618:45:46, 60.14s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6842584


 22%|██▏       | 43238/200000 [3:27:08<2618:24:43, 60.13s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6842948


 22%|██▏       | 43239/200000 [3:28:09<2619:23:03, 60.15s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6843135


 22%|██▏       | 43240/200000 [3:29:09<2618:47:20, 60.14s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6843286


 22%|██▏       | 43241/200000 [3:30:09<2619:42:22, 60.16s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6843285


 22%|██▏       | 43242/200000 [3:31:09<2620:20:41, 60.18s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6843352


 22%|██▏       | 43243/200000 [3:32:09<2619:26:44, 60.16s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6843428


 22%|██▏       | 43244/200000 [3:33:09<2620:08:29, 60.17s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6843495


 22%|██▏       | 43245/200000 [3:34:09<2619:17:14, 60.15s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6843995


 22%|██▏       | 43246/200000 [3:35:10<2618:51:10, 60.14s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6843999


 22%|██▏       | 43247/200000 [3:36:10<2619:33:48, 60.16s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6844500


 22%|██▏       | 43248/200000 [3:37:10<2620:12:03, 60.18s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6850554


 22%|██▏       | 43249/200000 [3:38:10<2619:18:54, 60.16s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6850555


 22%|██▏       | 43250/200000 [3:39:10<2619:07:08, 60.15s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6850981


 22%|██▏       | 43251/200000 [3:40:10<2617:59:41, 60.13s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6851470


 22%|██▏       | 43252/200000 [3:41:10<2617:16:12, 60.11s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6851726


 22%|██▏       | 43253/200000 [3:42:11<2617:50:47, 60.12s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6851729


 22%|██▏       | 43254/200000 [3:43:11<2618:58:48, 60.15s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6851865


 22%|██▏       | 43255/200000 [3:44:11<2618:25:43, 60.14s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6858079


 22%|██▏       | 43256/200000 [3:45:11<2618:14:19, 60.13s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6858280


 22%|██▏       | 43257/200000 [3:46:11<2618:02:36, 60.13s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6858403


 22%|██▏       | 43258/200000 [3:47:11<2618:46:16, 60.15s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6858434


 22%|██▏       | 43259/200000 [3:48:12<2619:34:48, 60.17s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6858511


 22%|██▏       | 43260/200000 [3:49:12<2619:00:53, 60.15s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6858517


 22%|██▏       | 43261/200000 [3:50:12<2618:21:15, 60.14s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6858619


 22%|██▏       | 43262/200000 [3:51:12<2617:50:13, 60.13s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6858622


 22%|██▏       | 43263/200000 [3:52:12<2617:36:20, 60.12s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6858620


 22%|██▏       | 43264/200000 [3:53:12<2617:35:42, 60.12s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6858649


 22%|██▏       | 43265/200000 [3:54:12<2618:33:15, 60.14s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6858661


 22%|██▏       | 43266/200000 [3:55:12<2618:09:01, 60.14s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6858721


 22%|██▏       | 43267/200000 [3:56:13<2619:03:08, 60.16s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6858791


 22%|██▏       | 43268/200000 [3:57:13<2618:36:47, 60.15s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6858858


 22%|██▏       | 43269/200000 [3:58:13<2618:38:44, 60.15s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6858862


 22%|██▏       | 43270/200000 [3:59:13<2618:43:03, 60.15s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6859105


 22%|██▏       | 43271/200000 [4:00:13<2618:21:33, 60.14s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6859118


 22%|██▏       | 43272/200000 [4:01:13<2618:05:30, 60.14s/it]

502 https://osm.wikidata.link/tagged/api/item/Q6859143


 22%|██▏       | 43273/200000 [4:02:13<2618:40:36, 60.15s/it]

502 https://osm.wikidata.link/tagged/api/item/Q7054743


 22%|██▏       | 43274/200000 [4:03:14<2618:06:26, 60.14s/it]

502 https://osm.wikidata.link/tagged/api/item/Q7054741


 22%|██▏       | 43275/200000 [4:04:14<2617:51:51, 60.13s/it]

502 https://osm.wikidata.link/tagged/api/item/Q7054744


 22%|██▏       | 43276/200000 [4:05:14<2618:44:55, 60.15s/it]

502 https://osm.wikidata.link/tagged/api/item/Q7054801


 22%|██▏       | 43277/200000 [4:06:14<2619:29:46, 60.17s/it]

502 https://osm.wikidata.link/tagged/api/item/Q7054830


 22%|██▏       | 43278/200000 [4:07:14<2618:41:08, 60.15s/it]

502 https://osm.wikidata.link/tagged/api/item/Q7054833


 22%|██▏       | 43279/200000 [4:08:14<2618:16:03, 60.14s/it]

502 https://osm.wikidata.link/tagged/api/item/Q7054837


 22%|██▏       | 43280/200000 [4:09:16<2632:20:28, 60.47s/it]

502 https://osm.wikidata.link/tagged/api/item/Q7054855


 22%|██▏       | 43281/200000 [4:10:16<2628:59:24, 60.39s/it]

502 https://osm.wikidata.link/tagged/api/item/Q7054882


 22%|██▏       | 43282/200000 [4:11:16<2626:38:01, 60.34s/it]

502 https://osm.wikidata.link/tagged/api/item/Q7054886


 22%|██▏       | 43283/200000 [4:12:16<2624:58:58, 60.30s/it]

502 https://osm.wikidata.link/tagged/api/item/Q7054895


100%|██████████| 200000/200000 [11:54:39<00:00,  4.66it/s]   

200000





In [6]:
OSMtot = pd.DataFrame(listWDo,
                  columns=['wikidata','type','osmid'])
OSMtot.shape


(200000, 3)

In [7]:
pd.set_option('max_colwidth', 400)
OSMtot.head(10)

Unnamed: 0,wikidata,type,osmid
0,Q318880,,
1,Q12654912,,
2,Q12659978,,
3,Q12661362,,
4,Q12677508,,
5,Q12680306,,
6,Q16470479,,
7,Q16476066,,
8,Q16476363,,
9,Q16476441,,


In [8]:
#OSMempty = OSMtot.osmid.notnull()
OSMtot[(OSMtot['osmid']=="")].shape

(163634, 3)

In [9]:
OSMEmpty =OSMtot[(OSMtot['osmid']=="")]

In [10]:
OSMEmpty.shape

(163634, 3)

In [11]:
OSMEmpty.to_csv("WD - OSM school 3 missing.csv")

OSMEmpty.head()

Unnamed: 0,wikidata,type,osmid
0,Q318880,,
1,Q12654912,,
2,Q12659978,,
3,Q12661362,,
4,Q12677508,,


In [12]:
OSMConnected=OSMtot[(OSMtot['osmid']!="")]
OSMConnected.to_csv("WD - OSM_school 3.csv")
OSMConnected.head()

Unnamed: 0,wikidata,type,osmid
231,Q7380723,node,354242485
284,Q2895377,node,1586863947
286,Q2898551,node,2572336843
487,Q5904530,way,130535965
489,Q5913254,way,10045363


In [13]:
print("*", start_time.strftime("%Y%m%d"),"WD objects", WDo.shape[0], "ej OSM kopplade",OSMEmpty.shape[0]) 


* 20230805 WD objects 200000 ej OSM kopplade 163634


Generate Markdown table eg.
| 20210526     | 2802 | 2050 |1147 | 254 | 213| 84|


In [14]:
print("|",start_time.strftime("%Y%m%d"),"|", \
      WDo.shape[0],"|",OSMEmpty.wikidata.nunique(),"|")


| 20230805 | 200000 | 163634 |


In [15]:
end = datetime.now()
print("Ended: ", end) 
print('Time elapsed (hh:mm:ss.ms) {}'.format(datetime.now() - start_time))

Ended:  2023-08-05 12:26:22.830326
Time elapsed (hh:mm:ss.ms) 11:55:04.186381
