Get all Wikidata objects and checks if they are connected to an OSM object using an [API](https://osm.wikidata.link/tagged)

* this [Notebook](https://github.com/salgo60/ProjectOutdoorGyms/blob/main/Jupyter/OSM_Wikidata.ipynb)

* API [Wikidata to OpenStreetMap](https://osm.wikidata.link/tagged)
  * eg. [https://osm.wikidata.link/tagged/api/item/Q106708773](https://osm.wikidata.link/tagged/api/item/Q106708773)

* Another tool [osm.wikidata.link](https://osm.wikidata.link/search)
  

TODO: 
* 


In [1]:
from datetime import datetime
start_time  = datetime.now()
print("Last run: ", start_time)

Last run:  2023-08-07 11:50:20.498426


In [2]:
import pandas as pd


In [3]:
#
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/

import sys,json
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"
 
#
queryWD = """SELECT DISTINCT (REPLACE(STR(?site), ".*Q", "Q") AS ?qid) ?site WHERE {
  #?site wdt:P17 wd:Q34. 
  ?site wdt:P31/wdt:P279* wd:Q28872924.
  ?site wdt:P625 ?coordinates.
#  minus{?site wdt:P17/wdt:P30 wd:Q46} #Europe
   minus {
    { ?site wdt:P10689 ?OSMid. }
    UNION
    { ?site wdt:P402 ?OSMrelid. }
    UNION 
    { ?site wdt:P11693 ?OSMnode. }
  }
} """


def get_sparql_dataframe(endpoint_url, query):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    user_agent = "salgo60/%s.%s" % (sys.version_info[0], sys.version_info[1])
 
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)

    return pd.DataFrame(out, columns=cols)

WDo = get_sparql_dataframe(endpoint_url, queryWD )
WDo["Source"] = "WD"     
WDo.shape

(14042, 3)

In [4]:
WDo.head()

Unnamed: 0,qid,site,Source
0,Q116817,http://www.wikidata.org/entity/Q116817,WD
1,Q118394,http://www.wikidata.org/entity/Q118394,WD
2,Q118518,http://www.wikidata.org/entity/Q118518,WD
3,Q118525,http://www.wikidata.org/entity/Q118525,WD
4,Q118537,http://www.wikidata.org/entity/Q118537,WD


In [5]:
import urllib3, json
from tqdm import tqdm
http = urllib3.PoolManager()

listWDo = []
for WD, row in tqdm(WDo.iterrows(), total=WDo.shape[0]):
    url = "https://osm.wikidata.link/tagged/api/item/" + row["qid"] 
    
    new_item = dict()
    new_item['wikidata'] = row["qid"] 
    try:
        r = http.request('GET', url) 
        data = json.loads(r.data.decode('utf-8'))
    except:
        print (r.status, url)
    #print (r.status)
    try:
        #print(data)
        osmid = data["osm"][0]["id"]            
        osmtype = data["osm"][0]["type"]              
    except:
        #print ("error")
        #print(data)
        osmid =""
        osmtype =""
    new_item['osmid'] = osmid  
    new_item['type'] = osmtype 
    
    listWDo.append(new_item)
print (len(listWDo))

100%|██████████| 14042/14042 [1:09:40<00:00,  3.36it/s]

14042





In [6]:
OSMtot = pd.DataFrame(listWDo,
                  columns=['wikidata','type','osmid'])
OSMtot.shape


(14042, 3)

In [7]:
pd.set_option('max_colwidth', 400)
OSMtot.head(10)

Unnamed: 0,wikidata,type,osmid
0,Q116817,,
1,Q118394,,
2,Q118518,way,807128582.0
3,Q118525,,
4,Q118537,,
5,Q118544,,
6,Q118873,,
7,Q119167,,
8,Q119177,,
9,Q119196,way,1182803970.0


In [8]:
#OSMempty = OSMtot.osmid.notnull()
OSMtot[(OSMtot['osmid']=="")].shape

(13712, 3)

In [9]:
OSMEmpty =OSMtot[(OSMtot['osmid']=="")]

In [10]:
OSMEmpty.shape

(13712, 3)

In [11]:
OSMEmpty.to_csv("WD - OSM WD 1 missing.csv")

OSMEmpty.head()

Unnamed: 0,wikidata,type,osmid
0,Q116817,,
1,Q118394,,
3,Q118525,,
4,Q118537,,
5,Q118544,,


In [12]:
OSMConnected=OSMtot[(OSMtot['osmid']!="")]
OSMConnected.to_csv("WD - OSM_WD1.csv")
OSMConnected.head()

Unnamed: 0,wikidata,type,osmid
2,Q118518,way,807128582
9,Q119196,way,1182803970
35,Q386154,relation,2910707
38,Q430569,relation,8500184
40,Q465816,way,1042413994


In [13]:
print("*", start_time.strftime("%Y%m%d"),"WD objects", WDo.shape[0], "ej OSM kopplade",OSMEmpty.shape[0]) 


* 20230807 WD objects 14042 ej OSM kopplade 13712


Generate Markdown table eg.
| 20210526     | 2802 | 2050 |1147 | 254 | 213| 84|


In [14]:
print("|",start_time.strftime("%Y%m%d"),"|", \
      WDo.shape[0],"|",OSMEmpty.wikidata.nunique(),"|")


| 20230807 | 14042 | 13712 |


In [15]:
end = datetime.now()
print("Ended: ", end) 
print('Time elapsed (hh:mm:ss.ms) {}'.format(datetime.now() - start_time))

Ended:  2023-08-07 13:00:04.411434
Time elapsed (hh:mm:ss.ms) 1:09:43.913511
