# Automatic check whether osm fountains with wikidata ref have coordinates in Wikidata Luzern
The following script takes the Q-# obtained from OSM and checks whether those fountains have coordinates in WikiData.
If not, the osm coordinates will be added to wikidata with reference to the osm license
https://github.com/water-fountains/import2wikidata/issues/15.

In [1]:
#TODO import most functions from ../../generic

## Initialize environment

In [2]:
from datetime import datetime as dt
dtFmt = "%y%m%d_%H%M%S"
print (dt.now().strftime(dtFmt))
import pandas as pd
import io
import numpy as np
from urllib.request import urlopen
import json
from math import *
from platform import python_version
print("Python v "+python_version())
#https://github.com/paulhoule/gastrodon/issues/7 
from gastrodon import RemoteEndpoint,QName,ttl,URIRef,inline
from matplotlib import pyplot

nameOfRegion='Luzern'

191125_074641
Python v 3.6.5


In [3]:
#@prefix wikibase: <wikibase: <http://wikiba.se/ontology#> .
prefixes=inline("""
   @prefix wd: <http://www.wikidata.org/entity/> .
   @prefix wdt: <http://www.wikidata.org/prop/direct/> .
   @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
   @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
   @prefix p: <http://www.wikidata.org/prop/> .
   @prefix psv: <http://www.wikidata.org/prop/statement/value/> .
   @prefix wikibase: <http://wikiba.se/ontology#> .
""").graph
endpoint=RemoteEndpoint(
   #"https://query.wikidata.org/sparql"
    "https://query.wikidata.org/bigdata/namespace/wdq/sparql"
   ,prefixes=prefixes
)

## Load data

In [4]:
df = pd.read_csv("osmFountainsWithWikidataReference_Luzern_191125_074112.csv")

In [5]:
df.head()

Unnamed: 0,lat,lon,wikidata
0,47.052607,8.307654,Q62599999
1,47.051825,8.303022,Q29785697


In [7]:
len(df)

2

## Identify already existing fountains
### Query fountains from Wikidata

In [6]:
def coordsFunction(row):    
    itemid = row['wikidata']    
    query = ("""SELECT ?lat ?lon
    WHERE
    {{
      wd:%s p:P625 ?statement.
    ?statement psv:P625 ?node.
    ?node wikibase:geoLatitude ?lat.
    ?node wikibase:geoLongitude ?lon.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    }}""")%itemid    
    print(itemid)
    data = endpoint.select(query)
    #print(itemid+" "+data['lat'])
    wdDf = pd.DataFrame(data)
    #wdDf
    if  len(wdDf.values) == 0:
        return 'noCoord'
    return wdDf.values[0][0]


In [7]:
# Perform query on each dw
result = df.apply(coordsFunction,axis=1)

Q62599999
Q29785697


In [8]:
result.head()

0    47.052607
1    47.051816
dtype: float64

In [9]:
query_result = pd.concat([df,result],ignore_index=True, axis=1)

In [10]:
#print(query_string)
print("\n\nTotal number of rows incl. duplicates "+str(len(query_result))+" size "+str(query_result.size))



Total number of rows incl. duplicates 2 size 8


In [11]:
query_result.head()

Unnamed: 0,0,1,2,3
0,47.052607,8.307654,Q62599999,47.052607
1,47.051825,8.303022,Q29785697,47.051816


## Create Quickstatement commands from data
### Helper functions to format content according to Quickstatements v1 syntax

In [12]:
def process_coordinates(x, y):
    # format geographic coordinates
    return '@{1:1.8f}/{0:1.8f}'.format(x,y)


def createline(lines, item, prop, value, extra, qualifiers=[]):
    # general function to create Quickstatement v1 commands
    if value != '' and value != '""':
        statement = '{}\t{}\t{}'.format(item, prop, value)
        if len(qualifiers):
            # append qualifiers if applicable
            for q in qualifiers:
                statement += '\t{}\t{}'.format(q['prop'], q['value'])
        statement += extra
        statement += '\n'
        lines.append(statement)
    return lines

### Create statements, taking care not to overwrite existing data

In [13]:
statedId = "\tS248\tQ1224853"
# initialize command storage list
lines = []
i=0
j=0
for index, row in query_result.iterrows():
    i+=1

    if row[3] == 'noCoord':
        j+=1
        # coordinates
        print(str(j)+":\torphan coordinates will be added for https://www.wikidata.org/wiki/"+row[2]+" in pos "+str(i))
        #TODO how to add ' target="_blank" '
        #TODO how to make only the Q-# visible, but the whole link anyway
        lines = createline(lines, row[2], 'P625', process_coordinates(row[1], row[0]),statedId)
    #else:
    #    print(str(i)+": "+" ignore "+row[2])
        

# Write commands to file

In [14]:
quickStatFileName = "quickstatement_commands_locOrphan_fountain_"+nameOfRegion+"_"+dt.now().strftime(dtFmt)+".txt"
with io.open(quickStatFileName, "w", encoding='utf8') as f:
    f.writelines(lines)
print("wrote '"+quickStatFileName+"' with "+str(len(lines))+" lines")

wrote 'quickstatement_commands_locOrphan_fountain_Luzern_191125_074711.txt' with 0 lines


# Import into Wikidata
- Go to https://tools.wmflabs.org/wikidata-todo/quick_statements.php.
- Authenticate yourself with your Wikidata account.
- Copy and paste the contents of quickstatement_commands*.txt into the blank field, and run the commands

Caveats
- do not interrupt this - it may take some time !  
- if you restart it, you may produce duplicates - if you think you have to restart, first wait half an hour !

see ../20191030_1600_import.png

...
58. Processing Q72935495 (Q72935495 Lde "Brunnen (Seelöwe-Planschbecken )")
59. Processing Q72935495 (Q72935495 P137 Q27229237)

All done!.

In [15]:
# it may well take half an hour until it works and becomes visible on https://query.wikidata.org/

In [61]:
#TODO Take the opposite approach: instead of querying osm first, do sparql to get the fountain instances without coordinates