In [1]:
%pip install SPARQLWrapper
%pip install fuzzywuzzy
%pip install geopy
from SPARQLWrapper import SPARQLWrapper, JSON
from fuzzywuzzy import fuzz
from geopy.distance import geodesic
import math

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.




In [31]:
def query_wikidata_coordinates(latitude : float, longitude : float, label : str, data=None):
    if data is None:
        print("Querying Wikidata......")
        query_wikidata = f"""
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX bd: <http://www.bigdata.com/rdf#>
        PREFIX wikibase: <http://wikiba.se/ontology#>
        PREFIX geo: <http://www.opengis.net/ont/geosparql#>

        SELECT DISTINCT ?city ?cityLabel ?location
        WHERE {{
        SERVICE wikibase:around {{
            ?city wdt:P625 ?location .
            bd:serviceParam wikibase:center "Point({longitude} {latitude})"^^geo:wktLiteral .
            bd:serviceParam wikibase:radius "50" . 
        }}
        ?city wdt:P31/wdt:P279* wd:Q515 . 
        ?city rdfs:label ?cityLabel .
        FILTER(LANG(?cityLabel) = "en") .
        
        }} LIMIT 1000
    
        """
        sparql_wdata = SPARQLWrapper("https://query.wikidata.org/sparql")

        sparql_wdata.setQuery(query_wikidata)

        sparql_wdata.setReturnFormat(JSON)

        results = sparql_wdata.query().convert()
    else:
        results = data

        
    matched_cities = []
    
    for r in results["results"]["bindings"]:
        ratio_partial = fuzz.partial_ratio(r["cityLabel"]["value"], label)
        ratio = fuzz.ratio(r["cityLabel"]["value"], label)

        ratio = (ratio + ratio_partial) / 2
        if ratio > 80:
            matched_cities.append((ratio, r))

    if len(matched_cities) == 0:
        min_distance = math.inf
        for r in results["results"]["bindings"]:
            coords = r["location"]["value"].split("(")[1].split(")")[0].split(" ")
            coords = (float(coords[1]), float(coords[0]))
            distance = geodesic((latitude, longitude), coords).kilometers
            if distance < min_distance:
                min_distance = distance
                closest_city = r
        return closest_city
    
    matched_cities.sort(key=lambda x: x[0], reverse=True)
    return matched_cities[0][1]

def get_wikidata_results(latitude : float, longitude : float):
    query_wikidata = f"""
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    PREFIX wd: <http://www.wikidata.org/entity/>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX bd: <http://www.bigdata.com/rdf#>
    PREFIX wikibase: <http://wikiba.se/ontology#>
    PREFIX geo: <http://www.opengis.net/ont/geosparql#>

    SELECT DISTINCT ?city ?cityLabel ?location
    WHERE {{
    SERVICE wikibase:around {{
        ?city wdt:P625 ?location .
        bd:serviceParam wikibase:center "Point({longitude} {latitude})"^^geo:wktLiteral .
        bd:serviceParam wikibase:radius "50" . 
    }}
    ?city wdt:P31/wdt:P279* wd:Q515 . 
    ?city rdfs:label ?cityLabel .
    FILTER(LANG(?cityLabel) = "en") .

    }} LIMIT 1000

    """
     
    sparql_wdata = SPARQLWrapper("https://query.wikidata.org/sparql")

    sparql_wdata.setQuery(query_wikidata)

    sparql_wdata.setReturnFormat(JSON)

    results = sparql_wdata.query().convert()
    return results

def get_dbpedia_results(latitude : float, longitude : float):
    query = f"""
    PREFIX dbo: <http://dbpedia.org/ontology/>
    PREFIX dbr: <http://dbpedia.org/resource/>
    PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

    SELECT DISTINCT ?city ?cityLabel 
    WHERE {{
      ?city a dbo:City .
      ?city rdfs:label ?cityLabel .
      ?city geo:lat ?lat .
      ?city geo:long ?long .
      FILTER (LANG(?cityLabel) = "en")
      FILTER (
        bif:st_intersects (
          bif:st_point (?long, ?lat),
          bif:st_point ({longitude}, {latitude}),
          50
        )
      )
    }}
    LIMIT 1000


    """

    sparql_dbpedia = SPARQLWrapper("http://dbpedia.org/sparql")

    sparql_dbpedia.setQuery(query)

    sparql_dbpedia.setReturnFormat(JSON)

    results = sparql_dbpedia.query().convert()

    return results

def query_dbpedia_coordinates(latitude : float, longitude : float, label : str, data=None):
    if data is None:
        print("Querying DBpedia......")
        query = f"""
        PREFIX dbo: <http://dbpedia.org/ontology/>
        PREFIX dbr: <http://dbpedia.org/resource/>
        PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

        SELECT DISTINCT ?city ?cityLabel ?lat ?long
        WHERE {{
          ?city a dbo:City .
          ?city rdfs:label ?cityLabel .
          ?city geo:lat ?lat .
          ?city geo:long ?long .
          FILTER (LANG(?cityLabel) = "en")
          FILTER (
            bif:st_intersects (
              bif:st_point (?long, ?lat),
              bif:st_point ({longitude}, {latitude}),
              50
            )
          )
        }}
        LIMIT 1000


        """

        sparql_dbpedia = SPARQLWrapper("http://dbpedia.org/sparql")

        sparql_dbpedia.setQuery(query)

        sparql_dbpedia.setReturnFormat(JSON)

        results = sparql_dbpedia.query().convert()
    else:
        results = data

    matched_cities = []
    
    for r in results["results"]["bindings"]:
        ratio_partial = fuzz.partial_ratio(r["cityLabel"]["value"], label)
        ratio = fuzz.ratio(r["cityLabel"]["value"], label)

        ratio = (ratio + ratio_partial) / 2

        if ratio > 80:
            matched_cities.append((ratio, r))

    if len(matched_cities) == 0:
        min_distance = math.inf
        for r in results["results"]["bindings"]:
            coords = (float(r["lat"]["value"]), float(r["long"]["value"]))
            distance = geodesic((latitude, longitude), coords).kilometers
            if distance < min_distance:
                min_distance = distance
                closest_city = r

        return closest_city
    
    matched_cities.sort(key=lambda x: x[0], reverse=True)
    return matched_cities[0][1]


def query_graphDB():
    query_my_data = """
    PREFIX schema: <https://schema.org/>
    PREFIX wdtn: <http://www.wikidata.org/prop/direct-normalized/>
    PREFIX data: <http://mydata.example.org/>
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX voc: <http://vocabulary.example.org/>
    PREFIX wd: <http://www.wikidata.org/entity/>
    SELECT DISTINCT ?cityName ?longitude ?latitude ?City
    WHERE {
    ?s rdf:type schema:Place .
    ?s schema:containedInPlace ?City .
    ?City rdf:type schema:City .
    ?City schema:name ?cityName .
    ?s schema:longitude ?longitude .
    ?s schema:latitude ?latitude . 
    }
    """
    # endpoint = "http://localhost:7200/repositories/test"
    endpoint = "http://193.2.205.14:7200/repositories/EnergyGraph"

    sparlq_graphdb = SPARQLWrapper(endpoint)
    sparlq_graphdb.setQuery(query_my_data)
    sparlq_graphdb.setReturnFormat(JSON)
    results_Graphdb = sparlq_graphdb.query().convert()

    return results_Graphdb    

In [None]:

query_my_data = """
PREFIX schema: <https://schema.org/>
PREFIX wdtn: <http://www.wikidata.org/prop/direct-normalized/>
PREFIX data: <http://mydata.example.org/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX voc: <http://vocabulary.example.org/>
PREFIX wd: <http://www.wikidata.org/entity/>
SELECT DISTINCT ?cityName ?longitude ?latitude ?City
WHERE {
?s rdf:type schema:Place .
?s schema:containedInPlace ?City .
?City rdf:type schema:City .
?City schema:name ?cityName .
?s schema:longitude ?longitude .
?s schema:latitude ?latitude . 
}
"""
# "http://localhost:7200/repositories/yourRepository"

sparlq_graphdb = SPARQLWrapper("http://localhost:7200/repositories/test")
sparlq_graphdb.setQuery(query_my_data)
sparlq_graphdb.setReturnFormat(JSON)
results_Graphdb = sparlq_graphdb.query().convert()


In [6]:
results = query_graphDB()
data = {}
matches= []
for c in results["results"]["bindings"]:
    label = c["cityName"]["value"]
    longitude = float(c["longitude"]["value"])
    latitude = float(c["latitude"]["value"])
    data[label] = get_wikidata_results(latitude, longitude)



In [33]:
results = query_graphDB()
results

{'head': {'vars': ['cityName', 'longitude', 'latitude', 'City']},
 'results': {'bindings': [{'cityName': {'type': 'literal',
     'value': 'City of Edinburgh'},
    'longitude': {'datatype': 'http://www.w3.org/2001/XMLSchema#double',
     'type': 'literal',
     'value': '-3.1883E0'},
    'latitude': {'datatype': 'http://www.w3.org/2001/XMLSchema#double',
     'type': 'literal',
     'value': '5.59533E1'},
    'City': {'type': 'uri',
     'value': 'http://mydata.example.org/public-locations-City/City%20of%20Edinburgh'}},
   {'cityName': {'type': 'literal', 'value': 'City of Edinburgh'},
    'longitude': {'datatype': 'http://www.w3.org/2001/XMLSchema#double',
     'type': 'literal',
     'value': '-3.1883E0'},
    'latitude': {'datatype': 'http://www.w3.org/2001/XMLSchema#double',
     'type': 'literal',
     'value': '5.59533E1'},
    'City': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q23436'}},
   {'cityName': {'type': 'literal', 'value': 'City of Edinburgh'},
    'longi

In [35]:
type(data)

dict

In [20]:
results = query_graphDB()
matches= []
for c in results["results"]["bindings"]:
    label = c["cityName"]["value"]
    # if label != "Montreal":
    #     continue
    longitude = float(c["longitude"]["value"])
    latitude = float(c["latitude"]["value"])
    print(label, longitude, latitude)
    result_dbpedia = (c,query_dbpedia_coordinates(latitude, longitude, label))
    result_wikidata = (c,query_wikidata_coordinates(latitude, longitude, label))
    matches.append(result_dbpedia)
    matches.append(result_wikidata)
matches

Abbotsford -122.35 49.083333
Querying DBpedia......
Querying Wikidata......
Charnwood -1.2097 52.7709
Querying DBpedia......
Querying Wikidata......
Montreal -73.561668 45.508888
Querying DBpedia......
Querying Wikidata......
London -0.076544 51.464462
Querying DBpedia......
Querying Wikidata......
Boston -71.064709 42.360338
Querying DBpedia......
Querying Wikidata......
City of Edinburgh -3.1883 55.9533
Querying DBpedia......
Querying Wikidata......


[({'cityName': {'type': 'literal', 'value': 'Abbotsford'},
   'longitude': {'datatype': 'http://www.w3.org/2001/XMLSchema#double',
    'type': 'literal',
    'value': '-1.2235E2'},
   'latitude': {'datatype': 'http://www.w3.org/2001/XMLSchema#double',
    'type': 'literal',
    'value': '4.9083333E1'},
   'City': {'type': 'uri',
    'value': 'http://mydata.example.org/public-locations-City/Abbotsford'}},
  {'city': {'type': 'uri',
    'value': 'http://dbpedia.org/resource/Abbotsford,_British_Columbia'},
   'cityLabel': {'type': 'literal',
    'xml:lang': 'en',
    'value': 'Abbotsford, British Columbia'},
   'lat': {'type': 'typed-literal',
    'datatype': 'http://www.w3.org/2001/XMLSchema#float',
    'value': '49.05'},
   'long': {'type': 'typed-literal',
    'datatype': 'http://www.w3.org/2001/XMLSchema#float',
    'value': '-122.317'}}),
 ({'cityName': {'type': 'literal', 'value': 'Abbotsford'},
   'longitude': {'datatype': 'http://www.w3.org/2001/XMLSchema#double',
    'type': 'lit

In [21]:
# matches[0][0]["City"]["value"], matches[0][1]["city"]["value"]

# s = "<"+matches[0][0]["City"]["value"] +"> " + "<http://www.w3.org/2002/07/owl#sameAs> " + "<"+matches[0][1]["city"]["value"] +"> ."
triples = []
for m in matches:
    s = "<"+m[0]["City"]["value"] +"> " + "<http://www.w3.org/2002/07/owl#sameAs> " + "<"+m[1]["city"]["value"] +"> .\n"
    triples.append(s)

In [22]:
with open("matches.nt", "w") as f:
    f.writelines(triples)

# DBPedia

In [11]:
latitude = 45.5128
longitude = -73.56
def query_dbpedia_coordinates(latitude, longitude):
    query = f"""
    PREFIX dbo: <http://dbpedia.org/ontology/>
    PREFIX dbr: <http://dbpedia.org/resource/>
    PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

    SELECT DISTINCT ?city ?cityLabel  ?lat ?long
    WHERE {{
      ?city a dbo:City .
      ?city rdfs:label ?cityLabel .
      ?city geo:lat ?lat .
      ?city geo:long ?long .
      FILTER (LANG(?cityLabel) = "en")
      FILTER (
        bif:st_intersects (
          bif:st_point (?long, ?lat),
          bif:st_point ({longitude}, {latitude}),
          50
        )
      )
    }}
    LIMIT 1000


    """

    sparql_dbpedia = SPARQLWrapper("http://dbpedia.org/sparql")

    sparql_dbpedia.setQuery(query)

    sparql_dbpedia.setReturnFormat(JSON)

    results = sparql_dbpedia.query().convert()
    return results


results = query_dbpedia_coordinates(latitude, longitude)
results

{'head': {'link': [], 'vars': ['city', 'cityLabel', 'lat', 'long']},
 'results': {'distinct': False,
  'ordered': True,
  'bindings': [{'city': {'type': 'uri',
     'value': 'http://dbpedia.org/resource/Carignan,_Quebec'},
    'cityLabel': {'type': 'literal',
     'xml:lang': 'en',
     'value': 'Carignan, Quebec'},
    'lat': {'type': 'typed-literal',
     'datatype': 'http://www.w3.org/2001/XMLSchema#float',
     'value': '45.45'},
    'long': {'type': 'typed-literal',
     'datatype': 'http://www.w3.org/2001/XMLSchema#float',
     'value': '-73.3'}},
   {'city': {'type': 'uri', 'value': 'http://dbpedia.org/resource/Rosemère'},
    'cityLabel': {'type': 'literal', 'xml:lang': 'en', 'value': 'Rosemère'},
    'lat': {'type': 'typed-literal',
     'datatype': 'http://www.w3.org/2001/XMLSchema#float',
     'value': '45.6369'},
    'long': {'type': 'typed-literal',
     'datatype': 'http://www.w3.org/2001/XMLSchema#float',
     'value': '-73.8'}},
   {'city': {'type': 'uri',
     'value':

In [None]:
results = query_graphDB()
matches= []
for c in results["results"]["bindings"]:
    label = c["cityName"]["value"]
    longitude = float(c["longitude"]["value"])
    latitude = float(c["latitude"]["value"])
    print(label, longitude, latitude)
    result_wikidata = (c,query_dbpedia_coordinates(latitude, longitude, label, data=data[label]))
    matches.append(result_wikidata)
results

In [10]:
results["results"]["bindings"][0]["cityLabel"]["value"]

'Carignan, Quebec'

# Linked geodata

In [27]:
# montreal cooridinates
latitude = 45.5128
longitude = -73.56

query = f"""
PREFIX lgdo: <http://linkedgeodata.org/ontology/>
PREFIX geom: <http://geovocab.org/geometry#>
PREFIX ogc: <http://www.opengis.net/ont/geosparql#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>

SELECT DISTINCT ?city ?cityLabel ?lat ?long
WHERE {{
  ?city a lgdo:City ;
        rdfs:label ?cityLabel ;
        geo:lat ?lat ;
        geo:long ?long .
  FILTER (LANG(?cityLabel) = "en")
  FILTER (
    bif:st_intersects (
      bif:st_point(?long, ?lat),
      bif:st_point({longitude}, {latitude}),
      50
    )
  )
}}
LIMIT 1000



"""

sparql_linkedgeodata = SPARQLWrapper("http://linkedgeodata.org/sparql")
sparql_linkedgeodata.setQuery(query)
sparql_linkedgeodata.setReturnFormat(JSON)
results = sparql_linkedgeodata.query().convert()

results

{'head': {'link': [], 'vars': ['city', 'cityLabel', 'lat', 'long']},
 'results': {'distinct': False,
  'ordered': True,
  'bindings': [{'city': {'type': 'uri',
     'value': 'http://linkedgeodata.org/triplify/node299790200'},
    'cityLabel': {'type': 'literal', 'xml:lang': 'en', 'value': 'Montreal'},
    'lat': {'type': 'typed-literal',
     'datatype': 'http://www.w3.org/2001/XMLSchema#double',
     'value': '45.5088'},
    'long': {'type': 'typed-literal',
     'datatype': 'http://www.w3.org/2001/XMLSchema#double',
     'value': '-73.554'}}]}}