In [2]:
%pip install SPARQLWrapper
%pip install fuzzywuzzy
%pip install geopy
from SPARQLWrapper import SPARQLWrapper, JSON
from fuzzywuzzy import fuzz
from geopy.distance import geodesic
import math

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.




In [3]:
def query_wikidata_coordinates(latitude : float, longitude : float, label : str, data=None):
    if data is None:
        print("Querying Wikidata......")
        query_wikidata = f"""
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX bd: <http://www.bigdata.com/rdf#>
        PREFIX wikibase: <http://wikiba.se/ontology#>
        PREFIX geo: <http://www.opengis.net/ont/geosparql#>

        SELECT DISTINCT ?city ?cityLabel ?location
        WHERE {{
        SERVICE wikibase:around {{
            ?city wdt:P625 ?location .
            bd:serviceParam wikibase:center "Point({longitude} {latitude})"^^geo:wktLiteral .
            bd:serviceParam wikibase:radius "50" . 
        }}
        ?city wdt:P31/wdt:P279* wd:Q515 . 
        ?city rdfs:label ?cityLabel .
        FILTER(LANG(?cityLabel) = "en") .
        
        }} LIMIT 1000
    
        """
        sparql_wdata = SPARQLWrapper("https://query.wikidata.org/sparql")

        sparql_wdata.setQuery(query_wikidata)

        sparql_wdata.setReturnFormat(JSON)

        results = sparql_wdata.query().convert()
    else:
        results = data

        
    matched_cities = []
    
    for r in results["results"]["bindings"]:
        ratio_partial = fuzz.partial_ratio(r["cityLabel"]["value"], label)
        ratio = fuzz.ratio(r["cityLabel"]["value"], label)

        ratio = (ratio + ratio_partial) / 2
        if ratio > 80:
            matched_cities.append((ratio, r))

    if len(matched_cities) == 0:
        min_distance = math.inf
        for r in results["results"]["bindings"]:
            coords = r["location"]["value"].split("(")[1].split(")")[0].split(" ")
            coords = (float(coords[1]), float(coords[0]))
            distance = geodesic((latitude, longitude), coords).kilometers
            if distance < min_distance:
                min_distance = distance
                closest_city = r
        return closest_city
    
    matched_cities.sort(key=lambda x: x[0], reverse=True)
    return matched_cities[0][1]

def get_wikidata_results(latitude : float, longitude : float):
    query_wikidata = f"""
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    PREFIX wd: <http://www.wikidata.org/entity/>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX bd: <http://www.bigdata.com/rdf#>
    PREFIX wikibase: <http://wikiba.se/ontology#>
    PREFIX geo: <http://www.opengis.net/ont/geosparql#>

    SELECT DISTINCT ?city ?cityLabel ?location
    WHERE {{
    SERVICE wikibase:around {{
        ?city wdt:P625 ?location .
        bd:serviceParam wikibase:center "Point({longitude} {latitude})"^^geo:wktLiteral .
        bd:serviceParam wikibase:radius "50" . 
    }}
    ?city wdt:P31/wdt:P279* wd:Q515 . 
    ?city rdfs:label ?cityLabel .
    FILTER(LANG(?cityLabel) = "en") .

    }} LIMIT 1000

    """
     
    sparql_wdata = SPARQLWrapper("https://query.wikidata.org/sparql")

    sparql_wdata.setQuery(query_wikidata)

    sparql_wdata.setReturnFormat(JSON)

    results = sparql_wdata.query().convert()
    return results

def get_dbpedia_results(latitude : float, longitude : float):
    query = f"""
    PREFIX dbo: <http://dbpedia.org/ontology/>
    PREFIX dbr: <http://dbpedia.org/resource/>
    PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

    SELECT DISTINCT ?city ?cityLabel 
    WHERE {{
      ?city a dbo:City .
      ?city rdfs:label ?cityLabel .
      ?city geo:lat ?lat .
      ?city geo:long ?long .
      FILTER (LANG(?cityLabel) = "en")
      FILTER (
        bif:st_intersects (
          bif:st_point (?long, ?lat),
          bif:st_point ({longitude}, {latitude}),
          50
        )
      )
    }}
    LIMIT 1000


    """

    sparql_dbpedia = SPARQLWrapper("http://dbpedia.org/sparql")

    sparql_dbpedia.setQuery(query)

    sparql_dbpedia.setReturnFormat(JSON)

    results = sparql_dbpedia.query().convert()

    return results

def query_dbpedia_coordinates(latitude : float, longitude : float, label : str, data=None):
    if data is None:
        print("Querying DBpedia......")
        query = f"""
        PREFIX dbo: <http://dbpedia.org/ontology/>
        PREFIX dbr: <http://dbpedia.org/resource/>
        PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

        SELECT DISTINCT ?city ?cityLabel ?lat ?long
        WHERE {{
          ?city a dbo:City .
          ?city rdfs:label ?cityLabel .
          ?city geo:lat ?lat .
          ?city geo:long ?long .
          FILTER (LANG(?cityLabel) = "en")
          FILTER (
            bif:st_intersects (
              bif:st_point (?long, ?lat),
              bif:st_point ({longitude}, {latitude}),
              50
            )
          )
        }}
        LIMIT 1000


        """

        sparql_dbpedia = SPARQLWrapper("http://dbpedia.org/sparql")

        sparql_dbpedia.setQuery(query)

        sparql_dbpedia.setReturnFormat(JSON)

        results = sparql_dbpedia.query().convert()
    else:
        results = data

    matched_cities = []
    
    for r in results["results"]["bindings"]:
        ratio_partial = fuzz.partial_ratio(r["cityLabel"]["value"], label)
        ratio = fuzz.ratio(r["cityLabel"]["value"], label)

        ratio = (ratio + ratio_partial) / 2

        if ratio > 80:
            matched_cities.append((ratio, r))

    if len(matched_cities) == 0:
        min_distance = math.inf
        for r in results["results"]["bindings"]:
            coords = (float(r["lat"]["value"]), float(r["long"]["value"]))
            distance = geodesic((latitude, longitude), coords).kilometers
            if distance < min_distance:
                min_distance = distance
                closest_city = r

        return closest_city
    
    matched_cities.sort(key=lambda x: x[0], reverse=True)
    return matched_cities[0][1]


def query_graphDB():
    query_my_data = """
    PREFIX schema: <https://schema.org/>
    PREFIX wdtn: <http://www.wikidata.org/prop/direct-normalized/>
    PREFIX data: <http://mydata.example.org/>
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX voc: <http://vocabulary.example.org/>
    PREFIX wd: <http://www.wikidata.org/entity/>
    SELECT DISTINCT ?cityName ?longitude ?latitude ?City
    WHERE {
    ?s rdf:type schema:Place .
    ?s schema:containedInPlace ?City .
    ?City rdf:type schema:City .
    ?City schema:name ?cityName .
    ?s schema:longitude ?longitude .
    ?s schema:latitude ?latitude . 
    }
    """
    # endpoint = "http://localhost:7200/repositories/test"
    endpoint = "http://193.2.205.14:7200/repositories/EnergyGraph"

    sparlq_graphdb = SPARQLWrapper(endpoint)
    sparlq_graphdb.setQuery(query_my_data)
    sparlq_graphdb.setReturnFormat(JSON)
    results_Graphdb = sparlq_graphdb.query().convert()

    return results_Graphdb    

In [49]:
def query_graphdb_countries():
    query_countries = """
    PREFIX voc: <http://vocabulary.example.org/>
    PREFIX saref: <https://saref.etsi.org/core/>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX schema: <https://schema.org/>
    SELECT DISTINCT ?country WHERE {
    ?country rdf:type schema:Country . 
    
    }
    """
    sparql_graphdb = SPARQLWrapper("http://193.2.205.14:7200/repositories/EnergyGraph_mixed")
    sparql_graphdb.setQuery(query_countries)
    sparql_graphdb.setReturnFormat(JSON)

    results = sparql_graphdb.query().convert()
    uris = {}
    for uri in results["results"]["bindings"]:
        k =  uri["country"]["value"].split("/")[-1].replace("%20", " ")
        uris[k] = uri["country"]["value"]
    return uris



def query_dbpedia_countries(country : str):
    query = f"""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX dbo: <http://dbpedia.org/ontology/>
    PREFIX dbp: <http://dbpedia.org/property/>

    SELECT ?country WHERE {{
    ?country a dbo:Country ;
            rdfs:label "{country}"@en .
    }}
    LIMIT 1
    """
    sparql_dbpedia = SPARQLWrapper("http://dbpedia.org/sparql")
    sparql_dbpedia.setQuery(query)
    sparql_dbpedia.setReturnFormat(JSON)
    results = sparql_dbpedia.query().convert()
    return results["results"]["bindings"][0]["country"]["value"]

res = query_dbpedia_countries("Canada")

In [50]:
res

'http://dbpedia.org/resource/Canada'

In [36]:

def query_graphDB_cities(endpoint : str):
    """Query the energy knowledge graph for the cities and their coordinates"""
    query_my_data = """
    PREFIX schema: <https://schema.org/>
    PREFIX wdtn: <http://www.wikidata.org/prop/direct-normalized/>
    PREFIX data: <http://mydata.example.org/>
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX voc: <http://vocabulary.example.org/>
    PREFIX wd: <http://www.wikidata.org/entity/>
    SELECT DISTINCT ?cityName ?longitude ?latitude ?City
    WHERE {
    ?s rdf:type schema:Place .
    ?s schema:containedInPlace ?City .
    ?City rdf:type schema:City .
    ?City schema:name ?cityName .
    ?s schema:longitude ?longitude .
    ?s schema:latitude ?latitude . 
    }
    """
    # endpoint = "http://localhost:7200/repositories/test"

    sparlq_graphdb = SPARQLWrapper(endpoint)
    sparlq_graphdb.setQuery(query_my_data)
    sparlq_graphdb.setReturnFormat(JSON)
    results_Graphdb = sparlq_graphdb.query().convert()

    for r in results_Graphdb["results"]["bindings"]:
        print(r["cityName"]["value"], r["longitude"]["value"], r["latitude"]["value"])

    return results_Graphdb 

# query_graphDB_cities("http://193.2.205.14:7200/repositories/EnergyGraph_mixed")

def query_wikidata_countries(country: str):
    """
    Query Wikidata for the country of a city given its city wikidata entity id
    """   
    # query_wikidata_countries =f"""PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    # PREFIX wd: <http://www.wikidata.org/entity/>
    # PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    # PREFIX bd: <http://www.bigdata.com/rdf#>

    # SELECT ?city ?cityLabel ?country ?countryLabel
    # WHERE {{
    # wd:{city} rdfs:label ?cityLabel ;  # City (Montreal)
    #         wdt:P17 ?country .         # Country of the city
    # ?country rdfs:label ?countryLabel .
    
    # FILTER(LANG(?cityLabel) = "en" && LANG(?countryLabel) = "en").
    # }}
    # """
    query_wikidata_countries = f"""
    SELECT ?country WHERE {{
    ?country wdt:P31 wd:Q6256; # instance of a country
            rdfs:label "{country}"@en. # country name in English
    }}

    """
        
    sparql_wdata = SPARQLWrapper("https://query.wikidata.org/sparql")

    sparql_wdata.setQuery(query_wikidata_countries)

    sparql_wdata.setReturnFormat(JSON)

    results = sparql_wdata.query().convert()
    
    return results["results"]["bindings"][0]["country"]["value"]
    

def query_graphdb_countries():
    query_countries = """
    PREFIX voc: <http://vocabulary.example.org/>
    PREFIX saref: <https://saref.etsi.org/core/>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX schema: <https://schema.org/>
    SELECT DISTINCT ?country WHERE {
    ?country rdf:type schema:Country . 
    
    }
    """
    sparql_graphdb = SPARQLWrapper("http://193.2.205.14:7200/repositories/EnergyGraph_mixed")
    sparql_graphdb.setQuery(query_countries)
    sparql_graphdb.setReturnFormat(JSON)

    results = sparql_graphdb.query().convert()
    uris = {}
    for uri in results["results"]["bindings"]:
        k =  uri["country"]["value"].split("/")[-1].replace("%20", " ")
        # special case for the US
        if k == "United States":
            k = "United States of America"
        uris[k] = uri["country"]["value"]
    return uris



matches = []
# query_wikidata_countries("Canada")
result_Graphdb_countries = query_graphdb_countries()
for c in result_Graphdb_countries:
        print(c)
        result_wikidata = (result_Graphdb_countries[c], query_wikidata_countries(c))
        matches.append(result_wikidata)




Canada
United Kingdom
South Korea
Switzerland
India
Germany
Greece
Netherlands
Poland
Portugal
France
United States of America
Uruguay


In [37]:
matches

[('http://mydata.example.org/public-locations-country/Canada',
  'http://www.wikidata.org/entity/Q16'),
 ('http://mydata.example.org/public-locations-country/United%20Kingdom',
  'http://www.wikidata.org/entity/Q145'),
 ('http://mydata.example.org/public-locations-country/South%20Korea',
  'http://www.wikidata.org/entity/Q884'),
 ('http://mydata.example.org/public-locations-country/Switzerland',
  'http://www.wikidata.org/entity/Q39'),
 ('http://mydata.example.org/public-locations-country/India',
  'http://www.wikidata.org/entity/Q668'),
 ('http://mydata.example.org/public-locations-country/Germany',
  'http://www.wikidata.org/entity/Q183'),
 ('http://mydata.example.org/public-locations-country/Greece',
  'http://www.wikidata.org/entity/Q41'),
 ('http://mydata.example.org/public-locations-country/Netherlands',
  'http://www.wikidata.org/entity/Q55'),
 ('http://mydata.example.org/public-locations-country/Poland',
  'http://www.wikidata.org/entity/Q36'),
 ('http://mydata.example.org/publ

In [6]:
results = query_graphDB()
data = {}
matches= []
for c in results["results"]["bindings"]:
    label = c["cityName"]["value"]
    longitude = float(c["longitude"]["value"])
    latitude = float(c["latitude"]["value"])
    data[label] = get_wikidata_results(latitude, longitude)



In [33]:
results = query_graphDB()
results

{'head': {'vars': ['cityName', 'longitude', 'latitude', 'City']},
 'results': {'bindings': [{'cityName': {'type': 'literal',
     'value': 'City of Edinburgh'},
    'longitude': {'datatype': 'http://www.w3.org/2001/XMLSchema#double',
     'type': 'literal',
     'value': '-3.1883E0'},
    'latitude': {'datatype': 'http://www.w3.org/2001/XMLSchema#double',
     'type': 'literal',
     'value': '5.59533E1'},
    'City': {'type': 'uri',
     'value': 'http://mydata.example.org/public-locations-City/City%20of%20Edinburgh'}},
   {'cityName': {'type': 'literal', 'value': 'City of Edinburgh'},
    'longitude': {'datatype': 'http://www.w3.org/2001/XMLSchema#double',
     'type': 'literal',
     'value': '-3.1883E0'},
    'latitude': {'datatype': 'http://www.w3.org/2001/XMLSchema#double',
     'type': 'literal',
     'value': '5.59533E1'},
    'City': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q23436'}},
   {'cityName': {'type': 'literal', 'value': 'City of Edinburgh'},
    'longi

In [14]:
def query_wikidata_countries(country: str):
    """
    Query Wikidata for the country of a city given its city wikidata entity id
    """   
    # query_wikidata_countries =f"""PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    # PREFIX wd: <http://www.wikidata.org/entity/>
    # PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    # PREFIX bd: <http://www.bigdata.com/rdf#>

    # SELECT ?city ?cityLabel ?country ?countryLabel
    # WHERE {{
    # wd:{city} rdfs:label ?cityLabel ;  # City (Montreal)
    #         wdt:P17 ?country .         # Country of the city
    # ?country rdfs:label ?countryLabel .
    
    # FILTER(LANG(?cityLabel) = "en" && LANG(?countryLabel) = "en").
    # }}
    # """
    query_wikidata_countries = f"""
    SELECT ?country WHERE {{
    ?country wdt:P31 wd:Q6256; # instance of a country
            rdfs:label "{country}"@en. # country name in English
    }}

    """
        
    sparql_wdata = SPARQLWrapper("https://query.wikidata.org/sparql")

    sparql_wdata.setQuery(query_wikidata_countries)

    sparql_wdata.setReturnFormat(JSON)

    results = sparql_wdata.query().convert()

    return results["results"]["bindings"][0]["country"]["value"]
results = query_wikidata_countries("Canada")

In [16]:
def query_graphDB_cities(endpoint : str):
    """Query the energy knowledge graph for the cities and their coordinates"""
    query_my_data = """
    PREFIX schema: <https://schema.org/>
    PREFIX wdtn: <http://www.wikidata.org/prop/direct-normalized/>
    PREFIX data: <http://mydata.example.org/>
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX voc: <http://vocabulary.example.org/>
    PREFIX wd: <http://www.wikidata.org/entity/>
    SELECT DISTINCT ?cityName ?longitude ?latitude ?City
    WHERE {
    ?s rdf:type schema:Place .
    ?s schema:containedInPlace ?City .
    ?City rdf:type schema:City .
    ?City schema:name ?cityName .
    ?s schema:longitude ?longitude .
    ?s schema:latitude ?latitude . 
    }
    """
    # endpoint = "http://localhost:7200/repositories/test"

    sparlq_graphdb = SPARQLWrapper(endpoint)
    sparlq_graphdb.setQuery(query_my_data)
    sparlq_graphdb.setReturnFormat(JSON)
    results_Graphdb = sparlq_graphdb.query().convert()

    for c in results_Graphdb["results"]["bindings"]:
        label = c["cityName"]["value"]
        longitude = float(c["longitude"]["value"])
        latitude = float(c["latitude"]["value"])
        data[label] = get_wikidata_results(latitude, longitude)

    return results_Graphdb 

query_graphDB_cities("http://193.2.205.14:7200/repositories/EnergyGraph_mixed")

{'head': {'vars': ['cityName', 'longitude', 'latitude', 'City']},
 'results': {'bindings': [{'cityName': {'type': 'literal',
     'value': 'Montevideo'},
    'longitude': {'datatype': 'http://www.w3.org/2001/XMLSchema#float',
     'type': 'literal',
     'value': '-56.16453170776367'},
    'latitude': {'datatype': 'http://www.w3.org/2001/XMLSchema#float',
     'type': 'literal',
     'value': '-34.9011116027832'},
    'City': {'type': 'uri',
     'value': 'http://mydata.example.org/public-locations-City/Montevideo'}},
   {'cityName': {'type': 'literal', 'value': 'Montevideo'},
    'longitude': {'datatype': 'http://www.w3.org/2001/XMLSchema#float',
     'type': 'literal',
     'value': '-56.16453170776367'},
    'latitude': {'datatype': 'http://www.w3.org/2001/XMLSchema#float',
     'type': 'literal',
     'value': '-34.9011116027832'},
    'City': {'type': 'uri',
     'value': 'http://dbpedia.org/resource/Montevideo'}},
   {'cityName': {'type': 'literal', 'value': 'Montevideo'},
    'l

In [53]:
def query_wikidata_coordinates(latitude : float, longitude : float, label : str, data=None):
    """
    Query Wikidata for the coordinates of a city given its label and coordinates, if the label is not found, return the closest city
    Can be used with the results of a previous query to avoid querying Wikidata again by passing the results as the data parameter
    """
    if data is None:
        print("Querying Wikidata......")
        results = get_wikidata_results(latitude, longitude)
    else:
        results = data

        
    matched_cities = []
    # match the label of the city with the results and if the similarity is above 80% add it to the list of matches
    for r in results["results"]["bindings"]:
        ratio = fuzz.partial_ratio(r["cityLabel"]["value"], label)
        if ratio > 80:
            matched_cities.append((ratio, r))

    # if no match is found, return the closest city
    if len(matched_cities) == 0:
        min_distance = math.inf
        for r in results["results"]["bindings"]:
            coords = r["location"]["value"].split("(")[1].split(")")[0].split(" ")
            coords = (float(coords[1]), float(coords[0]))
            distance = geodesic((latitude, longitude), coords).kilometers
            if distance < min_distance:
                min_distance = distance
                closest_city = r
        return closest_city
    # sort the results by the ratio of the match and return the best match
    matched_cities.sort(key=lambda x: x[0], reverse=True)
    return matched_cities[0][1]



def get_wikidata_results(latitude : float, longitude : float):

    # wiki data query to get the cities in a 50km radius of the given coordinates
    query_wikidata = f"""
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    PREFIX wd: <http://www.wikidata.org/entity/>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX bd: <http://www.bigdata.com/rdf#>
    PREFIX wikibase: <http://wikiba.se/ontology#>
    PREFIX geo: <http://www.opengis.net/ont/geosparql#>

    SELECT DISTINCT ?city ?cityLabel ?location
    WHERE {{
    SERVICE wikibase:around {{
        ?city wdt:P625 ?location .
        bd:serviceParam wikibase:center "Point({longitude} {latitude})"^^geo:wktLiteral .
        bd:serviceParam wikibase:radius "50" . 
    }}
    ?city wdt:P31/wdt:P279* wd:Q515 . 
    ?city rdfs:label ?cityLabel .
    FILTER(LANG(?cityLabel) = "en") .

    }} LIMIT 1000

    """
     
    sparql_wdata = SPARQLWrapper("https://query.wikidata.org/sparql")

    sparql_wdata.setQuery(query_wikidata)

    sparql_wdata.setReturnFormat(JSON)

    results = sparql_wdata.query().convert()
    return results

res =query_wikidata_coordinates(45.508888, -73.561668, "Montreal")

Querying Wikidata......


In [56]:
res["city"]["value"]

'http://www.wikidata.org/entity/Q340'

In [7]:
def query_wikidata_countries(city: str):
    """
    Query Wikidata for the country of a city given its city wikidata entity id
    """
    city = city["city"]["value"].split("/")[-1]
   
    query_wikidata_countries =f"""PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    PREFIX wd: <http://www.wikidata.org/entity/>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX bd: <http://www.bigdata.com/rdf#>

    SELECT ?city ?cityLabel ?country ?countryLabel
    WHERE {{
    wd:{city} rdfs:label ?cityLabel ;  # City (Montreal)
            wdt:P17 ?country .         # Country of the city
    ?country rdfs:label ?countryLabel .
    
    FILTER(LANG(?cityLabel) = "en" && LANG(?countryLabel) = "en").
    }}
    """
        
    sparql_wdata = SPARQLWrapper("https://query.wikidata.org/sparql")

    sparql_wdata.setQuery(query_wikidata_countries)

    sparql_wdata.setReturnFormat(JSON)

    results = sparql_wdata.query().convert()

    return results["results"]["bindings"][0]["country"]["value"]

results = query_graphDB()
matches= []
for c in results["results"]["bindings"]:
    label = c["cityName"]["value"]
    # if label != "Montreal":
    #     continue
    longitude = float(c["longitude"]["value"])
    latitude = float(c["latitude"]["value"])
    print(label, longitude, latitude)
    wdata = query_wikidata_coordinates(latitude, longitude, label)
    result_dbpedia = (c,query_dbpedia_coordinates(latitude, longitude, label))
    result_wdata_countries= query_wikidata_countries(wdata)
    result_wikidata = (c,wdata)
    matches.append(result_dbpedia)
    matches.append(result_wikidata)
    matches.append(result_wdata_countries)
# matches

London -0.07654400169849396 51.46446228027344
Querying Wikidata......
http://www.wikidata.org/entity/Q145


In [21]:
# matches[0][0]["City"]["value"], matches[0][1]["city"]["value"]

# s = "<"+matches[0][0]["City"]["value"] +"> " + "<http://www.w3.org/2002/07/owl#sameAs> " + "<"+matches[0][1]["city"]["value"] +"> ."
triples = []
for m in matches:
    s = "<"+m[0]["City"]["value"] +"> " + "<http://www.w3.org/2002/07/owl#sameAs> " + "<"+m[1]["city"]["value"] +"> .\n"
    triples.append(s)

In [22]:
with open("matches.nt", "w") as f:
    f.writelines(triples)

# DBPedia

In [11]:
latitude = 45.5128
longitude = -73.56
def query_dbpedia_coordinates(latitude, longitude):
    query = f"""
    PREFIX dbo: <http://dbpedia.org/ontology/>
    PREFIX dbr: <http://dbpedia.org/resource/>
    PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

    SELECT DISTINCT ?city ?cityLabel  ?lat ?long
    WHERE {{
      ?city a dbo:City .
      ?city rdfs:label ?cityLabel .
      ?city geo:lat ?lat .
      ?city geo:long ?long .
      FILTER (LANG(?cityLabel) = "en")
      FILTER (
        bif:st_intersects (
          bif:st_point (?long, ?lat),
          bif:st_point ({longitude}, {latitude}),
          50
        )
      )
    }}
    LIMIT 1000


    """

    sparql_dbpedia = SPARQLWrapper("http://dbpedia.org/sparql")

    sparql_dbpedia.setQuery(query)

    sparql_dbpedia.setReturnFormat(JSON)

    results = sparql_dbpedia.query().convert()
    return results


results = query_dbpedia_coordinates(latitude, longitude)
results

{'head': {'link': [], 'vars': ['city', 'cityLabel', 'lat', 'long']},
 'results': {'distinct': False,
  'ordered': True,
  'bindings': [{'city': {'type': 'uri',
     'value': 'http://dbpedia.org/resource/Carignan,_Quebec'},
    'cityLabel': {'type': 'literal',
     'xml:lang': 'en',
     'value': 'Carignan, Quebec'},
    'lat': {'type': 'typed-literal',
     'datatype': 'http://www.w3.org/2001/XMLSchema#float',
     'value': '45.45'},
    'long': {'type': 'typed-literal',
     'datatype': 'http://www.w3.org/2001/XMLSchema#float',
     'value': '-73.3'}},
   {'city': {'type': 'uri', 'value': 'http://dbpedia.org/resource/Rosemère'},
    'cityLabel': {'type': 'literal', 'xml:lang': 'en', 'value': 'Rosemère'},
    'lat': {'type': 'typed-literal',
     'datatype': 'http://www.w3.org/2001/XMLSchema#float',
     'value': '45.6369'},
    'long': {'type': 'typed-literal',
     'datatype': 'http://www.w3.org/2001/XMLSchema#float',
     'value': '-73.8'}},
   {'city': {'type': 'uri',
     'value':

In [None]:
results = query_graphDB()
matches= []
for c in results["results"]["bindings"]:
    label = c["cityName"]["value"]
    longitude = float(c["longitude"]["value"])
    latitude = float(c["latitude"]["value"])
    print(label, longitude, latitude)
    result_wikidata = (c,query_dbpedia_coordinates(latitude, longitude, label, data=data[label]))
    matches.append(result_wikidata)
results

In [10]:
results["results"]["bindings"][0]["cityLabel"]["value"]

'Carignan, Quebec'

# Linked geodata

In [27]:
# montreal cooridinates
latitude = 45.5128
longitude = -73.56

query = f"""
PREFIX lgdo: <http://linkedgeodata.org/ontology/>
PREFIX geom: <http://geovocab.org/geometry#>
PREFIX ogc: <http://www.opengis.net/ont/geosparql#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>

SELECT DISTINCT ?city ?cityLabel ?lat ?long
WHERE {{
  ?city a lgdo:City ;
        rdfs:label ?cityLabel ;
        geo:lat ?lat ;
        geo:long ?long .
  FILTER (LANG(?cityLabel) = "en")
  FILTER (
    bif:st_intersects (
      bif:st_point(?long, ?lat),
      bif:st_point({longitude}, {latitude}),
      50
    )
  )
}}
LIMIT 1000



"""

sparql_linkedgeodata = SPARQLWrapper("http://linkedgeodata.org/sparql")
sparql_linkedgeodata.setQuery(query)
sparql_linkedgeodata.setReturnFormat(JSON)
results = sparql_linkedgeodata.query().convert()

results

{'head': {'link': [], 'vars': ['city', 'cityLabel', 'lat', 'long']},
 'results': {'distinct': False,
  'ordered': True,
  'bindings': [{'city': {'type': 'uri',
     'value': 'http://linkedgeodata.org/triplify/node299790200'},
    'cityLabel': {'type': 'literal', 'xml:lang': 'en', 'value': 'Montreal'},
    'lat': {'type': 'typed-literal',
     'datatype': 'http://www.w3.org/2001/XMLSchema#double',
     'value': '45.5088'},
    'long': {'type': 'typed-literal',
     'datatype': 'http://www.w3.org/2001/XMLSchema#double',
     'value': '-73.554'}}]}}

{'head': {'vars': ['city', 'cityLabel', 'country', 'countryLabel']},
 'results': {'bindings': [{'country': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q77'},
    'countryLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'Uruguay'},
    'cityLabel': {'xml:lang': 'en',
     'type': 'literal',
     'value': 'Montevideo'}}]}}

In [5]:
graphDB_query = """
PREFIX voc: <http://vocabulary.example.org/>
PREFIX saref: <https://saref.etsi.org/core/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX schema: <https://schema.org/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT ?city WHERE {
  ?city rdf:type schema:City .
  FILTER(STRSTARTS(STR(?city), "http://www.wikidata.org/entity/"))
} 
"""
sparql_graphdb = SPARQLWrapper("http://193.2.205.14:7200/repositories/EnergyGraph_mixed")
sparql_graphdb.setQuery(graphDB_query)
sparql_graphdb.setReturnFormat(JSON)
results = sparql_graphdb.query().convert()
results

{'head': {'vars': ['city']},
 'results': {'bindings': [{'city': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q84'}},
   {'city': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q271730'}},
   {'city': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q340'}},
   {'city': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q17645993'}},
   {'city': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q100'}},
   {'city': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q23436'}},
   {'city': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q1335'}}]}}

In [7]:
for r in results["results"]["bindings"]:
    print(r["city"]["value"].split("/")[-1])

Q84
Q271730
Q340
Q17645993
Q100
Q23436
Q1335


In [8]:

country_id = "Q1335"
query_wikidata_countries =f"""PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX bd: <http://www.bigdata.com/rdf#>

SELECT ?city ?cityLabel ?country ?countryLabel
WHERE {{
  wd:{country_id} rdfs:label ?cityLabel ;  # City (Montreal)
          wdt:P17 ?country .         # Country of the city
  ?country rdfs:label ?countryLabel .
  
  FILTER(LANG(?cityLabel) = "en" && LANG(?countryLabel) = "en").
}}
"""

sparql_wdata = SPARQLWrapper("https://query.wikidata.org/sparql")

sparql_wdata.setQuery(query)

sparql_wdata.setReturnFormat(JSON)

results = sparql_wdata.query().convert()

results




{'head': {'vars': ['city', 'cityLabel', 'country', 'countryLabel']},
 'results': {'bindings': [{'country': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q77'},
    'countryLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'Uruguay'},
    'cityLabel': {'xml:lang': 'en',
     'type': 'literal',
     'value': 'Montevideo'}}]}}

In [12]:
results["results"]["bindings"][0]["country"]["value"]

'http://www.wikidata.org/entity/Q77'