In [1]:
import requests

# Define the SPARQL endpoint and query
url = 'https://query.wikidata.org/sparql'
query = """
SELECT ?road ?roadLabel ?kmlTemplate
WHERE {
  ?road wdt:P16 wd:Q387692;   
        wdt:P3096 ?kmlTemplate.  # P3096 is the property for KML files
  
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""

# Make the request
headers = {
    'Accept': 'application/sparql-results+json'
}
response = requests.get(url, headers=headers, params={'query': query})

# Check if the request was successful
if response.status_code != 200:
    raise Exception(f"Query failed with status code {response.status_code}")

# Convert the response to JSON
results = response.json()

# Extract the bindings from the results
bindings = results['results']['bindings']

# Extract KML template URLs and their labels
kml_templates = []
for item in bindings:
    kml_template = item['kmlTemplate']['value']
    road_label = item['roadLabel']['value']
    kml_templates.append((road_label, kml_template))

# Print the KML templates
for road, template in kml_templates:
    print(f"{road}: {template}")

M25 motorway: http://www.wikidata.org/entity/Q26326887
M1 motorway: http://www.wikidata.org/entity/Q26330001
M67 motorway: http://www.wikidata.org/entity/Q26329958
A1 road: http://www.wikidata.org/entity/Q26330216
A38(M) motorway: http://www.wikidata.org/entity/Q26330136
A91 road: http://www.wikidata.org/entity/Q26360850
M6 Toll: http://www.wikidata.org/entity/Q26330022
M80 motorway: http://www.wikidata.org/entity/Q26330113
M18 motorway: http://www.wikidata.org/entity/Q26330034
M90 motorway: http://www.wikidata.org/entity/Q26330116
M27 motorway: http://www.wikidata.org/entity/Q26330048
A823(M) motorway: http://www.wikidata.org/entity/Q26342825
M20 motorway: http://www.wikidata.org/entity/Q26330039
M50 motorway: http://www.wikidata.org/entity/Q26330072
M898 motorway: http://www.wikidata.org/entity/Q26342761
A2 road: http://www.wikidata.org/entity/Q26330219
M2 motorway: http://www.wikidata.org/entity/Q26330005
M48 motorway: http://www.wikidata.org/entity/Q26330066
M42 motorway: http://ww

In [22]:
import json

# Process each KML template to extract Wikipedia links
def get_wiki_link(url):
    # Fetch the KML template content
    kml_response = requests.get(url)
    entity_name = template.split("/")[4]
    
    if kml_response.status_code == 200:
        kml_content = json.loads(kml_response.text)
        wikipedia_link = kml_content['entities'][entity_name]['sitelinks']['enwiki']['url']
        
        return wikipedia_link

In [21]:

from bs4 import BeautifulSoup
import requests
 
def extract_kml(url):

    # call get method to request that page
    page = requests.get(url)
    
    # with the help of beautifulSoup and html parser create soup
    soup = BeautifulSoup(page.content, "html.parser")
    
    mydivs = soup.find("div", {"class": "mw-content-ltr mw-parser-output"})

    road_name = url.split("/")[5]

    if mydivs.text:
        with open(f'../data/roads/{road_name}.kml', 'w', encoding='utf-8') as file:
            file.write(mydivs.text)


In [24]:
for road, template in kml_templates:
    try:
        url = get_wiki_link(template)
        extract_kml(url)
    except:
        pass