In [10]:
import requests

In [None]:
# get Path
dbutils.widgets.text("pbfPath", "", "Path to PBF file")
pbfPath = dbutils.widgets.get("pbfPath")

# get Region
dbutils.widgets.text("region", "", "Region")
region = dbutils.widgets.get("region").lower()

In [27]:

# Define the mirror index URL
mirror_index_url = "https://download.geofabrik.de/index-v1-nogeom.json"

# Fetch the dataset index
print("Fetching Geofabrik dataset index...")
response = requests.get(mirror_index_url)
if response.status_code != 200:
    raise ConnectionError(f"Failed to fetch dataset index (status code {response.status_code})")

# Parse the JSON data
features = response.json()["features"]
datasets = list(map(lambda x: x["properties"], features))

In [None]:

filtered_datasets = [
    ds for ds in datasets
    # Include datasets where the child appears in the "id"
    if region in ds["id"].lower() or region in ds["name"].lower()
]

if not filtered_datasets:
    raise ValueError(f"Region '{region}' not found in available datasets.")

# Extract the dataset information
selected_dataset = filtered_datasets[0]
download_url = selected_dataset.get("urls", {}).get("pbf")
if not download_url:
    raise ValueError(f"No PBF file available for region '{region}'.")

In [None]:
# Download the file
print(f"Downloading OSM data for region '{region}' from {download_url}...")
with requests.get(download_url, stream=True) as r:
    r.raise_for_status()
    with open(pbfPath, 'wb') as f:
        for chunk in r.iter_content(chunk_size=8192):
            f.write(chunk)

print(f"Download complete! File saved at: {pbfPath}")