In [1]:
# default_exp datasets/geofabrik

In [1]:
# hide
# no_test
![ -e /content ] && pip install -Uqq git+https://github.com/thinkingmachines/geowrangler.git

# Datasets Geofabrik
> Download geofabrik data

In [41]:
import os
import shutil
from functools import lru_cache
from pathlib import Path
from urllib.parse import urlparse

# exporti
import requests

In [51]:
# exporti
@lru_cache(maxsize=None)
def load_geofabrik_data():
    return requests.get("https://download.geofabrik.de/index-v1-nogeom.json").json()

In [61]:
# export
def list_geofabrik_regions() -> dict:
    """Get list of regions from geofabrik index"""
    geofrabik_data = load_geofabrik_data()
    return {
        k["properties"]["id"]: k["properties"]["urls"].get("shp")
        for k in geofrabik_data["features"]
        if k["properties"]["urls"].get("shp")
    }

In [60]:
# export
def download_geofabrik_region(region: str, directory: str = "data/") -> Path:
    """Download geofabrik region to path"""
    if not os.path.isdir(directory):
        raise ValueError(f"{directory} is not a directory")
    geofrabik_info = list_geofabrik_regions()
    if region not in geofrabik_info:
        raise ValueError(
            f"{region} not found in geofrabik. Run list_geofabrik_regions() to learn more about available areas"
        )
    url = geofrabik_info[region]
    parsed_url = urlparse(url)
    filename = Path(os.path.basename(parsed_url.path))
    response = requests.get(url, stream=True)
    with open(directory / filename, "wb") as out_file:
        shutil.copyfileobj(response.raw, out_file)
    return directory / filename

In [63]:
# hide
# no_test
from nbdev.export import notebook2script

notebook2script("05_datasets_geofabrik.ipynb")

Converted 05_data_download.ipynb.
