## CASSDA - Data Download

Contains code to download the following data
- Wildlife crossings from OSM
- Streets from OSM
- High-Resolution satellite images from discomap https://discomap.eea.europa.eu/wiki/ (Cloudless 2m/px satellite data)

#### Deps

In [1]:
import os
import sys

sys.path.append("../src")

import geopandas as gpd
import requests as r
from owslib.wms import WebMapService
from shapely.geometry import LineString, MultiPolygon, Point, Polygon

from modelling.helpers import polygon_to_yolo_segmentation
from osm.queries import get_street_query, get_wildlife_crossing_query
from wms.get_wms_crops import get_crop_bboxes

#### Global Config

In [2]:
countries_to_process = ["CH"]
image_output_dir = "../data/satellite_images/"
geodata_output_dir = "../data/geodata/"

In [3]:
# these typically do not need to be changed
OVERPASS_API_URL = "https://overpass-api.de/api/interpreter"
WMS_URL = "https://image.discomap.eea.europa.eu/arcgis/services/GioLand/VHR_2021_LAEA/ImageServer/WMSServer/?request=GetCapabilities&service=WMS"
CRS = "EPSG:3035"  # ETRS89 / ETRS-LAEA
PIXEL_RESOLUTION_M = 2  # 2 meters per pixel

### Wildlife Crossing

#### Export Polygons from OSM

In [None]:
# config for wildlife crossing query
bridges_only = True  # whether to only consider bridges or tunnels and other as well

Gets the queries according to defined parameters, sends it co Overpass-API and 
on success stores the result in <geodata_output_dir>

In [8]:
bridges_string = "_bridges_only" if bridges_only else ""

for country in countries_to_process:
    print(f"Processing country: {country}")

    # Download wildlife crossings
    crossing_query = get_wildlife_crossing_query(
        country, timeout=240, bridges_only=bridges_only
    )
    response = r.get(OVERPASS_API_URL, params={"data": crossing_query})
    response.raise_for_status()
    data = response.json()

    features = []
    for el in data["elements"]:
        tags = el.get("tags", {})

        # Node → Point
        if el["type"] == "node":
            geometry = Point(el["lon"], el["lat"])
            features.append({"id": el["id"], "geometry": geometry, **tags})

        # Way → LineString or Polygon
        elif el["type"] == "way" and "geometry" in el:
            coords = [(pt["lon"], pt["lat"]) for pt in el["geometry"]]
            if len(coords) >= 3 and coords[0] == coords[-1]:
                geometry = Polygon(coords)  # Closed → Polygon
            else:
                geometry = LineString(coords)  # Open → LineString
            features.append({"id": el["id"], "geometry": geometry, **tags})

        # Relation → MultiPolygon (if members have geometry)
        elif el["type"] == "relation" and "members" in el:
            polygons = []
            for member in el["members"]:
                if member["type"] == "way" and "geometry" in member:
                    coords = [(pt["lon"], pt["lat"]) for pt in member["geometry"]]
                    if len(coords) >= 3:
                        polygons.append(Polygon(coords))
            if polygons:
                geometry = MultiPolygon(polygons) if len(polygons) > 1 else polygons[0]
                features.append({"id": el["id"], "geometry": geometry, **tags})

    # --- Create GeoDataFrame ---
    if features:
        print(f"Found {len(features)} wildlife crossings.")
        gdf = gpd.GeoDataFrame(features, geometry="geometry", crs="EPSG:4326")
    else:
        print("No wildlife crossings found.")

    # --- Reproject to EPSG:3035 (ETRS89 / LAEA Europe) ---
    if not gdf.empty:
        gdf = gdf.to_crs(epsg=3035)
        print("Reprojected to EPSG:3035.")

    os.makedirs(geodata_output_dir, exist_ok=True)

    # Save the GeoDataFrame to a file
    output_file = os.path.join(
        geodata_output_dir,
        f"{country.lower()}_wildlife_crossings_osm{bridges_string}.geojson",
    )
    gdf.to_file(output_file, driver="GeoJSON")
    print(f"Saved GeoDataFrame to {output_file}")

Processing country: CH
Found 18 wildlife crossings.
Reprojected to EPSG:3035.
Saved GeoDataFrame to ../data/geodata/ch_wildlife_crossings_osm_bridges_only.geojson


### Download WMS Images

#### Config

In [6]:
# config for satellite image download
pixel_size = 2  # meter per pixel
buffer_size = 640  # meter buffer around each point, square bbox

source_geometry_files = ["../data/geodata/combined_wildlife_poly_bridges_filtered_no_ch.gpkg"]

In [7]:
wms = WebMapService(
    WMS_URL,
    version="1.3.0",
)
layer_name = list(wms.contents)[0]
print(layer_name)

VHR_2021_LAEA


#### Execution

In [8]:
for f in source_geometry_files:
    print(f"Processing source geometry file: {os.path.basename(f)}")
    gdf = gpd.read_file(f)
    gdf = gdf[
        (gdf.geom_type == "Polygon")
        | (gdf.geom_type == "Point") & (gdf["bridge"] == "yes")
    ]
    print(f"Number of geometries to process: {len(gdf)}")

    gdf_centroids = gdf.to_crs(epsg=3035).centroid
    areas_of_interest = get_crop_bboxes(gdf_centroids, buffer_size)

    output_subdir = os.path.join(
        image_output_dir,
        os.path.splitext(os.path.basename(f))[0] + "_" + str(buffer_size),
    )
    os.makedirs(output_subdir, exist_ok=True)
    os.makedirs(os.path.join(output_subdir, "images"), exist_ok=True)
    os.makedirs(os.path.join(output_subdir, "labels"), exist_ok=True)

    # for every point create a square around it and request a WMS crop centered on the point
    img_format = "image/jpeg"
    img_size = (
        int(buffer_size * 2 / pixel_size),
        int(buffer_size * 2 / pixel_size),
    )  # width, height in pixels

    for idx, bbox in enumerate(areas_of_interest):
        # Request WMS image
        img = wms.getmap(
            layers=[layer_name],
            srs="EPSG:3035",
            bbox=bbox,
            size=img_size,
            format=img_format,
            transparent=True,
        )

        # Save image to file
        img_data = img.read()
        img_filename = os.path.join(output_subdir, "images", f"crossing_{idx}.png")
        with open(img_filename, "wb") as f:
            f.write(img_data)

        # save polygon label to file in yolo format
        label_filename = os.path.join(output_subdir, "labels", f"crossing_{idx}.txt")
        polygon = gdf.geometry.iloc[idx]

        # Convert polygon to YOLO segmentation format
        yolo_line = polygon_to_yolo_segmentation(polygon, bbox, class_id=0)

        if yolo_line:
            with open(label_filename, "w") as f:
                f.write(yolo_line + "\n")
            print(f"Saved WMS crop and label for crossing {idx} to {img_filename}")
        else:
            print(f"Warning: Could not convert geometry {idx} to YOLO format")

Processing source geometry file: combined_wildlife_poly_bridges_filtered_no_ch.gpkg
Number of geometries to process: 469
Saved WMS crop and label for crossing 0 to ../data/satellite_images/combined_wildlife_poly_bridges_filtered_no_ch_640\images\crossing_0.png
Saved WMS crop and label for crossing 1 to ../data/satellite_images/combined_wildlife_poly_bridges_filtered_no_ch_640\images\crossing_1.png
Saved WMS crop and label for crossing 2 to ../data/satellite_images/combined_wildlife_poly_bridges_filtered_no_ch_640\images\crossing_2.png
Saved WMS crop and label for crossing 3 to ../data/satellite_images/combined_wildlife_poly_bridges_filtered_no_ch_640\images\crossing_3.png
Saved WMS crop and label for crossing 4 to ../data/satellite_images/combined_wildlife_poly_bridges_filtered_no_ch_640\images\crossing_4.png
Saved WMS crop and label for crossing 5 to ../data/satellite_images/combined_wildlife_poly_bridges_filtered_no_ch_640\images\crossing_5.png
Saved WMS crop and label for crossing 6

### Road

#### Export LineStrings from OSM

In [22]:
# config for street query
street_type_depth = 1  # how detailed the street network should be (1-12)
overpass_timeout = 180  # timeout for overpass query in seconds

In [23]:
for country in countries_to_process:
    print(f"Processing country for streets: {country}")

    # Download street data
    street_query = get_street_query(
        extent_country=country,
        street_type_depth=street_type_depth,
        timeout=overpass_timeout,
    )
    response = r.get(OVERPASS_API_URL, params={"data": street_query})
    response.raise_for_status()
    data = response.json()

    features = []
    for el in data["elements"]:
        if el["type"] == "way" and "geometry" in el:
            coords = [(pt["lon"], pt["lat"]) for pt in el["geometry"]]
            # Only open ways → LineString
            if len(coords) >= 2 and coords[0] != coords[-1]:
                features.append(
                    {
                        "id": el["id"],
                        "geometry": LineString(coords),
                        **el.get("tags", {}),
                    }
                )

    # --- Build GeoDataFrame ---
    if not features:
        print("No LineString features found!")
        gdf = gpd.GeoDataFrame(columns=["id", "geometry"])
    else:
        gdf = gpd.GeoDataFrame(features, geometry="geometry", crs="EPSG:4326")
        print(f"Found {len(gdf)} LineString features.")

    # --- Prepare output folder ---
    output_dir = os.path.abspath(os.path.join(geodata_output_dir))
    os.makedirs(output_dir, exist_ok=True)

    # --- Reproject to EPSG:3035 (ETRS89 / LAEA Europe) ---
    if not gdf.empty:
        gdf = gdf.to_crs(epsg=3035)
        print("Reprojected to EPSG:3035.")

    # --- Save original GeoJSON ---
    output_file = os.path.join(
        output_dir, f"{country}_highway_depth_{street_type_depth}.geojson"
    )
    gdf.to_file(output_file, driver="GeoJSON")
    print(f"Data saved to {output_file}")

Processing country for streets: CH
Found 14700 LineString features.
Reprojected to EPSG:3035.


INFO:pyogrio._io:Created 14,700 records


Data saved to c:\code_ibre\cassda_zertifikatsarbeit\data\geodata\CH_highway_depth_1.geojson
