# Data Preparation

**Author:** Florian Klaver

In this Notebook the datasets are prepared fo the Multi-Criteria Analysis (MCA).

The following steps are performed: 
1. Create binary mask from Cantonal boundary.
2. Calculate Slope from the DEM and clip to boundary.
3. Rasterize Forest areas (Cover) and clip to boundary.
4. Rasterize Water bodies (Mask) and clip to boundary.
5. Filter and rasterize heavy traffic Roads, calculates Euclidean Distance and clip to boundary.
6. Filter and rasterize Settlement areas, calculates Euclidean Distance and clip to boundary.
7. Extract and rasterize Prey potential (Alpine pastures) and clip to boundary.

All outputs are saved as 10m aligned GeoTIFFs in the 'output' directory.

## Setup

In [13]:
import os
import geopandas as gpd
import rasterio
from rasterio import features
from scipy.ndimage import distance_transform_edt
from osgeo import gdal
import numpy as np
from pathlib import Path

In [14]:
# --- PATH CONFIGURATION ---
try:
    # Try to get the script directory (works for standard .py files)
    script_dir = Path(__file__).parent
except NameError:
    # Fallback for Jupyter Notebooks
    script_dir = Path.cwd()

PROJECT_ROOT = script_dir.parent
DATA_DIR = PROJECT_ROOT / 'data'
OUTPUT_DIR = PROJECT_ROOT / 'output'

# Input Files
DEM_PATH = OUTPUT_DIR / "dem_10m_graubuenden.tif"
BOUNDARIES_PATH = DATA_DIR / "swissBOUNDARIES3D_1_5_LV95_LN02.gpkg"
TLM_PATH = DATA_DIR / "SWISSTLM3D_2025.gpkg"
AREAL_PATH = DATA_DIR / "arealstatistik_2056.gpkg"

# Output Files
SLOPE_PATH = OUTPUT_DIR / "slope_10m_graubuenden.tif"
FOREST_RASTER_PATH = OUTPUT_DIR / "forest_10m_graubuenden.tif"
ROADS_DIST_PATH = OUTPUT_DIR / "distance_roads_10m.tif"
SETTLEMENT_DIST_PATH = OUTPUT_DIR / "distance_settlements_10m.tif"
WATER_MASK_PATH = OUTPUT_DIR / "water_mask_10m.tif"
PREY_RASTER_PATH = OUTPUT_DIR / "prey_10m.tif"

# Ensure output directory exists
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

## Process

In [15]:
# --- HELPER FUNCTIONS ---

def safe_delete(path):
    """
    Safely deletes a file if it exists. 
    Raises a clear error if the file is locked by another process (e.g., QGIS).
    """
    path = Path(path)
    if path.exists():
        try:
            os.remove(path)
        except PermissionError:
            print(f"CRITICAL ERROR: Could not delete {path.name}. File is locked.")
            print("Action required: Close QGIS or any other software using this file.")
            raise PermissionError(f"File locked: {path}")
        except Exception as e:
            print(f"Error deleting {path.name}: {e}")

def get_graubuenden_boundary():
    """
    Loads and returns the dissolved geometry of Canton Graubünden.
    Used to spatially mask vector data loading to improve performance.
    """
    print("-> Loading Cantonal Boundary (Graubünden)...")
    try:
        gdf = gpd.read_file(BOUNDARIES_PATH, layer='TLM_KANTONSGEBIET')
        
        if 'NAME' in gdf.columns:
            gdf_gr = gdf[gdf['NAME'].isin(['Graubünden', 'Grigioni', 'Grischun'])]
        elif 'name' in gdf.columns:
            gdf_gr = gdf[gdf['name'].isin(['Graubünden', 'Grigioni', 'Grischun'])]
        elif 'kantonsnummer' in gdf.columns:
            gdf_gr = gdf[gdf['kantonsnummer'] == 18]
        else:
            gdf_gr = gdf

        # Dissolve geometry
        try:
            return gdf_gr.geometry.union_all()
        except AttributeError:
            return gdf_gr.geometry.unary_union # Fallback for older geopandas versions
            
    except Exception as e:
        print(f"Error loading boundaries: {e}")
        return None
    
def create_boundary_mask(reference_path, geometry):
    """
    Creates a binary raster mask (1=Inside, 0=Outside) from the geometry.
    Used to clip all other rasters.
    """
    with rasterio.open(reference_path) as src:
        shape = src.shape
        transform = src.transform
        
    mask = features.rasterize(
        [(geometry, 1)],
        out_shape=shape,
        transform=transform,
        fill=0,
        default_value=1,
        dtype=rasterio.uint8
    )
    return mask   

def simple_rasterize(gdf, reference_path, output_path, clip_mask=None):
    """
    Rasterizes features and applies strict clipping.
    """
    output_path = Path(output_path)
    safe_delete(output_path) 
    
    with rasterio.open(reference_path) as src:
        meta = src.meta.copy()
        meta.update(dtype=rasterio.uint8, count=1, nodata=0)
        shape = src.shape
        transform = src.transform
        
    print(f"   ... Rasterizing {len(gdf)} features...")
    
    if len(gdf) > 0:
        arr = features.rasterize(
            ((geom, 1) for geom in gdf.geometry),
            out_shape=shape, transform=transform, fill=0, default_value=1, dtype=rasterio.uint8
        )
    else:
        arr = np.zeros(shape, dtype=rasterio.uint8)
    
    # APPLY CLIPPING
    if clip_mask is not None:
        arr = arr * clip_mask
    
    with rasterio.open(output_path, 'w', **meta) as dst:
        dst.write(arr, 1)
    print(f"   Saved clipped raster: {output_path.name}")

def rasterize_and_distance(gdf, reference_path, output_path, burn_value=1, clip_mask=None):
    """
    Calculates distance and applies strict clipping to the result.
    """
    output_path = Path(output_path)
    safe_delete(output_path)
    
    with rasterio.open(reference_path) as src:
        meta = src.meta.copy()
        shape = src.shape
        transform = src.transform
        cell_size = src.res[0]

    print(f"   ... Rasterizing & Calculating Distance...")
    if len(gdf) > 0:
        binary = features.rasterize(
            ((geom, burn_value) for geom in gdf.geometry),
            out_shape=shape, transform=transform, default_value=burn_value, dtype=rasterio.uint8
        )
    else:
        binary = np.zeros(shape, dtype=rasterio.uint8)

    # Calculate distance on the FULL extent first (to handle border effects correctly)
    mask_calc = np.logical_not(binary) 
    dist_meters = distance_transform_edt(mask_calc) * cell_size

    # APPLY CLIPPING TO RESULT
    if clip_mask is not None:
        # Set values outside mask to NoData
        dist_meters[clip_mask == 0] = -9999

    meta.update(dtype=rasterio.float32, count=1, nodata=-9999)
    with rasterio.open(output_path, 'w', **meta) as dst:
        dst.write(dist_meters.astype(rasterio.float32), 1)
    print(f"   Saved clipped distance raster: {output_path.name}")

In [16]:
# ==========================================
# MAIN EXECUTION
# ==========================================

if not DEM_PATH.exists():
    print(f"Critical Error: DEM not found at {DEM_PATH}.")
    exit()

# PREPARE MASTER MASK
gr_geometry = get_graubuenden_boundary()
if gr_geometry is None: exit()

print("\n--- 0. CREATING MASTER CLIP MASK ---")
boundary_mask = create_boundary_mask(DEM_PATH, gr_geometry)
print("Master mask created.")

# 1. CLIP DEM 
print("\n--- 1. CLIPPING SOURCE DEM ---")
# This ensures that any script using the DEM as a reference (like MCA)
# will automatically ignore pixels outside the canton.
with rasterio.open(DEM_PATH, "r+") as src:
    data = src.read(1)
    # Apply mask to DEM data
    data[boundary_mask == 0] = -9999
    src.write(data, 1)
    src.nodata = -9999
print("DEM file updated with hard clipping.")

# 2. SLOPE
print("\n--- 2. SLOPE CALCULATION & CLIPPING ---")
gdal.DEMProcessing(
    str(SLOPE_PATH), str(DEM_PATH), "slope", 
    options=gdal.DEMProcessingOptions(computeEdges=True, slopeFormat="degree")
)
with rasterio.open(SLOPE_PATH, "r+") as src:
    data = src.read(1)
    data[boundary_mask == 0] = -9999
    src.write(data, 1)
    src.nodata = -9999
print("Slope calculated and clipped.")

# 3. FOREST
print("\n--- 3. FOREST ---")
gdf_land = gpd.read_file(TLM_PATH, layer='tlm_bb_bodenbedeckung', mask=gr_geometry)
forest_types = ['Wald', 'Gebueschwald', 'Wald offen', 'Gehoelzflaeche']
gdf_forest = gdf_land[gdf_land['objektart'].isin(forest_types)]
simple_rasterize(gdf_forest, DEM_PATH, FOREST_RASTER_PATH, clip_mask=boundary_mask)

# 4. WATER 
print("\n--- 4. WATER ---")
gdf_water = gdf_land[gdf_land['objektart'] == 'Stehende Gewaesser']
simple_rasterize(gdf_water, DEM_PATH, WATER_MASK_PATH, clip_mask=boundary_mask)

# 5. ROADS
print("\n--- 5. ROADS ---")
gdf_roads = gpd.read_file(TLM_PATH, layer='tlm_strassen_strasse', mask=gr_geometry)
heavy_types = ['Autobahn', 'Autostrasse', '10m Strasse', '8m Strasse', '6m Strasse', 'Hauptstrasse', 'Verbindungsstrasse', 'Einfahrt', 'Ausfahrt', 'Zufahrt']
gdf_surf = gdf_roads[gdf_roads['objektart'].isin(heavy_types) & ~gdf_roads['kunstbaute'].isin(['Tunnel', 'Unterfuehrung'])]
if not gdf_surf.empty:
    rasterize_and_distance(gdf_surf, DEM_PATH, ROADS_DIST_PATH, clip_mask=boundary_mask)

# 6. SETTLEMENTS 
print("\n--- 6. SETTLEMENTS ---")
AS_COLUMN = 'AS18_72'
gdf_areal = gpd.read_file(AREAL_PATH, layer='arealstatistik_all', mask=gr_geometry)
if AS_COLUMN in gdf_areal.columns:
    gdf_settle = gdf_areal[gdf_areal[AS_COLUMN].isin(list(range(1, 14)))]
    if not gdf_settle.empty:
        gdf_settle_poly = gpd.GeoDataFrame(geometry=gdf_settle.buffer(150)) 
        rasterize_and_distance(gdf_settle_poly, DEM_PATH, SETTLEMENT_DIST_PATH, clip_mask=boundary_mask)

# 7. PREY
print("\n--- 7. PREY ---")
if AS_COLUMN in gdf_areal.columns:
    # 43, 44: Home pastures (near settlements, important food source)
    # 45-49: Alpine pastures (Classic summer grazing)
    # 65: Unproductive grass/shrubs (Wild game habitat: Chamois/Ibex)
    target_codes = [43, 44, 45, 46, 47, 48, 49, 65]
    print(f"   Filtering codes (Expanded): {target_codes}")
    
    gdf_prey = gdf_areal[gdf_areal[AS_COLUMN].isin(target_codes)]
    if not gdf_prey.empty:
        simple_rasterize(gdf_prey, DEM_PATH, PREY_RASTER_PATH, clip_mask=boundary_mask)

print("\n--- PREPROCESSING COMPLETED  ---")

-> Loading Cantonal Boundary (Graubünden)...

--- 0. CREATING MASTER CLIP MASK ---
Master mask created.

--- 1. CLIPPING SOURCE DEM ---
DEM file updated with hard clipping.

--- 2. SLOPE CALCULATION & CLIPPING ---
Slope calculated and clipped.

--- 3. FOREST ---
   ... Rasterizing 58825 features...
   Saved clipped raster: forest_10m_graubuenden.tif

--- 4. WATER ---
   ... Rasterizing 3593 features...
   Saved clipped raster: water_mask_10m.tif

--- 5. ROADS ---
   ... Rasterizing & Calculating Distance...
   Saved clipped distance raster: distance_roads_10m.tif

--- 6. SETTLEMENTS ---
   ... Rasterizing & Calculating Distance...
   Saved clipped distance raster: distance_settlements_10m.tif

--- 7. PREY ---
   Filtering codes (Expanded): [43, 44, 45, 46, 47, 48, 49, 65]
   ... Rasterizing 243757 features...
   Saved clipped raster: prey_10m.tif

--- PREPROCESSING COMPLETED  ---
