In [1]:
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.enums import Resampling
import numpy as np

# Input paths
chm_2019_path = 'data/raster/lidar_products/46114f1_2019_qspatial_chm.tif'
chm_2020_path = 'data/raster/lidar_products/46114f1_2020_ravalli_chm.tif'

# Reproject and aggregate each CHM
for chm_path in [chm_2019_path, chm_2020_path]:
    print(f"Processing: {chm_path}")
    # Read source
    with rasterio.open(chm_path) as src:
        print(f"  Source CRS: {src.crs}")
        print(f"  Source resolution: {src.res}")
        print(f"  Source bounds: {src.bounds}")
        # First reproject to target CRS at original resolution
        transform, width, height = calculate_default_transform(
            src.crs, 'EPSG:6514', src.width, src.height, 
            *src.bounds
        )
        print(f"  Target CRS: EPSG:6514")
        print(f"  Target transform: {transform}")
        print(f"  Target width: {width}")
        print(f"  Target height: {height}")
        
        # Reproject data
        data = np.zeros((height, width), dtype=np.float32)
        nodata_mask = np.ones((height, width), dtype=bool)  # Initialize nodata mask
        reproject(
            source=rasterio.band(src, 1),
            destination=data,
            src_transform=src.transform,
            src_crs=src.crs,
            dst_transform=transform,
            dst_crs='EPSG:6514',
            resampling=Resampling.bilinear,
            dst_nodata=np.nan  # Use NaN to track nodata values
        )
        nodata_mask = np.isnan(data)  # Update mask based on NaN values
        print("  Reprojection complete.")

        canopy_height_threshold = 1.0
        # Convert to binary mask of values > 0.5, preserving nodata
        binary = np.where(nodata_mask, np.nan, (data > canopy_height_threshold).astype(np.float32))
        print("  Converted to binary mask.")
        
        # Calculate proportion in 30m blocks
        scale_factor = 30 / src.res[0]  # assuming square pixels
        block_size = int(scale_factor)
        print(f"  Block size: {block_size} pixels")
        
        # Calculate new dimensions, handling non-divisible sizes
        new_height = height // block_size
        new_width = width // block_size
        print(f"  New height (blocks): {new_height}")
        print(f"  New width (blocks): {new_width}")
        
        # Trim both binary and nodata mask
        trimmed_binary = binary[:new_height * block_size, :new_width * block_size]
        trimmed_nodata = nodata_mask[:new_height * block_size, :new_width * block_size]

        # Reshape both arrays and compute means
        aggregated_data = trimmed_binary.reshape(
            (new_height, block_size, new_width, block_size)
        ).mean(axis=(1,3))

        # Calculate proportion of nodata in each block
        nodata_proportion = trimmed_nodata.reshape(
            (new_height, block_size, new_width, block_size)
        ).mean(axis=(1,3))

        # Set blocks with majority nodata (>50%) to nodata value
        aggregated_mask = (nodata_proportion > 0.5)
        rescaled_data = np.round(aggregated_data * 100).clip(0, 100).astype(np.uint8)
        rescaled_data[aggregated_mask] = 255  # Apply nodata value

        # Write output
        if '2019' in chm_path:
            output_path = 'data/raster/lidar_products/2019_lidar_cover.tif'
        else:
            output_path = 'data/raster/lidar_products/2020_lidar_cover.tif'
        with rasterio.open(output_path, 'w',
                           driver='GTiff',
                           height=new_height,
                           width=new_width,
                           count=1,
                           dtype=np.uint8,
                           crs=src.crs,
                           transform=transform * transform.scale(block_size, block_size),
                           nodata=255) as dst:
            dst.write(rescaled_data, 1)
        print("  Write complete.")


Processing: data/raster/lidar_products/46114f1_2019_qspatial_chm.tif
  Source CRS: EPSG:6514
  Source resolution: (0.9144000000087544, 0.9144000000087544)
  Source bounds: BoundingBox(left=246941.9496002429, bottom=276082.96320049936, right=256548.63600033487, top=288227.10960061563)
  Target CRS: EPSG:6514
  Target transform: | 0.91, 0.00, 246941.95|
| 0.00,-0.91, 288227.11|
| 0.00, 0.00, 1.00|
  Target width: 10506
  Target height: 13281
  Reprojection complete.
  Converted to binary mask.
  Block size: 32 pixels
  New height (blocks): 415
  New width (blocks): 328


  rescaled_data = np.round(aggregated_data * 100).clip(0, 100).astype(np.uint8)


  Write complete.
Processing: data/raster/lidar_products/46114f1_2020_ravalli_chm.tif
  Source CRS: EPSG:6514
  Source resolution: (1.0, 1.0)
  Source bounds: BoundingBox(left=246200.0, bottom=273807.0, right=256005.0, top=279298.0)
  Target CRS: EPSG:6514
  Target transform: | 1.00, 0.00, 246200.00|
| 0.00,-1.00, 279298.00|
| 0.00, 0.00, 1.00|
  Target width: 9805
  Target height: 5491
  Reprojection complete.
  Converted to binary mask.
  Block size: 30 pixels
  New height (blocks): 183
  New width (blocks): 326
  Write complete.


  rescaled_data = np.round(aggregated_data * 100).clip(0, 100).astype(np.uint8)
