# Summarize CHM results from DINOv3 - create area summary by height bins from `002m` CHMs

#### Workflow steps: 
1.  `cavm_buffer_above_shrubs.ipynb` --> creates the AOI needed for mosaic
2.  `mosaic_chm_dinvo3.ipynb` --> creates a tiles mosaic of CHM data with masking within AOI
3.  `summarize_chm_dinov3_create.ipynb` --> computes height class summary table
4.  `summarize_chm_dinov3_plot.ipynb` --> uses height class smry to plot bar/donut plot
5.  `map_chm_dinov3.ipynb` --> map corresponding to data plotted from summary

Paul Montesano  
Feb 2026

In [1]:
import rasterio
import numpy as np
from multiprocessing import Pool
import pandas as pd
import glob

In [2]:
import re

### Function to parallel process across CHM tifs

In [3]:
def compute_height_class_areas(filepath, height_bins=None, SCALE_FACTOR=None):
    """
    Compute area for each height class in a single raster.
    
    Valid data is determined by band 0 (flag band) == 0.
    
    Parameters:
    -----------
    filepath : str
        Path to GeoTIFF (4-band: value, year, doy, flag)
    height_bins : list of tuples
        Height ranges, e.g., [(0.25, 5), (5, 10), (10, 20)]
    SCALE_FACTOR : float
        Factor to scale the input raster pixels (applied to band 1)
    
    Returns:
    --------
    dict with areas per height class
    """
    if height_bins is None:
        height_bins = [(0, 0.25), (0.25, 5), (5, 10), (10, 20), (20, 50)]
    
    try:
        with rasterio.open(filepath) as src:
            # Read band 1 (raster values) and band 4 (flag)
            data = src.read(1)  # Band 1: raster values
            flag_band = src.read(4)  # Band 4: nodata flags
            
            # Apply scale factor to data if provided
            if SCALE_FACTOR is not None:
                data = data * SCALE_FACTOR
            
            transform = src.transform
            
            # Get pixel area in m²
            pixel_width = abs(transform[0])   # meters
            pixel_height = abs(transform[4])  # meters
            pixel_area_m2 = pixel_width * pixel_height
            pixel_area_km2 = pixel_area_m2 / 1e6
            
            # Total tile area (all pixels)
            total_pixels = data.shape[0] * data.shape[1]
            total_area_km2 = total_pixels * pixel_area_km2
            
            # Valid data mask: flag band == 0 (valid data)
            valid_mask = (flag_band == 0)
            # AOI where we'd like data; needed for for % area analyzed calculations: flags 0, -1, -2 (inside AOI)
            aoi_mask = np.isin(flag_band, [0, -1, -2])
            
            print(f"Processing: {os.path.basename(filepath)}")
            print(f"  Total pixels: {total_pixels:,}")
            print(f"  Valid pixels (flag==0): {np.sum(valid_mask):,} ({100*np.sum(valid_mask)/total_pixels:.1f}%)")
            
            # Calculate area for each height class
            results = {'file': os.path.basename(filepath)}
            
            for min_h, max_h in height_bins:
                class_name = f'{min_h}-{max_h}m'
                # Only consider pixels that are valid (flag==0) AND in height range
                mask = (data >= min_h) & (data < max_h) & valid_mask
                n_pixels = np.sum(mask)
                area_km2 = n_pixels * pixel_area_km2
                results[f'area_{class_name}'] = area_km2
                results[f'n_pixels_{class_name}'] = n_pixels
            
            # Summary metrics
            results['total_area_km2'] = total_area_km2
            results['total_area_aoi_km2'] = np.sum(aoi_mask) * pixel_area_km2
            results['total_valid_area_km2'] = np.sum(valid_mask) * pixel_area_km2
            results['n_valid_pixels'] = np.sum(valid_mask)
            results['n_total_pixels'] = total_pixels
            results['pct_valid'] = 100 * np.sum(valid_mask) / total_pixels if total_pixels > 0 else 0
            
            return results
            
    except Exception as e:
        print(f"Error processing {filepath}: {e}")
        return None

### List CHM tifs

#### TODO: 
+ this should be run on the MOSAIC tile versions of the CHM to avoid double counting
+ water mask would be nice to avoid noise introduced by height values in water (clip mosaic tiles to coastline? Mel)
+ do for a specific geographic domain? (eg all tiles that intersect 100km CAVM buffer?)

In [4]:
NAME_STEM = 'cavm100km_alaska_chm_002m'

MAINDIR = '/explore/nobackup/projects/above/misc/ABoVE_Shrubs'
INDIR =          f'{MAINDIR}/chm/2026_chm/4.3.2.5'
DIR_PAPER_FIGS = f'{MAINDIR}/paper_figs'

In [5]:
DO_INDIV_CHM = False # True: work on original 

In [54]:
if DO_INDIV_CHM:
    files_chm = glob.glob(f'{INDIR}/002m/*-sr-02m.chm.tif')
    
    # Filter for July (07) and August (08) in YYYYMMDD format
    # Pattern matches: WV02_20230715_*, WV03_20190828_*, etc.
    files_chm_jujlyaug = [f for f in files_chm if re.search(r'_\d{4}(07|08)\d{2}_', f)]
    
    print(f"Found {len(files_chm_jujlyaug)} files from July and August (out of {len(files_chm)} total)")
else:
    # Use mosaics
    MOSAIC_SUBDIR = '002m_mos_2018_212_deltayr8_months789_50m' # most images / most area / largest temporal window
    MOSAIC_SUBDIR = '002m_mos_2018_212_deltayr8_months78_50m' 
    MOSAIC_SUBDIR = '002m_mos_2018_212_deltayr8_months78_10m'
    # MOSAIC_SUBDIR = '002m_mos_2018_212_deltayr5_months78_50m' # fewest images / smaller temporal window
    # MOSAIC_SUBDIR = '002m_mos_2018_212_deltayr5_months78_10m' # finest res realistic for this step? fewest images / smaller temporal window
    # MOSAIC_SUBDIR = '002m_mos_2018_212_deltayr5_months78_75m' # coarser res - same as previous
 
    
    files_chm = glob.glob(f'{INDIR}/{MOSAIC_SUBDIR}/*.tif')
    print(f"Found {len(files_chm)} tiles from a mosaic: {MOSAIC_SUBDIR}")

Found 103 tiles from a mosaic: 002m_mos_2018_212_deltayr8_months78_10m


In [55]:
#files_chm_for_summary = files_chm_jujlyaug[0:]

In [56]:
files_chm_for_summary = files_chm

In [57]:
height_bins = [(0,0.25), (0.25, 1.0), (1.0,2.0), (2.0,3.0), (3.0, 5), (5, 10), (10, 20), (20, 50)]
height_bins = [(0,0.25), (0.25, 1.0), (1.0,2.0), (2.0,3.0), (3.0, 4.0), (4.0, 5.0), (5, 10), (10, 20), (20, 50)]

# Updated breaks - useful for Alaska vegetation classification
height_bins = [(0,0.35), (0.35, 1.0), (1.0,1.5), (1.5,3.0), (3.0, 4.0), (4.0, 5.0), (5, 10), (10, 20), (20, 50)]

In [58]:
%%time
from joblib import Parallel, delayed

# Much more Jupyter-friendly
results = Parallel(n_jobs=35, verbose=10, backend='threading')(
    delayed(compute_height_class_areas)(f, height_bins, SCALE_FACTOR=0.1) 
    for f in files_chm_for_summary
)

area_stats_df = pd.DataFrame([r for r in results if r is not None])

[Parallel(n_jobs=35)]: Using backend ThreadingBackend with 35 concurrent workers.


Processing: mosaic_2018_DOY212_deltayr08_months78_R0006C0002.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 19,478,855 (24.0%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0007C0000.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 3,230,037 (4.0%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0004C0003.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 6,921,957 (8.5%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0001C0001.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 2,442,383 (3.0%)


[Parallel(n_jobs=35)]: Done   2 tasks      | elapsed:   29.3s


Processing: mosaic_2018_DOY212_deltayr08_months78_R0007C0002.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 4,876,695 (6.0%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0001C0003.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 52,809,369 (65.2%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0007C0001.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 8,348,904 (10.3%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0003C0002.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 17,646,734 (21.8%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0002C0005.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 15,815,521 (19.5%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0005C0002.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 12,146,444 (15.0%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0004C0002.tif
  Total pixels: 81,000,000
Processing: mosaic_2018_DOY212_deltayr08_months78_R0002C0004.tif
  Total pixels: 81,000,0

[Parallel(n_jobs=35)]: Done  15 tasks      | elapsed:   47.9s


Processing: mosaic_2018_DOY212_deltayr08_months78_R0005C0003.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 13,051,837 (16.1%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0004C0000.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 21,519,987 (26.6%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0002C0000.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 13,645,363 (16.8%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0004C0001.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 14,046,163 (17.3%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0002C0001.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 34,658,797 (42.8%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0009C0004.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 27,094,995 (33.5%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0006C0000.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 6,507,626 (8.0%)
Processing: mosaic_2018_DOY212_deltayr08_mo

[Parallel(n_jobs=35)]: Done  28 tasks      | elapsed:   58.5s


Processing: mosaic_2018_DOY212_deltayr08_months78_R0009C0006.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 18,908,682 (23.3%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0014C0012.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 857,318 (1.1%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0010C0008.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 37,073,247 (45.8%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0014C0011.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 11,245,024 (13.9%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0011C0006.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 33,746,792 (41.7%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0003C0004.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 37,057,418 (45.7%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0009C0002.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 10,914,817 (13.5%)
Processing: mosaic_2018_DOY212_deltayr08_mont

[Parallel(n_jobs=35)]: Done  45 out of 103 | elapsed:  1.5min remaining:  1.9min


Processing: mosaic_2018_DOY212_deltayr08_months78_R0012C0007.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 31,809,937 (39.3%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0008C0000.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 47,267,642 (58.4%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0010C0001.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 2,895,619 (3.6%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0011C0010.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 29,872,219 (36.9%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0010C0004.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 33,547,519 (41.4%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0013C0001.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 813,992 (1.0%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0009C0000.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 6,724,109 (8.3%)
Processing: mosaic_2018_DOY212_deltayr08_months78

[Parallel(n_jobs=35)]: Done  56 out of 103 | elapsed:  1.7min remaining:  1.5min


Processing: mosaic_2018_DOY212_deltayr08_months78_R0011C0009.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 2,671,997 (3.3%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0009C0008.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 3,353,120 (4.1%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0008C0004.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 24,580,905 (30.3%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0009C0007.tif
  Total pixels: 81,000,000
Processing: mosaic_2018_DOY212_deltayr08_months78_R0010C0003.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 2,278,352 (2.8%)
  Valid pixels (flag==0): 27,983,290 (34.5%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0012C0001.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 35,451,101 (43.8%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0012C0010.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 12,754,095 (15.7%)
Processing: mosaic_2018_DOY212_deltayr08_months

[Parallel(n_jobs=35)]: Done  67 out of 103 | elapsed:  2.0min remaining:  1.1min


Processing: mosaic_2018_DOY212_deltayr08_months78_R0011C0004.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 34,861,447 (43.0%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0007C0003.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 30,661,180 (37.9%)


[Parallel(n_jobs=35)]: Done  78 out of 103 | elapsed:  2.1min remaining:   40.7s


Processing: mosaic_2018_DOY212_deltayr08_months78_R0011C0012.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 7,447,143 (9.2%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0012C0002.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 72,266,102 (89.2%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0012C0011.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 48,534,208 (59.9%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0008C0001.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 51,105,023 (63.1%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0011C0002.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 63,264,259 (78.1%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0012C0005.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 7,848,902 (9.7%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0013C0008.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 778,359 (1.0%)
Processing: mosaic_2018_DOY212_deltayr08_months78

[Parallel(n_jobs=35)]: Done  89 out of 103 | elapsed:  2.3min remaining:   21.6s


Processing: mosaic_2018_DOY212_deltayr08_months78_R0011C0008.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 50,971,499 (62.9%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0012C0012.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 7,716,351 (9.5%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0013C0010.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 24,632,353 (30.4%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0012C0003.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 77,331,850 (95.5%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0013C0011.tif
  Total pixels: 81,000,000
Processing: mosaic_2018_DOY212_deltayr08_months78_R0011C0011.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 55,492,052 (68.5%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0013C0003.tif
  Total pixels: 81,000,000
  Valid pixels (flag==0): 54,628,629 (67.4%)
Processing: mosaic_2018_DOY212_deltayr08_months78_R0013C0012.tif
  Total pixels: 81,000,

[Parallel(n_jobs=35)]: Done 100 out of 103 | elapsed:  2.4min remaining:    4.3s


CPU times: user 8min 3s, sys: 1min 45s, total: 9min 49s
Wall time: 2min 22s


[Parallel(n_jobs=35)]: Done 103 out of 103 | elapsed:  2.4min finished


In [59]:
# Save detailed results
area_stats_df.to_csv(f'{DIR_PAPER_FIGS}/{NAME_STEM}_height_class_area_{MOSAIC_SUBDIR}.csv', index=False)

In [60]:
area_stats_df.head()

Unnamed: 0,file,area_0-0.35m,n_pixels_0-0.35m,area_0.35-1.0m,n_pixels_0.35-1.0m,area_1.0-1.5m,n_pixels_1.0-1.5m,area_1.5-3.0m,n_pixels_1.5-3.0m,area_3.0-4.0m,...,area_10-20m,n_pixels_10-20m,area_20-50m,n_pixels_20-50m,total_area_km2,total_area_aoi_km2,total_valid_area_km2,n_valid_pixels,n_total_pixels,pct_valid
0,mosaic_2018_DOY212_deltayr08_months78_R0002C00...,918.4537,9184537,275.2121,2752121,135.0896,1350896,165.4374,1654374,33.8663,...,2.1338,21338,0.0,0,8100.0,2293.6794,1581.5521,15815521,81000000,19.525335
1,mosaic_2018_DOY212_deltayr08_months78_R0007C00...,504.3112,5043112,108.5918,1085918,54.0973,540973,133.6037,1336037,26.5387,...,0.0081,81,0.0,0,8100.0,1230.7015,834.8904,8348904,81000000,10.307289
2,mosaic_2018_DOY212_deltayr08_months78_R0001C00...,223.5942,2235942,16.3597,163597,2.2706,22706,1.4329,14329,0.1923,...,0.0167,167,0.0,0,8100.0,772.7761,244.2383,2442383,81000000,3.015288
3,mosaic_2018_DOY212_deltayr08_months78_R0004C00...,643.9302,6439302,115.8161,1158161,38.8588,388588,70.3419,703419,24.0641,...,12.4752,124752,0.0,0,8100.0,8100.0,990.7821,9907821,81000000,12.231878
4,mosaic_2018_DOY212_deltayr08_months78_R0000C00...,115.2566,1152566,17.3704,173704,4.088,40880,4.615,46150,1.1746,...,0.018,180,0.0,0,8100.0,1527.9342,144.4919,1444919,81000000,1.783851
