In [1]:
!ls ../../Aim2/code/Python

00fire-perimeters-EVT.ipynb
00fire-perimeters-TreeMap.ipynb
01ics-geolocate.ipynb
02afd-nasa-firms.ipynb
03a_viirs_earthaccess-swath.ipynb
03b_viirs_earthaccess-swath-geolocate.ipynb
03c_viirs-afd-processing.ipynb
04a_TreeMap_FIA-Prep.ipynb
04b_TreeMap_FIA-FORTYPCD.ipynb
04b_TreeMap_FIA-TreeTable.ipynb
04b_TreeMap_FIA-TreeTable-V2.ipynb
04d_GEE-CBI_Summary.ipynb
04d_GEE-GridMet.ipynb
04d_GEE-MOD15-LAI.ipynb
04d_GEE-S2MSI.ipynb
04d_GEE-Topo.ipynb
04d_GEE-TreeMap.ipynb
05a_TreeMap_Forest-Composition.ipynb
05b_FRP-CBIbc.ipynb
_aspen-cover_comparison.ipynb
earthaccess
_figures.ipynb
fired
_fire-summary.ipynb
__functions.py
_geoloc-sample-dist.ipynb
_gridstats_lc_compare.ipynb
__pycache__
_west-fire-season-length.ipynb


In [16]:
"""
10-meter aspen patch metrics
author: maxwell.cook@colorado.edu
"""

import os, sys, time
import geopandas as gpd
import pylandstats as pls
import multiprocessing as mp
import concurrent.futures
import numpy as np
from multiprocessing import Pool, cpu_count
from tqdm.notebook import tqdm
from shapely.geometry import box

# Custom functions
sys.path.append('../../Aim2/code/Python')
from __functions import *

maindir = '/home/jovyan/'
datadir = os.path.join(maindir,'data-store/data/iplant/home/maco4303/data')
os.listdir(datadir)

proj = 'EPSG:5070' # albers

print("Ready to go !")

Ready to go !


In [3]:
os.getcwd()

'/home/jovyan/data-store/aspen-fire/Aim3/code'

In [4]:
os.listdir(datadir)

['HYR-SENSE', 'JFSP', 'OPP']

In [5]:
fp = os.path.join(datadir,'JFSP/Aim3/future_fire_grid_trend.gpkg')
future_fire = gpd.read_file(fp)
future_fire.head()

Unnamed: 0,grid_id,trend_area,trend_count,p_area,p_count,NA_L3NAME,US_L4NAME,US_L4CODE,geometry
0,159230.0,190.629216,0.019221,3.590523e-11,1.436235e-13,Southern Rockies,Foothill Shrublands,21d,"POLYGON ((-861518.632 2246765.246, -858285.087..."
1,159231.0,190.629216,0.019221,3.590523e-11,1.436235e-13,Southern Rockies,Foothill Shrublands,21d,"POLYGON ((-858285.087 2246403.307, -855051.389..."
2,159232.0,190.629216,0.019221,3.590523e-11,1.436235e-13,Southern Rockies,Foothill Shrublands,21d,"POLYGON ((-855051.389 2246042.730, -851817.539..."
3,159233.0,301.28274,0.026862,1.926234e-07,1.67749e-09,Southern Rockies,Crystalline Mid-Elevation Forests,21c,"POLYGON ((-851817.539 2245683.513, -848583.540..."
4,159234.0,411.936265,0.034504,3.85211e-07,3.354837e-09,Southern Rockies,Crystalline Mid-Elevation Forests,21c,"POLYGON ((-848583.540 2245325.656, -845349.389..."


In [6]:
# check for duplicates, remove them
n = future_fire.duplicated(subset=['grid_id']).sum()
if n > 0:
    print(f"\nThere are [{n}] duplicate rows.\n")
else:
    print("\nNo duplicates at this stage.\n")


No duplicates at this stage.



In [7]:
# load the 10-m aspen map (classification)
aspen10_fp = os.path.join(datadir,'JFSP/Aim3/s2aspen_distribution_10m_y2019_CookEtAl.tif')
aspen10 = rxr.open_rasterio(aspen10_fp, cache=False, mask=True).squeeze()
print(f"\n{aspen10}\n")
print(aspen10.rio.crs)


<xarray.DataArray (y: 95433, x: 64151)>
[6122122383 values with dtype=uint8]
Coordinates:
    band         int64 1
  * x            (x) float64 -1.217e+06 -1.217e+06 ... -5.76e+05 -5.76e+05
  * y            (y) float64 2.309e+06 2.309e+06 ... 1.355e+06 1.355e+06
    spatial_ref  int64 0
Attributes:
    AREA_OR_POINT:  Area
    _FillValue:     255
    scale_factor:   1.0
    add_offset:     0.0

EPSG:5070


In [8]:
t0 = time.time()

# calculate zonal statistics
aspen10_grids = compute_band_stats(
    geoms=future_fire, 
    image_da=aspen10, 
    id_col='grid_id', 
    attr='aspen10',
    ztype='categorical'
)
# only keep the count of aspen pixels
aspen10_grids = aspen10_grids[aspen10_grids['aspen10'] == 1]
# check the results
print(aspen10_grids.head())

t1 = (time.time() - t0) / 60
print(f"\nTotal elapsed time: {t1:.2f} minutes.\n")
print("\n~~~~~~~~~~\n")

    grid_id  aspen10  count  total_pixels  pct_cover
1  159230.0        1      8        146335   0.005467
3  159231.0        1     23        146342   0.015717
5  159232.0        1    764        146346   0.522050
7  159233.0        1    100        146336   0.068336
9  159234.0        1    369        146336   0.252159

Total elapsed time: 0.69 minutes.


~~~~~~~~~~



In [9]:
aspen10_grids['pct_cover'].describe()

count    9212.000000
mean        6.760243
std        11.574808
min         0.000617
25%         0.204016
50%         1.432681
75%         7.450748
max        80.162397
Name: pct_cover, dtype: float64

In [10]:
# merge back to the spatial data
ff_aspen_grids = future_fire[future_fire['grid_id'].isin(aspen10_grids['grid_id'].unique())]
print(len(ff_aspen_grids))

9212


In [11]:
os.cpu_count()

128

In [18]:
ff_aspen_grids.columns

Index(['grid_id', 'trend_area', 'trend_count', 'p_area', 'p_count',
       'NA_L3NAME', 'US_L4NAME', 'US_L4CODE', 'geometry'],
      dtype='object')

In [20]:
t0 = time.time()

# patch analysis - largest patch size, mean patch size, etc

# define metrics to calculate
cls_metrics = [
    'number_of_patches',  'patch_density', 'largest_patch_index'
]

# Function to process a single gridcell
def process_grid(grid_dict):
    """ Process a single gridcell, grid_dict is a dictionary with grid attributes """
    try:
        # Extract required fields from dictionary
        grid_id = grid_dict["grid_id"]
        geometry = grid_dict["geometry"]  # Geometry is stored as a Shapely object in GeoDataFrame

        # Ensure the bounds are in the same CRS as raster
        bounds = geometry.bounds
        bbox = box(*bounds)

        # Extract the raster for the gridcell:
        arr = aspen10.rio.clip([bbox], future_fire.crs, drop=True).values

        # Ensure non-empty array before processing
        if np.all(np.isnan(arr)) or np.count_nonzero(arr) == 0:
            return None

        # Generate the landscape metrics
        ls = pls.Landscape(arr, res=(10, 10))
        patches = ls.compute_class_metrics_df(metrics=cls_metrics)
        patches["grid_id"] = grid_id  # Attach grid ID for reference

        del arr, ls  # Free memory
        return patches

    except Exception as e:
        print(f"Error processing grid {grid_dict['grid_id']}: {e}")
        return None

# Parallel processing with progress tracking
if __name__ == "__main__":
    num_workers = max(1, os.cpu_count() - 2)  # Use available CPU cores

    # Convert GeoDataFrame to list of dicts for better serialization
    grid_list = ff_aspen_grids.to_dict(orient="records")

    results = []
    
    with concurrent.futures.ProcessPoolExecutor(max_workers=num_workers) as executor:
        futures = {executor.submit(process_grid, grid): grid for grid in grid_list}
        
        # Track progress with tqdm
        for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc="Processing Grids"):
            result = future.result()
            if result is not None:
                results.append(result)

    # Merge all results into a single DataFrame
    if results:
        patch_metrics_df = pd.concat(results, ignore_index=True)
    else:
        patch_metrics_df = pd.DataFrame()  # Handle empty case

    t1 = (time.time() - t0) / 60
    print(f"\nTotal elapsed time: {t1:.2f} minutes.\n")

Processing Grids:   0%|          | 0/9212 [00:00<?, ?it/s]

KeyboardInterrupt: 