In [49]:
"""
Identify wildfire events with >= 1% pre-fire aspen forest cover across western U.S. ecoregions
Landcover data: LANDFIRE Existing Vegetation Type (EVT) ca. 2016
Author: maxwell.cook@colorado.edu
"""

import os, time, glob, gc
import numpy as np
import pandas as pd
import geopandas as gpd
import rioxarray as rxr
import rasterio as rio
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import multiprocessing as mp
import dask

from shapely.geometry import box
from shapely.geometry import Polygon, MultiPolygon
from datetime import datetime
from rasterstats import zonal_stats

import warnings
warnings.filterwarnings("ignore") # suppresses annoying geopandas warning

proj = 'EPSG:5070'

# maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/'
# projdir = os.path.join(maindir, 'aspen-fire/Aim2/')

maindir = '/home/jovyan/' # cyverse
projdir = os.path.join(maindir,'data-store/data/iplant/home/maco4303/data/JFSP')

print("Ready to go !")

Ready to go !


In [2]:
# results_dir = os.path.join(projdir,'data/tabular/mod/EVT/')
results_dir = os.path.join(projdir,'results/') # cyverse
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
print(f"Saving EVT summary tables to {results_dir}")

Saving EVT summary tables to /home/jovyan/data-store/data/iplant/home/maco4303/data/JFSP/results/


In [47]:
def compute_band_stats(geoms, image_da, id_col):
    """
    Function to compute band statistics for a chunk of geometries and a raster band.
    This function is passed to the multiprocessing workers.
    """
    affine = image_da.rio.transform()
    nodataval = image_da.rio.nodata
    arr = image_da.values
    
    stats = zonal_stats(
        vectors=geoms[[id_col, 'geometry']],
        raster=arr,
        affine=affine,
        nodata=nodataval,
        categorical=True,
        all_touched=True,
        geojson_out=True
    )

    # Extract the results (properties)
    stats_df = pd.DataFrame(stats)
    stats_df[id_col] = stats_df['properties'].apply(lambda x: x.get(id_col))
    stats_df['properties'] = stats_df['properties'].apply(lambda x: {key: val for key, val in x.items() if key != id_col})
    stats_df['props_list'] = stats_df['properties'].apply(lambda x: list(x.items()))

    # Explode the properties to column
    props = stats_df.explode('props_list').reset_index(drop=True)
    props[['evt', 'count']] = pd.DataFrame(props['props_list'].tolist(), index=props.index)
    props['evt'] = props['evt'].astype(int)
    props = props[[id_col, 'evt', 'count']].reset_index(drop=True)

    # Calculate the total pixels and percent cover
    total_pixels = props.groupby(props[id_col])['count'].transform('sum')
    props['total_pixels'] = total_pixels
    props['pct_cover'] = (props['count'] / props['total_pixels']) * 100

    del arr, stats, stats_df # clean up
    gc.collect()
    
    return props


def create_bounds(gdf, buffer=None):
    """ Calculate a bounding rectangle for a given geometry and buffer """
    bounds = gdf.geometry.apply(lambda geom: box(*geom.bounds))
    if buffer is not None:
        bounds = bounds.buffer(buffer)
    # Assign the geometry to the geodataframe
    gdf_ = gdf.copy()
    gdf_.geometry = bounds.geometry.apply(
        lambda geom: Polygon(geom) if geom.geom_type == 'Polygon' else MultiPolygon([geom])
    )
    return gdf_
    

print("Functions loaded !")

Functions loaded !


In [4]:
"""
Copy the data-store to 'local' directory
This enables quick access to data files in CyVerse
"""
import shutil

# Set a destination path (this is a 'local' and temporary path)
dest = '/home/jovyan/data-store/aspen-fire/data' 
if not os.path.exists(dest):
    os.mkdir(dest) # create the directory for the copied data, if needed
    
# Using 'shutil' package, copy all the files over
shutil.copytree(projdir, dest, dirs_exist_ok=True)

'/home/jovyan/data-store/aspen-fire/data'

In [5]:
# Load the land cover data

In [6]:
# Load the LANDFIRE EVT (ca. 2016)
# evt_fp = os.path.join(maindir,'data/landcover/LANDFIRE/LF2016_EVT_200_CONUS/Tif/LC16_EVT_200.tif')
evt_fp = os.path.join(dest,'LC16_EVT_200.tif') # cyverse

evt_da = rxr.open_rasterio(evt_fp, masked=True, cache=False, chunks='auto').squeeze()

shp, gt, wkt, nd = evt_da.shape, evt_da.spatial_ref.GeoTransform, evt_da.rio.crs, evt_da.rio.nodata
print(
    f"Shape: {shp}; \n"
    f"GeoTransform: {gt}; \n"
    f"WKT: {wkt}; \n"
    f"NoData Value: {nd}; \n"
    f"Data Type: {evt_da[0].dtype}")
gc.collect()

Shape: (97283, 154207); 
GeoTransform: -2362425.0 30.0 0.0 3177435.0 0.0 -30.0; 
WKT: EPSG:5070; 
NoData Value: nan; 
Data Type: float32


57

In [7]:
# Load FIRED perimeters

In [8]:
# Load the FIRED perimeters (2012-2023)
# daily_fp = os.path.join(maindir,'aspen-fire/Aim2/data/spatial/raw/FIRED/fired-daily_west_2012_to_2023.gpkg')
daily_fp = os.path.join(dest,'fired-daily_west_2012_to_2023.gpkg') # cyverse
daily = gpd.read_file(daily_fp)
daily = daily.to_crs(proj) # ensure albers projection
daily.columns

Index(['did', 'id', 'date', 'ig_date', 'ig_day', 'ig_month', 'ig_year',
       'last_date', 'event_day', 'event_dur', 'pixels', 'tot_pix', 'dy_ar_km2',
       'tot_ar_km2', 'fsr_px_dy', 'fsr_km2_dy', 'mx_grw_px', 'mn_grw_px',
       'mu_grw_px', 'mx_grw_km2', 'mn_grw_km2', 'mu_grw_km2', 'mx_grw_dte',
       'x', 'y', 'ig_utm_x', 'ig_utm_y', 'lc_code', 'lc_mode', 'lc_name',
       'lc_desc', 'lc_type', 'eco_mode', 'eco_name', 'eco_type', 'geometry'],
      dtype='object')

In [9]:
# Add a one pixel buffer to the daily perimeters ()
n_pixels = 500  # meters
daily_ = daily.copy() # make a copy of the original data
daily_['geometry'] = daily_['geometry'].buffer(n_pixels)
print(f"Buffered by {n_pixels/500} pixel(s).")

Buffered by 1.0 pixel(s).


In [10]:
# Crop the image data to the fire bounds

In [11]:
# Crop the raster by the fire bounds (with small buffer)
bounds = daily_.total_bounds
evt_da_crop = evt_da.rio.clip_box(
    minx=bounds[0]+1000, 
    miny=bounds[1]+1000, 
    maxx=bounds[2]+1000, 
    maxy=bounds[3]+1000
)
del evt_da, bounds
gc.collect()

66

In [12]:
# Calculate the daily land cover (EVT) proportions for daily perimeters

In [13]:
t0 = time.time()

sampled = compute_band_stats(daily_, evt_da_crop, 'did')

t1 = (time.time() - t0) / 60
print(f"Total elapsed time: {t1:.2f} minutes.")
print("\n~~~~~~~~~~\n")

Total elapsed time: 2.26 minutes.

~~~~~~~~~~



In [14]:
sampled.head()

Unnamed: 0,did,evt,count,total_pixels,pct_cover
0,0000802601a74f5edd520234febfaeff,7011,3,14864,0.020183
1,0000802601a74f5edd520234febfaeff,7049,1,14864,0.006728
2,0000802601a74f5edd520234febfaeff,7050,1,14864,0.006728
3,0000802601a74f5edd520234febfaeff,7080,1446,14864,9.728202
4,0000802601a74f5edd520234febfaeff,7106,147,14864,0.988967


In [15]:
gc.collect() # clear any unused memory

0

In [16]:
# Load the lookup info to get landcover type

In [22]:
# lookup = os.path.join(maindir,'data/landcover/LANDFIRE/LF2016_EVT_200_CONUS/CSV_Data/LF16_EVT_200.csv')
lookup = os.path.join(dest, 'LF16_EVT_200.csv') # cyverse
lookup = pd.read_csv(lookup)
print(lookup.columns)

Index(['VALUE', 'EVT_NAME', 'LFRDB', 'EVT_FUEL', 'EVT_FUEL_N', 'EVT_LF',
       'EVT_PHYS', 'EVT_GP', 'EVT_GP_N', 'SAF_SRM', 'EVT_ORDER', 'EVT_CLASS',
       'EVT_SBCLS', 'R', 'G', 'B', 'RED', 'GREEN', 'BLUE'],
      dtype='object')


In [18]:
# Subset the codes we want to join, join back to the dataframe
lookup = lookup[['VALUE','EVT_NAME','EVT_PHYS','EVT_GP_N','EVT_CLASS']]
# Merge back to the data
props_df = sampled.merge(lookup, left_on='evt', right_on='VALUE', how='left')
props_df.head()

0

In [19]:
# retrieve the FIRED_ID from the original dataframe
props_df = props_df.merge(daily[['id','did']], left_on='did', right_on='did', how='left')
print(f"There are [{len(props_df['id'].unique())}] unique fires in the sampled data.\n\t[{len(props_df['did'].unique())}] individual daily perimeters.")

There are [18529] unique fires in the sampled data.
	[63264] individual daily perimeters.


In [23]:
props_df.columns

Index(['id', 'did', 'evt', 'count', 'total_pixels', 'pct_cover', 'EVT_NAME',
       'EVT_PHYS', 'EVT_GP_N', 'EVT_CLASS'],
      dtype='object')

In [21]:
props_df = props_df[['id', 'did', 'evt', 'count', 'total_pixels', 'pct_cover', 
                     'EVT_NAME', 'EVT_PHYS', 'EVT_GP_N', 'EVT_CLASS']]

In [24]:
# Save the file out.
out_fp = os.path.join(results_dir, 'fired-daily_west_2012_to_2023-EVT.csv')
props_df.to_csv(out_fp)
print(f"Saved to {out_fp}")

In [26]:
del sampled, props_df, daily
gc.collect()

In [None]:
# Calculate event-level EVT proportions

In [28]:
# Load the FIRED perimeters (2012-2023)
# events = os.path.join(maindir,'aspen-fire/Aim2/data/spatial/raw/FIRED/fired-events_west_2012_to_2023.gpkg')
events_fp = os.path.join(dest, 'fired-events_west_2012_to_2023.gpkg') # cyverse
events = gpd.read_file(events_fp)
events = events.to_crs(proj) # ensure albers projection
events.columns

Index(['id', 'ig_date', 'ig_day', 'ig_month', 'ig_year', 'last_date',
       'event_dur', 'tot_pix', 'tot_ar_km2', 'fsr_px_dy', 'fsr_km2_dy',
       'mx_grw_px', 'mn_grw_px', 'mu_grw_px', 'mx_grw_km2', 'mn_grw_km2',
       'mu_grw_km2', 'mx_grw_dte', 'x', 'y', 'ig_utm_x', 'ig_utm_y', 'lc_code',
       'lc_mode', 'lc_name', 'lc_desc', 'lc_type', 'eco_mode', 'eco_name',
       'eco_type', 'tot_perim', 'na_l3name', 'geometry'],
      dtype='object')

In [29]:
# Add a one pixel buffer to the daily perimeters ()
n_pixels = 500  # meters
events_ = events.copy() # make a copy of the original data
events_['geometry'] = events_['geometry'].buffer(n_pixels)
print(f"Buffered by {n_pixels/500} pixel(s).")

Buffered by 1.0 pixel(s).


In [34]:
t0 = time.time()

sampled = compute_band_stats(events_, evt_da_crop, 'id')

t1 = (time.time() - t0) / 60
print(f"Total elapsed time: {t1:.2f} minutes.")
print("\n~~~~~~~~~~\n")

Total elapsed time: 0.73 minutes.

~~~~~~~~~~



In [35]:
lookup = lookup[['VALUE','EVT_NAME','EVT_PHYS','EVT_GP_N','EVT_CLASS']]
props_df = sampled.merge(lookup, left_on='evt', right_on='VALUE', how='left')
props_df.head()

Unnamed: 0,id,evt,count,total_pixels,pct_cover,VALUE,EVT_NAME,EVT_PHYS,EVT_GP_N,EVT_CLASS
0,5,7008,7,32955,0.021241,7008,North Pacific Oak Woodland,Hardwood,Western Oak Woodland and Savanna,Open tree canopy
1,5,7035,61,32955,0.185101,7035,North Pacific Dry Douglas-fir-(Madrone) Forest...,Conifer,Douglas-fir Forest and Woodland,Closed tree canopy
2,5,7036,863,32955,2.618723,7036,North Pacific Seasonal Sitka Spruce Forest,Conifer,Sitka Spruce Forest,Closed tree canopy
3,5,7037,4023,32955,12.207556,7037,North Pacific Maritime Dry-Mesic Douglas-fir-W...,Conifer,Douglas-fir-Western Hemlock Forest and Woodland,Closed tree canopy
4,5,7039,21524,32955,65.313306,7039,North Pacific Maritime Mesic-Wet Douglas-fir-W...,Conifer,Douglas-fir-Western Hemlock Forest and Woodland,Closed tree canopy


In [36]:
props_df.columns

Index(['id', 'evt', 'count', 'total_pixels', 'pct_cover', 'VALUE', 'EVT_NAME',
       'EVT_PHYS', 'EVT_GP_N', 'EVT_CLASS'],
      dtype='object')

In [38]:
props_df = props_df[['id', 'evt', 'count', 'total_pixels', 'pct_cover', 
                     'EVT_NAME', 'EVT_PHYS', 'EVT_GP_N', 'EVT_CLASS']]

In [39]:
# Save the files out.
out_fp = os.path.join(results_dir, 'fired-events_west_2012_to_2023-EVT.csv')
props_df.to_csv(out_fp)
print(f"Saved to {out_fp}")

Saved to /home/jovyan/data-store/data/iplant/home/maco4303/data/JFSP/results/fired-events_west_2012_to_2023-EVT.csv


In [40]:
del sampled, props_df # clean up
gc.collect()

1431

In [None]:
# Compare with a more broad scale approach (event summary within 3km bounds)

In [51]:
# Create new geometry (bounds) with 1km buffer
bounds = create_bounds(events, buffer=3000)
bounds = bounds[['id','geometry']] # keep it simple
bounds = bounds.set_crs(proj, allow_override=True) # ensure correct crs

In [52]:
t0 = time.time()

sampled = compute_band_stats(bounds, evt_da_crop, 'id')

t1 = (time.time() - t0) / 60
print(f"Total elapsed time: {t1:.2f} minutes.")
print("\n~~~~~~~~~~\n")

Total elapsed time: 1.27 minutes.

~~~~~~~~~~



In [53]:
sampled.head()

Unnamed: 0,id,evt,count,total_pixels,pct_cover
0,5,7008,139,153586,0.090503
1,5,7035,334,153586,0.217468
2,5,7036,5453,153586,3.550454
3,5,7037,19118,153586,12.447749
4,5,7039,83327,153586,54.254294


In [54]:
# Subset the codes we want to join, join back to the dataframe
lookup = lookup[['VALUE','EVT_NAME','EVT_PHYS','EVT_GP_N','EVT_CLASS']]
# Merge back to the data
props_df_3k = sampled.merge(lookup, left_on='evt', right_on='VALUE', how='left')
props_df_3k.head()

Unnamed: 0,id,evt,count,total_pixels,pct_cover,VALUE,EVT_NAME,EVT_PHYS,EVT_GP_N,EVT_CLASS
0,5,7008,139,153586,0.090503,7008,North Pacific Oak Woodland,Hardwood,Western Oak Woodland and Savanna,Open tree canopy
1,5,7035,334,153586,0.217468,7035,North Pacific Dry Douglas-fir-(Madrone) Forest...,Conifer,Douglas-fir Forest and Woodland,Closed tree canopy
2,5,7036,5453,153586,3.550454,7036,North Pacific Seasonal Sitka Spruce Forest,Conifer,Sitka Spruce Forest,Closed tree canopy
3,5,7037,19118,153586,12.447749,7037,North Pacific Maritime Dry-Mesic Douglas-fir-W...,Conifer,Douglas-fir-Western Hemlock Forest and Woodland,Closed tree canopy
4,5,7039,83327,153586,54.254294,7039,North Pacific Maritime Mesic-Wet Douglas-fir-W...,Conifer,Douglas-fir-Western Hemlock Forest and Woodland,Closed tree canopy


In [55]:
# Save the file.
out_fp = os.path.join(results_dir, 'fired-events_west_2012_to_2023-EVT-3km.csv')
props_df_3k.to_csv(out_fp)
print(f"Saved to {out_fp}")

In [56]:
del lookup, sampled, props_df_3k
gc.collect()

0