In [None]:
"""
Data preparation for wildfire events (2016-2024)
Author: maxwell.cook@colorado.edu
"""

import os, sys

# Custom functions
sys.path.append(os.path.join(os.getcwd(),'code/'))
from __functions import *

proj = 'EPSG:5070'

maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/'
projdir = os.path.join(maindir, 'aspen-fire/Aim2/')

print("Ready to go !")

In [None]:
# Load western US ecoregions
fp = os.path.join(projdir,'data/spatial/raw/boundaries/na_cec_eco_l3_west.gpkg')
ecol3 = gpd.read_file(fp)
ecol3 = ecol3[['NA_L3NAME','NA_L3CODE','geometry']].rename(
    columns={'NA_L3NAME': 'na_l3name', 'NA_L3CODE': 'na_l3code'}
)
# filter to the Southern Rocky Mountains
srm = ecol3[ecol3['na_l3name'] == 'Southern Rockies']
del ecol3
srm

### National Interagency Fire Center (NIFC) Interagency Fire Perimeters

Initially, we will use the NIFC fire perimeters to gather events in the Southern Rockies from 2016-2024.

In [None]:
# Load the NIFC perimeters, subset to SRM
fp = os.path.join(maindir,'data/nifc/mod/NIFC_AllPerimeters_to2023.gpkg')
nifc = gpd.read_file(fp)
nifc = gpd.overlay(nifc, srm, how='intersection')

# Filter for fires since 2016
nifc['FIRE_YEAR'] = nifc['FIRE_YEAR'].astype(int) # force to integer
nifc = nifc[(nifc['FIRE_YEAR'] >= 2017) & (nifc['FIRE_YEAR'] != 9999)]
print(f"There are [{len(nifc)}] NIFC perimeters in the SRM.\n{nifc.columns}\n")
print(f"\tFire years: {nifc['FIRE_YEAR'].min()}-{nifc['FIRE_YEAR'].max()}")

# fix the geometries if needed
if (~nifc.geometry.is_valid).sum() > 0:
    nifc['geometry'] = nifc.geometry.buffer(0)
    print(f"\n\tFixed [{(~nifc.geometry.is_valid).sum()}] geometries.")

In [None]:
# Keep fires greater than at least 10 acres
nifc = nifc[nifc['GIS_ACRES'] >= 10]
print(f"[{len(nifc)}] fires >= 10 acres.")

In [None]:
# Check for duplicates (different mapping methods, most likely)
n_duplicates = nifc.duplicated(subset='IRWINID').sum()
print(f"{n_duplicates} [{round(n_duplicates/len(nifc)*100, 2)}%] events have a duplicated IRWINID.")

In [None]:
print(nifc['FEATURE_CA'].unique())
print("\n")
print(nifc['MAP_METHOD'].unique())

In [None]:
# Resolve duplicate fire perimeters
# Attempt to keep only the 'Final Wildfire Perimeter'
def handle_duplicate_fires(group):
    """ 
    If there are duplicate IRWIN IDs, 
        1) Keep the 'Feature Category' == 'Final Wildfire Perimeter'. 
        2) If no such record exists, keep one of the records (first occurrence). 
    """
    if 'Wildfire Final Fire Perimeter' in group['FEATURE_CA'].values:
        return group[group['FEATURE_CA'] == 'Wildfire Final Fire Perimeter'].iloc[0]  # Keep 'Final' perimeter
    else:
        return group.iloc[0]  # If no 'Final' perimeter, keep the first record

# isolate the duplicate records
duplicates = nifc[nifc['IRWINID'].duplicated(keep=False)]  # Keep all duplicates
# apply our function to the duplicate records
resolved = duplicates.groupby('IRWINID').apply(handle_duplicate_fires).reset_index(drop=True)
# combine the resolved duplicates with the non-duplicates
non_duplicates = nifc[~nifc['IRWINID'].duplicated(keep=False)]  # keep only non-duplicates
nifc = pd.concat([non_duplicates, resolved], ignore_index=True) # merge back (clean data)

# check on the results
print(f"Number of unique duplicates: {len(duplicates['IRWINID'].unique())}/{len(duplicates)}")
print(f"Number of remaining fires after removing duplicates: {len(resolved)}")
print(f"\nThere are {len(nifc)} unique fire perimeters after resolving duplicates.")

del duplicates, resolved, non_duplicates
gc.collect() # clean up

In [None]:
# Join to western U.S. ecoregions by fire centroid
# Identify the US EPA Level III ecoregion based on centroid of fire perimeter
centroids = nifc.copy() # make a copy of the fire data
centroids.geometry = centroids.geometry.centroid # use the centroid for joining to ecoregions
if not centroids.crs == ecol3.crs:
    print("CRS mismatch, fixing it !")
    centroids = centroids.to_crs(proj)
    ecol3 = ecol3.to_crs(proj)
# Perform the overlay analysis
nifc_west = gpd.overlay(centroids, ecol3, how='intersection') # find overlaps
print(f"\nThere are {len(nifc_west)} fire events across western U.S. ecoregions in CONUS (2017-2023).\n")

In [None]:
# Replace the centroid geometry with the fire perimeter
# This ensure our fire data are in polygon format
nifc_west = gpd.GeoDataFrame(
    pd.merge(nifc_west[['OBJECTID','na_l3name']], nifc, left_on='OBJECTID', right_on='OBJECTID', how="left")
).rename(columns={'OBJECTID': 'NIFC_Fire_ID'})
print(f"Geometry set to fire perimeters: {nifc_west.geom_type.unique()}")
print(f"Data columns:\n{nifc_west.columns}")

In [None]:
# save this file out.
out_fp = os.path.join(projdir,'data/spatial/mod/NIFC/nifc-interagency_2017_to_2023_west.gpkg')
nifc_west.to_file(out_fp)
print(f"Saved file to: {out_fp}")

### Fire Events Delineation (FIRED)

In [None]:
fp = os.path.join(maindir,'FIRED/data/spatial/raw/events/conus_ak/fired_conus_ak_2000_to_2024_events.gpkg')
fired = gpd.read_file(fp)
fired = fired[(fired['ig_year'] >= 2017) & (fired['ig_year'] <= 2023)]
print(f"There are [{len(fired)}] FIRED perimeters in CONUS (2017-2023).\n{fired.columns}\n")

In [None]:
# filter to retain fires >= 100 acres (same as NIFC)
fired['tot_ar_acres'] = fired['tot_ar_km2'] * 247.105381
fired = fired[fired['tot_ar_acres'] >= 10]
print(f"There are [{len(fired)}] FIRED perimeters >=100 acres.")

In [None]:
# Join to western U.S. ecoregions by fire centroid
# Identify the US EPA Level III ecoregion based on centroid of fire perimeter
centroids = fired.copy() # make a copy of the fire data
centroids.geometry = centroids.geometry.centroid # use the centroid for joining to ecoregions
if not centroids.crs == ecol3.crs:
    print("CRS mismatch, fixing it !")
    centroids = centroids.to_crs(proj)
    ecol3 = ecol3.to_crs(proj)

# Perform the overlay analysis
fired_west = gpd.overlay(centroids, ecol3, how='intersection') # find overlaps
print(f"\nThere are {len(fired_west)} FIRED events across western U.S. ecoregions in CONUS (2017-2023).\n")

In [None]:
# Replace the centroid geometry with the fire perimeter
# This ensure our fire data are in polygon format
fired_west = gpd.GeoDataFrame(
    pd.merge(fired_west[['id','na_l3name']], fired, left_on='id', right_on='id', how="left")
).rename(columns={'id': 'FIRED_Fire_ID'})
print(f"Geometry set to fire perimeters: {fired_west.geom_type.unique()}")
print(f"Data columns:\n{fired_west.columns}")
del fired
gc.collect()

In [None]:
# save this file out.
out_fp = os.path.join(projdir,'data/spatial/mod/FIRED/fired_events_2017_to_2023_west.gpkg')
fired_west.to_file(out_fp)
print(f"Saved file to: {out_fp}")

### Monitoring Trends in Burn Severity (MTBS)

In [None]:
fp = os.path.join(maindir,'data/mtbs/mtbs_perimeter_data/mtbs_perims_DD.shp')
mtbs = gpd.read_file(fp)
mtbs.columns

In [None]:
# subset to 2017-2023
mtbs['Ig_Date'] = pd.to_datetime(mtbs['Ig_Date'], errors='coerce')
mtbs['Ig_Year'] = mtbs['Ig_Date'].dt.year
mtbs = mtbs[mtbs['Ig_Year'] >= 2017]
print(f"There are [{len(mtbs)}] MTBS perimeters in CONUS (2017-2023).\n{mtbs.columns}\n")

In [None]:
# Join to western U.S. ecoregions by fire centroid
# Identify the US EPA Level III ecoregion based on centroid of fire perimeter
centroids = mtbs.copy() # make a copy of the fire data
centroids.geometry = centroids.geometry.centroid # use the centroid for joining to ecoregions
if not centroids.crs == ecol3.crs:
    print("CRS mismatch, fixing it !")
    centroids = centroids.to_crs(proj)
    ecol3 = ecol3.to_crs(proj)

# Perform the overlay analysis
mtbs_west = gpd.overlay(centroids, ecol3, how='intersection') # find overlaps
print(f"\nThere are {len(mtbs_west)} MTBS events across western U.S. ecoregions (2017-2023).\n")

In [None]:
# Replace the centroid geometry with the fire perimeter
# This ensure our fire data are in polygon format
mtbs_west = gpd.GeoDataFrame(
    pd.merge(mtbs_west[['Event_ID','na_l3name']], mtbs, left_on='Event_ID', right_on='Event_ID', how="left")
).rename(columns={'Event_ID': 'MTBS_Fire_ID'})
print(f"Geometry set to fire perimeters: {mtbs_west.geom_type.unique()}")
print(f"Data columns:\n{mtbs_west.columns}")
# del mtbs
gc.collect()

In [None]:
print(mtbs_west['Asmnt_Type'].unique())
print(mtbs_west['Incid_Type'].unique())

In [None]:
# save this file out.
out_fp = os.path.join(projdir,'data/spatial/mod/MTBS/mtbs_perims_2017_to_2023_west.gpkg')
mtbs_west.to_file(out_fp)
print(f"Saved file to: {out_fp}")

In [None]:
# Create a census of fire events for the Southern Rockies
# Use the three data sources to find all possible fire events
nifc = nifc_west[nifc_west['na_l3name'] == 'Southern Rockies']
mtbs = mtbs_west[mtbs_west['na_l3name'] == 'Southern Rockies'].to_crs(nifc.crs)
fired = fired_west[fired_west['na_l3name'] == 'Southern Rockies'].to_crs(nifc.crs)

# function to calculate spatial overlap
def fire_overlap(row, source):
    """Calculate the maximum overlap percentage with comparison_gdf."""
    overlap = source[source.intersects(row.geometry)]

    if overlap.empty:
        return 0
    # Calculate intersection areas
    overlap_ar = overlap.geometry.intersection(row.geometry).area
    overlap_pct = (overlap_ar / row.geometry.area).max()
    
    return overlap_pct

print(f"\n[{len(nifc)}] NIFC events.")
# apply the function to the MTBS perimeters
mtbs['nifc_overlap'] = mtbs.apply(fire_overlap, axis=1, args=(nifc,))
mtbs_ = mtbs[mtbs['nifc_overlap'] < 0.30]
print(f"\n\t{len(mtbs)-len(mtbs_)} fires [{round(len(mtbs)-len(mtbs_)/len(mtbs))}%] MTBS agreement")

# apply to FIRED events
fired['nifc_overlap'] = fired.apply(fire_overlap, axis=1, args=(nifc,))
fired_ = fired[fired['nifc_overlap'] < 0.30]
print(f"\n\t{len(fired_)}/{len(fired)} unique FIRED events.")

# FIRED, not overlapping MTBS
fired_['mtbs_overlap'] = fired_.apply(fire_overlap, axis=1, args=(mtbs_,))
fired_unq = fired_[fired_['mtbs_overlap'] < 0.30]
print(f"\n\t{len(fired_unq)} unique FIRED events (not overlapping MTBS).")

In [None]:
# merge to create the fire "census"
# adopt a standard naming convention for dates and IDs, where possible
census = gpd.GeoDataFrame(
    pd.concat([nifc, mtbs_, fired_unq], ignore_index=True)
)

print(f"Fire census: {len(census)} events.\n")
census.columns

# save this out as a lookup table.
out_fp = os.path.join(projdir,'data/tabular/mod/srm_fire_census_2017_to_2023_lookup.csv')
census.to_csv(out_fp)
print(f"Saved file to: {out_fp}")

In [None]:
# handle the fire ID and names
census.rename(columns={
    'Event_ID': 'MTBS_Fire_ID',
}, inplace=True)

# create a column for the data source
census['Source'] = census[['NIFC_Fire_ID', 'MTBS_Fire_ID', 'FIRED_Fire_ID']].apply(
    lambda row: 'NIFC' if pd.notnull(row['NIFC_Fire_ID']) else
                'MTBS' if pd.notnull(row['MTBS_Fire_ID']) else
                'FIRED' if pd.notnull(row['FIRED_Fire_ID']) else None,
    axis=1
)

# create a new Fire ID
census['Fire_ID'] = census.index.astype(int)
# Combine the Fire_ID columns into a single column
census['Final_Acres'] = census[['GIS_ACRES', 'BurnBndAc', 'tot_ar_acres']].bfill(axis=1).iloc[:, 0]
census['Start_Date'] = census[['Ig_Date', 'ig_date']].bfill(axis=1).iloc[:, 0]
census['Start_Date'] = pd.to_datetime(census['Start_Date'], errors='coerce')
census['Start_Date'] = census['Start_Date'].dt.strftime('%Y-%m-%d')
census['Fire_Year'] = census[['FIRE_YEAR', 'Ig_Year', 'ig_year']].bfill(axis=1).iloc[:, 0]
census['Fire_Year'] = census['Fire_Year'].astype(int)
census['Fire_Name'] = census[['INCIDENT', 'Incid_Name']].bfill(axis=1).iloc[:, 0]
census['Fire_Type'] = census[['FEATURE_CA', 'Incid_Type']].bfill(axis=1).iloc[:, 0]
census['Fire_Type'] = census['Fire_Type'].fillna('Unknown')

# standardize the fire names
census['Fire_Name'] = census['Fire_Name'].str.upper()
# Drop the original Fire_ID columns (optional)
census_srm = census[['Fire_ID','Source','Fire_Name','Fire_Year','Start_Date',
                     'Final_Acres','Fire_Type','na_l3name','geometry']]
census_srm.head()

In [None]:
census_srm['Fire_Type'].unique()

In [None]:
# merge the calf canyon/hermit's peak polygons.
calf_hermit = census_srm[census_srm['Fire_Name'].isin(['CALF CANYON', 'HERMITS PEAK'])]
calf_hermit.sort_values(by=['Final_Acres'], ascending=False)
calf_hermit = calf_hermit.dissolve(by='Fire_Year', as_index=False)  # Use 'Fire_Year' or another common column
calf_hermit['Fire_Name'] = 'CALF CANYON-HERMITS PEAK'
calf_hermit['Final_Acres'] = calf_hermit.geometry.area * 0.000247105

# merge back in
census_srm = census_srm[~census_srm['Fire_Name'].isin(['CALF CANYON', 'HERMITS PEAK'])]
census_srm = pd.concat([census_srm, calf_hermit], ignore_index=True)

calf_hermit

In [None]:
len(census_srm)

In [None]:
# save this file out.
out_fp = os.path.join(projdir,'data/spatial/mod/srm_fire_census_2017_to_2023.gpkg')
census_srm.to_file(out_fp)
print(f"Saved file to: {out_fp}")

### Calculate the percent forested from the USFS TreeMap (ca. 2016)

In [None]:
# Load the TreeMap (ca. 2016)
fp = os.path.join(maindir,'data/landcover/USFS/RDS_TreeMap/TreeMap2016_FORTYPCD/TreeMap2016_FORTYPCD.tif')
treemap_da = rxr.open_rasterio(fp, masked=True, cache=False, chunks='auto').squeeze()
# Grab some raster metadata
shp, gt, wkt, nd = treemap_da.shape, treemap_da.spatial_ref.GeoTransform, treemap_da.rio.crs, treemap_da.rio.nodata
print(
    f"Shape: {shp}; \n"
    f"GeoTransform: {gt}; \n"
    f"WKT: {wkt}; \n"
    f"NoData Value: {nd}; \n"
    f"Data Type: {treemap_da[0].dtype}")
gc.collect() # clean up

In [None]:
# Crop the raster by the fire bounds (with small buffer)
bounds = census_srm.total_bounds
treemap_da_crop = treemap_da.rio.clip_box(
    minx=bounds[0]+1000, # +1000 meters for a small buffer
    miny=bounds[1]+1000, 
    maxx=bounds[2]+1000, 
    maxy=bounds[3]+1000
)
del treemap_da, bounds
gc.collect()

In [None]:
# Add a small buffer to wildfire perimeters
# Accounting for edge effects
buffer = 90  # meters
census_srm_ = census_srm.copy() # make a copy of the original data
census_srm_['geometry'] = census_srm_['geometry'].buffer(buffer)
print(f"Buffered fire perimeters by {buffer} meters.")

In [None]:
census_srm_.columns

In [None]:
# Calculate zonal stats for the wildfire perimeters.
t0 = time.time()

# see __functions.py
sampled = compute_band_stats(census_srm_, treemap_da_crop, 'Fire_ID', attr='FORTYPCD')

t1 = (time.time() - t0) / 60
print(f"Total elapsed time: {t1:.2f} minutes.")
print("\n~~~~~~~~~~\n")

In [None]:
sampled.head(3)

In [None]:
del treemap_da_crop
gc.collect()  # clean up

In [None]:
# Load the lookup table and gather LC info
# load the species lookup table
fp = os.path.join(projdir,'data/tabular/mod/treemap_fortypcd_species_mapping.csv')
species_df = pd.read_csv(fp)[['FORTYPCD','SpeciesName']]
species_df = species_df[species_df['FORTYPCD'].isin(sampled['FORTYPCD'].unique())]
print(species_df.columns)

# merge with fire summary to get forest species name
fortypcd = sampled.merge(species_df, on='FORTYPCD', how='left')

# merge with the spatial data
fortypcd = pd.merge(fortypcd, census_srm_, on='Fire_ID', how='left')
fortypcd = gpd.GeoDataFrame(fortypcd)
fortypcd.head()

In [None]:
# Identify fires with >= 1% aspen forest cover
aspen_fires = fortypcd[fortypcd['SpeciesName'] == 'Aspen']
aspen_fires = aspen_fires[['Fire_ID','pct_cover','total_pixels']].rename(
    columns={'pct_cover': 'pct_aspen'}
)
print(f"There are {len(aspen_fires)} fires with some aspen component.")

In [None]:
aspen_fires.columns

In [None]:
# Join aspen percent to the spatial data.
census_srm_p = pd.merge(census_srm_, aspen_fires, on='Fire_ID', how='left')
census_srm_p['pct_aspen'] = census_srm_p['pct_aspen'].fillna(0)
census_srm_p = census_srm_p.to_crs(proj)  # ensure the correct projection before exporting
census_srm_p.columns

In [None]:
len(census_srm_p)

In [None]:
# save this file out.
out_fp = os.path.join(projdir,'data/spatial/mod/srm_fire_census_2017_to_2023.gpkg')
census_srm_p.to_file(out_fp)
print(f"Saved file to: {out_fp}")

In [None]:
# Map of aspen fires

In [None]:
### Load the state boundaries
states = gpd.read_file(os.path.join(maindir,'data/boundaries/political/TIGER/tl19_us_states_west_nad83.gpkg'))
states = states.to_crs(4326)

ecol3 = ecol3.to_crs(4326)
srm = ecol3[ecol3['na_l3name'] == 'Southern Rockies']

# Generate centroids
centroid = census_srm_p.copy()
centroid = centroid.to_crs(4326)
centroid['geometry'] = centroid.geometry.centroid
centroid['size'] = centroid['pct_aspen'] * 10  # Adjust the scaling factor as necessary

# Make a spatial map of the centroids now
fig, ax = plt.subplots(figsize=(6, 6))

srm.plot(ax=ax, edgecolor='black', linewidth=0.4, color='none')
centroid.plot(
    ax=ax, markersize=centroid['pct_aspen'], 
    column='pct_aspen', cmap='viridis', 
    legend=False, alpha=0.6)

# Manually add colorbar
norm = matplotlib.colors.Normalize(vmin=centroid['pct_aspen'].min(), vmax=centroid['pct_aspen'].max())
sm = plt.cm.ScalarMappable(cmap='viridis', norm=norm)
sm.set_array([])  # Only needed for ScalarMappable

# Adjust the size of the colorbar using the `shrink` parameter
cbar = fig.colorbar(sm, ax=ax, shrink=0.7)
cbar.set_label('Aspen Percent')

plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.tight_layout()

# Save the map as a PNG
plt.savefig(os.path.join(projdir, 'figures/Figure1_Aspen-Fires_NIFC_2012to2023.png'), dpi=300, bbox_inches='tight')

plt.show()

In [None]:
# Plot the count of fires across US EPA Level III ecoregions
df = fortypcd.groupby('na_l3name').agg(
    pct_cover_mean=('pct_cover', 'mean'),  # Mean percent cover
    fire_count=('pct_cover', 'count')      # Count of fires
).reset_index()
df

In [None]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm

df = df.sort_values('fire_count', ascending=False)

norm = plt.Normalize(df['pct_cover_mean'].min(), df['pct_cover_mean'].max())
cmap = cm.viridis  # Choose a colormap

# Create the bar chart
fig, ax = plt.subplots(figsize=(10, 5))
bars = ax.bar(
    df['na_l3name'], 
    df['fire_count'], 
    color=cmap(norm(df['pct_cover_mean']))
)

# Add colorbar for mean percent cover
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = plt.colorbar(sm, ax=ax)
cbar.set_label('Mean Percent Cover (%)', rotation=270, labelpad=15)

# Add labels and title
ax.set_xlabel('Ecoregion')
ax.set_ylabel('Fire Count')
ax.set_title('Fire Count and Mean Percent Cover by Ecoregion')
ax.set_xticks(range(len(df['na_l3name'])))
ax.set_xticklabels(df['na_l3name'], rotation=45, ha='right')

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
ecol3.columns

In [None]:
# Southern Rockies

centroid_srm = centroid[centroid['na_l3name'] == 'Southern Rockies']
ecol3_srm = ecol3[ecol3['na_l3name'] == 'Southern Rockies']

# plot it
fig, ax = plt.subplots(figsize=(6, 6))

ecol3_srm.plot(ax=ax, edgecolor='black', linewidth=0.8, color='none')
centroid_srm.plot(
    ax=ax, markersize=centroid_srm['pct_cover'], 
    column='pct_cover', cmap='viridis', 
    legend=False, alpha=0.6)

# Manually add colorbar
norm = matplotlib.colors.Normalize(vmin=centroid_srm['pct_cover'].min(), vmax=centroid_srm['pct_cover'].max())
sm = plt.cm.ScalarMappable(cmap='viridis', norm=norm)
sm.set_array([])  # Only needed for ScalarMappable

# Adjust the size of the colorbar using the `shrink` parameter
cbar = fig.colorbar(sm, ax=ax, shrink=0.7)
cbar.set_label('Aspen Percent')

plt.xlabel('Longitude')
plt.ylabel('Latitude')


# Save the map as a PNG
plt.savefig(os.path.join(projdir, 'figures/Figure1_Aspen-Fires_NIFC-SRM.png'), dpi=300, bbox_inches='tight')

plt.show()

In [None]:
gc.collect()