In [1]:
"""
Identify wildfire events with >= 1% pre-fire aspen forest cover across western U.S. ecoregions

Data sources
    - Wildfire perimeters: NIFC Interagency Final Wildfire Perimeters
    - Landcover: LANDFIRE Existing Vegetation Type (EVT) ca. 2016
    
Author: maxwell.cook@colorado.edu
"""

import os, sys, time, gc
import geopandas as gpd
import rioxarray as rxr
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

# Custom functions
sys.path.append(os.path.join(os.getcwd(),'code/'))
from __functions import *

proj = 'EPSG:5070'

maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/'
projdir = os.path.join(maindir, 'aspen-fire/Aim2/')

print("Ready to go !")

Ready to go !


In [None]:
# load the fire data (NIFC) and handle duplicate entries

In [2]:
# Load the NIFC perimeters
fp = os.path.join(projdir,'data/spatial/raw/NIFC/nifc-interagency_2012_to_2023.gpkg')
events = gpd.read_file(fp)
print(f"There are [{len(events)}] NIFC perimeters.\n{events.columns}")

There are [34595] NIFC perimeters.
Index(['OBJECTID', 'MAP_METHOD', 'DATE_CUR', 'COMMENTS', 'GEO_ID', 'IRWINID',
       'UNQE_FIRE_', 'FIRE_YEAR', 'LOCAL_NUM', 'INCIDENT', 'UNIT_ID',
       'POO_RESP_I', 'FEATURE_CA', 'GIS_ACRES', 'USER_NAME', 'SOURCE',
       'AGENCY', 'FIRE_YEAR_', 'Shape__Are', 'Shape__Len', 'FORID', 'OTHERID',
       'geometry'],
      dtype='object')


In [3]:
# tidy the columns.
events = events[['OBJECTID','MAP_METHOD','GEO_ID','IRWINID','FIRE_YEAR',
                 'INCIDENT','FEATURE_CA','GIS_ACRES','geometry']]
events[['INCIDENT','FIRE_YEAR','GIS_ACRES']].head()

Unnamed: 0,INCIDENT,FIRE_YEAR,GIS_ACRES
0,COAL CREEK,2012,97.68
1,LONG DRAW,2016,2142.83
2,DEVIL CREEK,2018,20.46
3,COLT,2021,30.42
4,577,2019,89.31


In [4]:
# Check on the burned area distribution
events['GIS_ACRES'].describe()

count    3.459500e+04
mean     5.686211e+04
std      9.776046e+05
min      0.000000e+00
25%      4.720000e+00
50%      3.301000e+01
75%      2.833400e+02
max      1.314430e+08
Name: GIS_ACRES, dtype: float64

In [5]:
# Keep fires greater than at least 1 acres (excludes potential map errors)
events = events[events['GIS_ACRES'] >= 10]
print(f"There are [{len(events)}] fires >= 1 acres.")

# Check for duplicates (different mapping methods)
n_dup = events.duplicated(subset='IRWINID').sum()
print(f"There are {n_dup} [{round(n_dup/len(events)*100, 2)} %] duplicate IRWINIDs.")

There are [23737] fires >= 1 acres.
There are 11333 [47.74 %] duplicate IRWINIDs.


In [6]:
# Resolve duplicate fire perimeters
def resolve_duplicates(group):
    """ If there are duplicates, keep the record labeled 'Final Wildfire Perimeter'. 
    If no such record exists, keep one of the records (first occurrence). """
    if 'Wildfire Final Fire Perimeter' in group['FEATURE_CA'].values:
        return group[group['FEATURE_CA'] == 'Wildfire Final Fire Perimeter'].iloc[0]  # Keep 'Final' perimeter
    else:
        return group.iloc[0]  # If no 'Final' perimeter, keep the first record

# Isolate duplicate records
dups = events[events['IRWINID'].duplicated(keep=False)]  # Keep all duplicates
print(f"Number of unique duplicates: {len(dups['IRWINID'].unique())}/{len(dups)}")

# Apply the function to groups of duplicates
resolved_df = dups.groupby('IRWINID').apply(resolve_duplicates).reset_index(drop=True)
print(f"Number of remaining fires after removing duplicates: {len(resolved_df)}")

# Combine the resolved duplicates with the non-duplicates
non_dups = events[~events['IRWINID'].duplicated(keep=False)]  # keep only non-duplicates
events_c = pd.concat([non_dups, resolved_df], ignore_index=True) # merge back (clean data)

print(f"\nThere are {len(events_c)} unique fire perimeters after resolving duplicates.")

del dups, resolved_df, non_dups, events
gc.collect() # clean up

Number of unique duplicates: 416/11749
Number of remaining fires after removing duplicates: 415

There are 12403 unique fire perimeters after resolving duplicates.


0

In [None]:
# Join to western U.S. ecoregions

In [7]:
fp = os.path.join(projdir,'data/spatial/raw/boundaries/na_cec_eco_l3_west.gpkg')
ecol3 = gpd.read_file(fp)
ecol3 = ecol3[['NA_L3NAME','NA_L3CODE','geometry']].rename(columns={'NA_L3NAME': 'na_l3name', 'NA_L3CODE': 'na_l3code'})
ecol3.columns

Index(['na_l3name', 'na_l3code', 'geometry'], dtype='object')

In [8]:
# Identify the US EPA Level III ecoregion based on centroid of fire perimeter
centroids = events_c.copy() # make a copy of the fire data
centroids.geometry = centroids.geometry.centroid # use the centroid for joining to ecoregions
if not centroids.crs == ecol3.crs:
    print("CRS mismatch, fixing it !")
    centroids = centroids.to_crs(proj)
    ecol3 = ecol3.to_crs(proj)

# Perform the overlay analysis
events_west = gpd.overlay(centroids, ecol3, how='intersection') # find overlaps

print(f"There are {len(events_west)} fire events across western U.S. ecoregions in CONUS (2012-2023).")
print(f"Data columns:\n{events_west.columns}")

There are 8538 fire events across western U.S. ecoregions in CONUS (2012-2023).
Data columns:
Index(['OBJECTID', 'MAP_METHOD', 'GEO_ID', 'IRWINID', 'FIRE_YEAR', 'INCIDENT',
       'FEATURE_CA', 'GIS_ACRES', 'na_l3name', 'na_l3code', 'geometry'],
      dtype='object')


In [10]:
# Replace the centroid geometry with the fire perimeter
events_west = gpd.GeoDataFrame(
    pd.merge(events_west[['OBJECTID','na_l3name']], events_c, left_on='OBJECTID', right_on='OBJECTID', how="left")
)
print(f"Geometry set to fire perimeters: {events_west.geom_type.unique()}")

Geometry set to fire perimeters: ['MultiPolygon']


In [11]:
# save this file out.
out_fp = os.path.join(projdir,'data/spatial/mod/NIFC/nifc-interagency_2012_to_2023_west.gpkg')
events_west.to_file(out_fp)
print(f"Saved file to: {out_fp}")

Saved file to: /Users/max/Library/CloudStorage/OneDrive-Personal/mcook/aspen-fire/Aim2/data/spatial/mod/NIFC/nifc-interagency_2012_to_2023_west.gpkg


In [12]:
events_west.columns

Index(['OBJECTID', 'na_l3name', 'MAP_METHOD', 'GEO_ID', 'IRWINID', 'FIRE_YEAR',
       'INCIDENT', 'FEATURE_CA', 'GIS_ACRES', 'geometry'],
      dtype='object')

In [None]:
# Calculate zonal statistics from the LANDFIRE EVT

In [13]:
# Load the TreeMap (ca. 2016)
fp = os.path.join(maindir,'data/landcover/USFS/RDS_TreeMap/TreeMap2016_FORTYPCD/TreeMap2016_FORTYPCD.tif')
treemap_da = rxr.open_rasterio(fp, masked=True, cache=False, chunks='auto').squeeze()
# Grab some raster metadata
shp, gt, wkt, nd = treemap_da.shape, treemap_da.spatial_ref.GeoTransform, treemap_da.rio.crs, treemap_da.rio.nodata
print(
    f"Shape: {shp}; \n"
    f"GeoTransform: {gt}; \n"
    f"WKT: {wkt}; \n"
    f"NoData Value: {nd}; \n"
    f"Data Type: {treemap_da[0].dtype}")
gc.collect() # clean up

Shape: (97383, 154221); 
GeoTransform: -2362845.0 30.0 0.0 3180555.0 0.0 -30.0; 
WKT: EPSG:5070; 
NoData Value: nan; 
Data Type: float32


208

In [14]:
# Crop the raster by the fire bounds (with small buffer)
bounds = events_west.total_bounds
treemap_da_crop = treemap_da.rio.clip_box(
    minx=bounds[0]+1000, # +1000 meters for a small buffer
    miny=bounds[1]+1000, 
    maxx=bounds[2]+1000, 
    maxy=bounds[3]+1000
)
del treemap_da, bounds
gc.collect()

54

In [15]:
# Add a small buffer to wildfire perimeters
# Accounting for edge effects
buffer = 90  # meters
events_west = events_west.copy() # make a copy of the original data
events_west['geometry'] = events_west['geometry'].buffer(buffer)
print(f"Buffered fire perimeters by {buffer} meters.")

Buffered fire perimeters by 90 meters.


In [16]:
events_west.columns

Index(['OBJECTID', 'na_l3name', 'MAP_METHOD', 'GEO_ID', 'IRWINID', 'FIRE_YEAR',
       'INCIDENT', 'FEATURE_CA', 'GIS_ACRES', 'geometry'],
      dtype='object')

In [None]:
# Calculate zonal stats for the wildfire perimeters.

In [17]:
t0 = time.time()

# see __functions.py
sampled = compute_band_stats(events_west, treemap_da_crop, 'OBJECTID', attr='FORTYPCD')

t1 = (time.time() - t0) / 60
print(f"Total elapsed time: {t1:.2f} minutes.")
print("\n~~~~~~~~~~\n")

Total elapsed time: 1.52 minutes.

~~~~~~~~~~



In [19]:
sampled.head(3)

Unnamed: 0,OBJECTID,FORTYPCD,count,total_pixels,pct_cover
0,5,182,22.0,6874.0,0.320047
1,5,184,2.0,6874.0,0.029095
2,5,185,8.0,6874.0,0.116381


In [20]:
del treemap_da_crop
gc.collect()  # clean up

750

In [None]:
# Load the lookup table and gather LC info

In [21]:
# load the species lookup table
fp = os.path.join(projdir,'data/tabular/mod/treemap_fortypcd_species_mapping.csv')
species_df = pd.read_csv(fp)[['FORTYPCD','SpeciesName']]
species_df = species_df[species_df['FORTYPCD'].isin(sampled['FORTYPCD'].unique())]
print(species_df.columns)

# merge with fire summary to get forest species name
fortypcd = sampled.merge(species_df, on='FORTYPCD', how='left')
fortypcd.head(3)

Index(['FORTYPCD', 'SpeciesName'], dtype='object')


Unnamed: 0,OBJECTID,FORTYPCD,count,total_pixels,pct_cover,SpeciesName
0,5,182,22.0,6874.0,0.320047,Rocky Mountain juniper
1,5,184,2.0,6874.0,0.029095,Juniper woodland
2,5,185,8.0,6874.0,0.116381,Pinyon / juniper woodland


In [None]:
# Identify fires with >= 1% aspen forest cover

In [22]:
aspen_fires = fortypcd[fortypcd['SpeciesName'] == 'Aspen']
print(f"There are {len(aspen_fires)} fires with some aspen component.")

# filter fires where "pct_aspen" >= 1%
aspen_fires = aspen_fires[aspen_fires['pct_cover'] >= 1] # retain fires with >= X %
aspen_fires = aspen_fires[['OBJECTID','pct_cover']] # subset columns
print(f"\nThere are [{len(aspen_fires)}] fire events with > 1% aspen cover across western U.S. ecoregions (2012-2023).\n")

There are 1707 fires with some aspen component.

There are [1061] fire events with > 1% aspen cover across western U.S. ecoregions (2012-2023).



In [23]:
# Join aspen percent to the spatial data.
props_gdf = pd.merge(events_west, aspen_fires, on='OBJECTID', how='inner').rename(columns={'pct_cover': 'aspen_pct'})
props_gdf = props_gdf.to_crs(proj)  # ensure the correct projection before exporting
props_gdf.columns

Index(['OBJECTID', 'na_l3name', 'MAP_METHOD', 'GEO_ID', 'IRWINID', 'FIRE_YEAR',
       'INCIDENT', 'FEATURE_CA', 'GIS_ACRES', 'geometry', 'aspen_pct'],
      dtype='object')

In [24]:
# Check for remaining duplicates
print(f"There are {props_gdf.duplicated(subset='IRWINID').sum()} duplicate IRWINIDs.")

There are 0 duplicate IRWINIDs.


In [29]:
# Check for remaining duplicates on year+fire name
props_gdf['YEAR_NAME'] = props_gdf['FIRE_YEAR'].astype(str) + props_gdf['INCIDENT'].astype(str) + props_gdf['na_l3name'].astype(str)
print(f"There are {props_gdf.duplicated(subset='YEAR_NAME').sum()} duplicate Fire Year/Fire Name combinations.")

There are 5 duplicate Fire Year/Fire Name combinations.


In [30]:
duplicates = props_gdf[props_gdf['YEAR_NAME'].duplicated(keep=False)]  # Keep all duplicates
duplicates = duplicates.sort_values(by=['INCIDENT'])
duplicates[['OBJECTID','FIRE_YEAR','INCIDENT','YEAR_NAME','GIS_ACRES','MAP_METHOD','aspen_pct']].head(len(duplicates))

Unnamed: 0,OBJECTID,FIRE_YEAR,INCIDENT,YEAR_NAME,GIS_ACRES,MAP_METHOD,aspen_pct
123,7753,2016,Berry,2016BerryMiddle Rockies,20824.79,Infrared Interpretation,3.108192
948,97825,2016,Berry,2016BerryMiddle Rockies,20841.12,Autonomous GPS,3.108192
622,25950,2017,Boundary,2017BoundaryArizona/New Mexico Mountains,17817.96,Infrared Interpretation,3.581105
677,29666,2017,Boundary,2017BoundaryArizona/New Mexico Mountains,17804.88,Other Agency Digital,3.584981
522,24555,2019,Crib,2019CribNorthwestern Great Plains,32.96,Unknown,12.5
882,72938,2019,Crib,2019CribNorthwestern Great Plains,32.96,GPS-Unknown,12.5
857,70102,2018,Ryan,2018RyanSouthern Rockies,28585.43,Mixed Methods,14.97682
1021,41932,2018,Ryan,2018RyanSouthern Rockies,28243.65,Infrared Interpretation,14.855188
444,22817,2022,Snyelmn,2022SnyelmnCanadian Rockies,345.0,Other,2.669633
900,79577,2022,Snyelmn,2022SnyelmnCanadian Rockies,245.91,Other,2.669633


In [37]:
# Handle remaining duplicate fire boundaries (keep the largest)
# 'Lost Spring' above is not actually a duplicate
duplicates = duplicates.sort_values(by=['GIS_ACRES'], ascending=False)
duplicates[['OBJECTID','FIRE_YEAR','INCIDENT','GIS_ACRES']].head(len(duplicates))

resolved_df = duplicates.drop_duplicates(subset='INCIDENT', keep='first')
print(resolved_df[['OBJECTID','FIRE_YEAR','INCIDENT','GIS_ACRES','aspen_pct']].head())

# Merge back with the original dataframe
non_dups = props_gdf[~props_gdf['YEAR_NAME'].duplicated(keep=False)]  # keep only non-duplicates
props_gdf_ = pd.concat([non_dups, resolved_df], ignore_index=True) # merge back
print(f"There are {len(props_gdf_)} after resolving the last duplicates.")

Unnamed: 0,OBJECTID,FIRE_YEAR,INCIDENT,GIS_ACRES
857,70102,2018,Ryan,28585.43
1021,41932,2018,Ryan,28243.65
948,97825,2016,Berry,20841.12
123,7753,2016,Berry,20824.79
622,25950,2017,Boundary,17817.96
677,29666,2017,Boundary,17804.88
444,22817,2022,Snyelmn,345.0
900,79577,2022,Snyelmn,245.91
522,24555,2019,Crib,32.96
882,72938,2019,Crib,32.96


In [None]:
# Save this file out
out_fp = os.path.join(projdir,'data/spatial/mod/NIFC/nifc-interagency_2012_to_2023-TreeMap_aspen.gpkg')
props_gdf_.to_file(out_fp)
print(f"Saved file to {out_fp}")

In [None]:
# Map of aspen fires

In [None]:
### Load the state boundaries
states = gpd.read_file(os.path.join(maindir,'data/boundaries/political/TIGER/tl19_us_states_west_nad83.gpkg'))
states = states.to_crs(4326)

ecol3 = ecol3.to_crs(4326)

# Generate centroids
centroid = props_gdf.copy()
centroid = centroid.to_crs(4326)
centroid['geometry'] = centroid.geometry.centroid
centroid['size'] = centroid['aspen_pct'] * 10  # Adjust the scaling factor as necessary

# Make a spatial map of the centroids now
fig, ax = plt.subplots(figsize=(6, 6))

ecol3.plot(ax=ax, edgecolor='black', linewidth=0.4, color='none')
centroid.plot(
    ax=ax, markersize=centroid['aspen_pct'], 
    column='aspen_pct', cmap='viridis', 
    legend=False, alpha=0.6)

# Manually add colorbar
norm = matplotlib.colors.Normalize(vmin=centroid['aspen_pct'].min(), vmax=centroid['aspen_pct'].max())
sm = plt.cm.ScalarMappable(cmap='viridis', norm=norm)
sm.set_array([])  # Only needed for ScalarMappable

# Adjust the size of the colorbar using the `shrink` parameter
cbar = fig.colorbar(sm, ax=ax, shrink=0.7)
cbar.set_label('Aspen Percent')

plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.tight_layout()

# Save the map as a PNG
plt.savefig(os.path.join(projdir, 'figures/Figure1_Aspen-Fires_NIFC_2012to2023.png'), dpi=300, bbox_inches='tight')

plt.show()

In [None]:
props_gdf_.columns

In [None]:
# Plot the count of fires across US EPA Level III ecoregions
df = props_gdf_.groupby('na_l3name').agg(
    pct_cover_mean=('pct_cover', 'mean'),  # Mean percent cover
    fire_count=('pct_cover', 'count')      # Count of fires
).reset_index()
df

In [None]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm

df = df.sort_values('fire_count', ascending=False)

norm = plt.Normalize(df['pct_cover_mean'].min(), df['pct_cover_mean'].max())
cmap = cm.viridis  # Choose a colormap

# Create the bar chart
fig, ax = plt.subplots(figsize=(10, 5))
bars = ax.bar(
    df['na_l3name'], 
    df['fire_count'], 
    color=cmap(norm(df['pct_cover_mean']))
)

# Add colorbar for mean percent cover
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = plt.colorbar(sm, ax=ax)
cbar.set_label('Mean Percent Cover (%)', rotation=270, labelpad=15)

# Add labels and title
ax.set_xlabel('Ecoregion')
ax.set_ylabel('Fire Count')
ax.set_title('Fire Count and Mean Percent Cover by Ecoregion')
ax.set_xticks(range(len(df['na_l3name'])))
ax.set_xticklabels(df['na_l3name'], rotation=45, ha='right')

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
ecol3.columns

In [None]:
# Southern Rockies

centroid_srm = centroid[centroid['na_l3name'] == 'Southern Rockies']
ecol3_srm = ecol3[ecol3['na_l3name'] == 'Southern Rockies']

# plot it
fig, ax = plt.subplots(figsize=(6, 6))

ecol3_srm.plot(ax=ax, edgecolor='black', linewidth=0.8, color='none')
centroid_srm.plot(
    ax=ax, markersize=centroid_srm['pct_cover'], 
    column='pct_cover', cmap='viridis', 
    legend=False, alpha=0.6)

# Manually add colorbar
norm = matplotlib.colors.Normalize(vmin=centroid_srm['pct_cover'].min(), vmax=centroid_srm['pct_cover'].max())
sm = plt.cm.ScalarMappable(cmap='viridis', norm=norm)
sm.set_array([])  # Only needed for ScalarMappable

# Adjust the size of the colorbar using the `shrink` parameter
cbar = fig.colorbar(sm, ax=ax, shrink=0.7)
cbar.set_label('Aspen Percent')

plt.xlabel('Longitude')
plt.ylabel('Latitude')


# Save the map as a PNG
plt.savefig(os.path.join(projdir, 'figures/Figure1_Aspen-Fires_NIFC-SRM.png'), dpi=300, bbox_inches='tight')

plt.show()

In [None]:
gc.collect()