In [1]:
"""
Read VIIRS active fire geolocated fire pixels output from XXviirs_access-swath.ipynb
Author: maxwell.cook@colorado.edu
"""

# Import packages
import sys, os
import xarray as xr
import pyproj
import geopandas as gpd
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns

from netCDF4 import Dataset # to open NetCDF
from datetime import datetime
from datetime import timedelta
from matplotlib import pyplot as plt
from affine import Affine
from osgeo import gdal, gdal_array, gdalconst, osr
from rasterio.transform import from_bounds
from scipy.spatial import cKDTree

# Custom functions
sys.path.append(os.path.join(os.getcwd(),'code/'))
from __functions import *

# Projection information
geog = 'EPSG:4326'  # Geographic projection
prj = 'EPSG:5070'  # Projected coordinate system- WGS 84 NAD83 UTM Zone 13N

# File path information
maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/'
projdir = os.path.join(maindir, 'aspen-fire/Aim2/')
# Output directories
dataraw = os.path.join(projdir,'data/spatial/raw/VIIRS/')
datamod = os.path.join(projdir,'data/spatial/mod/VIIRS/')

# File path information
print("Ready !")

Ready !


In [2]:
# Load and tidy the fire perimeter data

In [3]:
fp = os.path.join(projdir,'data/spatial/mod/NIFC/nifc-ics_2018_to_2023-aspen.gpkg')
fires = gpd.read_file(fp)

# subset to Southern Rockies
fires = fires[fires['na_l3name'] == 'Southern Rockies']

# tidy the fire id and name columns
fires.rename(columns={'NIFC_ID': 'Fire_ID', 'NIFC_NAME': 'Fire_Name'}, inplace=True)

# tidy the date columns
fires['DISCOVERY_DATE'] = pd.to_datetime(fires['DISCOVERY_DATE'])
fires['WF_CESSATION_DATE'] = pd.to_datetime(fires['WF_CESSATION_DATE'])
fires['NIFC_ACRES'] = fires['NIFC_ACRES'].astype(float)

print(f"There are {len(fires)} with > 1% aspen cover in the Southern Rockies (2018-2023)")

There are 67 with > 1% aspen cover in the Southern Rockies (2018-2023)


In [4]:
# Add a 3km buffer to wildfire perimeters
buffer_dist = 3000  # meters
fires = fires.copy() # make a copy of the original data
fires['geometry'] = fires['geometry'].buffer(buffer_dist)
print(f"Buffered fire perimeters by {buffer_dist} meters.")

Buffered fire perimeters by 3000 meters.


In [5]:
# Create spatial points from lat/lon

In [6]:
fp = os.path.join(datamod,'vnp14img_geo_aspen-fires-srm_2018to2023.csv')
afds = pd.read_csv(fp).reset_index(drop=True)
afds = afds.loc[:, ~afds.columns.str.startswith('Unnamed:')]
len(afds)

81574

In [7]:
# get a summary of fire detection confidence
afds['confidence'].value_counts()

confidence
n    67638
h     8219
l     5717
Name: count, dtype: int64

In [8]:
from shapely.geometry import Point

# convert to spatial points using pixel centroid
afds['geometry'] = [Point(xy) for xy in zip(afds.longitude, afds.latitude)]
afds_ll = gpd.GeoDataFrame(afds, geometry='geometry', crs="EPSG:4326")
afds_ll = afds_ll.to_crs("EPSG:5070")
afds_ll = afds_ll.reset_index(drop=True)
afds_ll['afdID'] = afds_ll.index # add a unique ID
print(afds_ll.head())

# save this file out.
out_fp = os.path.join(datamod,'vnp14img_geo_srm_pix_latlon.gpkg')
afds_ll.to_file(out_fp)
print(f"\nSaved spatial points to: {out_fp}")

   longitude   latitude  fire_mask confidence daynight  acq_date  acq_time  \
0 -104.60649  38.234726          8          n    Night  6/1/2018       824   
1 -104.60698  38.232952          8          n    Night  6/1/2018       824   
2 -104.95000  36.581818          8          n    Night  6/1/2018       824   
3 -104.94370  36.581154          8          n    Night  6/1/2018       824   
4 -104.99421  36.582436          8          n    Night  6/1/2018       824   

                                    granule_id                  geo_id  \
0  VNP14IMG.A2018152.0824.002.2024080110710.nc  VNP03IMG.A2018152.0824   
1  VNP14IMG.A2018152.0824.002.2024080110710.nc  VNP03IMG.A2018152.0824   
2  VNP14IMG.A2018152.0824.002.2024080110710.nc  VNP03IMG.A2018152.0824   
3  VNP14IMG.A2018152.0824.002.2024080110710.nc  VNP03IMG.A2018152.0824   
4  VNP14IMG.A2018152.0824.002.2024080110710.nc  VNP03IMG.A2018152.0824   

        frp        iot4        iot5  sample  along_scan  along_track  \
0  1.722382  3

In [9]:
# join to the fire perimeters
fires_ = fires[['Fire_ID','Fire_Name','START_YEAR','DISCOVERY_DATE','WF_CESSATION_DATE','geometry']]
afds_ll_ = gpd.sjoin(afds_ll, fires_, how='inner', predicate='within')
afds_ll_.drop(columns=['index_right'], inplace=True)

# check for duplicates
dups = afds_ll_[afds_ll_.duplicated(subset='afdID', keep=False)]
print(f"[{len(dups)}/{len(afds_ll)} duplicate obs.")

dups[['afdID','Fire_Name','START_YEAR','acq_date']].head()

[4774/81574 duplicate obs.


Unnamed: 0,afdID,Fire_Name,START_YEAR,acq_date
1993,1993,MULLEN,2020,6/11/2018
1993,1993,BADGER CREEK,2018,6/11/2018
1994,1994,MULLEN,2020,6/11/2018
1994,1994,BADGER CREEK,2018,6/11/2018
1995,1995,MULLEN,2020,6/11/2018


In [10]:
# temporal filters to handle duplicates between fires
afds_ll_f = afds_ll_.copy()

afds_ll_f['acq_date'] = pd.to_datetime(afds_ll_f['acq_date'])
afds_ll_f['acq_month'] = afds_ll_f['acq_date'].dt.month.astype(int)
afds_ll_f['acq_year'] = afds_ll_f['acq_date'].dt.year.astype(int)

afds_ll_f = afds_ll_f[
    (afds_ll_f['acq_date'] >= afds_ll_f['DISCOVERY_DATE'] - timedelta(days=14)) &
    (afds_ll_f['acq_date'] <= afds_ll_f['WF_CESSATION_DATE'] + timedelta(days=14))
]

dups = afds_ll_f[afds_ll_f.duplicated(subset='afdID', keep=False)]
print(f"[{len(dups)}/{len(afds_ll_f)} duplicate obs.")

[0/64196 duplicate obs.


In [11]:
afds_ll_f.columns

Index(['longitude', 'latitude', 'fire_mask', 'confidence', 'daynight',
       'acq_date', 'acq_time', 'granule_id', 'geo_id', 'frp', 'iot4', 'iot5',
       'sample', 'along_scan', 'along_track', 'scan_angle', 'pix_area',
       'geometry', 'afdID', 'Fire_ID', 'Fire_Name', 'START_YEAR',
       'DISCOVERY_DATE', 'WF_CESSATION_DATE', 'acq_month', 'acq_year'],
      dtype='object')

In [12]:
# grab a count of observations for each fire
counts = afds_ll_f.groupby(['Fire_ID']).size().reset_index(name='obs_count')
afds_ll_f = pd.merge(afds_ll_f, counts, left_on='Fire_ID', right_on='Fire_ID', how='left')
afds_ll_f['obs_count'].describe()

count    64196.000000
mean      6748.049037
std       4680.280469
min          1.000000
25%       2261.000000
50%       9858.000000
75%       9922.000000
max      12959.000000
Name: obs_count, dtype: float64

In [13]:
# filter to retain fires with at least N observations
n_obs = 10
afds_ll_f_ = afds_ll_f[afds_ll_f['obs_count'] >= n_obs]
print(f"There are {len(afds_ll_f_['Fire_ID'].unique())} fires with >= {n_obs} obs.")

There are 50 fires with >= 10 obs.


In [26]:
fires_ = fires[fires['Fire_ID'].isin(afds_ll_f_['Fire_ID'].unique())]
len(fires_)

50

In [14]:
# save this file out.
out_fp = os.path.join(datamod,'vnp14img_geo_srm_pix_latlon_aspenfires.gpkg')
afds_ll_f_.to_file(out_fp)
print(f"Saved spatial points to: {out_fp}")

Saved spatial points to: /Users/max/Library/CloudStorage/OneDrive-Personal/mcook/aspen-fire/Aim2/data/spatial/mod/VIIRS/vnp14img_geo_srm_pix_latlon_aspenfires.gpkg


In [15]:
# Create the ground area of pixels

In [16]:
# Define the pixel buffer function for the given width and height
def pixel_area(point, width, height):
    half_width = width / 2
    half_height = height / 2
    return box(
        point.x - half_width, point.y - half_height,
        point.x + half_width, point.y + half_height
    )

afds_ll_pix = afds_ll_f_.copy()

# Apply the buffer function with along_scan and along_track values converted to meters (*1000)
afds_ll_pix["geometry"] = afds_ll_pix.apply(
    lambda row: pixel_area(row["geometry"], row["along_scan"] * 1000, row["along_track"] * 1000), axis=1
)

afds_ll_pix = afds_ll_pix.reset_index(drop=True)
afds_ll_pix['obs_id'] = afds_ll_pix.index # unique ID column

afds_ll_pix.head() # check the results

Unnamed: 0,longitude,latitude,fire_mask,confidence,daynight,acq_date,acq_time,granule_id,geo_id,frp,...,afdID,Fire_ID,Fire_Name,START_YEAR,DISCOVERY_DATE,WF_CESSATION_DATE,acq_month,acq_year,obs_count,obs_id
0,-107.8099,37.458965,8,n,Day,2018-06-01,1948,VNP14IMG.A2018152.1948.002.2024080110709.nc,VNP03IMG.A2018152.1948,7.603679,...,468,14,416,2018,2018-06-01 11:02:00,2018-07-03 18:00:00,6,2018,3109,0
1,-107.81094,37.462734,9,h,Day,2018-06-01,1948,VNP14IMG.A2018152.1948.002.2024080110709.nc,VNP03IMG.A2018152.1948,48.685238,...,469,14,416,2018,2018-06-01 11:02:00,2018-07-03 18:00:00,6,2018,3109,1
2,-107.815796,37.46177,7,l,Day,2018-06-01,1948,VNP14IMG.A2018152.1948.002.2024080110709.nc,VNP03IMG.A2018152.1948,48.685238,...,470,14,416,2018,2018-06-01 11:02:00,2018-07-03 18:00:00,6,2018,3109,2
3,-107.82067,37.460793,8,n,Day,2018-06-01,1948,VNP14IMG.A2018152.1948.002.2024080110709.nc,VNP03IMG.A2018152.1948,25.084015,...,471,14,416,2018,2018-06-01 11:02:00,2018-07-03 18:00:00,6,2018,3109,3
4,-107.81684,37.465534,9,h,Day,2018-06-01,1948,VNP14IMG.A2018152.1948.002.2024080110709.nc,VNP03IMG.A2018152.1948,48.685238,...,472,14,416,2018,2018-06-01 11:02:00,2018-07-03 18:00:00,6,2018,3109,4


In [17]:
# save this file out.
out_fp = os.path.join(datamod,'vnp14img_geo_srm_pix_area_aspenfires.gpkg')
afds_ll_pix.to_file(out_fp)
print(f"Saved to {out_fp}\n")

Saved to /Users/max/Library/CloudStorage/OneDrive-Personal/mcook/aspen-fire/Aim2/data/spatial/mod/VIIRS/vnp14img_geo_srm_pix_area_aspenfires.gpkg



In [18]:
len(afds_ll_pix)

64143

In [19]:
afds_ll_pix.columns

Index(['longitude', 'latitude', 'fire_mask', 'confidence', 'daynight',
       'acq_date', 'acq_time', 'granule_id', 'geo_id', 'frp', 'iot4', 'iot5',
       'sample', 'along_scan', 'along_track', 'scan_angle', 'pix_area',
       'geometry', 'afdID', 'Fire_ID', 'Fire_Name', 'START_YEAR',
       'DISCOVERY_DATE', 'WF_CESSATION_DATE', 'acq_month', 'acq_year',
       'obs_count', 'obs_id'],
      dtype='object')

In [20]:
afds_ll_pix['acq_datetime'] = afds_ll_pix.apply(
    lambda row: convert_datetime(row['acq_date'], row['acq_time'], zone='America/Denver'), 
    axis=1
)
afds_ll_pix[['acq_date','acq_time','acq_datetime','daynight']].head()

Unnamed: 0,acq_date,acq_time,acq_datetime,daynight
0,2018-06-01,1948,2018-06-01 13:48:00-06:00,Day
1,2018-06-01,1948,2018-06-01 13:48:00-06:00,Day
2,2018-06-01,1948,2018-06-01 13:48:00-06:00,Day
3,2018-06-01,1948,2018-06-01 13:48:00-06:00,Day
4,2018-06-01,1948,2018-06-01 13:48:00-06:00,Day


In [21]:
# check on nighttime datetimes
day_obs = afds_ll_pix[afds_ll_pix['daynight'] == 'Day']
day_obs['acq_time_mst'] = day_obs['acq_datetime'].dt.time

print(f"Minimum MST datetime for 'Day': {day_obs['acq_time_mst'].min()}")
print(f"Maximum MST datetime for 'Day': {day_obs['acq_time_mst'].max()}")
del day_obs

Minimum MST datetime for 'Day': 12:42:00
Maximum MST datetime for 'Day': 15:12:00


In [22]:
# check on nighttime datetimes
night_obs = afds_ll_pix[afds_ll_pix['daynight'] == 'Night']
night_obs['acq_time_mst'] = night_obs['acq_datetime'].dt.time

print(f"Minimum MST datetime for 'Night': {night_obs['acq_time_mst'].min()}")
print(f"Maximum MST datetime for 'Night': {night_obs['acq_time_mst'].max()}")
del night_obs

Minimum MST datetime for 'Night': 01:06:00
Maximum MST datetime for 'Night': 04:18:00


In [23]:
# Create a regularized grid for SRM

In [24]:
fp = os.path.join(projdir,'data/spatial/raw/boundaries/na_cec_eco_l3_west.gpkg')
ecol3 = gpd.read_file(fp)
srm = ecol3[ecol3['NA_L3NAME'] == 'Southern Rockies']
print(srm.columns)

Index(['NA_L3CODE', 'NA_L3NAME', 'NA_L2CODE', 'NA_L2NAME', 'NA_L1CODE',
       'NA_L1NAME', 'NA_L3KEY', 'NA_L2KEY', 'NA_L1KEY', 'Shape_Leng',
       'Shape_Area', 'geometry'],
      dtype='object')


In [27]:
def regular_grid(extent, res=0.0039, crs_out='EPSG:5070', regions=None):
    """
    """
    # retrieve bounding coordinates
    min_lon, max_lon, min_lat, max_lat = extent
    # create the grid lines in degrees
    x_coords = np.arange(min_lon, max_lon, res)
    y_coords = np.arange(min_lat, max_lat, res)

    # generate the grid cells
    cells = [
        Polygon([(x, y), (x + res, y), (x + res, y + res), (x, y + res)])
        for x in x_coords for y in y_coords
    ]

    # create a geodataframe in WGS, reprojected if needed
    grid = gpd.GeoDataFrame({'geometry': cells}, crs=crs_out)

    if regions is not None:
        if regions.crs != grid.crs:
            regions = regions.to_crs(grid.crs)
        # Perform spatial intersection to keep only grid cells overlapping the polygon
        grid = gpd.overlay(grid, regions, how="intersection")

    return grid

# get the SRM extent in lat/lon (WGS)
coords, extent = get_coords(srm, buffer=3000, crs='EPSG:5070')
print(f"Bounding extent for the SRM: {extent}")

# generate the grid (0.0039 degrees or 375m)
grid = regular_grid(extent, res=375, crs_out='EPSG:5070', regions=fires_)

# save this out.
out_fp = os.path.join(projdir, 'data/spatial/mod/srm_fires_grid_0039deg.gpkg')
grid.to_file(out_fp, driver="GPKG")
print(f"Grid saved to: {out_fp}")

Bounding extent for the SRM: [-1184484.4171543047, -689479.4244799014, 1397166.4988387332, 2246127.1529145916]
Grid saved to: /Users/max/Library/CloudStorage/OneDrive-Personal/mcook/aspen-fire/Aim2/data/spatial/mod/srm_fires_grid_0039deg.gpkg


In [None]:
# Handle duplicate observations for AFDs

In [64]:
# Spatial overlap > 30% and the same acquisition date and time

# Find duplicates in space and time
drop_obs = set() # to store the observations we want to drop
overlap_threshold = 0.30 # spatial overlap (percent)

# group the observations by datetime
dt_groups = afds_ll_pix.groupby(['Fire_ID','acq_datetime'])
print(f"Number of unique (Fire_ID, acq_datetime) groups: {len(dt_groups)}")

def process_group(group):
    drop_obs = set()

    # spatial index
    sidx = group.sindex

    for idx, obs in group.iterrows():
        if obs['afdID'] in drop_obs:
            continue  # Skip if already marked

        # Find overlapping geometries within the group
        overlap_idxs = list(sidx.intersection(obs.geometry.bounds))
        overlap_obs = group.iloc[overlap_idxs]

        for match_idx, match_obs in overlap_obs.iterrows():
            if match_obs['afdID'] == obs['afdID'] or match_obs['afdID'] in drop_obs:
                continue  # Skip self-comparisons or already processed

            # Calculate intersection and overlap ratio
            area = obs.geometry.intersection(match_obs.geometry).area
            ratio = area / obs.geometry.area

            # Check for spatial overlap > threshold
            if ratio > overlap_threshold:
                if match_obs['frp'] < obs['frp']:
                    drop_obs.add(match_obs['afdID'])
                else:
                    drop_obs.add(obs['afdID'])
                    break  # No need to check further for this observation

    return drop_obs

# Process observations grouped by Fire_ID and acq_datetime
for (fire_id, datetime), group in dt_groups:
    if len(group) > 1:  # Only process groups with potential duplicates
        drop_obs.update(process_group(group))

# apply to the AFD ground area data
print(f"Identified a total of [{len(drop_obs)}/{len(afds_ll_pix)}] duplicate observations.")
afds_ll_pix_c1 = afds_ll_pix[~afds_ll_pix['afdID'].isin(drop_obs)] # drop the duplicate obs.

Number of unique (Fire_ID, acq_datetime) groups: 1692
Identified a total of [8211/64143] duplicate observations.


In [65]:
# save this file out.
out_fp = os.path.join(datamod, 'vnp14img_geo_srm_pix_area_aspenfires_nodup.gpkg')
afds_ll_pix_c1.to_file(out_fp)
print(f"Saved to:{out_fp}")

Saved to:/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/aspen-fire/Aim2/data/spatial/mod/VIIRS/vnp14img_geo_srm_pix_area_aspenfires_nodup.gpkg


In [None]:
for idx, obs in afds_ll_pix.iterrows():
    afdID = obs['afdID'] # grab the current ID
    # identify other observations of the same datetime
    date_matches = afds_ll_pix[
        (afds_ll_pix['acq_datetime'] == obs['acq_datetime']) &
        (afds_ll_pix.afdID != afdID)
    ]
    if len(date_matches) > 0:
        for match_idx, match_obs in date_matches.iterrows():
            match_afdID = match_obs['afdID']
            # Calculate overlap
            overlap_area = obs.geometry.intersection(match_obs.geometry).area
            ratio = overlap_area / obs.geometry.area
            # Check for spatial overlap
            if ratio > overlap_threshold:
                # Keep the one with higher FRP, mark lower FRP for removal
                if match_obs['frp'] < obs['frp']:
                    drop_obs.add(match_afdID)
    else:
        continue


In [None]:
print(afds_ll_pix_c1['frp'].isna().sum())

In [None]:
# Case 2: Greater than 50% overlap, same day, different time.

In [None]:
time_threshold = timedelta(hours=2) # search for time within 3 hours

afds_ll_pix_c1 = afds_ll_pix_c1.reset_index()

drop_obs = set()
for acq_date, group in afds_ll_pix_c1.groupby('ACQ_DATE'):
    for dn in ['D','N']:
        dn_group = group[group['DAYNIGHT'] == dn].sort_values('ACQ_DATETIME')
        for idx, obs in dn_group.iterrows():
            time_matches = dn_group[
                (dn_group['ACQ_DATETIME'] > obs['ACQ_DATETIME']) &
                (dn_group['ACQ_DATETIME'] <= obs['ACQ_DATETIME'] + time_threshold) &
                (dn_group.index != idx)  # Exclude itself
            ]
            for match_idx, match_obs in time_matches.iterrows():
                # Calculate spatial overlap ratio
                overlap_area = obs.geometry.intersection(match_obs.geometry).area
                overlap_ratio = overlap_area / obs.geometry.area
                
                # Check for >50% spatial overlap
                if overlap_ratio > 0.50:
                    # Retain the highest FRP observation
                    if match_obs['FRP'] < obs['FRP']:
                        drop_obs.add(match_idx)

afds_ll_pix_c2 = afds_ll_pix_c2.drop(index=list(drop_obs)).reset_index(drop=True)
print(f"Identified a total of [{len(drop_obs)}/{len(afds_ll_pix_c1)}] overlapping observations within 2 hours, separated by day/night.")

In [None]:
print(afds_srm_c2['FRP'].isna().sum())

In [None]:
# Case 3: Classify first day of burn and secondary

In [None]:
afds_srm_c2['detection'] = 'primary' # set the first day of burn / maximum FRP
afds_srm_c3 = afds_srm_c2.sort_values(['ACQ_DATETIME']).reset_index(drop=True)
afds_srm_c3 = afds_srm_c3.reset_index(drop=True)
afds_srm_c3.drop(columns=['index'], inplace=True)

overlap_gdf = gpd.sjoin(
    afds_srm_c3, 
    afds_srm_c3, 
    how="inner", 
    predicate="intersects", 
    lsuffix="left", 
    rsuffix="right")

# Iterate over each observation in swath_gdf_c3
for idx, obs in afds_srm_c3.iterrows():
    # Filter to previous overlapping observations only
    overlapping_obs = overlap_gdf[
        (overlap_gdf['afdID_left'] == obs['afdID']) &  # Current observation
        (overlap_gdf['afdID_right'] < obs['afdID']) &  # Previous observations only
        (overlap_gdf['DAYNIGHT_right'] == obs['DAYNIGHT'])  # Same day/night cycle
    ].copy()

    # Calculate overlap ratio for each overlapping observation
    overlap_ratios = [
        obs.geometry.intersection(afds_srm_c3.loc[afds_srm_c3['afdID'] == row['afdID_right']].geometry.iloc[0]).area
        / obs.geometry.area
        for _, row in overlapping_obs.iterrows()
    ]

    # Filter to those with significant overlap (e.g., >50%)
    overlapping_obs['overlap_ratio'] = overlap_ratios
    overlapping_obs = overlapping_obs[overlapping_obs['overlap_ratio'] > 0.5]

    # If there are overlapping observations, proceed to classify
    if not overlapping_obs.empty:
        max_frp = overlapping_obs['FRP_right'].max()  # Max FRP from overlapping observations

        # Classify based on FRP comparison
        if obs['FRP'] > max_frp:
            afds_srm_c3.at[idx, 'detection'] = 'flare-up'
        else:
            afds_srm_c3.at[idx, 'detection'] = 'secondary'

out_fp = os.path.join(projdir, 'data/spatial/mod/VIIRS/afd_aspen-fires_Case3.gpkg')
afds_srm_c3.to_file(out_fp)
print(f"Saved to: {out_fp}")

In [None]:
print(afds_srm_c3['FRP'].isna().sum())

In [None]:
afds_srm_c3['detection'].unique()

In [None]:
# Plot FRP for each detection label
plt.figure(figsize=(5.5, 3))

sns.boxplot(data=afds_srm_c3, x='detection', y='log_FRP', palette="inferno")
plt.title("Distribution of log(FRP) by Detection Type")
plt.xlabel("Detection Type")
plt.ylabel("log(FRP)")

plt.savefig(os.path.join(projdir,'figures/FigureX_Primary_Secondary-FRP.png'), dpi=150, bbox_inches='tight')

plt.show()

In [None]:
# Plot the distribution of "front" and "re-burn"
burn_class_counts = afds_srm_c3['detection'].value_counts()

# Plot a bar chart
plt.figure(figsize=(5, 3))
sns.barplot(x=burn_class_counts.index, y=burn_class_counts.values, palette="inferno")
plt.title("Primary, Secondary, and Flare-up Obs.")
plt.xlabel("Burn Class")
plt.ylabel("Count")

plt.savefig(os.path.join(projdir,'figures/FigureX_Primary_Secondary-Counts.png'), dpi=150, bbox_inches='tight')

plt.show()

In [None]:
len(afds_srm_c3)

In [None]:
# Join to CBI, TreeMap, etc

In [None]:
# Load the summary table (from GEE reductions)
fp = os.path.join(projdir,'data/tabular/mod/AFD/combined-afd_aspen-fires_TreeMap.csv')
treemap = pd.read_csv(fp)
treemap.drop(columns=["Unnamed: 0"], inplace=True)
print(len(treemap['afdID'].unique()))
treemap.head()

In [None]:
# Identify "forested" VIIRS observations
# Calculate the total forest cover percent
pct_cover_sum = treemap.groupby('afdID')['pct_cover'].sum()
# Filter afdIDs with a sum of pct_cover greater than 50%
afdIDs_gt50 = pct_cover_sum[pct_cover_sum > 50].index

# Filter the original dataframe to keep only rows with these afdIDs
treemap_ = treemap[treemap['afdID'].isin(afdIDs_gt50)]
print(f"Number of afdIDs with pct_cover > 50%: {len(afdIDs_gt50)}")

In [None]:
# Identify the dominant forest type
dtree = treemap_.loc[treemap_.groupby('afdID')['pct_cover'].idxmax()]
print(len(dtree))
dtree.head()

In [None]:
dtree_frp = dtree.merge(afds_srm_c3, on='afdID', how='inner')
print(dtree_frp.columns)
len(dtree_frp)

In [None]:
print(dtree_frp['FRP'].isna().sum())

In [None]:
### Plot FRP by EVT group name
df = dtree_frp[dtree_frp['DAYNIGHT'] == 'D'] # primary obs.
df = dtree_frp

# Find the median and sort the evt classes
med = df.groupby('species_name')['FRP'].median().sort_values(ascending=False)
sorted = med.index.tolist()

plt.figure(figsize=(8, 8)) 

sns.boxplot(
    data=df,
    x='FRP', 
    y='species_name', 
    hue='detection',
    order=sorted,  
    palette='coolwarm', 
    dodge=True
)

plt.xscale('log')

plt.title('Daytime FRP by Detection Class and Species')
plt.xlabel('Fire Radiative Power (FRP)')
plt.ylabel('Dominant Tree Species')

plt.tight_layout()

plt.savefig(os.path.join(projdir,'figures/FigureX_DTree-FRP-Detections.png'), dpi=300, bbox_inches='tight')

plt.show()

In [None]:
aspen_df = dtree_frp[dtree_frp['species_name'] == 'Aspen']
aspen_df = aspen_df[aspen_df['DAYNIGHT'] == 'D']

# Create the scatter plot
plt.figure(figsize=(6, 4))
sns.scatterplot(data=aspen_df, x='FRP', y='BALIVE', hue='detection', style='detection', s=50, palette="viridis")
plt.xlabel('Aspen Live Basal Area')
plt.ylabel('FRP')
plt.legend(title='Detection Type')

plt.savefig(os.path.join(projdir,'figures/FigureX_DTree-Aspen-FRP_scatter.png'), dpi=300, bbox_inches='tight')

plt.show()

In [None]:
# Join in the CBI
cbi = os.path.join(projdir,'data/tabular/mod/AFD/combined-afd_aspen-fires_cbi.csv')
cbi = pd.read_csv(cbi)
cbi.columns

In [None]:
cbi = cbi[['afdID','CBI_mean', 'CBI_bc_mean', 'rbr_mean']]
cbi.head()

In [None]:
dtree_frp_cbi = dtree_frp.merge(cbi, on='afdID', how='inner')
print(len(dtree_frp_cbi))
dtree_frp_cbi.head()

In [None]:
dtree_frp_cbi = dtree_frp_cbi.drop_duplicates(subset='afdID', keep='first')
len(dtree_frp_cbi)

In [None]:
print(dtree_frp_cbi['afdID'].isna().sum())
print(dtree_frp_cbi['CBI_mean'].isna().sum())
print(dtree_frp_cbi['species_name'].isna().sum())
print(dtree_frp_cbi['FRP'].isna().sum())

In [None]:
# Calculate the Pearson correlation by forest type

In [None]:
df = dtree_frp_cbi[dtree_frp_cbi['detection'] == 'primary'] #primary obs.
df = dtree_frp_cbi[dtree_frp_cbi['DAYNIGHT'] == 'D'] #daytime

In [None]:
from scipy.stats import pearsonr

correlations = {}
for tree_type, group in df.groupby('species_name'):
    corr, p_value = pearsonr(group['FRP'], group['CBI_bc_mean'])
    correlations[tree_type] = (corr, p_value)
    print(f"Tree Type: {tree_type} - Pearson correlation: {corr:.2f}, p-value: {p_value:.4f}")

In [None]:
correlation_df = pd.DataFrame(correlations, index=['correlation', 'p_value']).T
print("\nCorrelation by Tree Type:\n")
correlation_df = correlation_df.sort_values('correlation')
correlation_df.to_csv(os.path.join(projdir, 'data/tabular/mod/afds_aspen-fires_frp-cbi_pearson.csv'))
correlation_df.head(15)

In [None]:
g = sns.lmplot(data=df, x='FRP', y='CBI_bc_mean', hue='species_name', col='species_name', 
           col_wrap=3, height=2, aspect=1.5, scatter_kws={'alpha':0.4}, line_kws={'color':'black'}, sharey=True
          )

# Adjust the titles to remove the "species_name = " prefix
for ax in g.axes.flat:
    ax.set_ylim(0, 4)  # Set the y-axis limit from 0 to 4
    ax.set_title(ax.get_title().replace('species_name = ', ''))

plt.savefig(os.path.join(projdir,'figures/FigureX_DTree-FRP-CBI_bc.png'), dpi=300, bbox_inches='tight')

plt.show()