In [None]:
"""
"""

import os, time, glob, gc
import numpy as np
import pandas as pd
import geopandas as gpd
import rioxarray as rxr
import rasterio as rio
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import multiprocessing as mp

from shapely.geometry import box
from datetime import datetime
from rasterstats import zonal_stats

import warnings
warnings.filterwarnings("ignore") # suppresses annoying geopandas warning

proj = 'EPSG:5070'

maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/'
projdir = os.path.join(maindir, 'aspen-fire/Aim2/')

print("Ready to go !")


## Wrangle the MODIS 1km and VIIRS 375m Active Fire Detections (AFD)

We downloaded the archive AFD for MODIS Collection 6.1 (1km), the Suomi National Polar-Orbiting Partnership (VIIRS S-NPP 375m) and NOAA-20 (VIIRS NOAA-20 375m) data products from the NASA FIRMS (https://firms.modaps.eosdis.nasa.gov/download/) between 2018-2023 in the western US. 

The VIIRS observations are split into archive (2018-2022) and "NRT" (2022-2023). These files need to be merged prior to performing the tidying.

To start with, we will create a tidy database of VIIRS observations for both the S-NPP and NOAA-20 vintages. Then, we will look at creating a combined dataset.

In [None]:
t0 = time.time()

# Set up the file paths

modis = os.path.join(projdir,'data/spatial/raw/NASA-FIRMS/DL_FIRE_M-C61_476781/')
snpp = os.path.join(projdir,'data/spatial/raw/NASA-FIRMS/DL_FIRE_SV-C2_476784/')
noaa = os.path.join(projdir,'data/spatial/raw/NASA-FIRMS/DL_FIRE_J1V-C2_476782/')

# Store these in a dictionary
dict = {
    "MOD61": modis,
    "SNPP": snpp,
    "NOAA-20": noaa
}

# Buffer fire perimeters for the extraction
fire_buffer = fired_aspen.copy().to_crs(proj)
fire_buffer['geometry'] = fire_buffer.geometry.buffer(1000)  # 1km buffer
fire_buffer = fire_buffer[['fired_id','ig_date','last_date','geometry']] 

# Process each of the archive data products
gdfs = {} # dictionary to store the geo data frames
for key, path in dict.items():
    print(f'Processing: {key}')
    # Read in the archive vector data
    vect = glob.glob(path+"*.shp")
    print([os.path.basename(v) for v in vect])
    if len(vect) > 1:
        print("~Merging archive and NRT data.")
        archive = gpd.read_file([v for v in vect if "archive" in v][0]).to_crs(proj)
        nrt = gpd.read_file([v for v in vect if "nrt" in v][0]).to_crs(proj)
        # Concatenate the archive and NRT
        afd = pd.concat([archive, nrt], ignore_index=True)
        del archive, nrt
    else:
        afd = gpd.read_file(vect[0]).to_crs(proj)

    # Add some attribute information
    afd['VID'] = afd.index # unique ID column

    # Remove low-confidence observations
    try:
        afd = afd[afd['CONFIDENCE'] != 'l']
    except KeyError as e:
        print(f"KeyError: {e}")

    del vect

    #################################################
    # Extract AFDs within wildfire data (aspen fires)
    
    # Extract AFDs
    afd_aspen = gpd.sjoin(afd, fire_buffer, how='inner', predicate='within')
    print(afd_aspen.columns)
    
    del afd
    
    ####################################################
    # Perform temporal filtering to remove false-positive matches

    # First, create date columns
    afd_aspen['ACQ_DATE'] = pd.to_datetime(afd_aspen['ACQ_DATE'])
    afd_aspen['ACQ_MONTH'] = afd_aspen['ACQ_DATE'].dt.month.astype(int)
    afd_aspen['ACQ_YEAR'] = afd_aspen['ACQ_DATE'].dt.year.astype(int)
    afd_aspen['ig_date'] = pd.to_datetime(afd_aspen['ig_date'])
    afd_aspen['last_date'] = pd.to_datetime(afd_aspen['last_date'])

    # Filter based on ignition month and year
    afd_aspen_f = afd_aspen[
        (afd_aspen['ACQ_YEAR'] >= afd_aspen['ig_date'].dt.year.astype(int)) & 
        (afd_aspen['ACQ_MONTH'] >= afd_aspen['ig_date'].dt.month.astype(int)) &
        (afd_aspen['ACQ_YEAR'] <= afd_aspen['last_date'].dt.year.astype(int)) &
        (afd_aspen['ACQ_MONTH'] <= afd_aspen['last_date'].dt.month.astype(int))
    ]
    
    # Keep unique rows
    afd_aspen_f = afd_aspen_f.drop_duplicates(subset='VID', keep='first')    

    del afd_aspen
    
    #############################################
    # Remove fires with less than 10 observations

    # Get a count per fire
    afd_counts = afd_aspen_f.groupby('fired_id').size().reset_index(name='counts')

    # Get a list of IDs of fires with > 1 obs.
    ids = afd_counts[afd_counts["counts"] > 1]
    
    # Grab the new list of unique FIRED ids
    ids = ids['fired_id'].unique().tolist()
    
    # Filter the datasets based on these FIRED ids
    afd_aspen_f = afd_aspen_f[afd_aspen_f['fired_id'].isin(ids)]
    fired_aspen_f = fired_aspen[fired_aspen['fired_id'].isin(ids)]

    print(f"Minimum obs./fire: {afd_counts['counts'].min()}; \nMaximum obs./fire: {afd_counts['counts'].max()}")
    print(f"Number of fires after filtering: {len(fired_aspen_f)}")
    print(f"Number of obs. after filtering: {len(afd_aspen_f)}")

    del afd_counts, ids
    
    #################################################
    # Plot the distribution of observations over time
    plt.figure(figsize=(6, 3))
    afd_aspen_f['ACQ_DATE'].hist(bins=100)
    plt.title(f'{key} - AFDs (2018-2023)')
    plt.xlabel('Date')
    plt.ylabel('Number of Observations')
    plt.tight_layout()
    plt.show()
    
    ##############################
    # Append to the new dictionary
    gdfs[key] = afd_aspen_f

print(f"Total elapsed time: {round((time.time() - t0))} seconds.")

In [None]:
# Save out the files as they are currently
gdfs['MOD61'].to_file(os.path.join(projdir,'data/spatial/mod/AFD/mod61_archive_afd_aspen.gpkg'))
gdfs['SNPP'].to_file(os.path.join(projdir,'data/spatial/mod/AFD/snpp_archive_afd_aspen.gpkg'))
gdfs['NOAA-20'].to_file(os.path.join(projdir,'data/spatial/mod/AFD/noaa20_archive_afd_aspen.gpkg'))

## Handling acquisition time-of-day and spatially overlapping observations

The VIIRS S-NPP AFD have many overlapping observations during a single fire event. In some cases, the overlapping points are on the same day and time but with different FRP values. In these cases, it may be best to 

From here on we can work with just the SNPP because it has the most consistency across our time period (NOAA-20 started in 2020). Later we can investigate the combination of the three datasets or at least make a comparison.

In [None]:
# Extract the SNPP observations

snpp_aspen = gdfs['SNPP']

# Filter out FRP == 0
snpp_aspen = snpp_aspen[snpp_aspen['FRP'] > 0]
snpp_aspen.head()


In [None]:
print(snpp_aspen['FRP'].describe())

In [None]:
print(snpp_aspen['ACQ_TIME'].describe())

In [None]:
snpp_aspen = snpp_aspen.reset_index()
snpp_aspen = snpp_aspen.rename(columns={'index':'index_'})
snpp_aspen = snpp_aspen.drop(['index_right'], axis=1)
print(snpp_aspen.columns)

In [None]:
# Create a datetime object as a new column (in UTC)

import pytz

# Function to convert ACQ_DATE and ACQ_TIME to a datetime object in UTC
def convert_to_datetime(acq_date, acq_time):
    # Ensure ACQ_TIME is in HHMM format
    if len(acq_time) == 3:
        acq_time = '0' + acq_time
    elif len(acq_time) == 2:
        acq_time = '00' + acq_time
    elif len(acq_time) == 1:
        acq_time = '000' + acq_time

    acq_date_str = acq_date.strftime('%Y-%m-%d')
    dt = datetime.strptime(acq_date_str + acq_time, '%Y-%m-%d%H%M')
    dt_utc = pytz.utc.localize(dt)  # Localize the datetime object to UTC
    return dt_utc

# Apply the conversion function to create a new datetime column
snpp_aspen.loc[:, 'ACQ_DATETIME'] = snpp_aspen.apply(lambda row: convert_to_datetime(row['ACQ_DATE'], row['ACQ_TIME']), axis=1)

# Print the resulting GeoDataFrame with timezone-aware datetime objects
print(snpp_aspen['ACQ_DATETIME'].head())

#### Case 1: Same day/time observations with different FRP values (spatially overlapping)

In this case, we have overlapping observations which have the same datetime but (sometimes) different FRP values. To handle this, we can group observations by datetime and perform a dissolve, taking the mean FRP.

In [None]:
# Perform a spatial intersection to identify overlapping observations
overlap = gpd.sjoin(snpp_aspen, snpp_aspen, predicate='intersects', lsuffix='left', rsuffix='right')
print(overlap.columns)

In [None]:
# Assign a unique group ID for each set of intersecting observations with the same DATETIME
overlap['_VID_'] = overlap.groupby(['ACQ_DATETIME_left', 'ACQ_DATETIME_right']).ngroup()
print(overlap[['ACQ_DATETIME_left', 'ACQ_DATETIME_right','_VID_']].head())

In [None]:
# Calculate the 90th percentile of VPD among the observations

# Join the new _VID_ back to the original dataframe using VID
snpp_aspen_ = snpp_aspen.merge(
    overlap[['VID_left', '_VID_']].drop_duplicates(), 
    left_on='VID', right_on='VID_left', how='left').reset_index(drop=True)

# Calculate the 90th percentile FRP for each _VID_
def pct90(group):
    group['FRP_90'] = np.percentile(group['FRP'], 90)
    return group

# Apply the function to calculate the 90th percentile FRP
snpp_aspen_ = snpp_aspen_.groupby('_VID_').apply(pct90).reset_index(drop=True)
snpp_aspen_[['_VID_','VID','ACQ_DATETIME','LATITUDE','LONGITUDE','FRP','FRP_90','VERSION']].head()

In [None]:
# Dissolve by the same day/time VID to create a new geometry with the 90th percentile FRP

snpp_aspen_dis = snpp_aspen_.dissolve(by='_VID_').reset_index() # this takes the first of each, which should be OK

snpp_aspen_dis.columns


In [None]:
# Save out a version of these data

# Create the buffered VIIRS obs.
snpp_aspen_plot = snpp_aspen_dis.copy()
snpp_aspen_plot['geometry'] = snpp_aspen_plot.geometry.buffer(375, cap_style=3)  # square buffer 375m

# Save the VIIRS observations (points)
snpp_aspen_dis = snpp_aspen_dis.to_crs(proj)
snpp_aspen_dis.to_file(os.path.join(maindir,'aspen-fire/Aim2/data/spatial/mod/VIIRS/viirs_snpp_pt_fired_events_west_aspen.gpkg'))

# Save the VIIRS observations (plots)
snpp_aspen_plot = snpp_aspen_plot.to_crs(proj)
snpp_aspen_plot.to_file(os.path.join(maindir,'aspen-fire/Aim2/data/spatial/mod/VIIRS/viirs_snpp_plot_fired_events_west_aspen.gpkg'))

## Tidy the FRP data: remove null values, check on the obs./fire, and check on date matches

Some observations may not be joined correctly (i.e., spatial overlap but wrong ignition year, etc). We may also have some fires with too few observations. 

In [None]:
# Check on the observation counts again
viirs_counts = snpp_aspen_dis.groupby('fired_id').size().reset_index(name='counts')
print(viirs_counts['counts'].min())
print(viirs_counts['counts'].max())

In [None]:
# Make a map of the fire with the most observations

# Sort the VIIRS counts
viirs_counts = viirs_counts.sort_values('counts', ascending=False).reset_index(drop=True)

# Take the first row (the maximum)
max_obs = viirs_counts.iloc[0]['fired_id']
print(max_obs)

# Filter the fire perimeter and VIIRS obs.
perim = fired_aspen[fired_aspen['fired_id'] == max_obs]
obs = snpp_aspen_dis[snpp_aspen_dis['fired_id'] == max_obs]
obs = obs.copy()
obs['FRP_log'] = np.log1p(obs['FRP'])
obs = obs[obs['DAYNIGHT'] == 'D']
print(len(obs))

# Create the map
fig, ax = plt.subplots(figsize=(4, 5.5))
# Plot VIIRS points
obs.plot(column='FRP_log', ax=ax, legend=True,
         legend_kwds={'label': "Fire Radiative Power (FRP)", 'orientation': "horizontal"},
         cmap='magma', markersize=1, alpha=0.7)
# Plot the fire perimeter
perim.plot(ax=ax, color='none', edgecolor='black', linewidth=1, label='Fire Perimeter')
plt.tight_layout()
plt.grid(True)

# Save the map as a PNG
plt.savefig(os.path.join(maindir,'aspen-fire/Aim2/figures/FigX_MullenFire_FRP.png'), dpi=300, bbox_inches='tight')

plt.show()

In [None]:
print(len(frp_aspen_f['pct_aspen']))
print(len(frp_aspen_f['FRP']))
      
# Scatterplot of FRP and aspen_pct (fire perimeter)
plt.figure(figsize=(6, 4))  # Set the figure size
plt.scatter(frp_aspen_f['pct_aspen'], frp_aspen_f['FRP'], alpha=0.5)  # Plot with some transparency

# Add titles and labels
plt.ylabel('Fire Radiative Power (FRP)')
plt.xlabel('Aspen %')

plt.show()

## Join VIIRS observations to daily FIRED perimeters

We want to summarize VIIRS observations on a daily basis and then associate them with the correct daily polygon from FIRED. The initial step is to group observations by day.

## Create the VIIRS observation buffer (375m2)

The archive VIIRS data is distributed as shapefiles with centroids representing the pixel center of a VIIRS observation. In order to assess characteristics within the VIIRS observations, we want to create a 375m2 buffer around the centroid locations to approximate the VIIRS pixel size.

In [None]:
# Create the buffered VIIRS obs.
frp_aspen_plot = frp_aspen_f.copy()
frp_aspen_plot['geometry'] = frp_aspen_plot.geometry.buffer(375, cap_style=3)  # square buffer 375m

print(len(fired_aspen))

# Let's plot one fire using the FRP column to color the "plots"

# Filter the fire perimeter and VIIRS obs.
perim = fired_aspen[fired_aspen['fired_id'] == "42306"]  # Williams Fork Fire "45811.0"
obs = frp_aspen_plot[frp_aspen_plot['fired_id'] == "42306"]
obs = obs.copy()
obs['FRP_log'] = np.log1p(obs['FRP'])
obs = obs[obs['DAYNIGHT'] == 'D']  # plot only daytime observations
print(len(obs))

# Create the map
fig, ax = plt.subplots(figsize=(4, 5.5))
# Plot VIIRS points
obs.plot(column='FRP_log', ax=ax, legend=True,
         legend_kwds={'label': "Fire Radiative Power (FRP)"},
         cmap='magma', markersize=1, alpha=0.7)
# Plot the fire perimeter
perim.plot(ax=ax, color='none', edgecolor='black', linewidth=1, label='Fire Perimeter')
plt.tight_layout()
plt.grid(True)
plt.show()

In [None]:
centroid = fired_aspen.copy()
centroid['geometry'] = centroid.geometry.centroid

# Make a spatial map of the centroids now
fig, ax = plt.subplots(figsize=(6, 6))

states.plot(ax=ax, edgecolor='black', linewidth=1, color='none')

# Plot centroids
centroid['size'] = centroid['pct_aspen'] * 10  # Adjust the scaling factor as necessary
centroid.plot(
    ax=ax, markersize=centroid['pct_aspen'], 
    column='pct_aspen', cmap='viridis', 
    legend=True, alpha=0.6, 
    legend_kwds={'label': "Aspen Percent"})

# Optional: Plot the original fire perimeters for context
fired_aspen.plot(ax=ax, color='none', edgecolor='gray', linewidth=0.5)

plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid(True)

del centroid

# Save the map as a PNG
plt.savefig(os.path.join(maindir,'aspen-fire/Aim2/figures/Fig1_aspen_fires.png'), dpi=300, bbox_inches='tight')

plt.show()

## Tidy and save out the necessary files

Now that we have a tidy dataframe for both wildfires with >=5% pre-fire aspen cover and their associated nominal or high confidence VIIRS observations, we can save these files out for further processing. 

Some of the processing will occur in GEE, so for these files we want to save a simplified SHP with only the required attribute information (they will be joined back to the full data after processing).

In [None]:
# Check on the observation counts again
viirs_counts = frp_aspen_plot.groupby('fired_id').size().reset_index(name='counts')
print(viirs_counts['counts'].min())
print(viirs_counts['counts'].max())

In [None]:
# Filter the daily files
# Get the list of IDs
ids = fired_aspen['fired_id'].unique()

# Load the daily polygons, subset to aspen fires
daily['id'] = daily['id'].astype(str)
daily = daily[daily['id'].isin(ids)]
print(len(daily['id'].unique()))

# Save the daily wildfire perimeters
daily = daily.to_crs(proj)  # ensure the correct projection before exporting
daily.to_file(os.path.join(maindir,'aspen-fire/Aim2/data/spatial/mod/FIRED/fired_daily_west_aspen.gpkg'))

In [None]:
# Save the wildfire perimeters
fired_aspen = fired_aspen.to_crs(proj)  # ensure the correct projection before exporting
fired_aspen.to_file(os.path.join(maindir,'aspen-fire/Aim2/data/spatial/mod/FIRED/fired_events_west_aspen.gpkg'))

# Save the VIIRS observations (points)
frp_aspen_f = frp_aspen_f.to_crs(proj)
frp_aspen_f.to_file(os.path.join(maindir,'aspen-fire/Aim2/data/spatial/mod/VIIRS/viirs_obs_fired_events_west_aspen.gpkg'))

# Save the VIIRS observations (plots)
frp_aspen_plot = frp_aspen_plot.to_crs(proj)
frp_aspen_plot.to_file(os.path.join(maindir,'aspen-fire/Aim2/data/spatial/mod/VIIRS/viirs_plots_fired_events_west_aspen.gpkg'))

# Tidy the files for GEE imports

# FIRED perimeters (1km buffer)
print(fired_aspen_1km.columns)
fired_aspen_gee = fired_aspen_1km[['fired_id','ig_date','ig_year','last_date','mx_grw_dte','geometry']]
fired_aspen_gee['ig_date'] = fired_aspen_gee['ig_date'].astype(str)
fired_aspen_gee['last_date'] = fired_aspen_gee['ig_date'].astype(str)
fired_aspen_gee.to_file(os.path.join(maindir,'aspen-fire/Aim2/data/spatial/mod/GEE/fired_events_west_aspen.shp'))

# VIIRS "plots"
print(frp_aspen_plot.columns)
frp_aspen_gee = frp_aspen_plot[['fired_id','VID','ACQ_DATE','DAYNIGHT','geometry']]
frp_aspen_gee['ACQ_DATE'] = frp_aspen_gee['ACQ_DATE'].astype(str)
frp_aspen_gee.to_file(os.path.join(maindir,'aspen-fire/Aim2/data/spatial/mod/GEE/viirs_plots_fired_events_west_aspen.shp'))

print("Success!")

In [None]:
gc.collect()