In [1]:
"""
Package imports and environment variables
"""

# Import modules and env vars

import os, time, glob
import earthaccess
import pandas as pd
import geopandas as gpd
import rioxarray as rxr
import rasterio as rio
import numpy as np
import math
import gc
import contextlib

from netCDF4 import Dataset
from shapely import geometry
from datetime import timedelta
from datetime import datetime
from shapely.geometry import box
from shapely.geometry.polygon import orient
from matplotlib import pyplot as plt
from rasterio.transform import from_origin
from rasterio.features import rasterize

maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/aspen-fire'

# Projection information
geog_crs = 'EPSG:4326'  # Geographic projection
proj_crs = 'EPSG:5070'  # Projected coordinate system

print("Success!")

Success!


In [None]:
class Download_Extract_VIIRS_ActiveFires:
    """ Class to handle downloading and extracting VIIRS active fire detections for a given polygon """

    def __init__(self, geom, start_date, last_date, out_dir, id_col,
                 geog_crs='EPSG:4326', proj_crs='EPSG:5070', 
                 buffer=None, short_names=None, delete_ds=None):
        """
        Args:
            geom: Geodataframe (point or polygon);
            start_date: the first date in the search request;
            last_date: the final date for the search results;
            out_dir: Output directory where results should be downloaded and processed;
            id_col: the column in the geodataframe which contains the unique ID;
            crs: Projection information, derfaults to WGS84;
            buffer: an optional buffer (meters) to be applied to the active fire locations;
            short_names: Optional list of actual short names to download

        Returns a GeoDataFrame of active fire locations
        """

        self.gdf = geom.copy()

        self.geog_crs = geog_crs
        self.proj_crs = proj_crs
        
        # Create the bounds for search request
        self.bounds = geom.to_crs(geog_crs).unary_union.envelope # make sure it is in geographic coordinates
        self.coords = list(self.bounds.exterior.coords)
        
        self.date_range = (start_date, last_date)
        self.out_dir = out_dir
        self.id = self.gdf[id_col].iloc[0] # grab the fire ID
        self.buffer = buffer
        if self.buffer:
            buffer_gdf = geom.geometry.buffer(buffer)
            bounds = buffer_gdf.to_crs(geog_crs).unary_union.envelope
            self.coords_buffer = list(bounds.exterior.coords)
            del buffer_gdf, bounds
            
        if not os.path.exists(self.out_dir):
            os.makedirs(self.out_dir)
        
        if short_names is not None:
            self.short_names = short_names
        else:
            self.short_names = ['VNP14IMG', 'VJ114IMG']

        # Geolocation short_names:
        self.geoloc = ['VNP03IMG', 'VJ103IMG']

        if delete_ds is not None:
            self.delete_ds = delete_ds
        else:
            self.delete_ds = False

    def _get_area(self):
        """
        Return area (km2) of the search bounds
        """
        total_area_sqm = self.gdf.geometry.area.sum()
        total_area_km2 = total_area_sqm / 1e6
        return total_area_km2

    def _search_request(self):
        """
        Returns dictionary from the earthaccess search results including footprint geometry for each short_name
        """

        print(f'Fire ID: {self.id}')

        search_dict = {} # to store the search results

        for short_name in self.short_names:
            
            try:
                # Search for products matching our short names
                result = earthaccess.search_data(
                    short_name=short_name,
                    polygon=self.coords,
                    temporal=self.date_range,
                    count=1000, 
                )
                
                # Check if there is valid data, if not, skip
                if len(result) != 0:
                    # Append the search results data frame to the dictionary
                    search_dict[short_name] = result
                else:
                    raise ValueError(f'No data found for: {short_name} -- Polygon ID {self.id}')
                
            except Exception as e:
                print(f"Skipping polygon ID {self.id}: {short_name}")
                continue

        if not search_dict:
            return None  # Return None for invalid search results
        else:
            return search_dict

    def download_results(self, result_dict):
        """ downloads earthaccess search results to the output directory """
        if result_dict is not None:
            for key, result in result_dict.items():
                folder_name = key # the short_name
                # Set the output directory as the ID and short_name
                fd = os.path.join(self.out_dir, f'FIRED_{self.id}/{key}/')
                # Download the the search results
                with open(os.devnull, 'w') as f, contextlib.redirect_stdout(f):
                    earthaccess.download(result, local_path=fd)

    def create_fire_gdf(self):
        """ Creates a geodataframe with active fire detections from a directory with NetCDF files """
        
        # List of downloaded .nc files
        nc_files = list_files(self.out_dir, "*.nc", recursive=True)
    
        out_fire_dfs = [] # to store the dataframes for each nc file
        for nc_file in nc_files:
            
            # Read the nc file
            ds = Dataset(nc_file, 'r', format = 'NETCDF4')

            # Grab some NetCDF attributes
            day_night_flag = ds.getncattr('DayNightFlag')
            short_name = ds.getncattr('ShortName')
            platform = ds.getncattr('PlatformShortName')
            version = ds.getncattr('VersionID')
            start_time_str = ds.getncattr('PGE_StartTime')
            acq_datetime = datetime.strptime(start_time_str, '%Y-%m-%d %H:%M:%S.%f') # convert to datetime
            julian_day = acq_datetime.timetuple().tm_yday # Calculate Julian Day

            # Grab an array of the lat/lons of fire detections
            # Filter the granule data to within the fire bounds
            # fire_coords = get_coords(self.gdf, buffer=0.375)
            fire_coords = self.coords_buffer
            flats = np.array(ds.variables['FP_latitude'][:])  # lats as np array
            flons = np.array(ds.variables['FP_longitude'][:])  # lons as np array
            fll = np.logical_and.reduce(
                (flons >= fire_coords[0][0], flons <= fire_coords[2][0], flats >= fire_coords[0][1], flats <= fire_coords[2][1]))
    
            # Extract fire pixel information
            lats = flats[fll]
            lons = flons[fll]
            frp = np.array(ds.variables['FP_power'][:])[fll]
            confidence = np.array(ds.variables['FP_confidence'][:])[fll]
            fp_rad13 = np.array(ds.variables['FP_Rad13'][:])[fll]
            fp_t4 = np.array(ds.variables['FP_T4'][:])[fll]
            fp_t5 = np.array(ds.variables['FP_T5'][:])[fll]
            view_az = np.array(ds.variables['FP_ViewAzAng'][:])[fll]
            view_zen = np.array(ds.variables['FP_ViewZenAng'][:])[fll]

            del ds, flats, flons, fll # clean up
    
            # Create a DataFrame with the fire pixel data
            df = pd.DataFrame({
                'fired_id': fire_id,
                'acq_datetime': acq_datetime,
                'acq_julian_day': julian_day,
                'day_night': day_night_flag,
                'short_name': short_name,
                'platform': platform,
                'version': version,
                'latitude': lats,
                'longitude': lons,
                'frp': frp,
                'fp_rad13': fp_rad13,
                'fp_t4': fp_t4,
                'fp_t5': fp_t5,
                'confidence': confidence,
                'view_az_an': view_az,
                'view_zen_an': view_zen
            })
    
            out_fire_dfs.append(df)
    
            # Clean up
            if self.delete_ds is True:
                os.remove(nc_file)

            gc.collect() # garbage collector
    
        # Concatenate the out dfs
        fire_data = pd.concat(out_fire_dfs) # for the entire fire
        
        # Create a GeoDataFrame
        fp_points = gpd.GeoDataFrame(
            fire_data, 
            geometry=gpd.points_from_xy(fire_data.longitude, fire_data.latitude),
            crs=self.geog_crs) # Geographic coordinates
        # Reproject to projected coordinate system
        fp_points = fp_points.to_crs(self.proj_crs)

        del fire_data

        return fp_points
            

def list_files(path, ext, recursive):
    """
    List files of a specific type in a directory or subdirectories
    """
    if recursive is True:
        return glob.glob(os.path.join(path, '**', '*{}'.format(ext)), recursive=True)
    else:
        return glob.glob(os.path.join(path, '*{}'.format(ext)), recursive=False)


def get_coords(geom, buffer):
    """ Returns the bounding box coordinates for a given geometry(ies) and buffer """
    geom['geometry'] = geom.geometry.buffer(buffer)
    bounds = geom.to_crs(geog_crs).unary_union.envelope # make sure it is in geographic coordinates
    coords = list(bounds.exterior.coords)
    return coords
    

def pixel_buffer(gdf, pixel_size=375):
    """
    Create square buffers (at nadir pixel) around points.
    """
    buffers = gdf.geometry.apply(lambda point: box(
        point.x - pixel_size / 2, point.y - pixel_size / 2,
        point.x + pixel_size / 2, point.y + pixel_size / 2
    ))
    return gpd.GeoDataFrame(gdf.drop(columns='geometry'), geometry=buffers)


def handle_duplicates(gdf):
    """ Handles duplicate fire detections, retaining one """
    

print("Class and functions ready!")

In [None]:
# Load some data
fires = gpd.read_file(os.path.join(maindir,'Aim2/data/spatial/mod/FIRED/fired_events_west_aspen.gpkg'))
print(fires.columns)
print(len(fires))

In [None]:
# Run the download class for the fire perimeters individually
outdir = os.path.join(maindir, 'Aim2/data/spatial/raw/VIIRS/')
resdir = os.path.join(maindir,'Aim2/data/spatial/mod/VIIRS/')
if not os.path.exists(resdir):
    os.makedirs(resdir)

# Get a list of fire IDs
fire_ids = fires['fired_id'].unique()

fp_points = [] # to store the output geodataframes
no_data_ids = [] # to store fire IDs with no data available
        
for fire_id in fire_ids[0:5]:
    # Retrieve the fire perimeter
    fire = fires[fires['fired_id'] == fire_id]  
    
    # Initiate the download and extract class
    downloader = Download_Extract_VIIRS_ActiveFires(
        geom=fire,
        start_date=fire['ig_date'].iloc[0],
        last_date=fire['last_date'].iloc[0],
        out_dir=outdir,
        id_col='fired_id',
        buffer=1000, # in meters?
        short_names=['VNP14IMG', 'VJ114IMG', 'VNP03MODLL', 'VJ103MODLL'],
        delete_ds=False
    )
    
    # Retrieve the search results
    try:
        search_results = downloader._search_request()
        if len(search_results) > 0:
            # Downlaod the search results
            downloader.download_results(search_results)
            # Create the geodataframe, append to output list
            fp_points_fire = downloader.create_fire_gdf()
            fp_points.append(fp_points_fire)
        else:
            raise ValueError(f'No data found for {self.id}, skipping completely !')
        
    except Exception as e:
        print(f"Skipping FIRED ID {fire_id}\n{e}")
        no_data_ids.append(fire_id)
        continue  # continue to the next fire id

# Concatenate the results
fp_points = gpd.GeoDataFrame(pd.concat(fp_points, ignore_index=True))

print("Done!")

In [None]:
fp_points.head()

In [None]:
len(fp_points)

In [None]:
fp_points.to_file(os.path.join(maindir,'Aim2/data/spatial/mod/VIIRS/viirs_afd_geo_points.gpkg'))

In [None]:
# Create a pixel area
fp_pixels = pixel_buffer(fp_points)
fp_pixels.to_file(os.path.join(maindir,'Aim2/data/spatial/mod/VIIRS/viirs_afd_geo_pixels.gpkg'))

In [None]:
def remove_duplicates(fp_points, time_threshold=8, distance_threshold=375):
    """
    Remove duplicate fire detections based on time difference and spatial distance.
    
    Args:
        fp_points (GeoDataFrame): GeoDataFrame containing fire detections with geometry and FRP information.
        time_threshold (int): Time difference threshold in seconds.
        distance_threshold (float): Distance threshold in meters (along-track dimension of the fire pixels).

    Returns:
        GeoDataFrame: Filtered GeoDataFrame with duplicates removed and FRP corrected.
    """
    # Convert acquisition datetime to datetime if not already
    if not pd.api.types.is_datetime64_any_dtype(fp_points['acq_datetime']):
        fp_points['acq_datetime'] = pd.to_datetime(fp_points['acq_datetime'])

    # Sort by acquisition datetime
    fp_points = fp_points.sort_values(by='acq_datetime').reset_index(drop=True)

    # Create a copy to store the final filtered results
    filtered_fp_points = fp_points.copy()

    # List to store indexes of duplicates to be removed
    duplicates_to_remove = []

    for i, point in fp_points.iterrows():
        if i in duplicates_to_remove:
            continue
        
        # Filter points within the time threshold
        time_filtered = fp_points[
            (fp_points['acq_datetime'] >= point['acq_datetime'] - timedelta(seconds=time_threshold)) &
            (fp_points['acq_datetime'] <= point['acq_datetime'] + timedelta(seconds=time_threshold))
        ]

        for j, other_point in time_filtered.iterrows():
            if i == j or j in duplicates_to_remove:
                continue
            
            # Calculate distance between points
            distance = point.geometry.distance(other_point.geometry)

            if distance <= distance_threshold:
                # Found a duplicate, retain one and remove the other
                duplicates_to_remove.append(j)
                # Average the FRP values
                filtered_fp_points.loc[i, 'frp'] = (point['frp'] + other_point['frp']) / 2

    # Remove duplicates
    filtered_fp_points = filtered_fp_points.drop(index=duplicates_to_remove).reset_index(drop=True)

    return filtered_fp_points

In [None]:
filtered_fp_points = remove_duplicates(fp_points)

In [None]:
filtered_fp_points.head()

In [None]:
# Create a pixel area
fp_pixels = pixel_buffer(filtered_fp_points)
fp_pixels.to_file(os.path.join(maindir,'Aim2/data/spatial/mod/VIIRS/viirs_afd_geo_pixels_rm.gpkg'))

In [None]:
def create_mosaic_frp_tif(afd_data, perim, fire_id, crs, out_dir):
    """ 
    Creates a mosaic tif file from the active fire data for a given perimeter 
    Args:
        afd_data: GeoDataFrame with active fire detection data
        perim: the polygon perimeter to create the geotiff
        crs: the projected coordinate system
    Returns: Single geotiff file representing the FRP of the first day-of-burn for each short_name (satellite)
    """

    platforms = afd_data['platform'].unique()

    # Convert perimeter to the specified CRS
    perim = perim.to_crs(crs)
    perim['geometry'] = perim.unary_union.envelope     
    
    # Define the regular grid
    minx, miny, maxx, maxy = perim.total_bounds
    pixel_size = 375  # Approx. 375 m resolution
    nrows = int((maxy - miny) / pixel_size)
    ncols = int((maxx - minx) / pixel_size)
    transform = from_origin(minx, maxy, pixel_size, pixel_size)

    for platform in platforms:
        for day_night in ['Day', 'Night']:
            # Filter data by platform and day/night flag
            afd_ = afd_data[(afd_data['platform'] == platform) & (afd_data['day_night'] == day_night)].copy()

            # Create an empty raster array for Julian day (first day of burn), FRP on first day of burn, and maximum FRP
            julian_arr = np.full((nrows, ncols), np.nan) # Julian day of first burn
            frp_arr = np.full((nrows, ncols), np.nan) # FRP od first day of burn
            date_arr = np.full((nrows, ncols), np.nan)

            daily_frp_list = [] # store the daily arrays of julian day
            daily_jul_list = [] # store the daily arrays of FRP
            daily_date_list = []
            
            # Group data by day
            afd_.loc[:, 'date'] = afd_['acq_datetime'].dt.date

            for date in afd_['date'].unique():
                
                afd_day = afd_[afd_['date'] == date].copy()

                # Create individual grids for each observation time within the day
                day_grids = []
                for time_ in afd_day['acq_datetime'].unique():
                    afd_time = afd_day[afd_day['acq_datetime'] == time_].copy()
                    afd_time_rast = rasterize_afd(afd_time, transform, (nrows, ncols))
                    day_grids.append(afd_time_rast)

                # Merge grids by taking the 99th percentile pixel values
                if len(day_grids) == 1:
                    max_grid = day_grids[0]
                elif len(day_grids) > 1:
                    combined_grids = np.array(day_grids)
                    max_grid = np.nanmax(combined_grids, axis=0)
                else:
                    max_grid = np.full((nrows, ncols), np.nan) # keep the empty grid

                # Update the first burn day and FRP rasters
                mask = np.isnan(julian_arr) & (max_grid > 0)
                julian_arr[mask] = date.timetuple().tm_yday
                frp_arr[mask] = max_grid[mask]
                date_arr[mask] = date.year * 10000 + date.month * 100 + date.day
                
                # Append daily arrays to lists
                daily_frp_list.append(max_grid)
                daily_jul_list.append(np.full((nrows, ncols), date.timetuple().tm_yday))
                daily_date_list.append(np.full((nrows, ncols), date.year * 10000 + date.month * 100 + date.day))
            
            # Calculate the maximum FRP and the day of maximum FRP across all days
            max_frp_arr = np.nanmax(np.array(daily_frp_list), axis=0)
            day_max_frp_arr = np.full((nrows, ncols), np.nan)
            date_max_frp_arr = np.full((nrows, ncols), np.nan)

            for i in range(nrows):
                for j in range(ncols):
                    if not np.isnan(max_frp_arr[i, j]):
                        max_index = np.nanargmax([day_grid[i, j] for day_grid in daily_frp_list])
                        day_max_frp_arr[i, j] = daily_jul_list[max_index][i, j]
                        date_max_frp_arr[i, j] = daily_date_list[max_index][i, j]

            # Ensure pixels with no active fire data remain NaN
            max_frp_arr[np.isnan(max_frp_arr)] = np.nan
            day_max_frp_arr[np.isnan(day_max_frp_arr)] = np.nan
            date_max_frp_arr[np.isnan(date_max_frp_arr)] = np.nan
            
            # Create multiband raster
            out_tif = os.path.join(out_dir, f'{platform}_{fire_id}_{day_night}_mb.tif')

            with rio.open(
                out_tif, 'w', driver='GTiff', height=nrows, width=ncols, count=5, dtype='float32',
                crs=crs, transform=transform
            ) as dst:
                dst.write(julian_arr, 1)
                dst.write(frp_arr, 2)
                dst.write(day_max_frp_arr, 3)
                dst.write(max_frp_arr, 4)
                dst.write(date_max_frp_arr, 5)
                
    return out_tif
    

def rasterize_afd(day_data, transform, out_shape):
    """
    Rasterize the day's data to a numpy array.
    Args:
        day_data: GeoDataFrame with day's active fire detection data
        transform: Affine transform for the output raster
        out_shape: Shape of the output raster
    Returns: Numpy array of the rasterized FRP values
    """
    geometries = [(geom, value) for geom, value in zip(day_data.geometry, day_data['frp'])]
    
    day_raster = rasterize(
        geometries,
        out_shape=out_shape,
        transform=transform,
        fill=0,
        dtype='float32',
        all_touched=True
    )
    return day_raster

In [None]:
# Create the day/night mosaics

fire_ids_new = fp_points['fired_id'].unique()

mosaic_dir = os.path.join(maindir, 'Aim2/data/spatial/mod/VIIRS/grids/')
if not os.path.exists(mosaic_dir):
    os.makedirs(mosaic_dir)
    
for fire_id in fire_ids_new:
    print(f'Creating mosaic rasters for: {fire_id}')
    
    perim = fires[fires['fired_id'] == fire_id].copy()
    perim['geometry'] = perim.geometry.buffer(1000) # same buffer as was used to extract
    
    fp_da = fp_points[fp_points['fired_id'] == fire_id].copy()
    
    create_mosaic_frp_tif(fp_da, perim, fire_id, crs=proj_crs, out_dir=mosaic_dir)
    
    print('\n ~~~ \n')
    