In [None]:
"""
Downloading VIIRS Active Fire Detections (AFD) with 'earthaccess' python API

For a given geometry (in this case, fire perimeters), download data granules for:

VIIRS/NPP Active Fires 6-Min L2 Swath 375m V002 (VNP14IMG)
VIIRS/NPP Imagery Resolution Terrain Corrected Geolocation 6-Min L1 Swath 375 m (VNP03IMG)

Return: 
    - Downloaded NetCDF granules for the above products
    - GeoDataFrame representing active fire pixel locations and attributes (before geolocation)
    - Geolocation grid representing pixel locations and overlap of adjacent orbits

Author: maxwell.cook@colorado.edu
"""

import sys, os
import earthaccess
import geopandas as gpd
import pandas as pd
import rioxarray as rxr
import rasterio as rio
import math
import contextlib
import traceback
import datetime as dt
import xarray as xr
import pyproj

from netCDF4 import Dataset 
from datetime import datetime
from datetime import timedelta
from matplotlib import pyplot as plt
from affine import Affine
from osgeo import gdal, gdal_array, gdalconst, osr
from rasterio.transform import from_bounds
from scipy.spatial import cKDTree
from urllib.parse import urlparse

import warnings
warnings.simplefilter('ignore')
warnings.filterwarnings('ignore')
import logging
logging.getLogger('earthaccess').setLevel(logging.ERROR)

# Custom functions
sys.path.append(os.path.join(os.getcwd(),'code/'))
from __functions import *
        
# Directories
maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/'
projdir = os.path.join(maindir, 'aspen-fire/Aim2/')

# Output directories
dataraw = os.path.join(projdir,'data/spatial/raw/VIIRS/')
datamod = os.path.join(projdir,'data/spatial/mod/VIIRS/')

print("Ready !")

In [None]:
class Download_VIIRS_AFD:
    """ Downloads VIIRS Active Fire Data (AFD) within a region for given date range """
    def __init__(self, start_date, last_date, geom = gpd.GeoDataFrame(),
                 id_col='Fire_ID', name_col='Fire_Name',
                 geog_crs = 'EPSG:4326', proj_crs = 'EPSG:5070',
                 short_names = ['VNP14IMG', 'VNP03IMG'], # active fire data and associated geolocation
                 buffer = None, out_directory=None, processed_granules=None,
                 download = False, region=None
                ):
        """
        Args:
            - start_date: the intial date for the granule search
            - last_date: the final date for the granule search
            - geom: GeoDataFrame for search request (fire perimeter)
            - geog_crs: Geographic projection (to retrieve coordinate pairs in lat/lon)
            - proj_crs: Projected coordinate system
            - short_names: the granules to be downloaded
            - buffer: Optional buffer for input geometry
            - out_directory: output directory to store results
            - download: If 'True', downloads the netcdf, otherwise processes in cloud
        Returns:
            - Downloaded files (VIIRS Active Fire Data NetCDF and Geolocation information)
            - GeoDataFrame with non-geolocated (raw) fire detections
        """
        # Extract coordinate bounds
        if region is None:
            # use the fire perimeter
            self.coords, self.extent = get_coords(geom, buffer)
            print(f"Bounding extent for data search: \n{self.extent}\n")
        elif region is not None and isinstance(region, gpd.GeoDataFrame):
            # use the region boundary for FP and fire for search
            _, self.extent = get_coords(region, buffer) # for extracting FP
            self.coords, _ = get_coords(geom, buffer) # for data search
            print(f"Bounding extent for data search: \n{self.extent}\n")
        else:
            print("Input region is not a GeoDataFrame !!!")
            
        # Extract class attributes
        self.fire_id = geom[id_col].iloc[0]
        self.fire_name = geom[name_col].iloc[0]
        self.date_range = (str(start_date), str(last_date))
        self.geog_crs = geog_crs
        self.proj_crs = proj_crs
        self.short_names = short_names
        self.out_dir = out_directory
        self.processed_granules = processed_granules
        self.download = download
  
    def ea_search_request(self):
        """ Generate an earthaccess search request with the given parameters """

        query = earthaccess.search_data(
            short_name=self.short_names, 
            polygon=self.coords,
            temporal=self.date_range, 
            cloud_hosted=True,
            count=-1
        )
        
        # Grab a list of granule IDs
        granules = [g['meta']['native-id'] for g in query]

        # Filter the query to only work with the "new" granules
        new = [g for g in granules if g not in self.processed_granules]
        query_ = [item for item in query if item['meta']['native-id'] in new]
        granules_ = [g['meta']['native-id'] for g in query_] # update the granule list

        if self.download is True:
            # Download the "new" granules
            earthaccess.download(query_, self.out_dir)

        # return query results and list of granules
        return query_, granules_
             

    def create_fire_gdf(self, search_result):
        """ Creates a geodataframe with active fire detections from a directory with NetCDF files """

        out_fire_dfs = [] # to store the geolocated AFDs
        
        # Identify VNP14 vs. VNP03
        if self.download is True:
            # Query the downloaded files
            vnp14_files = list_files(os.path.join(self.out_dir,'VNP14IMG'), "*.nc", recursive=True)
            vnp03_files = list_files(os.path.join(self.out_dir,'VNP03IMG'), "*.nc", recursive=True)
        else:
            vnp14_files = [g.data_links()[0] for g in search_result if 'VNP14IMG' in g.data_links()[0]]
            vnp03_files = [g.data_links()[0] for g in search_result if 'VNP03IMG' in g.data_links()[0]]
        
        nprint = round(len(vnp14_files) / 4) # print counter
        for idx, fp in enumerate(sorted(vnp14_files)):

            df = pd.DataFrame() # to store the active fire data
                
            # Gather some metadata information from the file name
            url = urlparse(fp)
            basename = os.path.basename(url.path)    
            timestamp = basename.split('.')[1:3]
            year = timestamp[0][1:5]
            day = timestamp[0][5:8]
            time = timestamp[1]
            date = dt.datetime.strptime(year+day, '%Y%j').strftime('%b %d') 
            acq_date = dt.datetime.strptime(year+day, '%Y%j').strftime('%-m/%-d/%y') #match FIRMS
            daytime = int(time) > 1500 #timestamps in the 1900h-2200h UTC range are afternoon for Western US
            
            # Identify the corresponding geolocation file
            geo_id = 'VNP03IMG.' + ".".join(timestamp)
            
            # geo_fp = [geo_link for geo_link in vnp03_files if geo_id in geo_link][0]
            geo_fp = next((link for link in vnp03_files if geo_id in link), None)   
            if geo_fp is None:
                print(f"!!! No geolocation file found for: {identifier}")
                continue

            query_ = [item for item in search_result if ".".join(timestamp) in item.data_links()[0]]
            
            # Open the VNP14IMG and gather the data
            fileset = earthaccess.open(query_)         
            
            with xr.open_dataset(fileset[1], phony_dims='access') as vnp14ds:

                # Check for fire pixels in the specified region
                lonfp = vnp14ds.variables['FP_longitude'][:] # fire pixel longitude
                latfp = vnp14ds.variables['FP_latitude'][:] # fire pixel latitude
                fire_scene = ((lonfp > self.extent[0]) & (lonfp < self.extent[1]) & 
                              (latfp > self.extent[2]) & (latfp < self.extent[3]))
                if not fire_scene.any():  # Check for any fire pixels in region
                    print(f"\tNo active fires detected in {basename}. Skipping...")
                    continue # skip if no fire pixels in region

                fire = vnp14ds['fire mask'] # the fire mask
                daynight = vnp14ds.DayNightFlag #string Day or Night
                granule_id = vnp14ds.LocalGranuleID
                frp = vnp14ds.variables['FP_power'][:] # fire radiative power
                t4 = vnp14ds.variables['FP_T4'][:] # I04 brightness temp (kelvins)
                t5 = vnp14ds.variables['FP_T5'][:] # I05 brightness temp (kelvins)
                tree = cKDTree(np.array([lonfp, latfp]).T) #search tree for finding nearest FRP

                del fire_scene
                
            # Read the geolocation data 
            with xr.open_dataset(fileset[0], group='geolocation_data', phony_dims='access') as geo_ds:
                i, j = np.indices(geo_ds.longitude.shape) #line and sample
                # Crop to fire bounding extent
                geo_scene = ((geo_ds.longitude > self.extent[0]) & (geo_ds.longitude < self.extent[1]) & 
                             (geo_ds.latitude > self.extent[2]) & (geo_ds.latitude < self.extent[3])).values
            
            # Populate the dataframe
            df['longitude'] = list(geo_ds.longitude.values[geo_scene])
            df['latitude'] = list(geo_ds.latitude.values[geo_scene])
            df['fire_mask'] = list(fire.values[geo_scene])
            # df['fire_mask'] = pd.Categorical(df['fire_mask'])
            df['confidence'] = df.fire_mask
            df.confidence = df.confidence.replace(
                {0:'x', 1:'x', 2:'x', 3:'x', 4:'x', 5:'x', 6:'x', 7:'l', 8:'n', 9:'h'})
            df['daynight'] = daynight[0]
            df['acq_date'] = acq_date
            df['acq_time'] = time
            df['granule_id'] = granule_id[0]
            df['j'] = list(j[geo_scene]) #sample number for pixel size lookup
            
            # Retain only low-high confidence fire points
            df = df[df['fire_mask'] > 6]
            known = df[df.confidence!='x'] # keep only low-high confidence fire pixels
        
            #gather frp, brightness temps
            for k in known.index:
                dist, nearest = tree.query([ known.loc[k, 'longitude'], known.loc[k, 'latitude'] ])
                df.loc[k, 'frp'] = frp[nearest].item()
                df.loc[k, 'iot4'] = t4[nearest].item()
                df.loc[k, 'iot5'] = t5[nearest].item()
        
            # Join to pixel size info
            df_ = pd.merge(df, lut, left_on='j', right_on='sample', how='left')
            df_.drop(columns=['j'], inplace=True)
            
            out_fire_dfs.append(df_) # append the granule dataframe
        
            if idx % nprint == 0:
                print(f"\n\tProcessed {idx+1} observations.\n")

            del df, i, j, geo_scene, fire, latfp, lonfp, frp, tree, df_
    
        # Concatenate the out dfs
        fire_data = pd.concat(out_fire_dfs) # for the entire list of granules
        # fire_data.to_csv(os.path.join(datamod,f'vnp14img_aspen-fires_geo.csv'))
        
        return fire_data

print("Class and functions ready !")

In [None]:
# Load the fire data

In [None]:
# Load the fire dataset for the Southern Rockies
fires_path = os.path.join(projdir,'data/spatial/mod/NIFC/nifc-ics_2018_to_2023-aspen_SRM.gpkg')
fires = gpd.read_file(fires_path)

# Tidy the columns
fires.rename(columns={'NIFC_ID': 'Fire_ID', 'NIFC_NAME': 'Fire_Name'}, inplace=True)
fires['DISCOVERY_DATE'] = fires['DISCOVERY_DATE'].dt.date
fires['WF_CESSATION_DATE'] = fires['WF_CESSATION_DATE'].dt.date

# # Adjust the start and end dates
# fires['start_date'] = fires['DISCOVERY_DATE'] - timedelta(days=2)

print(f"Available attributes: \n{fires.columns}")
print(f"\nThere are [{len(fires)}] fires.")

In [None]:
# Create a DataFrame with individual dates for each fire
date_counts = pd.DataFrame(
    [(fire['Fire_ID'], single_date)
     for _, fire in fires.iterrows()
     for single_date in pd.date_range(fire['DISCOVERY_DATE'], fire['WF_CESSATION_DATE'])],
    columns=['Fire_ID', 'Date']
)['Date'].value_counts()
print(date_counts.head())

In [None]:
# Load the SRM bounds
fp = os.path.join(projdir,'data/spatial/raw/boundaries/na_cec_eco_l3_west.gpkg')
ecol3 = gpd.read_file(fp)
srm = ecol3[ecol3['NA_L3NAME'] == 'Southern Rockies']
print(srm.columns)

In [None]:
t0 = time.time()

# load the lookup table for pixel sizes
lut = pd.read_csv(os.path.join(projdir,'data/tabular/raw/pix_size_lut.csv'))

# Get a list of fire IDs
fire_ids = fires['Fire_ID'].unique()

afd_dfs = [] # to store the output geodataframes
granules_p = set()

# Loop fire ids
for fire_id in fire_ids:
    t00 = time.time()

    fire = fires[fires['Fire_ID'] == fire_id]
    
    downloader = Download_VIIRS_AFD(
        start_date=fire['DISCOVERY_DATE'].iloc[0],
        last_date=fire['WF_CESSATION_DATE'].iloc[0],
        geom=fire,
        buffer=1000,
        short_names=['VNP14IMG','VNP03IMG'],
        out_directory=dataraw,
        processed_granules=granules_p,
        download=False,
        region=srm
    )
    
    try:
        query, granules = downloader.ea_search_request()
        granules.append(granules)
        
        print(f"\n\tGeolocating active fires ...\n")
        afd_fire = downloader.create_fire_gdf(query)
        afd_dfs.append(afd_fire)
        
        del afd_fire
        
    except Exception as e:
        print(f"Skipping fire id {fire_id}\n{e}")
        traceback.print_exc()  # This will print the full traceback
        continue # continue to the next fire id

    t1 = (time.time() - t00) / 60
    print(f"\nTotal elapsed time for {fire_id}: {t1:.2f} minutes.")
    print("\n~~~~~~~~~~\n")

# Concatenate the results and save out the geodataframe of latlon fire pixels (non-geolocated)
afds = pd.concat(afd_dfs, ignore_index=True)
print(f"\n{afds.head()}")
afds.to_csv(os.path.join(datamod,'vnp14img_aspen-fires-srm_2018_to_2023_geo.gpkg'))

t2 = (time.time() - t0) / 60
print(f"Total elapsed time: {t2:.2f} minutes.")
print("\n~~~~~~~~~~\n")
print("Done!")