In [1]:
"""
Downloading VIIRS Active Fire Detections (AFD) with 'earthaccess' python API

For a given geometry (in this case, fire perimeters), download data granules for:

VIIRS/NPP Active Fires 6-Min L2 Swath 375m V002 (VNP14IMG)
VIIRS/NPP Imagery Resolution Terrain Corrected Geolocation 6-Min L1 Swath 375 m (VNP03IMG)

Return: 
    - Downloaded NetCDF granules for the above products
    - GeoDataFrame representing active fire pixel locations and attributes (before geolocation)
    - Geolocation grid representing pixel locations and overlap of adjacent orbits

Author: maxwell.cook@colorado.edu
"""

import sys, os
import earthaccess
import geopandas as gpd
import rioxarray as rxr
import rasterio as rio
import math
import contextlib
import traceback
import datetime as dt
import xarray as xr
import pyproj

from netCDF4 import Dataset 
from datetime import datetime
from datetime import timedelta
from matplotlib import pyplot as plt
from affine import Affine
from osgeo import gdal, gdal_array, gdalconst, osr
from rasterio.transform import from_bounds
from scipy.spatial import cKDTree

# Custom functions
sys.path.append(os.path.join(os.getcwd(),'code/'))
from __functions import *

# Directories
maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/'
projdir = os.path.join(maindir, 'aspen-fire/Aim2/')

# Output directories
dataraw = os.path.join(projdir,'data/spatial/raw/VIIRS/')
datamod = os.path.join(projdir,'data/spatial/mod/VIIRS/')

print("Ready !")

Ready !


In [2]:
class Download_VIIRS_AFD:
    """ Downloads VIIRS Active Fire Data (AFD) for a geometry """
    def __init__(self, start_date, last_date, gdf = gpd.GeoDataFrame(), 
                 geog_crs = 'EPSG:4326', proj_crs = 'EPSG:5070', id_col='Fire_ID', name_col='Fire_Name',
                 short_names = ['VNP14IMG', 'VNP03IMG'], # active fire data and associated geolocation
                 buffer = None
                ):
        """
        Args:
            - start_date: the intial date for the granule search
            - last_date: the final date for the granule search
            - gdf: GeoDataFrame for search request
            - geog_crs: Geographic projection (to retrieve coordinate pairs in lat/lon)
            - id_col: unique identifier in the GeoDataFrame
            - short_names: the granules to be downloaded
        Returns:
            - Downloaded files (VIIRS Active Fire Data NetCDF and Geolocation information)
            - GeoDataFrame with non-geolocated (raw) fire detections
        """
        
        self.id = gdf[id_col].iloc[0] # grab the unique ID
        self.fire_name = gdf[name_col].iloc[0] # fire name
        self.crs = gdf.crs # the native CRS definition for the input geodataframe
        self.geog_crs = geog_crs
        self.proj_crs = proj_crs
        if buffer is not None:
            self.gdf = gdf
            self.gdf = self.gdf.assign(geometry=self.gdf.buffer(buffer)) # buffer units in meters
        else:
            self.gdf = gdf
        self.bounds = self.gdf.to_crs(geog_crs).unary_union.envelope # for bounds, coords ensure geographic projection
        self.coords = list(self.bounds.exterior.coords)
        self.short_names = short_names
        self.out_dir = os.path.join(dataraw,f"{self.fire_name}")
        self.date_range = (start_date, last_date)
    
    
    def ea_search_request(self):
        """ generate an earthaccess search request with the given parameters """
        print(f'Requesting data for: {self.fire_name} Fire')
            
        search_dict = {} # to store the search results
        for short_name in self.short_names:
            try:
                # Search for products matching our short names
                result = earthaccess.search_data(
                    short_name=short_name,
                    polygon=self.coords,
                    temporal=self.date_range,
                    count=1000, 
                )

                # Check if there is valid data, if not, skip
                if len(result) != 0:
                    # Append the search results data frame to the dictionary
                    search_dict[short_name] = result
                else:
                    raise ValueError(f'No data found for: {short_name} -- Polygon ID {self.id}')
                                
            except Exception as e:
                print(f"Skipping polygon ID {self.id}: {short_name}")
                continue

        if not search_dict:
            return None  # Return None for invalid search results
        else:
            return search_dict

    
    def download_results(self, search_dict):
        """ Downloads the search results to directory """
        if search_dict is not None:
            for key, result in search_dict.items():
                # Set the output directory based on short_name
                fd = os.path.join(self.out_dir, f'{key}/')
                if not os.path.exists(fd):
                    os.makedirs(fd)
                if len(os.listdir(fd)) < len(result):
                    # Download the the search results
                    with open(os.devnull, 'w') as f, contextlib.redirect_stdout(f):
                        earthaccess.download(result, local_path=fd)
                else:
                    print("Files already downloaded, skipping ! ")

    
    def create_fire_gdf(self, extent):
        """ Creates a geodataframe with active fire detections from a directory with NetCDF files """
        
        # List of downloaded .nc files
        vnp14 = list_files(os.path.join(self.out_dir,'VNP14IMG'), "*.nc", recursive=True)
        vnp03 = list_files(os.path.join(self.out_dir,'VNP03IMG'), "*.nc", recursive=True)

        N = round(len(vnp14) / 4)
        
        out_fire_dfs = []
        for idx, fp in enumerate(sorted(vnp14)):
            
            # Gather some metadata information from the file name
            timestamp = fp.split('.')[1:3]
            year = timestamp[0][1:5]
            day = timestamp[0][5:8]
            time = timestamp[1]
            date = dt.datetime.strptime(year+day, '%Y%j').strftime('%b %d') 
            acq_date = dt.datetime.strptime(year+day, '%Y%j').strftime('%-m/%-d/%y') #match FIRMS
            daytime = int(time) > 1500 #timestamps in the 1900h-2200h UTC range are afternoon for Western US

            # Find matching geolocation data file for one record
            identifier = os.path.basename(fp)[:-3]
            parts = identifier.split('.') # split by '.'
            date_time_part = '.'.join(parts[1:4])  # Extract date-time parts for the VNP Version 002    
            geo_id = 'VNP03IMG' + '.' + date_time_part
            geo_da_fp = [geo_link for geo_link in vnp03 if geo_id in os.path.basename(geo_link)][0] 
            
            if geo_da_fp is None:
                print(f"!!! No geolocation file found for: {identifier}")
                continue
                
            # Read the geolocation data
            geo = xr.open_dataset(geo_da_fp, engine='netcdf4', group='geolocation_data')
            i, j = np.indices(geo.longitude.shape) #line and sample
            # Crop to fire bounding extent
            scene = ((geo.longitude > extent[0]) & (geo.longitude < extent[1]) & (geo.latitude > extent[2]) & (geo.latitude < extent[3])).values
            
            # Get the VNP14IMG fire mask, etc
            vnp14 = xr.open_dataset(fp, engine='netcdf4')
        
            qa = vnp14['algorithm QA']
            fire = vnp14['fire mask']
            daynight = vnp14.DayNightFlag #string Day or Night
            
            lonfp = vnp14.variables['FP_longitude'][:] # fire pixel longitude
            latfp = vnp14.variables['FP_latitude'][:]
            frp = vnp14.variables['FP_power'][:] # fire radiative power
        
            tree = cKDTree(np.array([lonfp, latfp]).T) #search tree for finding nearest FRP
        
            # Set up a pandas dataframe for the swath
            df = pd.DataFrame()
            df['longitude'] = list(geo.longitude.values[scene])
            df['latitude'] = list(geo.latitude.values[scene])
            df['fire_mask'] = list(fire.values[scene])
            df['daynight'] = daynight[0]
            df['confidence'] = df.fire_mask
            df.confidence = df.confidence.replace({0:'x', 1:'x', 2:'x', 3:'x', 4:'x', 5:'x', 6:'x', 7:'l', 8:'n', 9:'h'})
            df['acq_date'] = acq_date
            df['acq_time'] = time
            df['j'] = list(j[scene]) #sample number for pixel size lookup
            
            # Retain only low-high confidence fire points
            df = df[df['fire_mask'] > 6]
            df['fire_mask'] = pd.Categorical(df['fire_mask'])
            known = df[df.confidence!='x'] # keep only low-high confidence fire pixels
        
            #gather frp
            for k in known.index:
                dist, nearest = tree.query([ known.loc[k, 'longitude'], known.loc[k, 'latitude'] ])
                df.loc[k, 'frp'] = frp[nearest].item()
        
            # Join to pixel size info
            df = pd.merge(df, lookup, left_on='j', right_on='sample', how='left')
            df.drop(columns=['j'], inplace=True)
            out_fire_dfs.append(df)
        
            if idx % N == 0:
                print(f"Processed {idx+1} observations.")

            # Clean up
            del geo, scene, vnp14, qa, fire, daynight, lonfp, latfp, frp, tree, df
            os.remove(fp)
            os.remove(geo_da_fp)

            gc.collect()
    
        # Concatenate the out dfs
        fire_data = pd.concat(out_fire_dfs) # for the entire fire
        fire_data['Fire_ID'] = self.id
        fire_data['Fire_Name'] = self.fire_name
        fire_data.to_csv(os.path.join(datamod,f'vnp14img_{self.fire_name.replace(" ","_")}_geo.csv'))
        
        return fire_data

print("Class and functions ready !")

Class and functions ready !


In [3]:
# Load the fire data

In [4]:
# Load the fire dataset
fires_path = os.path.join(projdir,'data/spatial/mod/NIFC/nifc-ics_2018_to_2023-aspen_SRM.gpkg')
fires = gpd.read_file(fires_path)
fires.rename(columns={'NIFC_ID': 'Fire_ID', 'NIFC_NAME': 'Fire_Name'}, inplace=True)
print(fires.columns)
print(len(fires))

Index(['Fire_ID', 'Fire_Name', 'NIFC_ACRES', 'FINAL_ACRES', 'pct_aspen',
       'INCIDENT_ID', 'INCIDENT_NAME', 'START_YEAR', 'CAUSE', 'DISCOVERY_DATE',
       'DISCOVERY_DOY', 'WF_CESSATION_DATE', 'WF_CESSATION_DOY',
       'STR_DESTROYED_TOTAL', 'STR_DAMAGED_TOTAL', 'STR_THREATENED_MAX',
       'EVACUATION_REPORTED', 'PEAK_EVACUATIONS', 'WF_PEAK_AERIAL',
       'WF_PEAK_PERSONNEL', 'na_l3name', 'geometry'],
      dtype='object')
49


In [5]:
# Create an extent around all fires


In [6]:
# Load the lookup table for pixel sizes
fp = os.path.join(projdir,'data/tabular/raw/pix_size_lut.csv')
lookup = pd.read_csv(fp)
print(lookup.head())

   sample  along_scan  along_track  scan_angle  pix_area
0       0    0.795616     0.783234     56.0600  0.623154
1       1    0.794690     0.782908     56.0511  0.622169
2       2    0.793765     0.782583     56.0422  0.621187
3       3    0.792842     0.782258     56.0333  0.620207
4       4    0.791921     0.781933     56.0244  0.619229


In [None]:
t0 = time.time()

# Get a list of fire IDs
fire_ids = fires['Fire_ID'].unique()

afd_dfs = [] # to store the output geodataframes
no_data_ids = [] # to store fire IDs with no data

for fire_id in fire_ids:
    t00 = time.time()
    
    fire = fires.loc[fires['Fire_ID'] == fire_id]
    name = fire['Fire_Name']

    # Grab an extent for cropping netcdf files
    buffer = 1000 
    coords, extent = get_coords(fire, buffer)
    print(f"Bounding Extent: \n{extent}")

    # Initiate the download and extract class
    downloader = Download_VIIRS_AFD(
        gdf=fire,
        start_date=fire['DISCOVERY_DATE'].iloc[0],
        last_date=fire['WF_CESSATION_DATE'].iloc[0],
        buffer=1000, # in meters
    )
    # Retrieve the search results
    try:
        search_results = downloader.ea_search_request()
        if len(search_results) > 0:
            # Downlaod the search results
            downloader.download_results(search_results)
            # Create the active fire detection geodataframe
            print(f"\nCreating AFD geodataframe ...\n")
            afd_fire = downloader.create_fire_gdf(extent)
            afd_dfs.append(afd_fire)
            del afd_fire
        else:
            raise ValueError(f'No data granules found for {fire_id}, skipping completely !')
            
    except Exception as e:
        print(f"Skipping Fire ID {fire_id}\n{e}")
        traceback.print_exc()  # This will print the full traceback
        no_data_ids.append(fire_id)
        break # continue to the next fire id

    t1 = (time.time() - t00) / 60
    print(f"Total elapsed time: {t1:.2f} minutes.")
    print("\n~~~~~~~~~~\n")

# Concatenate the results and save out the geodataframe of latlon fire pixels (non-geolocated)
afds = pd.concat(afd_dfs, ignore_index=True)
afds.to_file(os.path.join(datamod,'vnp14img_aspen-fires_2018_to_2023_geo.gpkg'))

t2 = (time.time() - t0) / 60
print(f"Total elapsed time: {t2:.2f} minutes.")
print("\n~~~~~~~~~~\n")
print("Done!")

Bounding Extent: 
[-108.0104287321153, -107.79428582032635, 37.37737010107216, 37.60899484292132]
Requesting data for: 416 Fire
Granules found: 94
Granules found: 94


QUEUEING TASKS | :   0%|          | 0/94 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/94 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/94 [00:00<?, ?it/s]

QUEUEING TASKS | :   0%|          | 0/94 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/94 [00:00<?, ?it/s]