In [1]:
"""
Downloading VIIRS Active Fire Detections (AFD) with 'earthaccess' python API

For a given geometry (in this case, fire perimeters), download data granules for:

VIIRS/NPP Active Fires 6-Min L2 Swath 375m V002 (VNP14IMG)
VIIRS/NPP Imagery Resolution Terrain Corrected Geolocation 6-Min L1 Swath 375 m (VNP03IMG)

Return: 
    - Downloaded NetCDF granules for the above products
    - GeoDataFrame representing active fire pixel locations and attributes (before geolocation)
    - Geolocation grid representing pixel locations and overlap of adjacent orbits

Author: maxwell.cook@colorado.edu
"""

import sys, os
import earthaccess
import geopandas as gpd
import pandas as pd
import rioxarray as rxr
import rasterio as rio
import math
import contextlib
import traceback
import datetime as dt
import xarray as xr
import pyproj
import datetime

from netCDF4 import Dataset 
from datetime import datetime
from datetime import timedelta
from matplotlib import pyplot as plt
from affine import Affine
from osgeo import gdal, gdal_array, gdalconst, osr
from rasterio.transform import from_bounds
from scipy.spatial import cKDTree
from urllib.parse import urlparse

import warnings
warnings.simplefilter('ignore')
warnings.filterwarnings('ignore')
import logging
logging.getLogger('earthaccess').setLevel(logging.ERROR)

# Custom functions
sys.path.append(os.path.join(os.getcwd(),'code/'))
from __functions import *
        
# Directories
maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/'
projdir = os.path.join(maindir, 'aspen-fire/Aim2/')

# Output directories
dataraw = os.path.join(projdir,'data/spatial/raw/VIIRS/')
datamod = os.path.join(projdir,'data/spatial/mod/VIIRS/')

print("Ready !")

Ready !


In [42]:
class Access_VIIRS_AFD:
    """ Accesses VIIRS Active Fire Data (AFD) within a region for given date range """
    def __init__(self, start_date, last_date, geom = gpd.GeoDataFrame(),
                 id_col='Fire_ID', name_col='Fire_Name',
                 geog_crs = 'EPSG:4326', proj_crs = 'EPSG:5070',
                 short_names = ['VNP14IMG', 'VNP03IMG'], # active fire data and associated geolocation
                 buffer = None, out_directory=None, processed_granules=None,
                 download = False, region=None
                ):
        """
        Args:
            - start_date: the intial date for the granule search
            - last_date: the final date for the granule search
            - geom: GeoDataFrame for search request (fire perimeter)
            - geog_crs: Geographic projection (to retrieve coordinate pairs in lat/lon)
            - proj_crs: Projected coordinate system
            - short_names: the granules to be downloaded
            - buffer: Optional buffer for input geometry
            - out_directory: output directory to store results
            - download: If 'True', downloads the netcdf, otherwise processes in cloud
        Returns:
            - Downloaded files (VIIRS Active Fire Data NetCDF and Geolocation information)
            - GeoDataFrame with non-geolocated (raw) fire detections
        """
        # Extract coordinate bounds
        if region is None:
            # use the fire perimeter
            self.coords, self.extent = get_coords(geom, buffer)
            # print(f"Bounding extent for data search: \n{self.extent}\n")
        elif region is not None and isinstance(region, gpd.GeoDataFrame):
            # use the region boundary for FP and fire for search
            _, self.extent = get_coords(region, buffer) # for extracting FP
            self.coords, _ = get_coords(geom, buffer) # for data search
            # print(f"Bounding extent for data search: \n{self.extent}\n")
        else:
            print("Input region is not a GeoDataFrame !!!")
            
        # Extract class attributes
        self.fire_id = geom[id_col].iloc[0]
        self.fire_name = geom[name_col].iloc[0]
        self.date_range = (str(start_date), str(last_date))
        self.geog_crs = geog_crs
        self.proj_crs = proj_crs
        self.short_names = short_names
        self.out_dir = out_directory
        self.download = download
        self.processed_granules = processed_granules
      
    def ea_search_request(self):
        """ Generate an earthaccess search request with the given parameters """

        query = earthaccess.search_data(
            short_name=self.short_names, 
            polygon=self.coords,
            temporal=self.date_range, 
            cloud_hosted=True,
            count=-1
        )
        
        # Grab a list of granule IDs (VNP14IMG)
        granules = [g['umm']['DataGranule']['Identifiers'][0]['Identifier']
                    for g in query if 'VNP14IMG' in g['umm']['DataGranule']['Identifiers'][0]['Identifier']]
        N = len(granules)
    
        # Filter the query to only work with the "new" granules
        # Skip if no new granules are required
        if self.processed_granules is not None:
            processed = [g.replace('.nc', '') for g in self.processed_granules]
            new_granules = [g for g in granules if g not in processed]
            if len(new_granules) == 0:
                print(f"\n\t! All granules already processed for region ({N - len(granules)}) !\n")
                return None, None
            elif len(new_granules) < N:
                print(f"\n\t! Some granules already processed for region ({N - len(granules)}) !\n")
                # update the query
                query = [item for item in query if item['umm']['DataGranule']['Identifiers'][0]['Identifier'] in new_granules
                         or 'VNP03IMG' in item['umm']['DataGranule']['Identifiers'][0]['Identifier']]
                granules = [g['umm']['DataGranule']['Identifiers'][0]['Identifier'] for g in query]
            
        if self.download is True and granules is not None:
            # Download the "new" granules
            earthaccess.download(query, self.out_dir)

        # return query results and list of granules
        return query, granules
             

    def create_fire_gdf(self, query):
        """ Creates a geodataframe with active fire detections from a directory with NetCDF files """

        granule_dfs = [] # to store the geolocated AFDs
        granule_log = os.path.join(datamod, 'logs/processed_granules.txt')
        
        # Identify VNP14 vs. VNP03
        if self.download is True:
            # Query the downloaded files
            vnp14_files = list_files(os.path.join(self.out_dir,'VNP14IMG'), "*.nc", recursive=True)
            vnp03_files = list_files(os.path.join(self.out_dir,'VNP03IMG'), "*.nc", recursive=True)
        else:
            vnp14_files = [g.data_links()[0] for g in query if 'VNP14IMG' in g.data_links()[0]]
            vnp03_files = [g.data_links()[0] for g in query if 'VNP03IMG' in g.data_links()[0]]
        
        nprint = 10 # print counter
        for idx, vnp14 in enumerate(sorted(vnp14_files)):

            df = pd.DataFrame() # to store the active fire data
                
            # check if the granule has been processed
            url = urlparse(vnp14)
            granule_id = os.path.basename(url.path)    
            if granule_id in self.processed_granules:
                print(f"\t{granule_id} already processed. Skipping...")

            # gather information from file name
            timestamp = granule_id.split('.')[1:3]
            year = timestamp[0][1:5]
            day = timestamp[0][5:8]
            time = timestamp[1]
            date = dt.datetime.strptime(year+day, '%Y%j').strftime('%b %d') 
            acq_date = dt.datetime.strptime(year+day, '%Y%j').strftime('%-m/%-d/%Y')
            daytime = int(time) > 1500 #timestamps in the 1900h-2200h UTC range are afternoon for Western US
            
            # Identify the corresponding geolocation file
            geo_id = 'VNP03IMG.' + ".".join(timestamp)

            if self.download is True:
                # Grab the associated geolocation file
                vnp03 = [g for g in vnp03_files if geo_id in os.path.basename(g)][0]
            else:
                # Filter the search query to the matching VNP14 and VNP03
                query_ = [item for item in query if ".".join(timestamp) in item.data_links()[0]]
                # Open the VNP14IMG and gather the data
                fileset = earthaccess.open(query_)  
                vnp14 = fileset[1]
                vnp03 = fileset[0]
            
            with xr.open_dataset(vnp14, phony_dims='access') as vnp14ds:

                # Check for fire pixels in the specified region
                lonfp = vnp14ds.variables['FP_longitude'][:] # fire pixel longitude
                latfp = vnp14ds.variables['FP_latitude'][:] # fire pixel latitude
                fire_scene = ((lonfp > self.extent[0]) & (lonfp < self.extent[1]) & 
                              (latfp > self.extent[2]) & (latfp < self.extent[3]))
                if not fire_scene.any():  # Check for any fire pixels in region
                    print(f"\n\tNo active fires detected in {granule_id}. Skipping...")
                    with open(granule_log, 'a') as log_file:
                        log_file.write(f"{granule_id}\n") # log this granule as "processed"
                    continue # skip if no fire pixels in region

                # granule attributes
                daynight = vnp14ds.DayNightFlag #string Day or Night
                granule_id = vnp14ds.LocalGranuleID

                # variables
                fire = vnp14ds['fire mask'] # the fire mask
                frp = vnp14ds.variables['FP_power'][:] # fire radiative power
                t4 = vnp14ds.variables['FP_T4'][:] # I04 brightness temp (kelvins)
                t5 = vnp14ds.variables['FP_T5'][:] # I05 brightness temp (kelvins)
                
                tree = cKDTree(np.array([lonfp, latfp]).T) #search tree for finding nearest FRP

                del fire_scene
                
            # Read the geolocation data 
            with xr.open_dataset(vnp03, group='geolocation_data', phony_dims='access') as geo_ds:
                i, j = np.indices(geo_ds.longitude.shape) #line and sample
                # Crop to fire bounding extent
                geo_scene = ((geo_ds.longitude > self.extent[0]) & (geo_ds.longitude < self.extent[1]) & 
                             (geo_ds.latitude > self.extent[2]) & (geo_ds.latitude < self.extent[3])).values
            
            # Populate the dataframe
            df['longitude'] = list(geo_ds.longitude.values[geo_scene])
            df['latitude'] = list(geo_ds.latitude.values[geo_scene])
            df['fire_mask'] = list(fire.values[geo_scene])
            df['confidence'] = pd.Categorical( df.fire_mask)
            df.confidence = df.confidence.replace(
                {0:'x', 1:'x', 2:'x', 3:'x', 4:'x', 5:'x', 6:'x', 7:'l', 8:'n', 9:'h'})
            df['daynight'] = daynight
            df['acq_date'] = acq_date
            df['acq_time'] = time
            df['granule_id'] = granule_id
            df['geo_id'] = geo_id
            df['j'] = list(j[geo_scene]) #sample number for pixel size lookup
            
            # Retain only low-high confidence fire points
            df = df[df.confidence!='x'] # keep only low-high confidence fire pixels
        
            # gather frp, brightness temps for nearest geolocated obs.
            for k in df.index:
                dist, nearest = tree.query([ df.loc[k, 'longitude'], df.loc[k, 'latitude'] ])
                df.loc[k, 'frp'] = frp[nearest].item()
                df.loc[k, 'iot4'] = t4[nearest].item()
                df.loc[k, 'iot5'] = t5[nearest].item()
        
            # Join to pixel size info
            df_ = pd.merge(df, lut, left_on='j', right_on='sample', how='left')
            df_.drop(columns=['j'], inplace=True)
            
            granule_dfs.append(df_) # append the granule dataframe
            
            # clear up some memory and log the processed granule
            del df, i, j, geo_scene, fire, latfp, lonfp, frp, tree, df_
            if self.download is True:
                os.remove(vnp14)
                os.remove(vnp03)

            with open(granule_log, 'a') as log_file:
                log_file.write(f"{granule_id}\n")

            # write out the csv file
            out_fp = os.path.join(datamod,f'granules/{granule_id.replace(".","_")}.csv')
            df_.to_csv(out_fp)

            if idx % nprint == 0:
                print(f"\n\tProcessed {idx+1} granules.\n")

        gc.collect() # clear out garbage
        
        # Concatenate the out dfs
        if len(granule_dfs) > 0:
            fire_data = pd.concat(granule_dfs) # for the entire list of granules
            return fire_data
        else:
            return None

print("Class and functions ready !")

Class and functions ready !


In [3]:
# Load the fire data

In [4]:
# Load the fire dataset for the Southern Rockies
fp = os.path.join(projdir,'data/spatial/mod/NIFC/nifc-ics_2018_to_2023-aspen.gpkg')
fires = gpd.read_file(fp)

# subset to Southern Rockies
fires = fires[fires['na_l3name'] == 'Southern Rockies']

# tidy the fire id and name columns
fires.rename(columns={'NIFC_ID': 'Fire_ID', 'NIFC_NAME': 'Fire_Name'}, inplace=True)

# tify the date columns
fires['DISCOVERY_DATE'] = pd.to_datetime(fires['DISCOVERY_DATE']).dt.date
fires['WF_CESSATION_DATE'] = pd.to_datetime(fires['WF_CESSATION_DATE']).dt.date

# # Adjust the start and end dates
# fires['start_date'] = fires['DISCOVERY_DATE'] - timedelta(days=2)
# fires['end_date'] = fires['WF_CESSATION_DATE'] + timedelta(days=2)

print(f"Available attributes: \n{fires.columns}")
print(f"\nThere are [{len(fires)}] fires.")

Available attributes: 
Index(['Fire_ID', 'Fire_Name', 'NIFC_ACRES', 'FINAL_ACRES', 'pct_aspen',
       'INCIDENT_ID', 'INCIDENT_NAME', 'START_YEAR', 'CAUSE', 'DISCOVERY_DATE',
       'DISCOVERY_DOY', 'WF_CESSATION_DATE', 'WF_CESSATION_DOY',
       'STR_DESTROYED_TOTAL', 'STR_DAMAGED_TOTAL', 'STR_THREATENED_MAX',
       'EVACUATION_REPORTED', 'PEAK_EVACUATIONS', 'WF_PEAK_AERIAL',
       'WF_PEAK_PERSONNEL', 'na_l3name', 'geometry'],
      dtype='object')

There are [67] fires.


In [5]:
fires['NIFC_ACRES'] = fires['NIFC_ACRES'].astype(float)
fires['NIFC_ACRES'].describe()

count        67.000000
mean      20818.361343
std       54462.748925
min          10.900000
25%         210.860000
50%        1497.900000
75%       11361.310000
max      299792.600000
Name: NIFC_ACRES, dtype: float64

In [6]:
fires = fires[fires['NIFC_ACRES'] > 34.749]
print(len(fires))
print(fires['Fire_Name'].unique())

63
['577' '416' 'LOADING PEN' 'PLATEAU' 'PLUMTAW' 'DOE CANYON'
 'CHRIS MOUNTAIN' 'DRY LAKE' 'BEAR CREEK' 'QUARTZ RIDGE' 'TRAIL SPRINGS'
 'MILL CREEK 2' 'THORPE' 'BUFFALO' 'LAKE CHRISTINE' 'SYLVAN'
 'GRIZZLY CREEK' 'MIDDLE MAMM' 'PACK CREEK' 'OSHA' 'CALF CANYON'
 'BLACK FEATHER' 'SARCA' 'CERRO PELADO' 'DECKER' 'MENKHAVEN'
 'CAMERON PEAK' 'LEFTHAND' 'WILLIAMS FORK' 'TARANTULA' 'BULL DRAW'
 'GREEN MOUNTAIN' 'POISON SPRINGS' 'COW CREEK' 'BRUSH CREEK' '403' 'AMOLE'
 'SARDINAS CANYON' 'LUNA' 'GURULE' 'MONTOYA SPRINGS' 'CACHE CREEK' '441'
 'WEST GUARD' 'BURRO' 'HORSE' 'ICE' 'SAND CREEK' 'EAST TROUBLESOME'
 'MULLEN' 'MIDDLE FORK' 'MUDDY SLIDE' 'SUGARLOAF' 'MORGAN CREEK'
 'BLACK MOUNTAIN' 'INDIAN RUN' 'SILVER CREEK' 'CABIN LAKE' 'REVEILLE'
 'SPRING CREEK' 'WESTON PASS' 'BADGER CREEK' 'RYAN']


In [7]:
fires[['DISCOVERY_DATE','WF_CESSATION_DATE']].head()

Unnamed: 0,DISCOVERY_DATE,WF_CESSATION_DATE
1,2019-07-28,2019-08-18
2,2018-06-01,2018-07-03
3,2020-06-13,2020-06-18
4,2018-07-22,2018-08-17
5,2022-05-17,2022-05-18


In [8]:
# Create a DataFrame with individual dates for each fire
date_counts = pd.DataFrame(
    [(fire['Fire_ID'], single_date)
     for _, fire in fires.iterrows()
     for single_date in pd.date_range(fire['DISCOVERY_DATE'], fire['WF_CESSATION_DATE'])],
    columns=['Fire_ID', 'Date']
)['Date'].value_counts()
print(date_counts.head())

Date
2018-08-10    7
2018-08-09    7
2018-08-08    7
2018-08-07    7
2018-08-15    6
Name: count, dtype: int64


In [9]:
# Load the SRM bounds
fp = os.path.join(projdir,'data/spatial/raw/boundaries/na_cec_eco_l3_west.gpkg')
ecol3 = gpd.read_file(fp)
srm = ecol3[ecol3['NA_L3NAME'] == 'Southern Rockies']
print(srm.columns)

Index(['NA_L3CODE', 'NA_L3NAME', 'NA_L2CODE', 'NA_L2NAME', 'NA_L1CODE',
       'NA_L1NAME', 'NA_L3KEY', 'NA_L2KEY', 'NA_L1KEY', 'Shape_Leng',
       'Shape_Area', 'geometry'],
      dtype='object')


In [10]:
# Run for fire perimeters

In [44]:
t0 = time.time()   

# Check for already processed granules
granule_log = os.path.join(datamod, 'logs/processed_granules.txt')
if os.path.exists(granule_log):
    with open(granule_log, 'r') as log_file:
        granules_p = set([line.strip() for line in log_file.readlines()])
else:
    granules_p = []

print(f"Already processed [{len(granules_p)}] granules.\n")

# Read in the already processed files, if it exists
afds_p_fp = os.path.join(datamod, f'vnp14img_geo_aspen-fires-srm.csv')
if len(granules_p) > 0:
    afds_p = pd.read_csv(afds_p_fp)
else:
    afds_p = None
                       
# load the lookup table for pixel sizes
lut = pd.read_csv(os.path.join(projdir,'data/tabular/raw/pix_size_lut.csv'))

# Get a list of fire IDs sorted by ignition date
fires = fires.sort_values(by=['START_YEAR','DISCOVERY_DATE'])
fire_ids = fires['Fire_ID'].unique()

afd_dfs = [] # to store the output geodataframes

# Loop fire ids
for fire_id in fire_ids:
    t00 = time.time()

    fire = fires[fires['Fire_ID'] == fire_id]
    print(f"Processing for {fire['Fire_Name'].iloc[0]} fire:")
    
    da_access = Access_VIIRS_AFD(
        start_date=fire['DISCOVERY_DATE'].iloc[0],
        last_date=fire['WF_CESSATION_DATE'].iloc[0],
        geom=fire,
        buffer=1000,
        short_names=['VNP14IMG','VNP03IMG'],
        out_directory=dataraw,
        processed_granules=granules_p,
        download=False,
        region=srm
    )

    try:
        query, granules = da_access.ea_search_request()

        if granules is None:
            continue
            
        print(f"\n\tGeolocating active fires ...\n")
        afd_fire = da_access.create_fire_gdf(query)

        # save the progress so far
        if afd_fire is not None:
            afd_dfs.append(afd_fire)
            granules_p.extend(granules) # running list
            # save out the fire data so far
            if afds_p is not None:
                pd.concat([afds_p, afds_fire]).to_csv(afds_p_fp)
            del afd_fire
        else:
            continue
        
    except Exception as e:
        print(f"Skipping fire id {fire_id}\n{e}")
        traceback.print_exc()  # This will print the full traceback
        continue # continue to the next fire id

    t1 = (time.time() - t00) / 60
    print(f"\nTotal elapsed time for {fire['Fire_Name']}: {t1:.2f} minutes.")
    print("\n~~~~~~~~~~\n")

# Concatenate the results and save out the geodataframe of latlon fire pixels
afds = pd.concat(afd_dfs, ignore_index=True)
print(f"\n{afds.head()}\n")
afds.to_csv(os.path.join(datamod, f'vnp14img_geo_aspen-fires_2018_to_2023.csv'))

t2 = (time.time() - t0) / 60
print(f"\nTotal elapsed time: {t2:.2f} minutes.")
print("\n~~~~~~~~~~\n")
print("Done!")

Already processed [823] granules.
Processing for OSHA fire:
Bounding extent for data search: 
[-109.60932582670196, -103.84298510389097, 35.26644446378176, 42.79162551153811]

Granules found: 4
[]

	! All granules already processed for region (0) !

Processing for 416 fire:
Bounding extent for data search: 
[-109.60932582670196, -103.84298510389097, 35.26644446378176, 42.79162551153811]

Granules found: 188
[]

	! All granules already processed for region (0) !



ValueError: No objects to concatenate