In [1]:
"""
Downloading VIIRS Active Fire Detections (AFD) with 'earthaccess' python API

Searches and accesses VIIRS AFD for fire perimeters.

VIIRS/NPP Active Fires 6-Min L2 Swath 375m V002 (VNP14IMG)
VIIRS/JPSS1 Active Fires 6-Min L2 Swath 375m V002 (VJ114IMG)

Return: 
    - Downloaded/cloud access NetCDF granules for the above products
    - GeoDataFrame representing active fire pixel locations and attributes (before geolocation)
    - Geolocation grid representing pixel locations and overlap of adjacent orbits

Author: maxwell.cook@colorado.edu
"""

import sys, os
import earthaccess as ea
import geopandas as gpd
import pandas as pd
import rioxarray as rxr
import rasterio as rio
import datetime as dt
import xarray as xr
import datetime
import traceback

from tqdm.notebook import tqdm
from datetime import datetime
from datetime import timedelta
from rasterio.transform import from_bounds
from urllib.parse import urlparse

import warnings
warnings.simplefilter('ignore')

# Custom functions
sys.path.append(os.path.join(os.getcwd(),'code/'))
from __functions import *
        
# Directories
maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/'
projdir = os.path.join(maindir, 'aspen-fire/Aim2/')

# Output directories
dataraw = os.path.join(projdir,'data/spatial/raw/VIIRS/')
datamod = os.path.join(projdir,'data/spatial/mod/VIIRS/')

print("Ready !")

Ready !


In [2]:
class Access_VIIRS_AFD:
    """ 
    Accesses VIIRS Active Fire Data (AFD) within a region for given date range
    """
    def __init__(self, start_date, last_date, geom = gpd.GeoDataFrame(),
                 id_col='Fire_ID', name_col='Fire_Name',
                 geog_crs = 'EPSG:4326', proj_crs = 'EPSG:5070',
                 short_names = ['VNP14IMG', 'VJ114IMG'],
                 buffer = None, out_directory=None, 
                 processed_granules=None, 
                 region=None
                ):
        """
        Args:
            - start_date: the intial date for the granule search
            - last_date: the final date for the granule search
            - geom: GeoDataFrame for search request (fire perimeter)
            - geog_crs: Geographic projection (to retrieve coordinate pairs in lat/lon)
            - proj_crs: Projected coordinate system
            - short_names: the granules to be downloaded
            - buffer: Optional buffer for input geometry
            - out_directory: output directory to store results
            - download: If 'True', downloads the netcdf, otherwise processes in cloud
        Returns:
            - Downloaded files (VIIRS Active Fire Data NetCDF and Geolocation information)
            - GeoDataFrame with non-geolocated (raw) fire detections
        """
        # Extract coordinate bounds
        if region is None:
            # use the fire perimeter
            self.coords, self.extent = get_coords(geom, buffer)
            # print(f"Bounding extent for data search: \n{self.extent}\n")
        elif region is not None and isinstance(region, gpd.GeoDataFrame):
            # use the region boundary for FP and fire for search
            _, self.extent = get_coords(region, buffer) # for extracting FP
            self.coords, _ = get_coords(geom, buffer) # for data search
            # print(f"Bounding extent for data search: \n{self.extent}\n")
        else:
            print("Input region is not a GeoDataFrame !!!")
            
        # Extract class attributes
        self.fire_id = geom[id_col].iloc[0]
        self.fire_name = geom[name_col].iloc[0]
        self.date_range = (str(start_date), str(last_date))
        self.geog_crs = geog_crs
        self.proj_crs = proj_crs
        self.short_names = short_names
        self.out_dir = out_directory
        self.granule_log = os.path.join(dataraw, 'logs/vnp_vji_processed_granules.txt')
        self.processed_granules = processed_granules
        self.lut = pd.read_csv(os.path.join(projdir, 'data/tabular/raw/pix_size_lut.csv'))
      
    def ea_search_request(self):
        """ Generate an earthaccess search request with the given parameters """
        query = ea.search_data(
            short_name=self.short_names, 
            polygon=self.coords,
            temporal=self.date_range, 
            cloud_hosted=True,
            count=-1
        )
        
        # Grab a list of granules from the search query
        granules = [g['umm']['DataGranule']['Identifiers'][0]['Identifier'] for g in query]
        N = len(granules)
    
        # Filter the query to only work with the "new" granules
        # Skip if no new granules are required
        if self.processed_granules is not None:
            processed = [g.replace('.nc', '') for g in self.processed_granules]
            new_granules = [g for g in granules if g not in processed]
            if len(new_granules) == 0:
                print(f"\t! All granules already processed, skipping ... !")
                return None, None
            elif len(new_granules) > 0 and len(new_granules) < N:
                print(f"\n\t! Some granules already processed [{N - len(new_granules)}] !")
                query = [item for item in query if item['umm']['DataGranule']['Identifiers'][0]['Identifier'] in new_granules]
                granules = [g['umm']['DataGranule']['Identifiers'][0]['Identifier'] for g in query]
            else:
                print(f"\n\t! Starting processing for [{len(granules)}] granules !")
                query = query
                granules = granules

        # open the fileset
        fileset = ea.open(query)

        # return query results and list of granules
        return fileset, granules
             

    def create_fire_gdf(self, fileset):
        """ Creates a geodataframe with active fire detections from a directory with NetCDF files """

        granule_dfs = [] # to store the geolocated AFDs
        
        nprint = 10 # print counter
        for fp in tqdm(fileset, desc="Processing granules"):
            df = pd.DataFrame() # to store the active fire data
            with xr.open_dataset(fp, phony_dims='access') as swath:
                # make sure there are fire pixels
                if swath.FirePix == 0:
                    continue
                
                # get the granule ID and associated geolocation swath
                granule_id = swath.LocalGranuleID
                geo_id = swath.VNP03IMG
                
                # get the data variables
                lonfp = swath.variables['FP_longitude'][:] # fire pixel longitude
                latfp = swath.variables['FP_latitude'][:] # fire pixel latitude
                frp = swath.variables['FP_power'][:] # fire radiative power
                t4 = swath.variables['FP_T4'][:] # I04 brightness temp (kelvins)
                t5 = swath.variables['FP_T5'][:] # I05 brightness temp (kelvins)
                m13 = swath.variables['FP_Rad13'][:] # M13 radiance (kelvin)
                sample = swath.variables['FP_sample'][:]
                line = swath.variables['FP_line'][:]
                # get the fire mask for fire pixels
                fire_mask = swath['fire mask'][line, sample].values

            # gather information from file name
            timestamp = granule_id.split('.')[1:3]
            year = timestamp[0][1:5]
            day = timestamp[0][5:8]
            acqtime = timestamp[1]
            acqdate = dt.datetime.strptime(year+day, '%Y%j').strftime('%-m/%-d/%Y')
            
            df['longitude'] = lonfp
            df['latitude'] = latfp
            df['j'] = sample #sample number for pixel size lookup
            df['fire_mask'] = fire_mask
            df['confidence'] = pd.Categorical(df.fire_mask)
            df.confidence = df.confidence.replace(
                {0:'x', 1:'x', 2:'x', 3:'x', 4:'x', 5:'x', 6:'x', 7:'l', 8:'n', 9:'h'})
            df['frp'] = frp
            df['t4'] = t4
            df['t5'] = t5
            df['m13'] = m13
            df['acq_date'] = acqdate
            df['acq_time'] = acqtime
            df['daynight'] = swath.DayNightFlag
            df['satellite'] = swath.PlatformShortName
            df['short_name'] = swath.ShortName
            df['granule_id'] = granule_id
            df['geo_id'] = geo_id
        
            df = pd.merge(df, self.lut, left_on='j', right_on='sample', how='left')
            df.drop(columns=['j'], inplace=True)
            
            granule_dfs.append(df) # append the granule dataframe

            # write the granule id to the log file
            with open(self.granule_log, 'a') as log_file:
                log_file.write(f"{granule_id}\n")

            # save a csv file
            out_dir = os.path.join(dataraw,"granules/")
            if not os.path.exists(out_dir):
                os.makedirs(out_dir)
            df.to_csv(os.path.join(out_dir,f"{granule_id[:-3]}.csv"))

        gc.collect() # clear out garbage
        
        # concatenate the out dfs
        if len(granule_dfs) > 0:
            fire_data = pd.concat(granule_dfs) # for the entire list of granules
            return fire_data
        else:
            return None

print("Class and functions ready !")

Class and functions ready !


In [3]:
# Load the fire data

In [5]:
# Load the fire dataset for the Southern Rockies
fires = gpd.read_file(os.path.join(projdir,'data/spatial/mod/NIFC/nifc-ics_2018_to_2023-aspen-obs.gpkg'))
fires = fires[fires['na_l3name'] == 'Southern Rockies']

# tidy the fire id and name columns
fires.rename(columns={'NIFC_ID': 'Fire_ID', 'NIFC_NAME': 'Fire_Name'}, inplace=True)
fires['obs_count'] = fires['obs_count'].fillna(0).astype(int) # fill NaN as 0 obs.
fires = fires[fires['obs_count'] != 0]
# tidy the date columns
fires['DISCOVERY_DATE'] = pd.to_datetime(fires['DISCOVERY_DATE']).dt.date
fires['WF_CESSATION_DATE'] = pd.to_datetime(fires['WF_CESSATION_DATE']).dt.date

print(f"Available attributes: \n{fires.columns}")
print(f"\nThere are [{len(fires)}] fires.")
fires[['Fire_Name','DISCOVERY_DATE','WF_CESSATION_DATE','first_obs_date','last_obs_date','obs_count']].head()

Available attributes: 
Index(['Fire_ID', 'Fire_Name', 'NIFC_ACRES', 'FINAL_ACRES', 'pct_cover',
       'INCIDENT_ID', 'INCIDENT_NAME', 'START_YEAR', 'CAUSE', 'DISCOVERY_DATE',
       'DISCOVERY_DOY', 'WF_CESSATION_DATE', 'WF_CESSATION_DOY',
       'STR_DESTROYED_TOTAL', 'STR_DAMAGED_TOTAL', 'STR_THREATENED_MAX',
       'EVACUATION_REPORTED', 'PEAK_EVACUATIONS', 'WF_PEAK_AERIAL',
       'WF_PEAK_PERSONNEL', 'na_l3name', 'first_obs_date', 'last_obs_date',
       'obs_count', 'geometry'],
      dtype='object')

There are [70] fires.


Unnamed: 0,Fire_Name,DISCOVERY_DATE,WF_CESSATION_DATE,first_obs_date,last_obs_date,obs_count
1,577,2019-07-28,2019-08-18,2019-07-30,2019-08-14,2
2,416,2018-06-01,2018-07-03,2018-06-01,2018-07-06,2955
3,NEBO,2020-10-13,2020-10-15,2020-10-14,2020-10-15,7
4,LOADING PEN,2020-06-13,2020-06-18,2020-06-14,2020-06-18,5
5,PLUMTAW,2022-05-17,2022-05-18,2022-05-17,2022-05-19,41


In [6]:
print(fires['obs_count'].describe())
fires = fires[fires['obs_count'] >= 10]
print(f"\nThere are [{len(fires)}] fires with > 10 VIIRS obs.")

count       70.000000
mean       868.000000
std       2214.503196
min          1.000000
25%         10.250000
50%         57.000000
75%        562.250000
max      12563.000000
Name: obs_count, dtype: float64

There are [54] fires with > 10 VIIRS obs.


In [7]:
# Create a DataFrame with individual dates for each fire
date_counts = pd.DataFrame(
    [(fire['Fire_ID'], single_date)
     for _, fire in fires.iterrows()
     for single_date in pd.date_range(fire['DISCOVERY_DATE'], fire['WF_CESSATION_DATE'])],
    columns=['Fire_ID', 'Date']
)['Date'].value_counts()
print(date_counts.head())

Date
2018-06-29    6
2018-06-30    6
2018-08-12    5
2018-08-15    5
2018-07-30    5
Name: count, dtype: int64


In [8]:
# Adjust the first and last date by one for the earthaccess search
fires['first_obs_date'] = fires['first_obs_date'] - pd.Timedelta(days=1)
fires['last_obs_date'] = fires['last_obs_date'] + pd.Timedelta(days=1)
print("Start and end dates adjusted by 1 day ...")

Start and end dates adjusted by 1 day ...


In [8]:
# Run for fire perimeters

In [9]:
# Check for already processed granules
granule_log = os.path.join(dataraw, 'logs/vnp_vji_processed_granules.txt')
if os.path.exists(granule_log):
    with open(granule_log, 'r') as log_file:
        granules_p = set([line.strip() for line in log_file.readlines()])
else:
    granules_p = set()

print(f"Already processed [{len(granules_p)}] granules.\n")

Already processed [3241] granules.



In [10]:
t0 = time.time()   

# Get a list of fire IDs sorted by ignition date
fires = fires.sort_values(by=['START_YEAR','first_obs_date'])
fire_ids = fires['Fire_ID'].unique()

afd_dfs = [] # to store the output geodataframes

# Loop fire ids
for fire_id in fire_ids:
    t00 = time.time()

    fire = fires[fires['Fire_ID'] == fire_id]
    fire_name = fire['Fire_Name'].iloc[0]
    fire_name = fire_name.replace(" ", "_")
    print(f"Processing for {fire_name} fire:")
    
    da_access = Access_VIIRS_AFD(
        start_date=fire['first_obs_date'].iloc[0],
        last_date=fire['last_obs_date'].iloc[0],
        geom=fire,
        buffer=1000,
        short_names=['VNP14IMG','VJ114IMG'],
        out_directory=dataraw,
        processed_granules=granules_p
    )

    try:
        fileset, granules = da_access.ea_search_request()

        if granules is None:
            continue
            
        print(f"\n\tExtracting active fires ...\n")
        afd_fire = da_access.create_fire_gdf(fileset)

        # save the progress so far
        if afd_fire is not None:
            afd_dfs.append(afd_fire)
            granules_p.update(granules) # running list
        else:
            continue
        
    except Exception as e:
        print(f"\nSkipping fire {fire['Fire_Name']}\n{e}\n")
        traceback.print_exc()  # This will print the full traceback
        continue # continue to the next fire id

    t1 = (time.time() - t00) / 60
    print(f"\nTotal elapsed time for {fire['Fire_Name']}: {t1:.2f} minutes.")
    print("\n~~~~~~~~~~\n")

t2 = (time.time() - t0) / 60
print(f"\nTotal elapsed time: {t2:.2f} minutes.\n")
print("\n~~~~~~~~~~\n")
print("Done!")

Processing for 416 fire:
Granules found: 215
	! All granules already processed, skipping ... !
Processing for BURRO fire:
Granules found: 162
	! All granules already processed, skipping ... !
Processing for BADGER_CREEK fire:
Granules found: 148
	! All granules already processed, skipping ... !
Processing for SARDINAS_CANYON fire:
Granules found: 46
	! All granules already processed, skipping ... !
Processing for SPRING_CREEK fire:
Granules found: 91
	! All granules already processed, skipping ... !
Processing for CHATEAU fire:
Granules found: 36
	! All granules already processed, skipping ... !
Processing for WESTON_PASS fire:
Granules found: 47
	! All granules already processed, skipping ... !
Processing for LAKE_CHRISTINE fire:
Granules found: 181
	! All granules already processed, skipping ... !
Processing for SILVER_CREEK fire:
Granules found: 456
	! All granules already processed, skipping ... !
Processing for SARCA fire:
Granules found: 107
	! All granules already processed, ski

QUEUEING TASKS | :   0%|          | 0/1 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/1 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/1 [00:00<?, ?it/s]


	Extracting active fires ...



Processing granules:   0%|          | 0/1 [00:00<?, ?it/s]


Total elapsed time for 6    DOE CANYON
Name: Fire_Name, dtype: object: 0.11 minutes.

~~~~~~~~~~

Processing for BEAVER fire:
Granules found: 71

	! Some granules already processed [19] !
Opening 52 granules, approx size: 0.11 GB


QUEUEING TASKS | :   0%|          | 0/52 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/52 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/52 [00:00<?, ?it/s]


	Extracting active fires ...



Processing granules:   0%|          | 0/52 [00:00<?, ?it/s]


Total elapsed time for 309    BEAVER
Name: Fire_Name, dtype: object: 1.49 minutes.

~~~~~~~~~~

Processing for 441 fire:
Granules found: 46
	! All granules already processed, skipping ... !
Processing for MIDDLE_MAMM fire:
Granules found: 350
	! All granules already processed, skipping ... !
Processing for AMOLE fire:
Granules found: 98
	! All granules already processed, skipping ... !
Processing for DECKER fire:
Granules found: 255
	! All granules already processed, skipping ... !
Processing for BRUSH_CREEK fire:
Granules found: 18
	! All granules already processed, skipping ... !
Processing for COW_CREEK fire:
Granules found: 53
	! All granules already processed, skipping ... !
Processing for SAND_CREEK fire:
Granules found: 108
	! All granules already processed, skipping ... !
Processing for GRIZZLY_CREEK fire:
Granules found: 113
	! All granules already processed, skipping ... !
Processing for CAMERON_PEAK fire:
Granules found: 469
	! All granules already processed, skipping ... !

QUEUEING TASKS | :   0%|          | 0/26 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/26 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/26 [00:00<?, ?it/s]


	Extracting active fires ...



Processing granules:   0%|          | 0/26 [00:00<?, ?it/s]


Total elapsed time for 94    MEDIO
Name: Fire_Name, dtype: object: 0.78 minutes.

~~~~~~~~~~

Processing for MIDDLE_FORK fire:
Granules found: 296
	! All granules already processed, skipping ... !
Processing for MULLEN fire:
Granules found: 203
	! All granules already processed, skipping ... !
Processing for EAST_TROUBLESOME fire:
Granules found: 75
	! All granules already processed, skipping ... !
Processing for LUNA fire:
Granules found: 54
	! All granules already processed, skipping ... !
Processing for LEFTHAND fire:
Granules found: 18
	! All granules already processed, skipping ... !
Processing for ICE fire:
Granules found: 29
	! All granules already processed, skipping ... !
Processing for PACK_CREEK fire:
Granules found: 88
	! All granules already processed, skipping ... !
Processing for RINCON fire:
Granules found: 30

	! Some granules already processed [24] !
Opening 6 granules, approx size: 0.01 GB


QUEUEING TASKS | :   0%|          | 0/6 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/6 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/6 [00:00<?, ?it/s]


	Extracting active fires ...



Processing granules:   0%|          | 0/6 [00:00<?, ?it/s]


Total elapsed time for 96    RINCON
Name: Fire_Name, dtype: object: 0.28 minutes.

~~~~~~~~~~

Processing for SYLVAN fire:
Granules found: 23
	! All granules already processed, skipping ... !
Processing for MUDDY_SLIDE fire:
Granules found: 25
	! All granules already processed, skipping ... !
Processing for MORGAN_CREEK fire:
Granules found: 129
	! All granules already processed, skipping ... !
Processing for BLACK_MOUNTAIN fire:
Granules found: 24
	! All granules already processed, skipping ... !
Processing for CALF_CANYON fire:
Granules found: 447
	! All granules already processed, skipping ... !
Processing for CERRO_PELADO fire:
Granules found: 172
	! All granules already processed, skipping ... !
Processing for PLUMTAW fire:
Granules found: 23
	! All granules already processed, skipping ... !
Processing for SUGARLOAF fire:
Granules found: 39
	! All granules already processed, skipping ... !
Processing for 403 fire:
Granules found: 24
	! All granules already processed, skipping ...

QUEUEING TASKS | :   0%|          | 0/38 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/38 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/38 [00:00<?, ?it/s]


	Extracting active fires ...



Processing granules:   0%|          | 0/38 [00:00<?, ?it/s]


Total elapsed time for 281    COMANCHE
Name: Fire_Name, dtype: object: 1.11 minutes.

~~~~~~~~~~

Processing for CHRIS_MOUNTAIN fire:
Granules found: 28
	! All granules already processed, skipping ... !
Processing for DRY_LAKE fire:
Granules found: 22
	! All granules already processed, skipping ... !
Processing for QUARTZ_RIDGE fire:
Granules found: 215
	! All granules already processed, skipping ... !
Processing for BEAR_CREEK fire:
Granules found: 213
	! All granules already processed, skipping ... !
Processing for BLACK_FEATHER fire:
Granules found: 21
	! All granules already processed, skipping ... !
Processing for TRAIL_SPRINGS fire:
Granules found: 160
	! All granules already processed, skipping ... !
Processing for MILL_CREEK_2 fire:
Granules found: 112
	! All granules already processed, skipping ... !

Total elapsed time: 5.58 minutes.


~~~~~~~~~~

Done!


In [11]:
granules_ = glob.glob(os.path.join(dataraw,'granules/*.csv'))
print(len(granules_))

3364


In [12]:
afds = pd.concat((pd.read_csv(f) for f in granules_), ignore_index=True)
afds.head()

Unnamed: 0.1,Unnamed: 0,longitude,latitude,fire_mask,confidence,frp,t4,t5,m13,acq_date,...,daynight,satellite,short_name,granule_id,geo_id,sample,along_scan,along_track,scan_angle,pix_area
0,0,-95.16016,34.07464,9,h,14.550305,367.0,295.69968,2.373925,6/27/2019,...,Day,JPSS-1,VJ114IMG,VJ114IMG.A2019178.2006.002.2024029081304.nc,VJ103IMG.A2019178.2006.021.2021049184623.nc,924,0.38191,0.588271,47.8431,0.224667
1,1,-95.16433,34.074383,8,n,14.550305,350.57697,296.18445,2.373925,6/27/2019,...,Day,JPSS-1,VJ114IMG,VJ114IMG.A2019178.2006.002.2024029081304.nc,VJ103IMG.A2019178.2006.021.2021049184623.nc,925,0.381694,0.588131,47.8342,0.224486
2,2,-95.16063,34.080082,8,n,6.089355,343.91302,296.12167,1.406672,6/27/2019,...,Day,JPSS-1,VJ114IMG,VJ114IMG.A2019178.2006.002.2024029081304.nc,VJ103IMG.A2019178.2006.021.2021049184623.nc,924,0.38191,0.588271,47.8431,0.224667
3,3,-95.164825,34.079823,8,n,6.089355,345.4684,296.05,1.406672,6/27/2019,...,Day,JPSS-1,VJ114IMG,VJ114IMG.A2019178.2006.002.2024029081304.nc,VJ103IMG.A2019178.2006.021.2021049184623.nc,925,0.381694,0.588131,47.8342,0.224486
4,4,-95.906006,34.81323,8,n,4.297072,340.28574,296.18735,1.041121,6/27/2019,...,Day,JPSS-1,VJ114IMG,VJ114IMG.A2019178.2006.002.2024029081304.nc,VJ103IMG.A2019178.2006.021.2021049184623.nc,1078,0.351243,0.567857,46.4736,0.199456


In [13]:
len(afds)

1849132

In [14]:
afds['satellite'].unique()

array(['JPSS-1', 'SUOMI-NPP'], dtype=object)

In [15]:
# save the file.
out_fp = os.path.join(dataraw, f'viirs_snpp_jpss1_afd_.csv')
afds.to_csv(out_fp)
print(f"Saved to: {out_fp}")

Saved to: /Users/max/Library/CloudStorage/OneDrive-Personal/mcook/aspen-fire/Aim2/data/spatial/raw/VIIRS/viirs_snpp_jpss1_afd_.csv
