In [1]:
"""
Testing remote access to VIIRS active fire data
"""

import os, sys, time
import earthaccess as ea
import geopandas as gpd
import xarray as xr
import datetime as dt

from urllib.parse import urlparse

# Custom functions
sys.path.append(os.path.join(os.getcwd(),'code/'))
from __functions import *
        
# Directories
maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/'
projdir = os.path.join(maindir, 'aspen-fire/Aim2/')

# Output directories
dataraw = os.path.join(projdir,'data/spatial/raw/VIIRS/')
datamod = os.path.join(projdir,'data/spatial/mod/VIIRS/')

print("Ready !")

Ready !


In [2]:
# Load the fire dataset for the Southern Rockies
fires = gpd.read_file(os.path.join(projdir,'data/spatial/mod/NIFC/nifc-ics_2018_to_2023-aspen-obs.gpkg'))
fires = fires[fires['na_l3name'] == 'Southern Rockies']

# tidy the fire id and name columns
fires.rename(columns={'NIFC_ID': 'Fire_ID', 'NIFC_NAME': 'Fire_Name'}, inplace=True)
fires['obs_count'] = fires['obs_count'].fillna(0).astype(int) # fill NaN as 0 obs.
fires = fires[fires['obs_count'] >= 10]

# tidy the date columns
fires['DISCOVERY_DATE'] = pd.to_datetime(fires['DISCOVERY_DATE']).dt.date
fires['WF_CESSATION_DATE'] = pd.to_datetime(fires['WF_CESSATION_DATE']).dt.date

print(f"Available attributes: \n{fires.columns}")
print(f"\nThere are [{len(fires)}] fires.")

Available attributes: 
Index(['Fire_ID', 'Fire_Name', 'NIFC_ACRES', 'FINAL_ACRES', 'pct_aspen',
       'INCIDENT_ID', 'INCIDENT_NAME', 'START_YEAR', 'CAUSE', 'DISCOVERY_DATE',
       'DISCOVERY_DOY', 'WF_CESSATION_DATE', 'WF_CESSATION_DOY',
       'STR_DESTROYED_TOTAL', 'STR_DAMAGED_TOTAL', 'STR_THREATENED_MAX',
       'EVACUATION_REPORTED', 'PEAK_EVACUATIONS', 'WF_PEAK_AERIAL',
       'WF_PEAK_PERSONNEL', 'na_l3name', 'first_obs_date', 'last_obs_date',
       'obs_count', 'geometry'],
      dtype='object')

There are [50] fires.


In [3]:
# Grab a test fire (Cameron Peak)

fire = fires[fires['Fire_Name'] == '416']

buffer=1000
coords, extent = get_coords(fire, buffer)
print(f"Bounding coordinates: {coords}")

start_date = fire['first_obs_date'].iloc[0]
last_date = fire['last_obs_date'].iloc[0]
date_range = (start_date, last_date)
print(f"Date range for EA search request: {date_range}")

Bounding coordinates: [(-105.91670123621584, 40.451623739399885), (-105.18607078366145, 40.451623739399885), (-105.18607078366145, 40.788201870344174), (-105.91670123621584, 40.788201870344174), (-105.91670123621584, 40.451623739399885)]
Date range for EA search request: (Timestamp('2020-08-13 00:00:00'), Timestamp('2020-10-24 00:00:00'))


In [4]:
date_range = ('2020-08-13', '2020-08-20')

In [5]:
# Search query with earthaccess

In [6]:
query = ea.search_data(
    short_name=['VJ114IMG', 'VNP14IMG'], 
    polygon=coords,
    temporal=date_range, 
    cloud_hosted=True,
    count=-1
)

Granules found: 46


In [7]:
granules = [g['umm']['DataGranule']['Identifiers'][0]['Identifier']
            for g in query if 'VJ114IMG' in g['umm']['DataGranule']['Identifiers'][0]['Identifier']]
N = len(granules)
print(N)

23


In [8]:
# open the fileset remotely
fileset = ea.open(query)

Opening 46 granules, approx size: 3.76 GB


QUEUEING TASKS | :   0%|          | 0/46 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/46 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/46 [00:00<?, ?it/s]

In [9]:
print(fileset[0])

<File-like object HTTPFileSystem, https://data.laadsdaac.earthdatacloud.nasa.gov/prod-lads/VJ103IMG/VJ103IMG.A2020226.0800.021.2021068002120.nc>


In [10]:
# isolate the fire swaths from the geolocation swaths
vj114_files = [f for f in fileset if 'VJ114IMG' in str(f)]
print(f"Processing a total of {len(vj114_files)} fire swaths.")

Processing a total of 23 fire swaths.


In [12]:
t0 = time.time()

for fp in vj114_files[0:5]:
    df = pd.DataFrame() # to store the active fire data
    t1 = time.time()
    print(f"Opening the fire swath: {fp.url}")
    with xr.open_dataset(fp, phony_dims='access') as swath:
        # get the granule ID and associated geolocation swath
        granule_id = swath.LocalGranuleID
        geo_id = swath.VNP03IMG
        
        # Check for fire pixels in the specified region
        lonfp = swath.variables['FP_longitude'][:] # fire pixel longitude
        latfp = swath.variables['FP_latitude'][:] # fire pixel latitude
        fire_scene = ((lonfp > extent[0]) & (lonfp < extent[1]) & 
                      (latfp > extent[2]) & (latfp < extent[3]))
        if not fire_scene.any():  # Check for any fire pixels in region
            print(f"\n\tNo active fires detected in {granule_id}. Skipping...\n")
            del fire_scene
            continue # skip if no fire pixels in region
        del fire_scene
        
        # granule attributes
        daynight = swath.DayNightFlag #string Day or Night

        # variables
        fire = swath['fire mask'] # the fire mask
        frp = swath.variables['FP_power'][:] # fire radiative power
        t4 = swath.variables['FP_T4'][:] # I04 brightness temp (kelvins)
        t5 = swath.variables['FP_T5'][:] # I05 brightness temp (kelvins)
        
        # tree = cKDTree(np.array([lonfp, latfp]).T) #search tree for finding nearest FRP

    print(f"\tProcessed the fire data in {(time.time() - t1) / 60}")

    # open the associated geolocation file
    print(f"Opening the geolocation file: {geo_id}")
    t1 = time.time()

    geods = [f for f in fileset if geo_id in str(f)][0]
    with xr.open_dataset(
        geods, group='geolocation_data', phony_dims='access',
        drop_variables=['height', 'range', 'sensor_azimuth', 'sensor_zenith', 
                        'solar_azimuth', 'solar_zenith', 'land_water_mask', 'quality_flag']
    ) as geo_ds:
        # extract the pixel positions
        i, j = np.indices(geo_ds.longitude.shape) #line and sample
        print(f"\tIt took {(time.time() - t1) / 60} to extract line and sample ...")
        
        # mask to fire bounding extent
        geo_scene = ((geo_slice.longitude > extent[0]) & (geo_slice.longitude < extent[1]) & 
                     (geo_slice.latitude > extent[2]) & (geo_slice.latitude < extent[3])).values
        print(f"\tIt took {(time.time() - t1) / 60} to gather the geo scene ...")
            
    # Populate the dataframe
    print(f"Creating fire data frame...")
    t1 = time.time()
    
    # gather information from file name
    timestamp = granule_id.split('.')[1:3]
    year = timestamp[0][1:5]
    day = timestamp[0][5:8]
    acqtime = timestamp[1]
    acqdate = dt.datetime.strptime(year+day, '%Y%j').strftime('%-m/%-d/%Y')
    
    df['longitude'] = list(geo_ds.longitude.values[geo_scene])
    df['latitude'] = list(geo_ds.latitude.values[geo_scene])
    df['j'] = list(j[geo_scene]) #sample number for pixel size lookup
    df['fire_mask'] = list(fire.values[geo_scene])
    df['confidence'] = pd.Categorical( df.fire_mask)
    df.confidence = df.confidence.replace(
        {0:'x', 1:'x', 2:'x', 3:'x', 4:'x', 5:'x', 6:'x', 7:'l', 8:'n', 9:'h'})
    df['daynight'] = daynight
    df['acq_date'] = acqdate
    df['acq_time'] = acqtime
    df['granule_id'] = granule_id
    df['geo_id'] = geo_id
    
    print(f"\tIt took {(time.time() - t1) / 60} to create the fire dataframe ...")

t2 = (time.time() - t0) / 60
print(f"\nTotal elapsed time: {t2:.2f} minutes.\n")
print("\n~~~~~~~~~~\n")
print("Done!")

Opening the fire swath: https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/VJ114IMG.002/VJ114IMG.A2020226.0800.002.2024070113843/VJ114IMG.A2020226.0800.002.2024070113843.nc

	No active fires detected in VJ114IMG.A2020226.0800.002.2024070113843.nc. Skipping...

Opening the fire swath: https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/VJ114IMG.002/VJ114IMG.A2020226.0936.002.2024070113845/VJ114IMG.A2020226.0936.002.2024070113845.nc

	No active fires detected in VJ114IMG.A2020226.0936.002.2024070113845.nc. Skipping...

Opening the fire swath: https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/VJ114IMG.002/VJ114IMG.A2020226.1924.002.2024070113841/VJ114IMG.A2020226.1924.002.2024070113841.nc

	No active fires detected in VJ114IMG.A2020226.1924.002.2024070113841.nc. Skipping...

Opening the fire swath: https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/VJ114IMG.002/VJ114IMG.A2020226.2100.002.2024070113843/VJ114IMG.A2020226.2100.002.2024070113843.nc


NameError: name 'geo_slice' is not defined

In [None]:
# open all the files with xarray, examine the first
def preprocess(ds):
    # Assign coordinates based on the dimensions
    ds = ds.assign_coords(
        phony_dim_1=np.arange(ds.dims['phony_dim_1']),
        phony_dim_2=np.arange(ds.dims['phony_dim_2']),
    )
    return ds
    
da = xr.open_mfdataset(
    vnp14_fileset, 
    preprocess=preprocess,
    combine='by_coords', 
    phony_dims='access'
)
print(f"There are [{len(da)}] .nc files in the fileset.")

In [None]:
da['fire mask']

In [None]:
lonfp = da['FP_longitude'].values
latfp = da['FP_latitude'].values

# Filter the data for fire pixels
frp = da.variables['FP_power'].values # fire radiative power
t4 = da.variables['FP_T4'].values # I04 brightness temp (kelvins)
t5 = da.variables['FP_T5'].values # I05 brightness temp (kelvins)

# Create a DataFrame to store fire data
fire_data = pd.DataFrame({
    'longitude': lonfp.ravel(),
    'latitude': latfp.ravel(),
    'FRP': frp.ravel(),
    'T4': t4.ravel(),
    'T5': t5.ravel()
})

# Add metadata if available
fire_data['daynight'] = da.attrs.get('DayNightFlag', 'Unknown')  # Granule day/night flag
fire_data['granule_id'] = da.attrs.get('LocalGranuleID', 'Unknown')
fire_data.head()

In [None]:
len(fire_data)

In [None]:
# download the files
ea.download(query, self.out_dir)