In [9]:
# Import packages
import os, sys
import earthaccess
import pandas as pd
import geopandas as gpd
import rasterio as rio
import rioxarray as rxr
import h5py
import pyproj
import xarray as xr
import numpy as np
import gc

from netCDF4 import Dataset
from matplotlib import pyplot as plt
from affine import Affine
from pyresample import geometry as geom
from pyresample import kd_tree as kdt

from osgeo import gdal, gdal_array, gdalconst, osr
from scipy.interpolate import RegularGridInterpolator as RGI

# Custom functions
sys.path.append(os.path.join(os.getcwd(),'code/'))
from __functions import *

# Explicitly use GDAL exceptions
gdal.UseExceptions()

# Projection information
geog_crs = 'EPSG:4326'  # Geographic projection
prj_crs = 'EPSG:5070'  # Projected coordinate system- WGS 84 NAD83 UTM Zone 13N

# File path information
maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/'
projdir = os.path.join(maindir, 'aspen-fire/Aim2/')
# Output directories
dataraw = os.path.join(projdir,'data/spatial/raw/VIIRS/')
datamod = os.path.join(maindir,'data/spatial/mod/VIIRS/')

# File path information
print("Success !")

Success !


In [2]:
# Function to convert swath to grid

def list_files(path, ext, recursive):
    """
    List files of a specific type in a directory or subdirectories
    """
    if recursive is True:
        return glob.glob(os.path.join(path, '**', '*{}'.format(ext)), recursive=True)
    else:
        return glob.glob(os.path.join(path, '*{}'.format(ext)), recursive=False)


def interpolate_geolocation(lat, lon, target_shape):
    """ Interpolate the geolocation data to the target shape """
    lat_int = RGI((np.arange(lat.shape[0]), np.arange(lat.shape[1])), lat)
    lon_int = RGI((np.arange(lon.shape[0]), np.arange(lon.shape[1])), lon)
    target_coords = np.meshgrid(
        np.linspace(0, lat.shape[0] - 1, target_shape[0]), 
        np.linspace(0, lon.shape[1] - 1, target_shape[1]), 
        indexing='ij')
    target_coords = np.stack(target_coords, axis=-1)
    lat_res = lat_int(target_coords)
    lon_res = lon_int(target_coords)
    return lat_res, lon_res


def viirs_swath2grid(fireDA, geoDA, shortName, sdsName, ecoSDS, geomCoords, out_dir):
    """ Converts VIIRS AFD NetCDF SDS to grid and exports as GeoTIFF """
    """
    Args:
        - fireDA: The NetCDF file containing the fire information (*.nc)
        - geoDA: The corresponding geolocation file (*.h5)
        - shortName: The short name for the data product (e.g., VNP14IMG, VJ114IMG)
        - sdsName: The name for the Science Dataset (SDS) (e.g., FP_power)
        - geomBounds: the bounding geometry to create the output spatial array
        - geomCoords: list of coordinate pairs, used to filter the data array
    Returns:
        - Spatial (projected) array for the given SDS and bounding geometry
    """

    #################################################################
    # Open the geolocation file (.h5) and read contents (lat/lon SDS)
    
    geo = h5py.File(geoDA)
    geo_objs = []
    geo.visit(geo_objs.append) # stores the SDS objects

    # Get the file name 
    geoName = os.path.basename(geoDA).split('.h5')[0]

    # Retrieve the coordinate SDS
    latSD = [str(obj) for obj in geo_objs if isinstance(geo[obj], h5py.Dataset) and '/Latitude' in obj]
    lonSD = [str(obj) for obj in geo_objs if isinstance(geo[obj], h5py.Dataset) and '/Longitude' in obj]
    # Open coordinates as arrays
    lat = geo[latSD[0]][()].astype(np.float32)
    lon = geo[lonSD[0]][()].astype(np.float32)
    print(f"latGEO shape: {lat.shape}\nlonGEO shape: {lon.shape}\nData Type: {type(lat)}")

    dims = lat.shape # shape of the swath coordinate array

    lat[lat == geo[latSD[0]].attrs['_FillValue']] = np.nan
    lon[lon == geo[lonSD[0]].attrs['_FillValue']] = np.nan
    
    ############################
    # Load data from NetCDF file
    ds = Dataset(fireDA, 'r')
    
    # Grab the Fire Pixel information (sparse arrays representing only pixel locations of active fire detections)
    FP_power = ds.variables['FP_power'][:]
    FP_latitude = ds.variables['FP_latitude'][:]
    FP_longitude = ds.variables['FP_longitude'][:]

    # Grab the fire mask (full array)
    fire_mask = ds.variables['fire mask'][:]

    # Debugging prints
    print(f"FP_power shape: {FP_power.shape}") # see the sparse array
    print(f"FP_latitude shape: {FP_latitude.shape}")
    print(f"FP_longitude shape: {FP_longitude.shape}")
    print(f"Fire Mask shape: {fire_mask.shape}") # see the full array

    # Resample the latlon SDS shape to match the fire mask (750m geolocation to 375m)
    lat_res, lon_res = interpolate_geolocation(lat, lon, fire_mask.shape)
    print(f"Resampled lat shape: {lat_res.shape}, Resampled lon shape: {lon_res.shape}")
    
    # Create swath and area definition using coordinate arrays and projection information
    swathDef = geom.SwathDefinition(lons=lon_res, lats=lat_res) # from 'pyresample' geom
    epsg, proj, pName = '4326', 'latlong', 'Geographic'  # Set output projection to Geographic CRS
    llLon, llLat, urLon, urLat = np.nanmin(lon_res), np.nanmin(lat_res), np.nanmax(lon_res), np.nanmax(lat_res)
    areaExtent = (llLon, llLat, urLon, urLat)
    projDict = {'proj': proj, 'datum': 'WGS84'}

    # Calculate the pixel dimensions, cols, and rows
    ps = np.min([abs(areaExtent[2] - areaExtent[0]) / fire_mask.shape[1],
                 abs(areaExtent[3] - areaExtent[1]) / fire_mask.shape[0]]) 
    # ps = 0.00333663072035137202  # Hard-coded estimate of pixel size in degrees
    cols = int(round((areaExtent[2] - areaExtent[0]) / ps))  # Calculate the output cols
    rows = int(round((areaExtent[3] - areaExtent[1]) / ps))  # Calculate the output rows

    print(f"Pixel Dims: {ps};\nNumber of columns: {cols};\nNumber of rows: {rows}")

    # Define output geometry and set up resampling
    areaDef = geom.AreaDefinition(epsg, pName, epsg, projDict, cols, rows, areaExtent) 
    index, outdex, indexArr, distArr = kdt.get_neighbour_info(swathDef, areaDef, 3750, neighbours=1)

    print(f'Area Definition Shape: {areaDef.shape}')

    # Perform kdtree resampling (swath 2 grid conversion) --- for the fire mask
    fv = -9999
    sdGEO = kdt.get_sample_from_neighbour_info('nn', areaDef.shape, fire_mask, index, outdex, indexArr, fill_value=fv)
    
    # Gather the geotransform definition
    gt = [areaDef.area_extent[0], ps, 0, areaDef.area_extent[3], 0, -ps]

    # Set up the GeoTIFF export
    outDir = os.path.join(out_dir, f'georeferenced/{shortName}')
    # Check the directory exists, make it if not
    if not os.path.exists(outDir):
        os.makedirs(outDir)

    # Set up output name
    identifier_ = identifier.replace(".", "_")
    outName = os.path.join(outDir, sdsName + '_' + identifier_ + '.tif')
    print("output file:\n{}\n".format(outName))
    
    # Get driver, specify dimensions, define and set output geotransform
    height, width = sdGEO.shape  # Define geotiff dimensions
    driv = gdal.GetDriverByName('GTiff')
    dataType = gdal_array.NumericTypeCodeToGDALTypeCode(sdGEO.dtype)
    d = driv.Create(outName, width, height, 1, dataType)
    d.SetGeoTransform(gt)

    # Create and set output projection, write output array data
    # Define target SRS
    srs = osr.SpatialReference()
    srs.ImportFromEPSG(int(epsg))
    d.SetProjection(srs.ExportToWkt())
    band = d.GetRasterBand(1)
    band.WriteArray(sdGEO)

    # Define fill value if it exists, if not, set to mask fill value
    if fv is not None and fv != 'NaN':
        band.SetNoDataValue(fv)
    else:
        try:
            band.SetNoDataValue(sdGEO.fill_value)
        except AttributeError:
            pass
        except TypeError:
            pass
    
    band.FlushCache()
    d, band = None, None
    

def get_coords(geom, buffer):
    """ Returns the bounding box coordinates for a given geometry(ies) and buffer """
    _geom = geom.copy()
    _geom['geometry'] = _geom.geometry.buffer(buffer)
    bounds = _geom.to_crs(geog_crs).unary_union.envelope # make sure it is in geographic coordinates
    coords = list(bounds.exterior.coords)

    del _geom, bounds
    return coords
    

print("Function to process VIIRS NetCDF files is ready to use!")

Function to process VIIRS NetCDF files is ready to use!


In [6]:
nc_files = list_files(os.path.join(projdir, f'data/spatial/raw/VIIRS/'), "*.nc", recursive=True)

vnp = [f for f in nc_files if 'VNP14' in os.path.basename(f)] # VNP14IMG (active fire)
vnp03 = [f for f in nc_files if 'VNP03' in os.path.basename(f)] # VNP03IMG (geolocation)

print(vnp[0])
print(vnp03[0])

/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/aspen-fire/Aim2/data/spatial/raw/VIIRS/416/VNP14IMG/VNP14IMG.A2018179.1000.002.2024080183012.nc
/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/aspen-fire/Aim2/data/spatial/raw/VIIRS/416/VNP03IMG/VNP03IMG.A2018155.0906.002.2021082151024.nc


In [10]:
# Load the fire dataset
fires_path = os.path.join(projdir,'data/spatial/mod/NIFC/nifc-ics_2018_to_2023-aspen_SRM.gpkg')
fires = gpd.read_file(fires_path)
fires.rename(columns={'NIFC_ID': 'Fire_ID'}, inplace=True)
print(fires.columns)
print(len(fires))

Index(['Fire_ID', 'NIFC_NAME', 'NIFC_ACRES', 'FINAL_ACRES', 'pct_aspen',
       'INCIDENT_ID', 'INCIDENT_NAME', 'START_YEAR', 'CAUSE', 'DISCOVERY_DATE',
       'DISCOVERY_DOY', 'WF_CESSATION_DATE', 'WF_CESSATION_DOY',
       'STR_DESTROYED_TOTAL', 'STR_DAMAGED_TOTAL', 'STR_THREATENED_MAX',
       'EVACUATION_REPORTED', 'PEAK_EVACUATIONS', 'WF_PEAK_AERIAL',
       'WF_PEAK_PERSONNEL', 'na_l3name', 'geometry'],
      dtype='object')
49


In [11]:
# Create a dictionary to store fire bounding coordinates
coords_dict = {}
buffer = 375 

for index, row in fires.iterrows():
    fire_id = row['Fire_ID']
    perim = fires.loc[fires['Fire_ID'] == fire_id]
    coords = get_coords(perim, buffer)
    coords_dict[fire_id] = coords

# Print the dictionary to verify
first = next(iter(coords_dict.items()))
print(f"FIRED_ID: {first[0]}, \nBounding Coordinates: \n{first[1]}")

FIRED_ID: 14, 
Bounding Coordinates: 
[(-108.0032864406339, 37.382946754600226), (-107.80141845568734, 37.382946754600226), (-107.80141845568734, 37.60341937952868), (-108.0032864406339, 37.60341937952868), (-108.0032864406339, 37.382946754600226)]


In [18]:
t0 = time.time()

dat = 'fire mask' # the SDS we are extracting ...

fired_id = '14'
coords_ = coords_dict[fired_id]

short_name = 'VNP14IMG'
print(f"Processing NetCDF files for {short_name}")

# Retrieve the geolocations files corresponding to the short name
sh_code = short_name[:3] # the platform code (e.g., 'VNP')
_geo_files = [gf for gf in vnp03 if sh_code in os.path.basename(gf)]
print(f"\tThere are {len(_geo_files)} associated geolocation files ...")

for fp in vnp[0:3]:
    identifier = os.path.basename(fp)[:-3]
    print(identifier)

    # Open the NetCDF file
    ds = Dataset(fp, 'r', format='NETCDF4')  # Read in VIIRS AFD file

    # Create a list of all SDS inside of the .nc file
    ecoSDS = list(ds.variables.keys())

    del ds # clean up !

    # Find the matching ECO1BGEO file from the file list
    parts = identifier.split('.')
    if short_name == 'VNP14IMG':
        date_time_part = '.'.join(parts[1:4])  # Extract date-time parts for the VNP Version 002
    else:
        date_time_part = '.'.join(parts[1:3])  
    geo_identifier = sh_code + '03IMG' + '.' + date_time_part
    geo = [geo_link for geo_link in _geo_files if geo_identifier in os.path.basename(geo_link)][0]        
    print(os.path.basename(geo))

    ###################################
    # Now apply our processing function
    viirs_swath2grid(
        fireDA=fp, 
        geoDA=geo[0], 
        shortName=short_name, 
        sdsName=dat, 
        ecoSDS=ecoSDS, 
        geomCoords=coords_, 
        out_dir=out_dir
    )
    
    print('Time to complete granule:', time.time() - t0)
    print("\n")
    print("---------------------------------------------")


Processing NetCDF files for VNP14IMG
	There are 94 associated geolocation files ...
VNP14IMG.A2018179.1000.002.2024080183012
VNP03IMG.A2018179.1000.002.2021084153603.nc


IsADirectoryError: [Errno 21] Unable to synchronously open file (file read failed: time = Wed Oct 30 11:45:40 2024
, filename = '/', file descriptor = 78, errno = 21, error message = 'Is a directory', buf = 0x16b1652e0, total read size = 8, bytes this sub-read = 8, bytes actually read = 18446744073709551615, offset = 0)

In [None]:
# Function to create FRP grid from daily granules
def create_frp_grid(datadict, geo_files, coords_dict, out_dir):
    """
    Create FRP grid from daily granules and eventually generate a maximum FRP grid for fire events.
    """
    for fired_id, coords_ in coords_dict.items():
        print(f"Processing fire event {fired_id}")
        t0 = time.time()

        # Initialize an empty array to store the maximum FRP values
        max_frp = None

        for short_name, fpaths in datadict.items():
            print(f"Processing NetCDF files for {short_name}")
            sh_code = short_name[:3]  # the platform code (e.g., 'VNP')
            _geo_files = [gf for gf in geo_files if sh_code in os.path.basename(gf)]
            print(f"There are {len(_geo_files)} associated geolocation files ...")

            for fp in fpaths:
                identifier = os.path.basename(fp)[:-3]
                print(identifier)

                # Open the NetCDF file
                ds = Dataset(fp, 'r', format='NETCDF4')  # Read in VIIRS AFD file

                # Extract the FP_power SDS
                FP_power = ds.variables['FP_power'][:]
                fire_mask = ds.variables['fire mask'][:]

                # Find the matching GEO file
                parts = identifier.split('.')
                if short_name == 'VNP14IMG':
                    date_time_part = '.'.join(parts[1:4])  # Extract date-time parts for the VNP Version 002
                else:
                    date_time_part = '.'.join(parts[1:3])  
                geo_identifier = sh_code + '03MODLL' + '.' + date_time_part
                geo = [geo_link for geo_link in _geo_files if geo_identifier in os.path.basename(geo_link)]        
                print(geo)

                # Convert swath to grid
                frp_grid = viirs_swath2grid(fp, geo[0], short_name, 'FP_power', ecoSDS=None, geomCoords=coords_, out_dir=out_dir)
                
                # Update the maximum FRP grid
                if max_frp is None:
                    max_frp = frp_grid
                else:
                    max_frp = np.maximum(max_frp, frp_grid)

                print('Time to complete granule:', time.time() - t0)
                print("\n")
                print("---------------------------------------------")

        # Save the maximum FRP grid as a GeoTIFF
        outName = os.path.join(out_dir, f'max_FRP_{fired_id}.tif')
        save_geotiff(max_frp, outName, geo[0])

        print(f"Completed processing for fire event {fired_id} in {time.time() - t0} seconds")

# Function to save the FRP grid as a GeoTIFF
def save_geotiff(array, out_name, geo_file):
    """
    Save the FRP grid as a GeoTIFF file.
    """
    geo = h5py.File(geo_file)
    lat = geo['/Latitude'][()].astype(np.float32)
    lon = geo['/Longitude'][()].astype(np.float32)
    
    ps = np.min([np.abs(lon[1] - lon[0]), np.abs(lat[1] - lat[0])])
    gt = [np.min(lon), ps, 0, np.max(lat), 0, -ps]

    # Get driver, specify dimensions, define and set output geotransform
    height, width = array.shape  # Define geotiff dimensions
    driv = gdal.GetDriverByName('GTiff')
    dataType = gdal_array.NumericTypeCodeToGDALTypeCode(array.dtype)
    d = driv.Create(out_name, width, height, 1, dataType)
    d.SetGeoTransform(gt)

    # Create and set output projection, write output array data
    srs = osr.SpatialReference()
    srs.ImportFromEPSG(4326)
    d.SetProjection(srs.ExportToWkt())
    band = d.GetRasterBand(1)
    band.WriteArray(array)

    band.FlushCache()
    d, band = None, None
    print(f"Saved GeoTIFF: {out_name}")

# Test the function
create_frp_grid(datadict, geo_files, coords_dict, testDir)