In [1]:
# Import packages
import os, shutil, time, glob, warnings, math
import earthaccess
import pandas as pd
import geopandas as gpd
import rasterio as rio
import rioxarray as rxr
import h5py
import pyproj
import xarray as xr
import numpy as np
import gc

from netCDF4 import Dataset
from matplotlib import pyplot as plt
from affine import Affine
from pyresample import geometry as geom
from pyresample import kd_tree as kdt
from os.path import join
from osgeo import gdal, gdal_array, gdalconst, osr
from scipy.interpolate import RegularGridInterpolator as RGI

# Explicitly use GDAL exceptions
gdal.UseExceptions()

# Projection information
geog_crs = 'EPSG:4326'  # Geographic projection
prj_crs = 'EPSG:5070'  # Projected coordinate system- WGS 84 NAD83 UTM Zone 13N

# File path information
maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/aspen-fire'
datadir = os.path.join(maindir,'Aim2/data/spatial/raw/VIIRS/')

# File path information
print("Success !")

Success !


In [2]:
# Function to convert swath to grid

def list_files(path, ext, recursive):
    """
    List files of a specific type in a directory or subdirectories
    """
    if recursive is True:
        return glob.glob(os.path.join(path, '**', '*{}'.format(ext)), recursive=True)
    else:
        return glob.glob(os.path.join(path, '*{}'.format(ext)), recursive=False)


def utmLookup(lat, lon):
    utm = str((math.floor((lon + 180) / 6) % 60) + 1)
    if len(utm) == 1:
        utm = '0' + utm
    if lat >= 0:
        epsg_code = '326' + utm
    else:
        epsg_code = '327' + utm
    return epsg_code


def interpolate_geolocation(lat, lon, target_shape):
    """ Interpolate the geolocation data to the target shape """
    lat_int = RGI((np.arange(lat.shape[0]), np.arange(lat.shape[1])), lat)
    lon_int = RGI((np.arange(lon.shape[0]), np.arange(lon.shape[1])), lon)
    target_coords = np.meshgrid(
        np.linspace(0, lat.shape[0] - 1, target_shape[0]), 
        np.linspace(0, lon.shape[1] - 1, target_shape[1]), 
        indexing='ij')
    target_coords = np.stack(target_coords, axis=-1)
    lat_res = lat_int(target_coords)
    lon_res = lon_int(target_coords)
    return lat_res, lon_res


def subset_array(array, areaDef, boundingCoords):
    """ Subset the array using the fire perimeter coordinates """
    min_lon, min_lat = np.min(boundingCoords[:, 0]), np.min(boundingCoords[:, 1])
    max_lon, max_lat = np.max(boundingCoords[:, 0]), np.max(boundingCoords[:, 1])
    
    # Transform the coordinates to array indices
    col_start = int((min_lon - areaDef.area_extent[0]) / areaDef.pixel_size_x)
    col_end = int((max_lon - areaDef.area_extent[0]) / areaDef.pixel_size_x)
    row_start = int((areaDef.area_extent[3] - max_lat) / areaDef.pixel_size_y)
    row_end = int((areaDef.area_extent[3] - min_lat) / areaDef.pixel_size_y)

    return array[row_start:row_end, col_start:col_end]


def viirs_swath2grid(fireDA, geoDA, shortName, sdsName, ecoSDS, geomCoords, out_dir):
    """ Converts VIIRS AFD NetCDF SDS to grid and exports as GeoTIFF """
    """
    Args:
        - fireDA: The NetCDF file containing the fire information (*.nc)
        - geoDA: The corresponding geolocation file (*.h5)
        - shortName: The short name for the data product (e.g., VNP14IMG, VJ114IMG)
        - sdsName: The name for the Science Dataset (SDS) (e.g., FP_power)
        - geomBounds: the bounding geometry to create the output spatial array
        - geomCoords: list of coordinate pairs, used to filter the data array
    Returns:
        - Spatial (projected) array for the given SDS and bounding geometry
    """

    #################################################################
    # Open the geolocation file (.h5) and read contents (lat/lon SDS)
    geo = h5py.File(geoDA)
    geo_objs = []
    geo.visit(geo_objs.append) # stores the SDS objects

    # Get the file name 
    geoName = os.path.basename(geoDA).split('.h5')[0]

    # Retrieve the coordinate SDS
    latSD = [str(obj) for obj in geo_objs if isinstance(geo[obj], h5py.Dataset) and '/Latitude' in obj]
    lonSD = [str(obj) for obj in geo_objs if isinstance(geo[obj], h5py.Dataset) and '/Longitude' in obj]
    # Open coordinates as arrays
    lat = geo[latSD[0]][()].astype(np.float32)
    lon = geo[lonSD[0]][()].astype(np.float32)
    print(f"latGEO shape: {lat.shape}\nlonGEO shape: {lon.shape}\nData Type: {type(lat)}")

    dims = lat.shape # shape of the swath coordinate array

    # lat[lat == geo[latSD[0]].attrs['_FillValue']] = np.nan
    # lon[lon == geo[lonSD[0]].attrs['_FillValue']] = np.nan

    # Get the middle swatch latlon
    midLat, midLon = np.mean(lat), np.mean(lon) 

    # Identify the UTM Zone of the middle swath (NOT IMPLEMENTED YET)
    utm_zone = utmLookup(midLat, midLon)
    print(f"UTM Zone of middle swath: {utm_zone}")
    
    ############################
    # Load data from NetCDF file
    ds = Dataset(fireDA, 'r')

    # Check if it is day or night
    daynight = ds.getncattr('DayNightFlag')
    
    # Grab the Fire Pixel information (sparse arrays representing only pixel locations of active fire detections)
    FP_power = ds.variables['FP_power'][:]
    FP_latitude = ds.variables['FP_latitude'][:]
    FP_longitude = ds.variables['FP_longitude'][:]

    # Grab the fire mask (full array)
    fire_mask = ds.variables['fire mask'][:]

    # Debugging prints
    print(f"FP_power shape: {FP_power.shape}") # see the sparse array
    print(f"FP_latitude shape: {FP_latitude.shape}")
    print(f"FP_longitude shape: {FP_longitude.shape}")
    print(f"Fire Mask shape: {fire_mask.shape}") # see the full array

    # Resample the latlon SDS shape to match the fire mask (750m geolocation to 375m)
    lat_res, lon_res = interpolate_geolocation(lat, lon, fire_mask.shape)
    print(f"Resampled lat shape: {lat_res.shape}, Resampled lon shape: {lon_res.shape}")
    
    # Create swath and area definition using coordinate arrays and projection information
    swathDef = geom.SwathDefinition(lons=lon_res, lats=lat_res) # from 'pyresample' geom
    epsg, proj, pName = '4326', 'latlong', 'Geographic'  # Set output projection to Geographic CRS
    llLon, llLat, urLon, urLat = np.nanmin(lon_res), np.nanmin(lat_res), np.nanmax(lon_res), np.nanmax(lat_res)
    areaExtent = (llLon, llLat, urLon, urLat)
    projDict = {'proj': proj, 'datum': 'WGS84'}

    # Calculate the pixel dimensions, cols, and rows
    ps = np.min([abs(areaExtent[2] - areaExtent[0]) / fire_mask.shape[1],
                 abs(areaExtent[3] - areaExtent[1]) / fire_mask.shape[0]]) 
    # ps = 0.00333663072035137202  # Hard-coded estimate of pixel size in degrees
    cols = int(round((areaExtent[2] - areaExtent[0]) / ps))  # Calculate the output cols
    rows = int(round((areaExtent[3] - areaExtent[1]) / ps))  # Calculate the output rows

    print(f"Pixel Dims: {ps};\nNumber of columns: {cols};\nNumber of rows: {rows}")

    # Define output geometry and set up resampling
    areaDef = geom.AreaDefinition(epsg, pName, epsg, projDict, cols, rows, areaExtent) 
    index, outdex, indexArr, distArr = kdt.get_neighbour_info(swathDef, areaDef, 1125, neighbours=1)

    print(f'Area Definition Shape: {areaDef.shape}')

    # Perform kdtree resampling (swath 2 grid conversion) --- for the fire mask
    fv = -9999
    sdGEO = kdt.get_sample_from_neighbour_info('nn', areaDef.shape, fire_mask, index, outdex, indexArr, fill_value=fv)

    # Create a full grid for FP_power based on the fire mask grid using pyresample's kd_tree.resample_nearest
    fv = np.nan
    
    # Create a new swatch definition
    swathDef_fire = geom.SwathDefinition(lons=FP_longitude, lats=FP_latitude) # fll is within the geometry bounds
    FP_power_grid = kdt.resample_nearest(swathDef_fire, FP_power, areaDef, radius_of_influence=375, fill_value=fv)
    
    # # Subset the FRP grid using the fire perimeter coordinates
    # coords_array = np.array(geomCoords)
    # FP_power_grid_s = subset_array(FP_power_grid, areaDef, coords_array)

    del sdGEO # clean up
    
    # Gather the geotransform definition
    gt = [areaDef.area_extent[0], ps, 0, areaDef.area_extent[3], 0, -ps]
    
    # Set up the GeoTIFF export for day or night
    outDir = os.path.join(out_dir, f'georeferenced/{shortName}/{daynight}')
    # Check the directory exists, make it if not
    if not os.path.exists(outDir):
        os.makedirs(outDir)

    # Set up output name
    identifier_ = identifier.replace(".", "_")
    platform_datetime = identifier_.split('_')[0] + "_" + identifier_.split('_')[1] + "_" + identifier_.split('_')[2]
    outName = os.path.join(outDir, sdsName + '_' + platform_datetime + '.tif')
    print("output file:\n{}\n".format(outName))
    
    # Get driver, specify dimensions, define and set output geotransform
    height, width = FP_power_grid.shape  # Define geotiff dimensions
    driv = gdal.GetDriverByName('GTiff')
    dataType = gdal_array.NumericTypeCodeToGDALTypeCode(FP_power_grid.dtype)
    d = driv.Create(outName, width, height, 1, dataType)
    d.SetGeoTransform(gt)

    # Create and set output projection, write output array data
    # Define target SRS
    srs = osr.SpatialReference()
    srs.ImportFromEPSG(int(epsg))
    d.SetProjection(srs.ExportToWkt())
    band = d.GetRasterBand(1)
    band.WriteArray(FP_power_grid)

    # Define fill value if it exists, if not, set to mask fill value
    if fv is not None and fv != 'NaN':
        band.SetNoDataValue(fv)
    else:
        try:
            band.SetNoDataValue(FP_power_grid.fill_value)
        except AttributeError:
            pass
        except TypeError:
            pass
    
    band.FlushCache()
    d, band = None, None
    

def get_coords(geom, buffer):
    """ Returns the bounding box coordinates for a given geometry(ies) and buffer """
    _geom = geom.copy()
    _geom['geometry'] = _geom.geometry.buffer(buffer)
    bounds = _geom.to_crs(geog_crs).unary_union.envelope # make sure it is in geographic coordinates
    coords = list(bounds.exterior.coords)

    del _geom, bounds
    return coords
    

print("Function to process VIIRS NetCDF files is ready to use!")


Function to process VIIRS NetCDF files is ready to use!


In [3]:
# Testing for one fire
testDir = os.path.join(datadir,'FIRED_3518')
    
# Get a list of geo files
geo_files = list_files(testDir,"*.h5",recursive=True)
print(geo_files[0])

# Get list of fire data files
vnp_files = list_files(testDir,"VNP*.nc",recursive=True)
vj1_files = list_files(testDir,"VJ1*.nc",recursive=True)
print(vnp_files[0])
print(vj1_files[0])

# Create a dictionary to store the file paths
datadict = {
    'VNP14IMG': vnp_files,
    'VJ114IMG': vj1_files
}

/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/aspen-fire/Aim2/data/spatial/raw/VIIRS/FIRED_3518/VJ103MODLL/VJ103MODLL.A2021243.1000.021.2021243163653.h5
/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/aspen-fire/Aim2/data/spatial/raw/VIIRS/FIRED_3518/VNP14IMG/VNP14IMG.A2021242.2048.002.2024074102039.nc
/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/aspen-fire/Aim2/data/spatial/raw/VIIRS/FIRED_3518/VJ114IMG/VJ114IMG.A2021239.0936.002.2024081210828.nc


In [4]:
# Load fire data and create a dictionary with bounding coordinates
fires_path = os.path.join(maindir,'Aim2/data/spatial/mod/FIRED/fired_events_west_aspen.gpkg')
fires = gpd.read_file(fires_path)
print(fires.columns)
print(len(fires))

Index(['fired_id', 'ig_date', 'ig_day', 'ig_month', 'ig_year', 'last_date',
       'event_dur', 'tot_pix', 'tot_ar_km2', 'fsr_px_dy', 'fsr_km2_dy',
       'mx_grw_px', 'mn_grw_px', 'mu_grw_px', 'mx_grw_km2', 'mn_grw_km2',
       'mu_grw_km2', 'mx_grw_dte', 'x', 'y', 'ig_utm_x', 'ig_utm_y', 'lc_code',
       'lc_mode', 'lc_name', 'lc_desc', 'lc_type', 'eco_mode', 'eco_name',
       'eco_type', 'tot_perim', 'pct_aspen', 'geometry'],
      dtype='object')
102


In [5]:
# Create a dictionary to store fire bounding coordinates
coords_dict = {}
buffer = 375 

for index, row in fires.iterrows():
    fire_id = row['fired_id']
    perim = fires.loc[fires['fired_id'] == fire_id]
    coords = get_coords(perim, buffer)
    coords_dict[fire_id] = coords

# Print the dictionary to verify
first = next(iter(coords_dict.items()))
print(f"FIRED_ID: {first[0]}, \nBounding Coordinates: \n{first[1]}")

FIRED_ID: 3518, 
Bounding Coordinates: 
[(-113.4870457742665, 37.31747928447832), (-113.45119936806327, 37.31747928447832), (-113.45119936806327, 37.336690972515356), (-113.4870457742665, 37.336690972515356), (-113.4870457742665, 37.31747928447832)]


In [6]:
t0 = time.time()

dat = 'FP_power' # the SDS we are extracting ...

fired_id = '3518'
coords_ = coords_dict[fired_id]

out_dir = testDir

# max_frp_day = None # empty array to store the maximum FRP daily arrays (daytime obs.)
# max_frp_night = None # empty array to store the maximum FRP daily arrays (nighttime obs.)

for short_name, fpaths in datadict.items():
    print(f"Processing NetCDF files for {short_name}")
    # Retrieve the geolocations files corresponding to the short name
    sh_code = short_name[:3] # the platform code (e.g., 'VNP')
    _geo_files = [gf for gf in geo_files if sh_code in os.path.basename(gf)]
    print(f"There are {len(_geo_files)} associated geolocation files ...")
    for fp in fpaths:
        identifier = os.path.basename(fp)[:-3]
        print(identifier)

        # Open the NetCDF file
        ds = Dataset(fp, 'r', format='NETCDF4')  # Read in VIIRS AFD file

        # Create a list of all SDS inside of the .nc file
        ecoSDS = list(ds.variables.keys())

        del ds # clean up !

        # Find the matching ECO1BGEO file from the file list
        parts = identifier.split('.')
        if short_name == 'VNP14IMG':
            date_time_part = '.'.join(parts[1:4])  # Extract date-time parts for the VNP Version 002
        else:
            date_time_part = '.'.join(parts[1:3])  
        geo_identifier = sh_code + '03MODLL' + '.' + date_time_part
        geo = [geo_link for geo_link in _geo_files if geo_identifier in os.path.basename(geo_link)]        
        print(geo)

        ###################################
        # Now apply our processing function
        viirs_swath2grid(
            fireDA=fp, 
            geoDA=geo[0], 
            shortName=short_name, 
            sdsName=dat, 
            ecoSDS=ecoSDS, 
            geomCoords=coords_, 
            out_dir=out_dir
        )

        # # Update the maximum FRP array
        # if daynight == "Day":
        #     # Update the maximum FRP grid
        #     if max_frp_day is None:
        #         max_frp_day = maxFRP
        #     else:
        #         max_frp_day = np.maximum(max_frp_day, maxFRP)
        # else:
        #     # Update the maximum FRP grid
        #     if max_frp_night is None:
        #         max_frp_night = maxFRP
        #     else:
        #         max_frp_night = np.maximum(max_frp_night, maxFRP)
        
        print('Time to complete granule:', time.time() - t0)
        print("\n")
        print("---------------------------------------------")


Processing NetCDF files for VNP14IMG
There are 34 associated geolocation files ...
VNP14IMG.A2021242.2048.002.2024074102039
['/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/aspen-fire/Aim2/data/spatial/raw/VIIRS/FIRED_3518/VNP03MODLL/VNP03MODLL.A2021242.2048.002.2024017150503.h5']
latGEO shape: (3232, 3200)
lonGEO shape: (3232, 3200)
Data Type: <class 'numpy.ndarray'>


NameError: name 'utm_zone' is not defined