In [None]:
import rasterio
import h5netcdf
import numpy as np
import geopandas as gpd
import os
from pathlib import Path
import neonutilities as neon
import requests
import h5py
import pandas as pd
from rasterio.transform import rowcol
from shapely import box
from rasterio.features import rasterize
from affine import Affine

root = Path.cwd()

### Download HS data from NEON

In [None]:
# get api token stored in txt file
with open('neon_token.txt','r') as f:
    token = f.readline()

# read in geodataframe to define AOI
crowns = gpd.read_file(root / 'data' / 'harvard' / 'all_tree_crown_polys.gpkg')
crowns = crowns.to_crs(32618)

# set query parameters
easting = list(crowns.get_coordinates()['x'].astype('int').values)
northing = list(crowns.get_coordinates()['y'].astype('int').values)
savepath = root / 'data' / 'harvard' / 'hyperspectral'
year = 2025
site = 'HARV'

# run handy dandy neon utility function to download data
neon.by_tile_aop(dpid="DP3.30006.002",  #"DP3.30006.001" for data prior to 2022
		site=site,
		year=year,
		easting=easting,
		northing=northing,
		savepath='c:/Users/roseh/My Drive/harvard_forest/data/harvard/hyperspectral',
        include_provisional=True,
		token=token)

Provisional NEON data are included. To exclude provisional data, use input parameter include_provisional=False.
Downloading 3 NEON data files totaling approximately 1.3 GB

100%|██████████| 3/3 [00:37<00:00, 12.35s/it]


### Extract tree spectral signatures from NEON H5 files

In [None]:

def make_tile_gdf(tile_dir):
    all_files = [f for f in os.scandir(tile_dir) if f.is_file() and f.path.endswith('.h5')]

    polygons = []
    file_west_bounds = []
    file_north_bounds = []

    for f in all_files:
        split_name = f.path.split('_')
        min_x, min_y = int(split_name[-4]), int(split_name[-3]) # get origin from file name
        max_x, max_y = min_x + 1000, min_y + 1000
        file_west_bounds.append(min_x)
        file_north_bounds.append(max_y)
        tile_poly = box(min_x, min_y, max_x, max_y)
        polygons.append(tile_poly)

    tiles = gpd.GeoDataFrame(
        data={
                'filepath': all_files,
                'file_west_bound': file_west_bounds,
                'file_north_bound': file_north_bounds
            }, 
        geometry=polygons, 
        crs=epsg)
    
    return tiles

def get_hs_filter(bands):
        # method from https://github.com/atalbanese/NEON_Hyperspectral/blob/main/annotation.py
        hs_filters = [[410,1320],[1450,1800],[2050,2475]]
        mask_list = [(bands>=lmin) & (bands<=lmax) for lmin, lmax in hs_filters]
        band_mask = np.logical_or.reduce(mask_list)
        idxs = np.where(band_mask)[0]
        return idxs

## loop over tiles, find crown polygons that overlap and extract tree spectra for those polygons
def make_tree_spectra_df(crowns, tiles, scale = 1.0):
    spectra_dfs_list = []

    for ix, tile in tiles.iterrows():
        
        # get crown polys that intersect this tile
        crowns_in_tile = crowns[crowns.intersects(tile.geometry)]
        if crowns_in_tile.empty:
            print(f'no tree crowns found in tile {ix}')
            continue

        min_x, min_y, max_x, max_y = crowns_in_tile.total_bounds   

        transform = Affine.translation(tile['file_west_bound'], tile['file_north_bound']) * Affine.scale(scale, -scale)
        row_0, col_0 = rowcol(transform,min_x,max_y) # upper left
        row_1, col_1 = rowcol(transform,max_x,min_y) # lower right
        # sort to avoid silent rounding errors
        row_0, row_1 = sorted((row_0,row_1))
        col_0, col_1 = sorted((col_0,col_1))
        
        hs_file = h5py.File(tile['filepath'].path, 'r')

        rows_full, cols_full, _ = hs_file['HARV']["Reflectance"]["Reflectance_Data"].shape
        
        # so much numpy indexing - making sure we are inside the tile bounds still
        row_start = max(0, min(row_0,rows_full-1))
        row_stop = max(0,min(row_1+1,rows_full))
        col_start = max(0, min(col_0,cols_full-1))
        col_stop = max(0,min(col_1+1,cols_full))

        bands = hs_file['HARV']["Reflectance"]["Metadata"]['Spectral_Data']['Wavelength'][:]
        hs_filter = get_hs_filter(bands)
        bands = bands[hs_filter]

        hs_grab = hs_file['HARV']["Reflectance"]["Reflectance_Data"][row_start:row_stop,col_start:col_stop,hs_filter]/10000
        hs_grab = hs_grab.astype(np.float32)
        hs_file.close()

        rows,cols, _ = hs_grab.shape
    
        # new transform for cropped raster
        crop_transform = Affine.translation(tile['file_west_bound'] + col_0 * scale, tile['file_north_bound'] - row_0 * scale)* Affine.scale(scale, -scale)
        # rasterize stemtags to select tree spectra
        shapes = list(zip(crowns_in_tile.geometry,crowns_in_tile['StemTag'].astype(int)))
        stemtag_raster = rasterize(
            shapes,
            out_shape=(rows,cols),
            transform=crop_transform,
            fill=0,
            all_touched=False,
            dtype="int32",
        )

        row_idx, col_idx = np.where(stemtag_raster > 0)
        if row_idx.size == 0:
            continue
        
        stemtags = stemtag_raster[row_idx,col_idx]
        canopy_spectra = hs_grab[row_idx,col_idx,:]

        spec_df = pd.DataFrame(canopy_spectra,columns=bands)
        spec_df['StemTag'] = stemtags
        spec_df['row'] = row_idx
        spec_df['col'] = col_idx
        
        spectra_dfs_list.append(spec_df)


    all_spectra = pd.concat(spectra_dfs_list,ignore_index=True)

    return all_spectra

In [None]:
# location of neon h5 file
tile_dir = root / 'data' / 'harvard' / 'hyperspectral'

tiles = make_tile_gdf(tile_dir=tile_dir)

all_spectra = make_tree_spectra_df(crowns=crowns,tiles=tiles)


In [40]:
all_spectra.to_csv(root / 'data' / 'harvard' / 'hyperspectral' / '2025_all_tree_spectra.csv')