In [23]:
import os
import glob
import matplotlib.pyplot as plt
import numpy.ma as ma
import xarray as xr
import rioxarray as rxr
from shapely.geometry import mapping, box
import geopandas as gpd
import rasterio as rio


home = "/Volumes/GoogleDrive/My Drive/Chapter2_mechanisms_forest_water_cycling"

# we want length of growing season 1, start of season 1, and end of season 1 
# we also want the reliability band to filter out low quality pixels 
# keep pixles where relibility =  0 (excellent), 1 (good), or 2 (acceptable)

# to process each year 
# clip the layers to the xl roi 
# mask all of the unreliable pixels

In [73]:
def clean_phenology(file, boundary, bands):

    # select which of the bands we are interested in and crop the file to the boundary 
    clipped = rxr.open_rasterio(file, 
                masked=True,
                variable=bands).rio.clip(boundary.geometry.apply(mapping),
                                        crs=boundary.crs, 
                                        all_touched=True,
                                        from_disk=True).squeeze()

    # mask out bad quality pixels
    masked = clipped.where((clipped['Reliability'] == 0) | (clipped['Reliability'] == 1) | (clipped['Reliability'] == 2))
    masked = masked.drop('Reliability')

    # write each band of interest out to a different folder 
    year = masked.attrs['RANGEBEGINNINGDATE'][0:4]
    VARS = list(masked.keys())
    VARS = [i.replace(" ", "_") for i in VARS]

    for variable in VARS:
        isExist = os.path.exists(os.path.join(home, "Data", "Phenology", variable))
        if not isExist:
            os.makedirs(os.path.join(home, "Data", "Phenology", variable))     
        masked[variable.replace("_", " ")].rio.to_raster(os.path.join(home, "Data", "Phenology", variable, variable + "_" + year + ".tif"))


In [74]:
phen_hdf = glob.glob(os.path.join(home, "Data", "Phenology", "*.hdf"))
boundary = gpd.read_file(os.path.join(home, "Data", "ROI", "blue_ridge_plus_reference_xl.shp"))
bands = ['Start of Season 1', 'End of Season 1','Length of Season 1', "Reliability"]

for file in phen_hdf:
    clean_phenology(file, boundary, bands)