### Extract daily NDVI by pasture at CPER
Uses pre-processed daily NDVI rasters from Landsat-MODIS fusion (LMF - see Gao et al. 2015) and Harmonized Landsat Sentinel (HLS - see Kearney et al. 2022). Computes the mean of all grid cells within each pasture. Returns a .csv file with a single NDVI value for each date for each pasture.

In [None]:
import rioxarray as riox
import rasterio as rio
import xarray as xr
import os
import re
import numpy as np
import pandas as pd
import geopandas as gpd
from datetime import datetime, timedelta
from hlsstack.hls_funcs.masks import shp2mask
from tqdm.notebook import tqdm
import glob

In [None]:
# dask cluster location
cluster_loc = 'local'
# the earliest year for using HLS
hls_cutoff = 2016

# the respective generic paths for LMF and HLS pre-processed data
ndvi_dict = {
    'lmf': '/mnt/t/3-GIS/CPER/Layers/CPER_Landsat-Modis-Fusion_V3/flexfit_ndvi.033032.{}.bin',
    'hls': '/mnt/c/Users/Sean.Kearney/git_repos/hls_nrt/data/gcloud/hls_cper_{}_gcloud.nc'
}

# the path to a shapefile with CPER pasture boundaries
cper_f = '../data/ground/boundaries/cper_pastures_2017_dissolved.shp'

# the range of years to be processed
yrs = range(2014, 2023)

# the output path for the csv file
out_f = '/mnt/t/3-GIS/CPER/Layers/CPER_RS_avgs/cper_ndvi_means_2014_2022.csv'

In [None]:
# setup the dask cluster
if cluster_loc == 'local':
    #os.chdir(wkDIR)
    print('   setting up Local cluster...')
    from dask.distributed import LocalCluster, Client
    import dask
    cluster = LocalCluster(n_workers=8, threads_per_worker=2)
    client = Client(cluster)
    display(client)
    inDIR = 'data/'

In [None]:
# prepare the pasture boundaries to be converted into an xarray mask
cper = gpd.read_file(cper_f).to_crs(32613)
cper_info = cper[['Pasture', 'geometry']].reset_index(drop=True).reset_index().rename(columns={'index': 'id'})
past_dict = {row.id+1: row.Pasture for _, row in cper_info.iterrows()}
past_dict[0] = 'UNK'
cper_mask_shp = [(row.geometry, row.id+1) for _, row in cper_info.iterrows()]

In [None]:
# loop through each year and extract pasture NDVI means for each date
for idx, yr in enumerate(tqdm(yrs)):
    # determine whether to use LMF
    if yr < hls_cutoff:
        # define file path
        fpath = ndvi_dict['lmf'].format(str(yr))
        # read in data
        da = riox.open_rasterio(fpath, masked=True)
        # rename for converting to xarray.DataSet since LMF data are single-band and thus xarray.DataArray
        da.name = 'NDVI'
        # convert to xarray.DataSet
        ds = da.to_dataset()
        # reproject if necessary
        if ds.rio.crs != cper.crs:
            ds = ds.rio.reproject(cper.crs)
        # convert the band coordinate to date
        ds['band'] = [datetime(yr, 1, 1) + timedelta(days=int(x)) for x in ds['band'].values]
        # rename band coordinate to date
        ds = ds.rename({'band': 'date'})
        # subset to only CPER boundaries
        ds = ds.sel(x=slice(cper.total_bounds[0], cper.total_bounds[2] + 30),
                    y=slice(cper.total_bounds[3], cper.total_bounds[1] - 30))
    # use HLS 
    else:
        # define the file path
        fpath = ndvi_dict['hls'].format(str(yr))
        # read in data, selecting only NDVI
        ds = riox.open_rasterio(fpath, masked=True, variable='NDVI')
        # reproject if necessary
        if ds.rio.crs != cper.crs:
            ds = ds.rio.reproject(cper.crs)
    # create the pasture mask from shapefile
    cper_mask = shp2mask(shp=cper_mask_shp, 
                         transform=ds.rio.transform(), 
                         outshape=ds['NDVI'].shape[1:], 
                         xr_object=ds['NDVI'])
    # convert from numeric id's to pasture names
    past_mask = np.array([past_dict[i] for i in cper_mask.values.flatten()]).reshape(cper_mask.shape)
    # add the pasture mask to the NDVi dataset
    ds = ds.assign(Pasture=(['y', 'x'], past_mask)).chunk({'y': 50, 'x': 50})
    ds = ds.set_coords('Pasture')
    # get pasture means and convert to dataframe
    df_yr = ds.groupby(
        ds['Pasture']).mean(
        dim='stacked_y_x').to_dataframe().reset_index().drop(
        columns='spatial_ref')
    df_yr['Year'] = str(yr)
    
    # create the initial output dataframe if it doesn't exist
    if idx == 0:
        df_out = df_yr.copy()
    # append to the existing output dataframe
    else:
        df_out = pd.concat([df_out, df_yr])

In [None]:
# add the data source to the output dataframe
df_out['Source'] = df_out['Year'].apply(lambda x: 'LMF' if int(x) < hls_cutoff else 'HLS')

In [None]:
# quality control check that all pastures are present for each year
df_out.groupby('Year').apply(lambda x: x['Pasture'].unique())

In [None]:
# preview the output dataframe
df_out

In [None]:
# write the output dataframe to disk
df_out.to_csv(out_f, index=False)

#### Plot saved results

In [None]:
import seaborn as sns

In [None]:
# read in the saved .csv output
df_saved = pd.read_csv('/mnt/t/3-GIS/CPER/Layers/CPER_RS_avgs/cper_ndvi_means_2014_2022.csv')

In [None]:
# plot all pastures and all dates on a single figure
sns.relplot(data=df_saved, x='date', y='NDVI', hue='Pasture', kind="line", errorbar=None)