In [1]:
import rioxarray as riox
import rasterio as rio
import xarray as xr
import os
import re
import numpy as np
import pandas as pd
import geopandas as gpd
from datetime import datetime, timedelta
from src.hls_funcs.masks import shp2mask

In [2]:
prefix = 'cper'
yr = 2022

In [3]:
ds = riox.open_rasterio('data/gcloud/hls_' + prefix + '_' + str(yr) + '_gcloud.nc', masked=True)
ds_ndvi_lta = riox.open_rasterio('data/ee_lta/' + prefix + '_ee_ndvi_landsat_wkly_lta.nc', masked=True)
ds_ndvi_lta['date'] = [datetime.strptime(re.sub('2020', '2099', str(x)),'%Y-%m-%d %H:%M:%S') for x in ds_ndvi_lta['date'].values]
#ds_ndvi_lta['date'] = ds_ndvi_lta['date'].dt.date
ds_ndvi_lta = ds_ndvi_lta.reindex({'y': ds.y, 'x': ds.x}, method='nearest', tolerance=30)#.isnull().all()

In [4]:
if prefix == 'cper':
    cper_f = 'data/ground/cper_pastures_2017_dissolved.shp'
    cper = gpd.read_file(cper_f).to_crs(ds.rio.crs.to_epsg())
    cper_info = cper[['Pasture', 'geometry']].reset_index(drop=True).reset_index().rename(columns={'index': 'id'})
    past_dict = {row.id+1: row.Pasture for _, row in cper_info.iterrows()}
    past_dict[0] = 'UNK'
    cper_mask_shp = [(row.geometry, row.id+1) for _, row in cper_info.iterrows()]
    cper_mask = shp2mask(shp=cper_mask_shp, 
                         transform=ds.rio.transform(), 
                         outshape=ds['NDVI'].shape[1:], 
                         xr_object=ds['NDVI'])
    past_mask = np.array([past_dict[i] for i in cper_mask.values.flatten()]).reshape(cper_mask.shape)

In [5]:
ds = ds.assign(Pasture=(['y', 'x'], past_mask)).chunk({'y': 50, 'x': 50})
ds = ds.set_coords('Pasture')

In [6]:
#ds_ndvi_lta['date'] = pd.to_datetime(ds_ndvi_lta['date']) + timedelta(days=3)

In [7]:
df_ndvi_lta = ds_ndvi_lta.groupby(ds['Pasture']).mean(dim='stacked_y_x').to_dataframe().reset_index().drop(columns='spatial_ref')
df_ndvi_lta['Year'] = '30-yr avg.'

In [8]:
yr_list = [2020, 2021, 2022]
for idx, yr_i in enumerate(yr_list):
    print(yr_i)
    yr_dates_tmp = [datetime(yr_i, 1, 1) + timedelta(weeks=w) for w in range(53)]
    ds_ndvi_yr = riox.open_rasterio('data/hls_nrt/' + prefix + '/hls_ndvi/' + prefix + '_hls_ndvi_' + str(yr_i) + '.nc',
                                    masked=True).rename({'time': 'date'})
    ds_ndvi_yr['date'] = [datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S') for x in ds_ndvi_yr['date'].values]
    #df_ndvi_yr_wkly = ds_ndvi_yr_wkly.groupby(ds['Pasture']).mean(dim='stacked_y_x').to_dataframe().reset_index().drop(columns='spatial_ref')
    #df_ndvi_yr_wkly['Year'] = yr_i
    
    ds_bm_yr = riox.open_rasterio('data/hls_nrt/' + prefix + '/hls_biomass/' + prefix + '_hls_bm_' + str(yr_i) + '.nc',
                                    masked=True).rename({'time': 'date'})
    ds_bm_yr['date'] = [datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S') for x in ds_bm_yr['date'].values]
    #df_bm_yr_wkly = ds_bm_yr_wkly.groupby(ds['Pasture']).mean(dim='stacked_y_x').to_dataframe().reset_index().drop(columns='spatial_ref')
    #df_bm_yr_wkly['Year'] = yr_i
    
    ds_bare_yr = riox.open_rasterio('data/hls_nrt/' + prefix + '/hls_cover/' + prefix + '_hls_BARE_' + str(yr_i) + '.nc',
                                masked=True).rename({'time': 'date'})
    ds_bare_yr['date'] = [datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S') for x in ds_bare_yr['date'].values]
    
    ds_sd_yr = riox.open_rasterio('data/hls_nrt/' + prefix + '/hls_cover/' + prefix + '_hls_SD_' + str(yr_i) + '.nc',
                                masked=True).rename({'time': 'date'})
    ds_sd_yr['date'] = [datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S') for x in ds_sd_yr['date'].values]
    
    
    ds_green_yr = riox.open_rasterio('data/hls_nrt/' + prefix + '/hls_cover/' + prefix + '_hls_GREEN_' + str(yr_i) + '.nc',
                                masked=True).rename({'time': 'date'})
    ds_green_yr['date'] = [datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S') for x in ds_green_yr['date'].values]
    
    ds_litt_yr = riox.open_rasterio('data/hls_nrt/' + prefix + '/hls_cover/' + prefix + '_hls_LITT_' + str(yr_i) + '.nc',
                                masked=True).rename({'time': 'date'})
    ds_litt_yr['date'] = [datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S') for x in ds_litt_yr['date'].values]
    
    if yr_i != yr_list[-1]:
        ds_ndvi_yr_wkly = ds_ndvi_yr.sel(date=yr_dates_tmp, method='nearest', tolerance=timedelta(days=1), drop=True)
        ds_bm_yr_wkly = ds_bm_yr.sel(date=yr_dates_tmp, method='nearest', tolerance=timedelta(days=1), drop=True)
        ds_bare_yr_wkly = ds_bare_yr.sel(date=yr_dates_tmp, method='nearest', tolerance=timedelta(days=1), drop=True)
        ds_sd_yr_wkly = ds_sd_yr.sel(date=yr_dates_tmp, method='nearest', tolerance=timedelta(days=1), drop=True)
        ds_green_yr_wkly = ds_green_yr.sel(date=yr_dates_tmp, method='nearest', tolerance=timedelta(days=1), drop=True)
        ds_litt_yr_wkly = ds_litt_yr.sel(date=yr_dates_tmp, method='nearest', tolerance=timedelta(days=1), drop=True)
        df_yr_wkly = xr.merge([ds_ndvi_yr_wkly,
                               ds_bm_yr_wkly,
                               ds_bare_yr_wkly,
                               ds_sd_yr_wkly,
                               ds_green_yr_wkly,
                               ds_litt_yr_wkly]).groupby(
            ds['Pasture']).mean(
            dim='stacked_y_x').to_dataframe().reset_index().drop(
            columns='spatial_ref')
    else:
        df_yr_wkly = xr.merge([ds_ndvi_yr,
                           ds_bm_yr,
                           ds_bare_yr,
                           ds_sd_yr,
                           ds_green_yr,
                           ds_litt_yr]).groupby(
        ds['Pasture']).mean(
        dim='stacked_y_x').to_dataframe().reset_index().drop(
        columns='spatial_ref')
    df_yr_wkly['Year'] = str(yr_i)
    
    if idx == 0:
        df_out = df_yr_wkly.copy()
    else:
        df_out = pd.concat([df_out, df_yr_wkly])

2020
2021
2022


In [9]:
df_out = pd.concat([df_ndvi_lta, df_out])

In [10]:
df_out_aoi = df_out.groupby('date').mean().reset_index()

In [11]:
df_out_aoi['Pasture'] = prefix
df_out_aoi['Year'] = df_out_aoi['date'].dt.isocalendar().year.transform(lambda x: '30-yr avg.' if x == 2099 else str(x))
df_out = pd.concat([df_out, df_out_aoi])

In [12]:
df_out[['NDVI', 
        'Biomass',
        'BARE',
        'SD', 
        'GREEN',
        'LITT']] = df_out.transform({'NDVI': lambda x: np.round(x, 3),
                  'Biomass': lambda x: np.round(x, 0),
                  'BARE': lambda x: np.round(x * 100, 1),
                  'SD': lambda x: np.round(x * 100, 1),
                  'GREEN': lambda x: np.round(x * 100, 1),
                  'LITT': lambda x: np.round(x * 100, 1)})

In [13]:
df_out.to_csv('data/gcloud/hls_' + prefix + '_means.csv', index=False)