In [2]:
import geopandas as gpd
import xarray as xr
import rioxarray 
from shapely.geometry import box
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from geocube.api.core import make_geocube



root = Path.cwd()


class Site():
    def __init__(self,site_name,sentinel_data,polygons):
        self.site_name = site_name
        self.sentinel_data = sentinel_data.rio.write_crs(26918).rio.set_spatial_dims(x_dim="x",y_dim="y",).rio.write_coordinate_system()
        self.polygons = polygons.reset_index().to_crs(26918)
        self.bbox = self.get_bbox()

        self.site_polygons = self.get_site_polygons()

        self.beech_idx = self.site_polygons.loc[self.site_polygons.Name!='non-beech stand']['index'].astype('float').values
        self.nonbeech_idx = self.site_polygons.loc[self.site_polygons.Name=='non-beech stand']['index'].astype('float').values

        self.raster_polys = self.rasterize_polys()

    def get_bbox(self):
        b = self.sentinel_data.rio.bounds()
        return box(*b)
    
    def get_site_polygons(self):
        df = gpd.GeoDataFrame({"id":1,"geometry":[self.bbox]})
        idx = df.sindex.query(self.polygons.geometry, predicate="intersects")[0]
        return self.polygons.iloc[idx]
    
    # rasterize polygons
    def rasterize_polys(self):
        g = make_geocube(
        vector_data=self.site_polygons,
        measurements=["index"],
        like=self.sentinel_data, # ensure the data are on the same grid
    )
        return g
    
    # plot rasterized polygons overlaid with vector polygons
    def plot_rasterized_polys(self):
        fig, ax = plt.subplots(ncols=1, figsize=(5, 5))

        p = self.raster_polys.index.plot.imshow(ax=ax)
        self.site_polygons.plot(ax=ax,alpha=.7)
        p.colorbar.remove()
        plt.title(f'Rasterized Polygons: {self.site_name.title()} Site')
        plt.tight_layout()
        plt.show()

    # store pixel values for each plot in dataframe
    def make_pixel_df(self,idx='beech'):
        
        if idx == 'beech':
            id = self.beech_idx
        else:
            id = self.nonbeech_idx

        if len(id) == 1:
            pixels = self.sentinel_data.where(self.raster_polys.index==id).assign_coords({'time':[str(x.astype('datetime64[D]')) for x in self.sentinel_data.time.values]})
        else:
            pixels = self.sentinel_data.where((self.raster_polys.index==id[0])|(self.raster_polys.index==id[1]))

        stacked = pixels.stack(spatial=('x','y'))
        transposed = stacked.transpose('spatial', 'band', 'time')
        # convert to dataframe
        df = transposed.to_dataframe(name='value').unstack(['band', 'time'])

        df = df.loc[:,('value')]

        df.columns = df.columns.set_levels([str(x.astype('datetime64[D]')) for x in self.sentinel_data.time.values],level='time')
        df = df.reset_index()

        df.columns = [f'{x[0]}_{x[1]}' for x in df.columns]

        df.rename(columns={df.columns[0]:'x',df.columns[1]:'y'},inplace=True)

        df = df.drop(['x','y'],axis=1)

        df = df[df.isna().sum(axis=1) == 0]
        print(df.shape)

        return df
        

    
    # store mean values for each plot in dataframe
    def make_site_means_df(self, i='beech'):

        out_grid = self.raster_polys
    
        out_grid['sentinel'] = (self.sentinel_data.dims, self.sentinel_data.values, self.sentinel_data.attrs, self.sentinel_data.encoding)

        # calculate means for each group (polygon)
        grouped_sentinel = out_grid.drop_vars("spatial_ref").groupby(out_grid.index)
        grid_mean = grouped_sentinel.mean().rename({"sentinel": "sentinel_mean"})

        zonal_stats = grid_mean.to_dataframe()

        # put beech/nonbeech into seperate dataframes 
        if i == 'beech':
            idx = self.beech_idx
        else:
            idx = self.nonbeech_idx
        
        df = zonal_stats.loc[idx]
        
        df_unstack = df.unstack(level='band').droplevel('band',axis=1)
        df_unstack.columns = self.sentinel_data.band.values

        if len(np.unique(df_unstack.index.get_level_values(level='index'))) < 2: # check if multiple polygons
            df_unstack = df_unstack.reset_index()
            df_unstack['time'] = [str(x.astype('datetime64[D]')) for x in self.sentinel_data.time.values]
            return df_unstack
        
        else:   # if multiple polygons, return a list of dataframes
            df_list = []
            for i in range(0,len(idx)):
                df1 = df_unstack.loc[idx[i]]
                df1 = df1.reset_index()
                df1['time'] = [str(x.astype('datetime64[D]')) for x in self.sentinel_data.time.values]
                df_list.append(df1)
            
            return df_list


In [None]:
# read in data
polys = gpd.read_file(root / 'beech_stands' / 'Beech Stand Polygons.kml')

SITE = 'pecoy'
pecoy_sentinel = xr.open_dataarray(root / 'sentinel_data' / f'2023_{SITE}.nc')

SITE = 'roundtop'
roundtop_sentinel = xr.open_dataarray(root / 'sentinel_data' / f'2023_{SITE}.nc')

SITE = 'visitors'
visitors_sentinel = xr.open_dataarray(root / 'sentinel_data' / f'2023_{SITE}.nc')

In [6]:
pecoy = Site('Pecoy',pecoy_sentinel, polys)
roundtop = Site('Roundtop',roundtop_sentinel,polys)
visitors = Site('Visitors',visitors_sentinel,polys)

beech_pixels = pd.concat([pecoy.make_pixel_df('beech'),roundtop.make_pixel_df('beech'),visitors.make_pixel_df('beech')])
nonbeech_pixels = pd.concat([pecoy.make_pixel_df('nonbeech'),roundtop.make_pixel_df('nonbeech'),visitors.make_pixel_df('nonbeech')])

beech_pixels.to_csv(root / 'output' / 'beech_sentinel_pixels.csv')
nonbeech_pixels.to_csv(root / 'output' / 'nonbeech_sentinel_pixels.csv')

### Get mean values for plots

In [7]:
# add satellite data to dataset
out_grid['sentinel'] = (sentinel.dims, sentinel.values, sentinel.attrs, sentinel.encoding)

# calculate means for each group (polygon)
grouped_sentinel = out_grid.drop_vars("spatial_ref").groupby(out_grid.index)
grid_mean = grouped_sentinel.mean().rename({"sentinel": "sentinel_mean"})

  ds.expand_dims(dim_name, create_index_for_new_dim=create_index_for_new_dim)


In [8]:
zonal_stats = grid_mean.to_dataframe()

# put beech/nonbeech into seperate dataframes 
beech_means = zonal_stats.loc[beech_idx]
nonbeech_means = zonal_stats.loc[nonbeech_idx]

In [None]:
def make_site_means_dfs(self, i='beech'):

    out_grid = self.raster_polys
   
    out_grid['sentinel'] = (self.sentinel_data.dims, self.sentinel_data.values, self.sentinel_data.attrs, self.sentinel_data.encoding)

    # calculate means for each group (polygon)
    grouped_sentinel = out_grid.drop_vars("spatial_ref").groupby(out_grid.index)
    grid_mean = grouped_sentinel.mean().rename({"sentinel": "sentinel_mean"})

    zonal_stats = grid_mean.to_dataframe()

    # put beech/nonbeech into seperate dataframes 
    if i == 'beech':
        df = zonal_stats.loc[self.beech_idx]
    else:
        df = zonal_stats.loc[self.nonbeech_idx]
    
    df_unstack = df.unstack(level='band').droplevel('band',axis=1)
    df_unstack.columns = self.sentinel_data.band.values

    if len(np.unique(df_unstack.index.get_level_values(level='index'))) < 2: # check if multiple polygons
        df_unstack = df_unstack.reset_index()
        df_unstack['time'] = [str(x.astype('datetime64[D]')) for x in self.sentinel_data.time.values]
        return df_unstack
    
    else:   # if multiple polygons, return a list of dataframes
        df_list = []
        for i in range(0,len(idx)):
            df1 = df_unstack.loc[idx[i]]
            df1 = df1.reset_index()
            df1['time'] = [str(x.astype('datetime64[D]')) for x in self.sentinel_data.time.values]
            df_list.append(df1)
        
        return df_list

In [9]:
def unstack_and_relabel_dfs(df,idx=None):
    """Reformat and tidy dataframes produced by calling to_dataframe() on zonal stats array"""

    df_unstack = df.unstack(level='band').droplevel('band',axis=1)
    df_unstack.columns = sentinel.band.values

    if len(np.unique(df_unstack.index.get_level_values(level='index'))) < 2: # check if multiple polygons
        df_unstack = df_unstack.reset_index()
        df_unstack['time'] = [str(x.astype('datetime64[D]')) for x in sentinel.time.values]
        return df_unstack
    
    else:   # if multiple polygons, return a list of dataframes
        df_list = []
        for i in range(0,len(idx)):
            df1 = df_unstack.loc[idx[i]]
            df1 = df1.reset_index()
            df1['time'] = [str(x.astype('datetime64[D]')) for x in sentinel.time.values]
            df_list.append(df1)
        
        return df_list
        

        

In [10]:
# reformat and label dataframes
beech_means = unstack_and_relabel_dfs(beech_means)
nonbeech_means = unstack_and_relabel_dfs(nonbeech_means)

In [11]:
# save to disk
beech_means.to_csv(root / 'output' / f'{SITE}_beech_polygon_means.csv')
nonbeech_means.to_csv(root / 'output' / f'{SITE}_nonbeech_polygon_means.csv')