### Outputting Grid Metrics as 2D and 3D Arrays
*PB 8/12/22* 

Redone with huge speed improvements made by
incorporating vectorized indexing in numpy to fill grids
and using xarray for geotif and netcdf exports.

----

##### Useful links for geotif export and affine transform

https://rasterio.readthedocs.io/en/latest/topics/georeferencing.html

http://dirsig.cis.rit.edu/docs/new/affine.html

https://corteva.github.io/rioxarray/stable/getting_started/crs_management.html

https://github.com/rasterio/affine

----

In [1]:
import sys
sys.path.append('/n/home02/pbb/scripts/halo-metadata-server/StructuralComplexity_Tyler')
import matplotlib.pyplot as plt
import numpy as np
import pickle
import pandas as pd
import rasterio as rio
import xarray as xr
import rioxarray as rxr
from pathlib import Path
import geopandas as gpd
import time

# from geocube.api.core import make_geocube
# from shapely.geometry import Point
# from geocube.rasterize import rasterize_points_griddata
# sys.path.append('/n/home02/pbb/scripts/halo-metadata-server/')
# from Cloud_Class import Cloud, calccover
# from StructComplexity_f import exportTIF
# import os
# import glob

In [2]:
# # # USER INPUTS: 

# Input Data:

# Set the pickle metrics to draw from
metricdir = '/n/davies_lab/Users/pbb/StructuralComplexity_Tyler/data/out/UHURU_100Plots_metrics'

# Horizontal Grid size
# 25 cm voxelss
xysize = 0.25

# Set CRS
epsg = '32637'

# Shapefile of Plots (for iterively setting boundaries of the grid)
# NOTE: This is the dissolved file without a buffer
# each feature is a plot polygon, marked by Site and Block number
shpf = Path('/n/home02/pbb/scripts/halo-metadata-server/StructuralComplexity_Tyler/data/in/UHURU_Polygons/UHURU_100mPlots_Dissolved.shp')
shpdf = gpd.read_file(shpf)

# Sites to loop through
Sites = ['South', 'Central', 'North']
# Blocks within each site
Blocks = [1, 2, 3]

# Output Data:

# Outdirectory for rasters and netcdfs
outdir_rast = '/n/davies_lab/Users/pbb/StructuralComplexity_Tyler/data/out/UHURU_100Plots_rasters/20220812'

# list of 3D metrics to output,
# corresponding to the names in the saved cover dictionaries
# Note: all percentile metrics get output by default
metriclabels3d = ['Npulses', 'CoverD1', 'CoverD2', 'CoverD1byH', 'CoverD2byH', 'FHPD1', 'FHPD2']

# # #  END USER INPUTS

In [3]:
# define a function for filling arrays
def fill2Darray(data, shape, xindices, yindices, filteridx=None, plot=False):
    
    # make an empty output array, filled with nans
    output_array = np.full(shape, np.nan)
    
    # if that data needs to be filtered
    if filteridx.size > 0:
        
        data = np.array(data)[filteridx]
        xindices = np.array(xidx)[filteridx]
        yindices = np.array(yidx)[filteridx]
    
    # Convert to int
    xindices = xindices.astype(int)
    yindices = yindices.astype(int)
        
    # fill the output array with data values, using the above indices
    output_array[yindices, xindices] = data
    
    if plot:
        fig, ax = plt.subplots()
        a = ax.imshow(output_array, cmap='magma')
        fig.colorbar(a)
        
    return output_array

In [4]:
# For each site
for s in Sites:
    
    # for each block (3 per site)
    for b in Blocks:

        start = time.time()

    # 1) Load Inputs, and Redefine Grid
    
        # Load percentile height metric dict
        with open(f'{metricdir}/UHURU{s}_{s}_{b}_{xysize}mgrid_percmetrics.obj', 'rb') as of:
            perc = pickle.load(of)
            
        # Load Cover percentile dict
        with open(f'{metricdir}/UHURU{s}_{s}_{b}_{xysize}mgrid_covermetrics.obj', 'rb') as of:
            cover = pickle.load(of)

        # Match site and block to get the plot polygon
        feat_gs = shpdf.loc[((shpdf.Site==s) & (shpdf.Block==b))]

        # Set bounds of grid to fill
        xmin=float(feat_gs.geometry.bounds.minx)
        ymin=float(feat_gs.geometry.bounds.miny)
        xmax=float(feat_gs.geometry.bounds.maxx)
        ymax=float(feat_gs.geometry.bounds.maxy)

        # Make the coordinates of the grid
        x_grid = np.arange(xmin, xmax, step=xysize)
        y_grid = np.arange(ymin, ymax, step=xysize)

        # Mesh the grid into 2 matrices of x and y coordinates
        x_mesh, y_mesh = np.meshgrid(x_grid, y_grid)

        # Find the index of where all the data values belong in the grid
        xlist = [k[0] for k in cover.keys()]
        ylist = [k[1] for k in cover.keys()]

        xidx = []
        yidx = []

        # for each unique x,y location in the list of data values
        for x, y in zip(xlist, ylist):

            # Find it's unique x and y index location on the grid (ex: [0,1], [100,3], ...)
            xi = np.flatnonzero(x_grid==x)
            yi = np.flatnonzero(y_grid==y)

            # If there is a location (not empty)
            if ((xi.size > 0) & (yi.size > 0)):

                # Add to the list, as an integer
                xidx.append(xi[0])
                yidx.append(yi[0])

            else: 

                # Otherwise, mark them both with nans
                xidx.append(np.nan)
                yidx.append(np.nan)


        # Make an index for filtering arrays 
        filterindex = np.array(xidx) >= 0 


    # 2) Load, Reshape, and Output 2D Metrics

        # Get all the data dicts from the nested perc dictionary
        v = list(perc.values())

        # unpack all vars into lists
        perc0 = [i[0][0] for i in v]
        perc25 = [i[25][0] for i in v]
        perc50 = [i[50][0] for i in v]
        perc75 = [i[75][0] for i in v]
        perc98 = [i[98][0] for i in v]
        perc100 = [i[100][0] for i in v]
        meanh = [i['mean'][0] for i in v]
        stdh = [i['std'][0] for i in v]
        
        # Reshape vars into 2D arrays and store in a dictionary
        outdict = {}.fromkeys(['0th','25th', '50th', '75th', '98th', '100th', 'Mean', 'Std'])

        for m, l in zip([perc0, perc25, perc50, perc75, perc98, perc100, meanh, stdh],
                        ['0th','25th', '50th', '75th', '98th', '100th', 'Mean', 'Std']):

            outdict[l] = fill2Darray(data=m, shape=x_mesh.shape,
                                     xindices=xidx, yindices=yidx,
                                     filteridx=filterindex, plot=False)

        # Output 2D Arrays as geotifs
        for l in ['0th','25th', '50th', '75th', '98th', '100th', 'Mean', 'Std']:

            # Grab one metric for output
            m = outdict[l]
            
            # Mirror Image the metric
            # Note: You do this because rioxarray wants data
            # ordered with positive x (left to right)
            # but with negative y (top to bottom)
            # This really just makes the export to geotif go smoothly,
            # with a correct affine transform 
            m = np.flipud(m)
            
            # Also flip y coordinates, so that they're correctly read into x array
            # y_grid_flip = np.flip(y_grid)
            # NOTE: adding 0.25 to y coord so that it marks Top left corner
            # instead of bottom left - this is in accordance with raster data and rioxarray
            y_grid_flip = np.flip(y_grid) + xysize
            
            # put in an xarray
            # 2D
            m_xr = xr.DataArray(data=m,
                                coords={"y": y_grid_flip,
                                        "x": x_grid},
                                dims=["y", "x"])

            # Write CRS and Nodata value and dims to xarray
            m_xr.rio.write_crs(f"epsg:{epsg}",
                               inplace=True)
            m_xr.rio.write_nodata(-9999,
                                  inplace=True)
            m_xr.rio.set_spatial_dims(x_dim="x",
                                      y_dim="y",
                                      inplace=True)
            m_xr.rio.write_coordinate_system(inplace=True)
            
            # make an output Label
            label = l.replace(' ', '').replace('.', 'p').replace('[m]', '')

            m_xr.rio.to_raster(f'{outdir_rast}/{s}/2D/UHURU{s}{str(b)}_{label}.tif')
            
            print(f'{l} done')
    
        end2d = time.time()
        
        print(f'Finished 2D outputs for UHURU {s} Block {b}. {end2d-start} seconds. \n')

    
    # 3) Load, Reshape, and Output 3D Metrics

        # Unpack cover values as a list
        v3 = list(cover.values())

        # Get list of heights for later
        hbins = v3[0]['HeightBins']
        
        shape = (len(x_grid), len(y_grid))

        # for each set of 3D metrics
        for l3 in metriclabels3d:

            # list to fill with xarrays from each height
            xr_list = []
            
            # Deals with an issue caused by using np.diff
            # if there's a difference metric, then the last height does not have a value
            if (("byH" in l3) | ('FHP' in l3)) :
                hbinz = hbins[0:-1]
            else:
                hbinz = hbins
            
            # for each height bin
            for hidx, h in enumerate(hbinz): 

                # make an array of all the values
                m = np.array([i[l3][hidx] for i in v3])

                # make an empty output array, filled with nans
                output_array = np.full(shape, np.nan)

                # if that data needs to be filtered
                if filterindex:

                    data = m[filterindex]
                    xindices = np.array(xidx)[filterindex]
                    yindices = np.array(yidx)[filterindex]
            
                # Convert to int
                xindices = np.array(xindices).astype(int)
                yindices = np.array(yindices).astype(int)
                
                # Fill any inf values in data with -9999
                # data[np.isfinite(data, where=False)] = -9999

                # stick into the array
                # IMPORTANT NOTE HERE: it's y, then x, not the other way around
                output_array[yindices, xindices] = data
                
                # Mirror Image the metric
                # Note: You do this because rioxarray wants data
                # ordered with positive x (left to right)
                # but with negative y (top to bottom)
                # This really just makes the export to geotif go smoothly,
                # with a correct affine transform 
                output_array = np.flipud(output_array)

                # Also flip y coordinates, so that they're correctly read into x array 
                # y_grid_flip = np.flip(y_grid)
                # NOTE: adding xysize to y coord so that it marks Top left corner
                # instead of bottom left - this is in accordance with raster data and rioxarray
                y_grid_flip = np.flip(y_grid) + xysize

                # put in an xarray
                # 2D
                m_xr = xr.DataArray(data=output_array,
                                    coords={"y": y_grid_flip,
                                            "x": x_grid},
                                    dims=["y", "x"])

                # Write CRS and Nodata value
                m_xr.rio.write_crs(f"epsg:{epsg}", inplace=True)
                m_xr.rio.write_nodata(-9999, inplace=True)
                m_xr.rio.set_spatial_dims(x_dim="x", y_dim="y", inplace=True)
                m_xr.rio.write_coordinate_system(inplace=True)

                # add to list 
                xr_list.append(m_xr)

            # Concatenate dataarrays
            # https://docs.xarray.dev/en/stable/user-guide/combining.html#combine
            # setting the values of the new dimension to be height
            ds = xr.concat(xr_list, hbinz)
            ds = ds.rename({'concat_dim':'z'})

            # Set it's name
            ds.name = l3

            # output to netcdf
            ds.to_netcdf(f'{outdir_rast}/{s}/3D/UHURU{s}{str(b)}_{l3}.nc')

        end3d = time.time()

        print(f'Finished 3D outputs for UHURU {s} Block {b}. {end3d-end2d} seconds. \n')

    # DONE

0th done
25th done
50th done
75th done
98th done
100th done
Mean done
Std done
Finished 2D outputs for UHURU South Block 1. 68.9206395149231 seconds. 

Finished 3D outputs for UHURU South Block 1. 310.86160016059875 seconds. 

0th done
25th done
50th done
75th done
98th done
100th done
Mean done
Std done
Finished 2D outputs for UHURU South Block 2. 86.43612170219421 seconds. 

Finished 3D outputs for UHURU South Block 2. 378.0471522808075 seconds. 

0th done
25th done
50th done
75th done
98th done
100th done
Mean done
Std done
Finished 2D outputs for UHURU South Block 3. 91.46655941009521 seconds. 

Finished 3D outputs for UHURU South Block 3. 424.8815679550171 seconds. 

0th done
25th done
50th done
75th done
98th done
100th done
Mean done
Std done
Finished 2D outputs for UHURU Central Block 1. 119.92950105667114 seconds. 

Finished 3D outputs for UHURU Central Block 1. 584.528507232666 seconds. 

0th done
25th done
50th done
75th done
98th done
100th done
Mean done
Std done
Finished 

complexity.keys()