In [1]:
# Pickle processing for point clouds 
# Note - modified 2/8/23 to loop over all radii
# Also, added grass metrics (cover and height percentiles of the grass layer)

# Import Dependencies
import sys
# Note: Selenkay uses some functions not included in the LabLidar (aka Lidar-Notebooks) Functions file
# the main functions (calculate cover, percentiles, etc) are the same
sys.path.append('/n/home02/pbb/scripts/halo-metadata-server/Selenkay/bin/')
from Functions import calccover, calcPercentileHeights, heightAggMetrics, canopyLayerMetrics
from Classes import Cloud
import geopandas as gpd
import pandas as pd
import numpy as np
import concurrent.futures
from pathlib import Path
import laspy
import time
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

from shapely.geometry import Polygon
from scipy.ndimage import gaussian_filter1d
from scipy.stats import gaussian_kde
from scipy.signal import find_peaks, peak_widths
from scipy.interpolate import interp1d

# makes matplotlib plots big
plt.rcParams['figure.figsize'] = [8, 6]
plt.rcParams.update({'font.size': 14})

# # # USER INPUTS

# Set a Label for this run (unique label id)
# label= 'initial'
# mango run - 2/8/23
label = 'mango'

# Radii to loop over
radii = [20, 30, 50, 80, 130]
# just for testing!
# radii = [10]

# # Set Data Dirs

# Path to directory of shapefiles (.shp) of polygon features to clip the point cloud with.
shpd = Path('/n/home02/pbb/scripts/halo-metadata-server/Selenkay/data/in/BoundaryShapefiles/SelenkaySpotPolygons_IncreasingRadius')

# Input directory of clipped las files to compute metrics with.
# NOTE: This should be the same as the 'od' directory from 1-ClipLasWithPolygons.ipynb
lasd = Path(f'/n/davies_lab/Users/pbb/SelenkayDiversity/data/out/SpotPolys_Fibonacci_ClippedLas/')
          
# output directory for metric output files (will be organized in with subfolders labelled as 10m, 20m, etc.)
metricd = Path(f'/n/davies_lab/Users/pbb/SelenkayDiversity/data/out/Metrics/{label}/')

# output directory for complexity figures
figd = Path(f'/n/home02/pbb/scripts/halo-metadata-server/Selenkay/figs/{label}')
if not figd.exists():
    figd.mkdir()

# # Shapefile parameters

# EPSG code of the shapefile and the las files, as a string
# Note: Shapefiles and las files must have the same EPSG code (same CRS)
# Kruger is 32736 (WGS84 UTM 36S)
# Mpala is 32637 (WGS84 UTM 37N)
# Selenkay is 32737 (WGS84 UTM37S)
epsg='32737'

# feature id column - name of attribute column in shapefile which defines each polygon feature with a unique ID
featureIDcol = 'Spot'

# # Voxel Parameters

# Max height of voxel stacks 
# NOTE: Set this to be just above max height of your trees in meters.
stackheight=15

# Horizontal Res of Grid (XY pixel size)
xysize=0.5

# Vertical step size for metrics
# NOTE: This defines the vertical bin size in meters (how "thick" each voxel is).
# initial 
# verticalres = 0.5
# mango
verticalres = 0.25

# Ground threshold (i.e. below this height treat points as ground) in meters
groundthreshold = 0.05

# height col
heightcol = 'HeightAboveGround'

# # Complexity Metric Parameters

# Set method for calculation of peaks/layers (options = 'kde' or 'gauss1d')
method = 'gauss1d'

# Set smoothing sigma if using 'gauss1d"
# Initial run
# sigma = 1.5
# mango 
sigma=1.0

# set relative height for top of herb layer calc in canopyLayerMetrics
# initial run
# rh = 0.9
# mango
rh = 1.0

# # # END USER INPUTS

In [2]:
# Create lists of inputs and outputs to loop over
shpfs = []
lds = []
mds = []

for r in radii:
    shpfs.append(Path(f'{shpd}/SelenkaySpotPolygons_{r}mRadius.shp'))
    lds.append(Path(f'{lasd}/{r}mRadius/'))

    md = Path(f'{metricd}/{r}mRadius/')
    
    # mkdir metric dir if it doesn't exist yet
    if not md.exists():
        md.mkdir()
    
    mds.append(md)
    
# lds, mds

### Make height bins for binning points

In [3]:
# Calc Cover for height bins
nbins = ((stackheight - 0) / verticalres) + 1
heightbins = np.linspace(0, stackheight, int(nbins))

# IMPORTANT: Using groundthres, you may want to account for errors in relative accuracy
# EX: IF the rel. accuracy of ground is about 0.06 m (6 cm) between flightlines,
# the lowest height bin could be set to 0.06 (instead of 0) to account for this.
# so any hit below 0.06 m counts as ground.
# NOTE: If you want to use everything, just set groundthres to 0
if groundthreshold > 0:
    # insert the groundthres into the array (right above 0)
    heightbins = np.insert(heightbins, 1, groundthreshold)
if groundthreshold < 0:
    # insert the groundthres into the array (right below 0)
    heightbins = np.insert(heightbins, 0, groundthreshold)
    

In [4]:
# Wrapper function for using parallel processing and calccover function 
# Notice that is calls lc as the first argument
# need to write it this way in order to use concurrent futures parallel processing below
def calccover_parallel(index, heightcol='HeightAboveGround'):

    # make a True/False array 
    # for all points within the current grid cell
    idx_bool = lc.grid_dict['idx_points'] == lc.grid_dict['idx_cells'][index]
    
    # Subset Points
    p = lc.las.points[idx_bool]

    # Get height array
    # Note: this is slightly different from the "heights" output below
    h = p[heightcol]
    
    # Remove high noise points above the canopy
    h = h[h<=stackheight]
    
    try:
        
        # Compute complexity metrics
        complexity = canopyLayerMetrics(h=h,
                                        hbins=heightbins,
                                        method=method,
                                        smoothsigma=sigma,
                                        rel_height=rh,
                                        groundthreshold=groundthreshold)
    except Exception as e:

        print(f"Complexity Calc. - {e.__class__} for {lc.lasf}: \n")
        print(f"\t{e}\n") 

    try:

        # Calculate Cover
        cover = calccover(points=p,
                          step=verticalres,
                          groundthres=groundthreshold,
                          heightcol=heightcol,
                          hmax=stackheight)
        
    except Exception as e:

        print(f"Cover Calc. - {e.__class__} for {lc.lasf}: \n")
        print(f"\t{e}\n")

    try: 
        
        # Calculate height statistics, and return an array of the point heights
        perc, heights = calcPercentileHeights(points=p,
                                              groundthres=groundthreshold,
                                              returnHeights=True,
                                              heightcol=heightcol,
                                              hmax=stackheight)
    except Exception as e:

        print(f"Percentile Calc. - {e.__class__} for {lc.lasf}: \n")
        print(f"\t{e}\n")
        
        
    # ADDED 2/8/23 - based on grass biomass work
    # Subset points to ONLY those in the grass layer
    # (where the herbaceous height in the complexity metric is used to identify the grass layer)
    grasspoints = p[p[heightcol] <= complexity['herbh']]
    
    try:
        
            grasscover = calccover(points=grasspoints,
                                   step=verticalres,
                                   groundthres=groundthreshold,
                                   heightcol=heightcol,
                                   hmax=stackheight)

            grassperc, grassheights = calcPercentileHeights(points=grasspoints,
                                                            groundthres=groundthreshold,
                                                            returnHeights=True,
                                                            heightcol=heightcol,
                                                            hmax=stackheight)
            
    except Exception as e:

        print(f"Grass Cover & Percentiles Calc. - {e.__class__} for {lc.lasf}: \n")
        print(f"\t{e}\n")
        
    # Return cover dict, percentile dict, height array, and complexity metrics
    return cover, perc, heights, complexity, grasscover, grassperc

In [6]:
# lds, mds, shpfs

In [None]:
# Looping through 
for shpf, ld, od_metrics, radius in zip(shpfs, lds, mds, radii):
    
    # make a list of input las files
    lasinputs = [str(l) for l in ld.glob('*.las')]
    
    for lasf in lasinputs:

    # For testing - only run 3a
    # for lasf in ['/n/davies_lab/Users/pbb/SelenkayDiversity/data/out/SpotPolys_Fibonacci_ClippedLas/30mRadius/Spot_3a.las']:

        ### STEP 1: Load in Cloud 
        startproj = time.time()

        # Make a las cloud class, and grid it
        lc = Cloud(lasf=lasf,
                   gridsize=xysize,
                   vsize=verticalres,
                   heightcol=heightcol,
                   maxh=stackheight)

        # get project string for saving below
        projstr = Path(lasf).name.split('.')[0].split('_')[-1]

        end = time.time()

        print(f'Loaded {projstr} cloud, time elapsed: {end - startproj}\n')

        ### STEP 2: Make the grid
        start = time.time()

        lc.makegrid()

        end = time.time()

        print(f'\tGrid created, time elapsed: {end - start}\n')

        ### STEP 2: Compute Cover, FHP, and Percentiles Metrics Over the Cloud's Grid
        start = time.time()

        # initialize dictionaries for output 
        lc.cover_dict = {}
        lc.perc_dict = {}
        lc.height_dict = {}
        lc.complexity_dict = {}
        lc.cover_dict_grass = {}
        lc.perc_dict_grass = {}

        # set the cell indices to loop over in parallel
        indices = lc.grid_dict['idx_cells']

    #     index for testing with 5D
    #     gives trouble because it has a noise height value way above the canopy
    #     # indices = [97165]

    #     # Without parallel processsing
    #     # Takes a long time, but useful for testing
    #     for i, x, y in zip(indices,
    #                        lc.grid_dict['x_cells'],
    #                        lc.grid_dict['y_cells']):

    #         cover, perc, heights, complexity = calccover_parallel(i)

    #         # Stick the cover, perc, and heights inside the metrics dictionary
    #         # with x and y location as tuple keys
    #         lc.cover_dict[(x, y)] = cover
    #         lc.perc_dict[(x, y)] = perc
    #         lc.height_dict[(x, y)] = np.round(heights, decimals=3)
    #         lc.complexity_dict[(x, y)] = complexity

        ## Use concurrent futures to compute cover over each cell in parallel
        with concurrent.futures.ProcessPoolExecutor(max_workers=None) as executor:
            for cphccp, x, y in zip(executor.map(calccover_parallel, indices),
                                     lc.grid_dict['x_cells'],
                                     lc.grid_dict['y_cells']):

                try:

                    # Stick the cover, perc, and heights inside the metrics dictionary
                    # with x and y location as tuple keys
                    lc.cover_dict[(x, y)] = cphccp[0]
                    lc.perc_dict[(x, y)] = cphccp[1]
                    lc.height_dict[(x, y)] = np.round(cphccp[2], decimals=3)
                    lc.complexity_dict[(x, y)] = cphccp[3]
                    lc.cover_dict_grass[(x, y)] = cphccp[4]
                    lc.perc_dict_grass[(x, y)] = cphccp[5]

                except Exception as e:

                    print(f"Saving metrics error - {e.__class__} for {lc.lasf} on pixel ({x}, {y}): \n")
                    print(f"\t{e}\n") 

        end = time.time()

        print(f'\tMetrics computed, time elapsed: {end - start}\n')


        ### STEP 3: SAVE GRID METRICS

        # Save outputs as pickles
        # "Can't open a pickle you don't know" - there can be malicious pickles, be wary.
        with open(f'{od_metrics}/{projstr}_{xysize}mgrid_covermetrics.obj', 'wb') as of:
            pickle.dump(lc.cover_dict, of, protocol=pickle.HIGHEST_PROTOCOL)

        with open(f'{od_metrics}/{projstr}_{xysize}mgrid_percmetrics.obj', 'wb') as of:
            pickle.dump(lc.perc_dict, of, protocol=pickle.HIGHEST_PROTOCOL)

        with open(f'{od_metrics}/{projstr}_{xysize}mgrid_heights.obj', 'wb') as of:
            pickle.dump(lc.height_dict, of, protocol=pickle.HIGHEST_PROTOCOL)

        with open(f'{od_metrics}/{projstr}_{xysize}mgrid_complexitymetrics.obj', 'wb') as of:
            pickle.dump(lc.complexity_dict, of, protocol=pickle.HIGHEST_PROTOCOL)

        with open(f'{od_metrics}/{projstr}_{xysize}mgrid_grasscovermetrics.obj', 'wb') as of:
            pickle.dump(lc.cover_dict_grass, of, protocol=pickle.HIGHEST_PROTOCOL)

        with open(f'{od_metrics}/{projstr}_{xysize}mgrid_grasspercmetrics.obj', 'wb') as of:
            pickle.dump(lc.perc_dict_grass, of, protocol=pickle.HIGHEST_PROTOCOL)

        # DONE
        endproj = time.time()
        projtime = endproj - startproj

        numcells = len(lc.grid_dict['idx_cells'])
        print(f'\t{lc.las.header.point_count} points gridded into {numcells} {xysize}m pixels in {projtime} seconds!\n')

        ### STEP 4: MAKE AND SAVE WHOLE PLOT METRICS

        start = time.time()

        # Get all points in the plot
        p = lc.las.points

        # Get height array
        # Note: this is slightly different from the "heights" output below
        hplot = p[heightcol]

        # Remove high noise points above the canopy
        hplot = hplot[hplot<=stackheight]

        try:

            # Calculate Cover
            cover_wholeplot = calccover(points=p,
                              step=verticalres,
                              groundthres=groundthreshold,
                              heightcol=heightcol,
                              hmax=stackheight)

        except Exception as e:

            print(f"Cover Calc. - {e.__class__} for {lc.lasf}: \n")
            print(f"\t{e}\n")

        try: 

            # Calculate height statistics, and return an array of the point heights
            perc_wholeplot, heights_wholeplot = calcPercentileHeights(points=p,
                                                  groundthres=groundthreshold,
                                                  returnHeights=True,
                                                  heightcol=heightcol,
                                                  hmax=stackheight)
        except Exception as e:

            print(f"Percentile Calc. - {e.__class__} for {lc.lasf}: \n")
            print(f"\t{e}\n")

        try:

            # Compute complexity metrics and make a plot
            complexity_wholeplot, fig, ax = canopyLayerMetrics(h=hplot,
                                                                hbins=heightbins,
                                                                method=method,
                                                                smoothsigma=0.5,
                                                                rel_height=rh,
                                                                plot=True,
                                                                groundthreshold=groundthreshold)

            # Save plot to fig directory
            ax.set_title(projstr)
            
            # make figdir ifnot ready
            if not Path(f'{figd}/{radius}mRadius').exists():
                Path(f'{figd}/{radius}mRadius').mkdir()
            
            fig.savefig(f'{figd}/{radius}mRadius/{projstr}_{radius}m_WholePlotProfile.png', dpi=300)
            
            plt.close(fig)
            
        except Exception as e:

            print(f"Complexity Calc. - {e.__class__} for {lc.lasf}: \n")
            print(f"\t{e}\n") 


        # Save
        with open(f'{od_metrics}/{projstr}_WholePlot_complexitymetrics.obj', 'wb') as of:
            pickle.dump(complexity_wholeplot, of, protocol=pickle.HIGHEST_PROTOCOL)

        with open(f'{od_metrics}/{projstr}_WholePlot_covermetrics.obj', 'wb') as of:
            pickle.dump(cover_wholeplot, of, protocol=pickle.HIGHEST_PROTOCOL)

        with open(f'{od_metrics}/{projstr}_WholePlot_percmetrics.obj', 'wb') as of:
            pickle.dump(perc_wholeplot, of, protocol=pickle.HIGHEST_PROTOCOL)

        end = time.time()
        endproj = time.time()

        print(f'\tWhole plot metrics in {end - start} s.\n')

        print(f'\t\tDone with processing {projstr}. Totaltime: {np.round(endproj - startproj, decimals=2)}\n')


Loaded 3C cloud, time elapsed: 0.06317329406738281

	Grid created, time elapsed: 0.2433934211730957

	Metrics computed, time elapsed: 4.492738723754883

	251107 points gridded into 5149 0.5m pixels in 5.830615997314453 seconds!

	Whole plot metrics in 17.18886399269104 s.

		Done with processing 3C. Totaltime: 23.02

Loaded 5C cloud, time elapsed: 0.20218729972839355

	Grid created, time elapsed: 0.22739076614379883

	Metrics computed, time elapsed: 4.461066246032715

	242995 points gridded into 5143 0.5m pixels in 5.805747747421265 seconds!

	Whole plot metrics in 17.85883355140686 s.

		Done with processing 5C. Totaltime: 23.66

Loaded 6B cloud, time elapsed: 0.13179469108581543

	Grid created, time elapsed: 0.24394536018371582

	Metrics computed, time elapsed: 4.728991508483887

	267980 points gridded into 5144 0.5m pixels in 6.040055990219116 seconds!

	Whole plot metrics in 19.970760345458984 s.

		Done with processing 6B. Totaltime: 26.01

Loaded 8B cloud, time elapsed: 0.1257488


## TESTING

In [5]:
# # Make and Save Plot-level complexity and height metrics as well
# start = time.time()

# # Subset Points
# p = lc.las.points

# # Get height array
# # Note: this is slightly different from the "heights" output below
# h = p[heightcol]

# # Remove high noise points above the canopy
# h = h[h<=stackheight]

# # try:

# #     # Calculate Cover
# #     cover_wholeplot = calccover(points=p,
# #                       step=verticalres,
# #                       groundthres=groundthreshold,
# #                       heightcol=heightcol,
# #                       hmax=stackheight)

# # except Exception as e:

# #     print(f"Cover Calc. - {e.__class__} for {lc.lasf}: \n")
# #     print(f"\t{e}\n")

# # try: 

# #     # Calculate height statistics, and return an array of the point heights
# #     perc_wholeplot, heights_wholeplot = calcPercentileHeights(points=p,
# #                                           groundthres=groundthreshold,
# #                                           returnHeights=True,
# #                                           heightcol=heightcol,
# #                                           hmax=stackheight)
# # except Exception as e:

# #     print(f"Percentile Calc. - {e.__class__} for {lc.lasf}: \n")
# #     print(f"\t{e}\n")

# try:

#     # Compute complexity metrics
#     complexity_wholeplot, fig, ax = canopyLayerMetrics(h=h,
#                                                         hbins=heightbins,
#                                                         method='gauss1d',
#                                                         smoothsigma=0.5,
#                                                         rel_height=rh,
#                                                         plot=True)
# except Exception as e:

#     print(f"Complexity Calc. - {e.__class__} for {lc.lasf}: \n")
#     print(f"\t{e}\n") 


# end = time.time()

# print(f'Finished whole plot metrics in {end - start} s.\n')

# ax.set_title(projstr)

# fig.savefig(f'./figs/WholePlotProfiles/{radius}m/{projstr}_{radius}mRadius_WholePlotProfile.png', dpi=300)
# ax.set_ylim([-0.1, 15])

In [6]:

# # make a True/False array 
# # for all points within the current grid cell
# idx_bool = lc.grid_dict['idx_points'] == lc.grid_dict['idx_cells'][index]

# # Subset Points
# p = lc.las.points[idx_bool]

# # Get height array
# # Note: this is slightly different from the "heights" output below
# h = p[heightcol]

# # Remove high noise points above the canopy
# h = h[h<=stackheight]

# try: 

#     # Calculate height statistics, and return an array of the point heights
#     perc, heights = calcPercentileHeights(points=p,
#                                           groundthres=groundthreshold,
#                                           returnHeights=True,
#                                           heightcol=heightcol,
#                                           hmax=stackheight)
# except Exception as e:

#     print(f"Percentile Calc. - {e.__class__} for {lc.lasf}: \n")
#     print(f"\t{e}\n")

# try:

#     # Compute complexity metrics
#     complexity= canopyLayerMetrics(h=h,
#                                     hbins=heightbins,
#                                     method='gauss1d',
#                                     smoothsigma=sigma,
#                                     rel_height=rh,
#                                     plot=True)
    
# except Exception as e:

#     print(f"Complexity Calc. - {e.__class__} for {lc.lasf}: \n")
#     print(f"\t{e}\n")

In [7]:
# # make a True/False array 
# # for all points within the current grid cell
# idx_bool = lc.grid_dict['idx_points'] == lc.grid_dict['idx_cells'][index]

# # Subset Points
# p = lc.las.points[idx_bool]

# # Get height array
# # Note: this is slightly different from the "heights" output below
# h = p[heightcol]

# # Remove high noise points above the canopy
# h = h[h<=stackheight]

# try: 

#     # Calculate height statistics, and return an array of the point heights
#     perc, heights = calcPercentileHeights(points=p,
#                                           groundthres=groundthreshold,
#                                           returnHeights=True,
#                                           heightcol=heightcol,
#                                           hmax=stackheight)
# except Exception as e:

#     print(f"Percentile Calc. - {e.__class__} for {lc.lasf}: \n")
#     print(f"\t{e}\n")

# try:

#     # Compute complexity metrics
#     complexity= canopyLayerMetrics(h=h,
#                                     hbins=heightbins,
#                                     method='gauss1d',
#                                     smoothsigma=sigma,
#                                     rel_height=rh,
#                                     plot=True)
    
# except Exception as e:

#     print(f"Complexity Calc. - {e.__class__} for {lc.lasf}: \n")
#     print(f"\t{e}\n")


In [8]:
#     print(i, x, y)
    
#     index = i
#     # make a True/False array 
#     # for all points within the current grid cell
#     idx_bool = lc.grid_dict['idx_points'] == lc.grid_dict['idx_cells'][index]
    
#     # Subset Points
#     p = lc.las.points[idx_bool]

#     # Get height array
#     # Note: this is slightly different from the "heights" output below
#     h = p[heightcol]
    
#     # Remove high noise points above the canopy
#     h = h[h<=stackheight]
    
#         # Calculate height statistics, and return an array of the point heights
#     perc, heights = calcPercentileHeights(points=p,
#                                           groundthres=groundthreshold,
#                                           returnHeights=True,
#                                           heightcol=heightcol,
#                                           hmax=stackheight)
    
#         # Compute complexity metrics
#     complexity = canopyLayerMetrics(h=h,
#                                     hbins=heightbins,
#                                     method='kde',
#                                     rel_height=0.9)

In [9]:
# percdict = {}
# heightdict = {}
# complexdict = {}

# # Testing - funky error with 5D and complexity 10/25
# for lasf in ['/n/davies_lab/Users/pbb/SelenkayDiversity/data/out/SpotPolys_Fibonacci_ClippedLas/130mRadius/Spot_5D.las']:

#     ### STEP 1: Load in Cloud 
#     startproj = time.time()

#     # Make a las cloud class, and grid it
#     lc = Cloud(lasf=lasf,
#                gridsize=xysize,
#                vsize=verticalres,
#                heightcol=heightcol,
#                maxh=stackheight)
    
#     lasf = Path(lasf)
#     # get project string for saving below
#     projstr = lasf.name.split('.')[0].split('_')[-1]

#     end = time.time()
    
#     print(f'Loaded {projstr} cloud, time elapsed: {end - startproj}\n')
    
#     ### STEP 2: Make the grid
#     start = time.time()
    
#     lc.makegrid()

#     end = time.time()
    
#     print(f'Grid created, time elapsed: {end - start}\n')
    
#     for idx_bool in lc.grid_dict['idx_cells']:

#         try: 

#             # Calculate height statistics, and return an array of the point heights
#             perc, heights = calcPercentileHeights(points=lc.las.points[idx_bool],
#                                                   groundthres=groundthreshold,
#                                                   returnHeights=True,
#                                                   heightcol=heightcol)
            
#             percdict[idx_bool] = perc
#             heightdict[idx_bool] = heights
            
#         except Exception as e:

#             print(f"Percentile Calc. - {e.__class__} for {lc.lasf}: \n")
#             print(f"\t{e}\n")


#         try:

#             # Compute complexity metrics
#             complexity = canopyLayerMetrics(h=heights,
#                                             hmax=stackheight,
#                                             step=verticalres,
#                                             groundthres=groundthreshold,
#                                             smoothsigma=0.5)
            
#             complexdict[idx_bool] = complexity
            
#         except Exception as e:

#             print(f"Complexity Calc. - {e.__class__} for {lc.lasf}: \n")
#             print(f"\t{e}\n")