# Computing Metrics from Clipped Las Files 
***Davies Lab Lidar Script***<br>
Peter Boucher <br>
2022/09/23 <br>

<p>This is the second step in a 2 part process for clipping las files with a set of polygons (1-ClipLasWithPolygons.ipynb) and then, computing vegetation structure metrics from the las files for each polygon (2-ComputeMetricsByPolygon.ipynb). </p>

#### Inputs: 
- a folder of clipped las files, with each file named by the unique id from the input shapefile
    - Note: The input las files need to have a "Height" attribute for each point (height above ground)

#### Outputs:
- an output shapefile with several lidar metric columns (like the input shapefile, but with more columns)
- a csv file with a list of all metric outputs

## Define User Inputs Below:

In [1]:
# Import Dependencies
from pathlib import Path
import sys
# sys.path.append('/n/davies_lab/Lab/LabLidarScripts/bin/')
sys.path.append('../../bin/')
from LabLidar_Functions import lasClip_IndivFeature, calccover, calcPercentileHeights
import geopandas as gpd
import pandas as pd
import numpy as np
import concurrent.futures
import laspy
import time

# # # USER INPUTS

# Path to a shapefile (.shp) of polygon features matching the las files
# NOTE: This should be the same input shapefile as in 1-ClipLasWithPolygons.ipynb
shpf = Path('../data/in/test/shapefile/MpalaForestGEOCanopies_LabLidarTest_EPSG32637.shp')

# Input directory of clipped las files to compute metrics with.
# NOTE: This should be the same as the 'od' directory from 1-ClipLasWithPolygons.ipynb
ld = Path('../data/out/test/clippedlasfiles/')

# outdirectory for metric output files
od_metrics = Path('../data/out/test/metrics/')

# epsg of the shapefile and the las files, as a string
# Kruger is 32736 (WGS84 UTM 36S)
# Mpala is 32637 (WGS84 UTM 37N)
epsg='32637'

# feature id column - unique ID for each feature in the shapefile
# NOTE: Should match the file names of the las files in 'ld' directory.
featureIDcol = 'treeID'

# Max height of voxel stacks 
# NOTE: Set this to be just above max height of your trees in meters.
stackheight=10

# Vertical step size for metrics
# NOTE: This defines the vertical bin size in meters (how "thick" each voxel is).
metricstep = 0.25

# Ground threshold (i.e. below this height treat points as ground) in meters
groundthreshold = 0.05

# # # END USER INPUTS

### Compute Metrics from Clipped Las Files

In [2]:
# Wrapper function for using parallel processing and calccover function
# Note: This reads each feature, then loads the corresponding las file using the feature ID
def calcFeatureMetrics_parallel(feature, lasdir=ld, IDcol=featureIDcol, step=metricstep, groundthres=groundthreshold, hmax=stackheight):
    
    start = time.time()
    
    # load corresponding las file using file naming convention from above
    # Note: assumes that the featureID number is an integer
    lasf = Path(f'{str(lasdir)}/{IDcol}_{int(feature[IDcol])}.las')
    
    # Check if the file exists
    if lasf.exists():

        # load points
        l = laspy.read(str(lasf))

        if len(l.points) > 0:

            # Calculate Cover, Percentile Heights, and Point Height Values (defined in "LabLidar_Functions.py" fiile)
            # Return dictionary outputs
            cover = calccover(points=l.points,
                              hmin=0, hmax=hmax, step=step, 
                              heightcol='HeightAboveGround',
                              numretcol='number_of_returns',
                              retnumcol='return_number',
                              classcol='classification',
                              groundthres=0.05,
                              calcintensity=False)

            perc, heights = calcPercentileHeights(l.points, groundthres=groundthres, returnHeights=True, heightcol='HeightAboveGround')

            end = time.time()
            tottime = end - start

        else:

            cover = None
            perc = None
            heights = None
            end = time.time()
            tottime = end - start
            
            print(f'No points found in {lasf.name} \n')
            
    else:
        
        cover = None
        perc = None
        heights = None
        end = time.time()
        tottime = end - start

        print(f'No las file found named: {lasf.name} \n')

    # Return cover dict, percentile dict, and height list (for quick recalculation of anything later)
    return cover, perc, heights, tottime

In [3]:
# Read the shapefile as a geodataframe
# Note: Expects a file with polygon features only
shpdf = gpd.read_file(shpf)

# Make a list of all features in shapefile to iterate through
features = [f for i, f in shpdf.iterrows()]

# Initiate dictionaries for saving metrics
cover_dict = {}
perc_dict = {}
height_dict = {}
for f in features:
    cover_dict[f[featureIDcol]] = None
    perc_dict[f[featureIDcol]] = None
    height_dict[f[featureIDcol]] = None

# OutDict for new shapefile
outdict = {featureIDcol:[],
           'MedianH':[],
           'MeanH':[],
           'MaxH':[],
           'Cover0cm':[],
           'CoverG':[]}

# Begin!
start_tottime = time.time()

# set up parallel processing for each polygon feature
with concurrent.futures.ProcessPoolExecutor() as executor:
    # for [cover, perc, heights, time] with each feature (feat)
    # compute metrics
    for c_p_h_t, feat in zip(executor.map(calcFeatureMetrics_parallel, features),
                             features):
        
        # If not None
        if c_p_h_t[0]:
        
            # Fill dictionaries using feature ID
            cover_dict[feat[featureIDcol]] = c_p_h_t[0]
            perc_dict[feat[featureIDcol]] = c_p_h_t[1]
            height_dict[feat[featureIDcol]] = c_p_h_t[2]

            # Also, fill a dictionary with useful metrics (cover at 25 cm, median height, max height, etc.)
            # for joining with a geodataframe, then export to a shapefile
            outdict[featureIDcol].append(feat[featureIDcol])
            outdict['MedianH'].append(c_p_h_t[1][50][0])
            outdict['MeanH'].append(c_p_h_t[1]['mean'][0])
            outdict['MaxH'].append(c_p_h_t[1][100][0])
            outdict['Cover0cm'].append(c_p_h_t[0]['CoverD2'][0])
            outdict['CoverG'].append(c_p_h_t[0]['CoverD2'][1])

        # Prints the time it took per feature
        # Comment out if running many features
        # print(f'Feature ID {feat[featureIDcol]} done in {c_p_h_t[3]} seconds. \n')
        
end_tottime = time.time()

tottime = end_tottime - start_tottime
tottime = np.array(tottime).round(1)

print(f'Done. Took {tottime} seconds to compute metrics for {len(features)} files.\n')

# # #

print(f'Now saving a shapefile...\n')

outdf = pd.DataFrame(outdict)
shpdf_out = shpdf.merge(outdf, on=featureIDcol)
shpdf_out.to_file(f'{od_metrics}/{featureIDcol}_Metrics.shp')

print(f'\tSaved metric shapefile: {od_metrics}/{featureIDcol}_Metrics.shp \n')

# # # 

print(f'Now saving cover metrics to csv...\n')

for var in ['CoverD1', 'CoverD2', 'CoverD1byH', 'CoverD2byH', 'FHPD1', 'FHPD2', 'Npulses']:
    
    df_list = []
    
    for featid in cover_dict.keys():
        
        try:
            
            # if there are metrics for this feature id
            if cover_dict[featid]: 
                
                if (('byH' in var) | ('FHP' in var)):
                    heightbins = cover_dict[featid]['HeightBins'][0:-1]
                else:
                    heightbins = cover_dict[featid]['HeightBins']

                # Make a list of dataframes, and for each df:
                # Rows are labelled by height bin
                # Cols are labelled by feature ID
                df_list.append(pd.DataFrame(cover_dict[featid][var],
                                            index=heightbins,
                                            columns=[featid]))
                
            # No points or metrics, fill with zeros
            else:
                
                # Make a list of dataframes, and for each df:
                # Rows are labelled by height bin
                # Cols are labelled by feature ID
                df_list.append(pd.DataFrame(np.zeros(len(heightbins)),
                                            index=heightbins,
                                            columns=[featid]))

        except Exception as e:
            
            print(f"{e.__class__} for {featid}: \n")
            print(f"\t{e}\n")
        
    # Merge the dataframes together
    df_merged = pd.concat(df_list, axis=1)

    # Save the dataframes as csv files
    df_merged.to_csv(f'{od_metrics}/{var}.csv')
    
    print(f'\t{var}.csv saved in {od_metrics}')
    

Done. Took 42.3 seconds to compute metrics for 1698 files.

Now saving a shapefile...



  pd.Int64Index,


	Saved metric shapefile: ../data/out/test/metrics/treeID_Metrics.shp 

Now saving cover metrics to csv...

	CoverD1.csv saved in ../data/out/test/metrics
	CoverD2.csv saved in ../data/out/test/metrics
	CoverD1byH.csv saved in ../data/out/test/metrics
	CoverD2byH.csv saved in ../data/out/test/metrics
	FHPD1.csv saved in ../data/out/test/metrics
	FHPD2.csv saved in ../data/out/test/metrics
	Npulses.csv saved in ../data/out/test/metrics


In [4]:
print(f'Now saving percentile metrics to csv...\n')

df_merged_list = []

for var in [0, 25, 50, 75, 98, 100, 'mean', 'std']:
    
    df_list = []
    
    for featid in perc_dict.keys():
        
        # Make a list of dataframes, and for each df:
        # Rows are labelled by percentile metric
        # Cols are labelled by feature ID
        df_list.append(pd.DataFrame(perc_dict[featid][var],
                                    index=[var],
                                    columns=[featid]))
    # Merge the dataframes together
    df_merged = pd.concat(df_list, axis=1)
    
    # Append to a list
    df_merged_list.append(df_merged)

# merge again
df_merge_merged = pd.concat(df_merged_list, axis=0)
    
# Save the dataframes as csv files
df_merge_merged.to_csv(f'{od_metrics}/PercentileHeights.csv')
    
print(f'\tPercentileHeights.csv saved in {od_metrics}')

Now saving percentile metrics to csv...

	PercentileHeights.csv saved in ../data/out/test/metrics


### DONE!