# Clipping Las Files with Selenkay Polygon Features
***Selenkay Diversity Project Processing Script***<br>
Peter Boucher <br>
2022/10/03 <br>

<p>This is the first step in a 2 part process for clipping las files with a set of polygons (1-ClipLasWithPolygons) and then, computing vegetation structure metrics from the las files for each polygon (2-ComputeMetricsByPolygon). </p>

#### Inputs: 
- a shapefile of polygon features with a unique integer ID attribute for each polygon feature
- a folder of las files (i.e. tiled point cloud data)
    - If computing metrics (2-ComputeMetricsByPolygon.ipynb), the input las files need to have a "Height" attribute for each point (height above ground)

#### Outputs:
- a folder of clipped las files, with one file per feature, named by the unique id from the input shapefile

## Define User Inputs Below:

In [1]:
# Import Dependencies
from pathlib import Path
import sys
sys.path.append('/n/home02/pbb/scripts/halo-metadata-server/Selenkay/bin/')
from Functions import lasClip_IndivFeature, shpQualityCheckDupes, shpQualityCheckPolys, lasQualityCheckExistingFiles
import geopandas as gpd
import pandas as pd
import numpy as np
import concurrent.futures
import laspy
import time

# # #
# # # USER INPUTS

# Path to directory of shapefiles (.shp) of polygon features to clip the point cloud with.
shpd = Path('/n/home02/pbb/scripts/halo-metadata-server/Selenkay/data/in/BoundaryShapefiles/SelenkaySpotPolygons_IncreasingRadius')

# Create list of input shapefiles
radiuses = [130, 80, 50, 30, 20, 10]
# radiuses = [130, 20, 30, 50, 80]
shpfs = []
for r in radiuses:
    shpfs.append(f'{str(shpd)}/SelenkaySpotPolygons_{r}mRadius.shp')

# Input directories of las files (usually in square tiles).
laspath = Path('/n/davies_lab/Users/pbb/SelenkayDiversity/data/in/')

# lasds = [Path('/n/davies_lab/Users/pbb/SelenkayDiversity/data/in/Inside/PointClouds'),
#          Path('/n/davies_lab/Users/pbb/SelenkayDiversity/data/in/Outside/PointClouds')]
# Note: Site 5 outside and transects 7 and 8 for testing

# Output directory for clipped las files
outpath = Path('/n/davies_lab/Users/pbb/SelenkayDiversity/data/out/SpotPolys_Fibonacci_ClippedLas')

# EPSG code of the shapefile and the las files, as a string
# Note: Shapefiles and las files must have the same EPSG code (same CRS)
# Kruger is 32736 (WGS84 UTM 36S)
# Mpala is 32637 (WGS84 UTM 37N)
# Selenkay is 32737 (WGS84 UTM37S)
epsg='32737'

# feature id column - name of attribute column in shapefile which defines each polygon feature with a unique ID
featureIDcol = 'Spot'

# # # End User Inputs
# # # 

In [2]:
# Check all the shapefile paths exist
# for p in shpfs:
#     print(Path(p).exists())

In [3]:
# Define a function for running in parallel
def lasClip_IndivFeature_Parallel(feat, IDcol=featureIDcol):
    
    # Use the attributes of the feature to set the input las file folder
    # NOTE: specific to Selenkay data
    tn = int(feat.TransectNum)
    s = feat.Spot
    ls = feat.LidarSite

    if tn <= 6:
        d = laspath.joinpath('Inside')
    else:
        d = laspath.joinpath('Outside')

    ld = Path(str(d) + f'/PointClouds/Site0{feat.LidarSite}')

    # Use the attributes of the feature to set the output las file folder
    od = Path(str(outpath) + f'/{feat.shpRadius}')

    # make the dir if it doesn't exist yet
    if not od.exists():
        od.mkdir(exist_ok=True)
        
    # Last quality check
    lasQualityCheckExistingFiles(od)
    
    try:
        
        lasClip_IndivFeature(feature=feat,
                             lasdir=str(ld),
                             outdir=str(od),
                             featureIDcol=featureIDcol,
                             epsg=epsg,
                             verb=False)
    except:
        
        print(f'Issue with {IDcol}: {feat.get(IDcol)} \n')

In [4]:
# For each shapefile
for shpf in shpfs:
    
    ## 1) Load shapefile inputs, and perform quality checks
    
    # Read the shapefile as a geodataframe
    shpdf = gpd.read_file(shpf)

    # Quality Check for duplicate feature IDS
    shpdf = shpQualityCheckDupes(shpf=shpf,
                                 shpdf=shpdf,
                                 featureIDcol=featureIDcol)

    # Quality Check for Polygon Features Only
    shpdf = shpQualityCheckPolys(shpdf)

    # Sort out shapefile and add columns for looping
    shpdf['TransectNum'] = shpdf.Spot.apply(lambda x: int(x[0:-1]))
    shpdf['TransectSpot'] = shpdf.Spot.apply(lambda x: x[-1])

    shpdf.sort_values(by=['TransectNum', 'TransectSpot'], inplace=True, ignore_index=True)

    # Load in key for matching each polygon with a lidar site
    polysitekey = pd.read_csv('./data/in/Key_BiodivSurveyPoints_LidarAcquisitions.txt')
    # polysitekey.head()

    polysitekey.sort_values(by=['Transect', ' Spot'], inplace=True, ignore_index=True)

    # Add the lidar Acquisition site number to the shpdf
    shpdf['LidarSite'] = polysitekey[' LidarAcquisition']
    
    # Add the tag of the current shapefile in a column as well
    # this is just for setting the output directory iteratively within the lasClip_IndivFeature_Parallel function
    shpdf['shpRadius'] = Path(shpf).name.split('_')[-1].split('.')[0]

    # ONLY FOR TESTING 10/12/22
    # filter shpdf to just transects 7 and 8
    # shpdf = shpdf.loc[[(('7' in x) | ('8' in x)) for x in shpdf[featureIDcol]]]
     # For 10/24 run after 7 and 8 have been done, but 9 and 10 can't be done yet since site 6 isn't done
    # shpdf = shpdf.loc[[(('1' in x) | ('2' in x) | ('3' in x) | ('4' in x) | ('5' in x) | ('6' in x)) for x in shpdf[featureIDcol]]]
    # print(shpdf.head())
    
    # Just rerunning sites 9 and 10 10/26/22
    # shpdf = shpdf.loc[[(('9' in x) | ('10' in x)) for x in shpdf[featureIDcol]]]
    
    ## 2) Clip Las Files
    
    # Make a list of all features in shapefile to iterate through
    features = [f for i, f in shpdf.iterrows()]

    # Run tree clipping function
    start = time.time()

    print(f'Starting to clip {len(features)} polygon features for {Path(shpf).name} \n')

    with concurrent.futures.ProcessPoolExecutor(max_workers=None) as executor:
            for f in zip(executor.map(lasClip_IndivFeature_Parallel, features)):
                endi = time.time()

    end = time.time() 

    print(f'\t {len(features)} features clipped in {end-start} s.\n')

Starting to clip 10 polygon features for SelenkaySpotPolygons_130mRadius.shp 

 	/n/davies_lab/Users/pbb/SelenkayDiversity/data/out/SpotPolys_Fibonacci_ClippedLas/130mRadius 

To avoid overwrite issues, delete all files in output directory before proceeding.
 	/n/davies_lab/Users/pbb/SelenkayDiversity/data/out/SpotPolys_Fibonacci_ClippedLas/130mRadius 

 	/n/davies_lab/Users/pbb/SelenkayDiversity/data/out/SpotPolys_Fibonacci_ClippedLas/130mRadius 

 	/n/davies_lab/Users/pbb/SelenkayDiversity/data/out/SpotPolys_Fibonacci_ClippedLas/130mRadius 
To avoid overwrite issues, delete all files in output directory before proceeding.
 	/n/davies_lab/Users/pbb/SelenkayDiversity/data/out/SpotPolys_Fibonacci_ClippedLas/130mRadius 

 	/n/davies_lab/Users/pbb/SelenkayDiversity/data/out/SpotPolys_Fibonacci_ClippedLas/130mRadius 
 	/n/davies_lab/Users/pbb/SelenkayDiversity/data/out/SpotPolys_Fibonacci_ClippedLas/130mRadius 

 	/n/davies_lab/Users/pbb/SelenkayDiversity/data/out/SpotPolys_Fibonacci_Clipp

In [5]:
# TESTING
# features = [f for i, f in shpdf.iterrows()]
# feat = features[0]
# feat

# # Use the attributes of the feature to set the input las file folder
# # NOTE: specific to Selenkay data
# tn = int(feat.TransectNum)
# s = feat.Spot
# ls = feat.LidarSite

# if tn <= 6:
#     d = laspath.joinpath('Inside')
# else:
#     d = laspath.joinpath('Outside')

# ld = Path(str(d) + f'/PointClouds/Site0{feat.LidarSite}')

# # Use the attributes of the feature to set the output las file folder
# od = Path(str(outpath) + f'/{feat.shpRadius}')

# # make the dir if it doesn't exist yet
# if not od.exists():
#     od.mkdir()

# lasClip_IndivFeature(feature=feat,
#                      lasdir=ld,
#                      outdir=od,
#                      featureIDcol=featureIDcol,
#                      epsg=epsg,
#                      verb=True)


### 
# Abandoned trying to do a merge/join instead of the above... 
# # rename columns to match shpdf
# polysitekey.rename({'Transect':'TransectNum',
#                     ' Spot':'TransectSpot'},
#                    axis=1, inplace=True)
# polysitekey.head()
# Join Polysitekey and shpdf
# pd.concat([shpdf, polysitekey], on=['TransectNum', 'TransectSpot'])
# pd.concat([shpdf, polysitekey], join='inner', axis=1)
# shpdf.merge(polysitekey, on='TransectNum')