In [1]:
import glob
import sys
import os
import gc
import math
import open3d as o3d
import numpy as np
import pandas as pd
import collections
import ipynb

from matplotlib import pyplot as plt
from descartes import PolygonPatch

import shapely.geometry as geometry
from shapely.ops import cascaded_union, polygonize
from shapely.geometry import Polygon, LineString

from scipy import optimize
from scipy.spatial import ConvexHull

from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans

In [2]:
# The following two functions are from Calders et al. 2022.
# https://doi.org/10.5281/zenodo.7307956
def nn(x,rad):
    """Return distances to nearest neighbors."""
    nbrs = NearestNeighbors(algorithm='auto', metric='euclidean').fit(x) 
    distances, indices = nbrs.radius_neighbors(radius=rad) #find all neighbours within this search radius
    
    # output explained: https://stackoverflow.com/questions/53886289/confused-with-the-output-of-sklearn-neighbors-nearestneighbors 
    return distances, indices

def load_pts(cloud,downsample=False,outliers=False): 
    """
    Convert a single tree's point cloud to x,y,z pandas dataframe. 
    
    Args:
    cloud - point cloud from a single tree
    downsample (Optional) - Default is False, meaning all the points are retained. If 
                            True, the point clouds are sampled to keep only fraction of points.
    outliers (Optional) - Default is False, meaning outliers are retained. 
    
    Returns: 
    df - Dataframe with X, Y, Z coordinates of point cloud
    """
        
    # Read the point cloud data for single tree and save as pandas df with columns for x, y, z coords.
    dftemp=o3d.io.read_point_cloud(cloud)
    df=pd.DataFrame(data=np.asarray(dftemp.points),columns=['x', 'y', 'z']) #access the points
    
    # Optional downsampling:
    if downsample:
        df=df.sample(frac=0.1)   # keep 10% of points
    
    # Optional outlier removal:
    if outliers:
        xy=df.iloc[:,0:2].values # Takes ALL rows, first and second column - x and y - and .values converts to numpy array
        dist, indi = nn(xy,0.5) # Nearest neighbors within search radius of 0.5
        cnt=[len(i) for i in indi] # Count the kNN within the search radius
        cnt = pd.DataFrame({'count':cnt})
        
        # Set threshold for the number of neighbors we want to keep.
        threshold=df.shape[0]*0.0001  # 1 neighbor for every 10,000 pts
        
        removed=sum(np.array(cnt)<threshold)
        df=df[np.array(cnt)>=threshold]
        #print("Removed %i outliers using kNN threshold %.2f" % (removed[0], threshold-1))
    
    return df 

### Helper Functions

In [3]:
def formatplots(trees, ncols=4, shape='square'):
    """
    Returns matplotlib fig and axs objects with desired shape and number of subplots.
    
    Args:
    trees - the list of trees (or other objects) that need to be plotted.
            the length of this list will be the number of subplots returned.
    ncols - the desired number of columns in the subplot grid 
    shape - optional argument describing the shape of the subplots. 
            Options are "square", "horiz", or "vert"
    
    Returns: 
    fig, axs - matplotlib objects corresponding to the grid of subplots
    """
    # Determine number of subplots.
    nplots = len(trees)
    
    # Determine number of rows.
    if(nplots % ncols) != 0:
        nrows = int(nplots / ncols + 1)
    else:
        nrows = int(nplots / ncols)

    if shape == 'horiz':
        # Initiate figure with nrows and ncols.
        fig, axs = plt.subplots(nrows, ncols, figsize=(25,nrows*4))
        fig.tight_layout()
    if shape == 'vert':
        # Initiate figure with nrows and ncols.
        fig, axs = plt.subplots(nrows, ncols, figsize=(40,nrows*10))
        fig.tight_layout()
    if shape == 'square':
        # Initiate figure with nrows and ncols.
        fig, axs = plt.subplots(nrows, ncols, figsize=(25,nrows*5))
        fig.tight_layout()
    
    return fig, axs

In [4]:
def update_inds(treeind, rowind, colind, ncols):
    """Updates the indices used for plotting after each tree."""
    # Increase tree index to get to next tree.
    treeind += 1

    # Move to next column unless we are at end of the row, in which case, move onto the next row.
    if (treeind % ncols) != 0:
        colind += 1
    else:
        rowind +=1 
        colind = 0
    
    return treeind, colind, rowind

In [5]:
def construct_treeid(tree, ptspath):
    """
    Construct variables for the tree point cloud path and tree ID. 
    
    This function uses the naming conventions specific to the Wytham Woods
    dataset and the tropical tree dataset to construct variables for the tree 
    path and the tree ID that can be used in further analysis and plotting.
    
    Args:
    tree - either full path to point cloud or tree ID 
    ptspath - path to folder with full point clouds for the dataset of interest
    
    Returns:
    tree - the full path to the tree point cloud 
    tree_num - the tree's unique ID
    """
    # For wytham woods tree dataset:
    if str(ptspath).__contains__('DATA_clouds_ply'):
        if tree[-4:] != '.ply':
            tree = str(ptspath) + '/wytham_winter_' + str(tree) + '.ply'
        else:
            tree = tree 

        tree_num = tree.split("_")[-1].split(".")[0]
    
    # For tropical tree dataset:
    if str(ptspath).__contains__('Tropical_manual_ply'):
        if tree[-4:] != '.ply':
            tree = str(ptspath) + '/MLA01_2018_' + str(tree) + '.wood.ply'
        else:
            tree = str(tree) 

        tree_num = tree.split("_")[-1].split(".")[0]
        
    return tree, tree_num

In [6]:
def wytham_singlemulti(ptspath):
    """
    Separate Wytham Woods trees into single vs multistemmed.
    
    The single vs. multistemmed Wytham Woods trees can be distinguished based
    on their naming conventions, where multistemed trees will have a letter 
    following the tree ID number that corresponds to the stem (e.g. 5a and 5b
    would be the two stems of tree 5).
    """
    singlestemmed = []
    multistemmed = []
    
    # Get list of trees from pts path.
    trees = glob.glob("%s/*ply" % ptspath)
    
    for tree in trees:
        tree_num = tree.split("_")[-1].split(".")[0]
        
        # If the treeID ends in a letter, it is not single stem.
        if tree_num[-1].isalpha():
            multistemmed.append(tree)
        else:
            singlestemmed.append(tree)

    return singlestemmed, multistemmed

### Analysis Functions

In [7]:
def calc_convexhull(tree, ptspath, hgt=1.3, cluster_pts=None):
    """
    Calculate convex hull of the tree cross section at the specified height.
    
    This function splits the tree's point cloud to get a 0.06 m cross section 
    (following Tansey et al. 2009 & Calders et al. 2022) of the tree at the 
    specified height. The default height is 1.3 m, but other heights can be 
    passed in after the optimal DBH height has been determined for each tree.
    
    Args:
    tree - the tree on which to calculate the convex hull 
    ptspath - path to folder with full point clouds for the dataset of interest
    hgt - (Optional) Height at which to calculate convex hull. Default is 1.3. 
    cluster_pts - (Optional) If the tree is multistemmed, the point cloud cross
                  section can be clustered and split so that convex hull is 
                  calculated for each stem. Default is None.
    
    
    Returns: 
    dbh - value for diameter at the specified height 
    verts - number of vertices used in calculating convex hull 
    pts_dbh - the 3D point dataset for the given slice (for plotting)
    hull - the convex hull object
    maxdist - the maximum distance between vertices of the convex hull
    """

    # Get treeID and ensure inputted format is correct.
    tree, tree_num = construct_treeid(tree, ptspath)

    # Get DBH from full cloud to preserve max number of hits on stem slice.
    pts = load_pts(tree,False,False)
    
    # Extract part of df with z values between 1.27 and 1.33 m by default 
    pts_dbh = pts[(pts['z'] > pts['z'].min() + hgt - 0.03) & (pts['z'] < pts['z'].min() + hgt + 0.03)]
    
    # Remove outliers following Calders et al. 2022.
    xy=pts_dbh.iloc[:,0:2].values
    dist, indi = nn(xy,0.1)
    cnt=[len(i) for i in indi] # Count the kNN within the search radius.
    cnt = pd.DataFrame({'count':cnt})
    threshold=5
    removed=sum(np.array(cnt)<threshold)
    pts_dbh=pts_dbh[np.array(cnt)>=threshold]
    #if removed[0] > 0: 
        #print(f"{tree_num}: Removed {removed[0]} outliers from DBH slice using kNN threshold {threshold-1}")
    
    # If cluster points are passed in (i.e. if calculating convex hull on one 
    # stem of multi-stem tree), use cluster of points.
    if cluster_pts is not None:
        pts_dbh = cluster_pts
    
    xy_dbh_arr = np.asarray(pts_dbh[['x', 'y']])
    
    # Calculate convex hull if there are at least three points in cross section.
    if len(xy_dbh_arr) > 3:
        
        hull = ConvexHull(xy_dbh_arr)

        # Calculate DBH based on convex hull and number of vertices.
        dbh = hull.area / np.pi
        verts = len(hull.vertices)

        # Calculate distances between each vertex of convex hull.
        dists = []
        for i in range (len(hull.vertices) - 1):
            vert1 = [xy_dbh_arr[i, 0], xy_dbh_arr[i, 1]]
            vert2 = [xy_dbh_arr[i+1, 0], xy_dbh_arr[i+1, 1]]

            dist = math.dist(vert1, vert2)
            dists.append(dist)
        
        # Get max distance btwn two vertices (possible indicator of data completeness).
        maxdist = np.max(dists)

        return dbh, verts, pts_dbh, hull, maxdist
    
    # If fewer than three points in the cross section, return nan for all values.
    else:
        print(f'{tree_num} has not enough points to calculate convex hull.')
        return np.nan, np.nan, np.nan, np.nan, np.nan
    

In [8]:
def iter_dbh(tree, ptspath, step=0.1, lim=10, verbose=False):
    """
    Calculate DBH iteratively up tree stem until defined stopping point. 
    
    Args:
    tree - the tree being processed
    ptspath - the path to the folder with .ply point clouds
    step - step size to move up the tree (in m)
    lim - the max distance up the tree to search
    verbose - Optional. If True, prints more information about function.
    
    Returns: 
    df_out - dataframe with convex hull values at each step up tree
    """

    # Get treeID
    tree, tree_num = construct_treeid(tree, ptspath)

    # Get DBH from full cloud to preserve max number of hits on stem slice.
    pts = load_pts(tree,False,False)
    
    # Set the limit equal to the smaller of tree height and defined limit.
    hgt = pts['z'].max() - pts['z'].min()   # height of tree
    lim = min(hgt, lim)
    
    # Initialize empty lists to hold values for each tree.
    dbh_list = []
    vert_list = []
    disc_list = []

    # Initialize cross section disc height to 0.1 m.
    disc = 0.1
    
    # Loop upward through height of tree (up to lim).
    while disc < lim:
        # Calculate DBH (convex hull) for current slice using scipy ConvexHull.
        pts_slice = pts[(pts['z'] > pts['z'].min() + disc - 0.03) & (pts['z'] < pts['z'].min() + disc + 0.03)]
        xy_dbh = np.asarray(pts_slice[['x', 'y']])

        # If there are fewer than 3 points at disc height, move up the tree.  
        if len(xy_dbh) < 3:
            if verbose: print(f'tree {tree_num} had not enough points at disc {disc}')
            dbh_list.append(np.nan)
            vert_list.append(np.nan)
            disc_list.append(round(disc, 1))
            disc += step
            continue

        # Calculate circumference, dbh, and number of vertices.
        circum = ConvexHull(xy_dbh) 
        dbh = circum.area / np.pi
        verts = len(circum.vertices)

        # Append data to lists. 
        dbh_list.append(dbh)
        vert_list.append(verts)
        disc_list.append(round(disc, 1))

        # Move up to next tree slice.
        disc += step
    
    # Construct and return output dataframe.
    df_out = pd.DataFrame(list(zip(dbh_list, vert_list, disc_list)), columns = ['DBH_CV', 'DBH_Verts', 'Slice_Hgt'])
    return df_out

In [9]:
def get_buttress_hgt(tree, ptspath, step=0.1, lim=10, slope_thresh = 0.1, verbose=False):
    """
    Get the height of the top of the buttress.
    
    Args:
    tree - the tree being processed
    ptspath - the path to the folder with .ply point clouds
    step - step size to move up the tree (in m)
    lim - the max distance up the tree to search
    slope_thresh - slope threshold above which points are flagged as part of a buttress
    
    Returns: 
    buttress_hgt - the height of the top of the buttress
    treedf - the tree dataframe with column indicating buttress or not for each step
    """
    
    # Get treeID
    tree, tree_num = construct_treeid(tree, ptspath)
    
    # run iterative dbh for specified step size and lim to calc slopes
    treedf = iter_dbh(tree, ptspath, step=step, lim=lim)
    
    # calculate slope between pair of dbh measurements
    for disc in treedf['Slice_Hgt'].iloc[1:-1]:

        # round disc heights to one decimal place
        disc = round(disc, 1)
        prev_disc = round(disc - step, 1)

        # get dbh values at each disc height
        val1 = treedf[treedf['Slice_Hgt'] == disc]['DBH_CV'].iloc[0]
        val2 = treedf[treedf['Slice_Hgt'] == prev_disc]['DBH_CV'].iloc[0]

        # calculate slope between dbh measurements
        slope = (val1 - val2) / step

        # make new column in treedf for slope
        treedf.loc[treedf['Slice_Hgt'] == prev_disc, 'slope'] = slope

        # flag slope values greater than given threshold
        if abs(slope) > slope_thresh:
            if slope < 0: treedf.loc[treedf['Slice_Hgt'] == prev_disc, 'Buttress'] = -1
            if slope > 0: treedf.loc[treedf['Slice_Hgt'] == prev_disc, 'Buttress'] = 1

        else:
            treedf.loc[treedf['Slice_Hgt'] == prev_disc, 'Buttress'] = 0
        
    # if first three points are not flagged as buttress points:
    if all(np.array(treedf['Buttress'].iloc[0:3]) == np.array([0, 0, 0])):
        # set buttress height to 0.0 (i.e. bottom of tree stem)
        buttress_index = 0
        buttress_hgt = 0.0

    # otherwise, find height of top of buttress
    else:
        try:
            # find first sequence of three 0's (non-buttress points) that come after a negative slope 
            val_find = np.array([-1, 0, 0, 0])
            df_condition = treedf['Buttress'].rolling(4).apply(lambda g: all(g.astype(int) == val_find), raw=True)
            
            # get the index where three 0's first occurs and height of buttress
            buttress_index = np.where(df_condition == 1)[0][0]
            buttress_hgt = treedf['Slice_Hgt'].iloc[buttress_index]
        
        except:
            if verbose: print(f"tree {tree_num}: no suitable place to measure dbh from. Setting buttress height to 0.1m by default.")
            
            buttress_hgt = 0.1
               
    return buttress_hgt, treedf

In [10]:
def flag_irregular(tree, ptspath, step=0.1, slope_thresh=0.1, lim=10, dbh_thresh = 1.5):
    """
    Flag any irregular parts of the tree stem to avoid using them for dbh.
    
    Using a rolling mean to easily account for/ avoid the influence of extremely 
    small outliers that may be caused by data occlusion. 
    
    Args: 
    tree - the tree being processed
    ptspath - the path to the folder with .ply point clouds
    step - step size to move up the tree (in m)
    lim - the max distance up the tree to search
    slope_thresh - slope threshold passed into get_buttress_hgt
    dbh_thresh - dbh threshold to use to flag irregular parts of the trunk
    
    Returns:
    treedf - tree dataframe with the flagged points marked up the tree
    """
    # run iterative dbh for specified step size and lim to calc slopes
    treedf = iter_dbh(tree, ptspath, step=step, lim=lim)
    
    # get buttress height and subset dataframe to parts over the buttress
    buttress_hgt, treedf = get_buttress_hgt(tree, ptspath, step=step, lim=lim, slope_thresh=slope_thresh)
    over_buttress = treedf[treedf["Slice_Hgt"] > buttress_hgt]
    
    # calculate a max dbh threshold - no greater 2 * minimum rolling mean dbh
    dbh_thresh = np.nanmin(over_buttress['DBH_CV'].rolling(3).mean()) * dbh_thresh
    
    # create a filter using this threshold
    dbh_filt = treedf['DBH_CV'] > dbh_thresh
    
    # flag values greater than the threshold
    treedf.loc[dbh_filt, 'Flagged'] = 1
    treedf.loc[~dbh_filt, 'Flagged'] = 0

    return treedf

In [11]:
def get_dbh_hgt(tree, ptspath, step=0.1, slope_thresh=0.1, lim=10, dbh_thresh = 1.5):
    """
    Take output of iter_dbh function and determine ideal height from which to calculate dbh.
    
    Args: 
    tree - the tree being processed
    ptspath - the path to the folder with .ply point clouds
    step - step size to move up the tree (in m)
    lim - the max distance up the tree to search
    dbh_thresh - dbh threshold to pass into flag_irregular function 
    
    Return: 
    hgt - height at which DBH should be measured
    """
    # Get treeID.
    tree, tree_num = construct_treeid(tree, ptspath)

    # Get buttress height. 
    buttress_hgt, treedf = get_buttress_hgt(tree, ptspath, step=step, lim=lim, slope_thresh=slope_thresh)
    
    # Flag irregular points to be excluded from dbh height consideration.
    treedf = flag_irregular(tree, ptspath, step=step, slope_thresh=slope_thresh, lim=lim, dbh_thresh = dbh_thresh)

    # Subset treedf to only above buttress height.
    over_buttress = treedf[treedf["Slice_Hgt"] > buttress_hgt]
    
    # Select only non-flagged data.
    filt = over_buttress['Flagged'] == 1
    validdf = over_buttress[~filt]

    # Get suitable value closest to 1.3m to use for dbh height. 
    disclist = list(validdf['Slice_Hgt'])
    dif = lambda disclist : abs(disclist - 1.3)
    hgt = min(disclist, key=dif)

    return hgt

In [12]:
def flag_missing(pts_dbh, num_slices):
    """
    Returns True if the tree is missing data, otherwise, False. 
    
    Splits the tree cross section into 6 equiangular sections and then checks
    each point and classifies it into one of the 6 sections. Any tree that has 
    at least one section with no points inside it is flagged for missing data.
    
    Args: 
    pts_dbh - point cloud of tree cross section 
    num_slices - Number of slice to split the cross section into
    
    Returns: 
    flagged - True if the tree is missing data, otherwise False
    
    """
    # Fit a circle to the points in tree cross section.
    residu, dbh_circ, xc, yc, circle_pts = fit_circle(pts_dbh)
    
    # Calculate radius of circle. 
    radius = dbh_circ / 2.0
    
    # Calculate angles and normalize them.
    def get_normalized_angle(point):
        angle = math.atan2(point[1] - yc, point[0] - xc)
        return (angle + 2 * math.pi) % (2 * math.pi)

    # Based on num_slices, determine size of the slices.
    slice_size = 2 * math.pi / num_slices

    # Initialize dictionary for counts of points in each slice. 
    slice_counts = collections.defaultdict(int)
    
    # For each point in tree cross section, determine which slice 
    # it is in and then add one to the dictionary for that slice. 
    for row in range (len(pts_dbh)):
        # Create the point object.
        x_pt = pts_dbh.iloc[row]['x']
        y_pt = pts_dbh.iloc[row]['y']
        point = [x_pt, y_pt]
        
        # Calculate angle from point to circle center.
        angle = get_normalized_angle(point)
        
        # Add to the count of points in the given slice.
        slice_idx = int(angle / slice_size)
        slice_counts[slice_idx] += 1
    
    # Make a list to hold the number of points in each slice.
    count_list = []
    
    # For each slice, add dictionary value to the list.
    for slice_num in slice_counts.keys():
        count_list.append(slice_counts[slice_num])
    
    # If not all slices have points in them, flag the tree. 
    if len(count_list) < num_slices:
        flagged = True
    else:
        flagged = False

    # Return True if the tree is missing data, otherwise False.
    return flagged

In [13]:
def trop_allometry(dataframe, new_colname, dbhfield):
    """
    Return tropical tree dataframe with volume from allometry.
    
    Using allometric equations from Chave et al. 2005.
    https://doi.org/10.1007/s00442-005-0100-x
    
    Args: 
    dataframe - Takes in a dataframe with all the trees and DBH values.
    new_colname - Name of new column that will hold allometry values.
    dbhfield - Field in original dataframe with DBH to use for allometry.
    
    Returns: 
    dataframe - Output dataframe with new column added
    
    """
    # Convert DBH field to cm before allometry.
    dataframe['dbh_cm'] = dataframe[dbhfield] * 100
    dataframe[new_colname] = (np.exp(-1.499+2.148*np.log(dataframe['dbh_cm'])+0.207*((np.log(dataframe['dbh_cm']))**2)-0.0281*((np.log(dataframe['dbh_cm']))**3)))/1000
    
    return dataframe

def wytham_allometry(dataframe, new_colname, dbhfield):
    """
    Return wytham tree dataframe with volume from allometry.
    
    Using allometric equations from Bunce 1968
    https://doi.org/10.2307/2258105
    
    Args:
    dataframe - Takes in a dataframe with all the trees and DBH values.
    new_colname - Name of new column that will hold allometry values.
    dbhfield - Field in original dataframe with DBH to use for allometry.
    
    Returns: 
    dataframe - Output dataframe with new column added
    """
    dataframe[new_colname] = np.exp(2.98 + 2.30*np.log(dataframe[dbhfield]))
    
    return dataframe

### Multi-Stem Functions

In [14]:
def split_stems(tree, ptspath, nclusters, fixedheight = True, hgt=1.3, hgtlist = None):
    """
    Splits multi-stemmed trees using k means algorithm.
    
    Looks at the tree point cloud at the desired height (either 1.3m or the optimal dbh height)
    and splits the data into the specified numbers of clusters. To determine the number of tree
    stems, apply the function multiple times using varying numbers of clusters and select number 
    of clusters with lowest residuals to circle fit. This function should be applied
    to tree dataset if it is not already split into stems (e.g. the tropical tree dataset).
    
    Args:
    tree - the tree being processed. 
    ptspath - the path to the folder with .ply point clouds
    nclusters - the number of clusters to split data into
    fixedheight - if True, use the specified height (hgt) for all trees, 
                  if False, use the optimal height for the tree stored in hgtlist
    hgt - If fixedheight is True, specify the height at which to split stems.
    hgtlist - if fixedheight is False, a dictionary with the optimal height for each tree.
    
    Returns:
    labels - labels assigning each point to a cluster
    """
    
    tree, tree_num = construct_treeid(tree, ptspath)

    if fixedheight:
        hgt = hgt
    else:
        hgt = hgtlist[tree_num]

    #get DBH from full cloud at desired height 
    pts = load_pts(tree,False,False)
    pts_dbh = pts[(pts['z'] > pts['z'].min() + hgt - 0.03) & (pts['z'] < pts['z'].min() + hgt + 0.03)]
    
    # create clustering object with desired number of clusters
    kmeans = KMeans(nclusters, n_init='auto')

    # Fit data to the defined number of clusters
    kmeans.fit(pts_dbh)

    # Get labels for each point 
    labels = kmeans.labels_
    
    return labels

In [15]:
def fit_circle(circle_pts):
    """
    Fit the circle to the point cloud and calculate dbh, residuals, etc.
    
    Code optimizing distance between data points and circle center adapted from 
    Calders et al. 2022 www.doi.org/10.5281/zenodo.7307956
    
    """
    # define subfunctions used to describe how well circle fits cross section 
    def calc_R(x,y, xc, yc):
        """ 
        calculate the distance of each 2D points from the center (xc, yc) 
        calc_R() from Calders et al. 2022 www.doi.org/10.5281/zenodo.7307956
        """
        return np.sqrt((x-xc)**2 + (y-yc)**2)

    def f(c, x, y):
        """ 
        calculate the algebraic distance between the data points and the mean circle centered at c=(xc, yc) 
        f() from Calders et al. 2022 www.doi.org/10.5281/zenodo.7307956
        """
        Ri = calc_R(x, y, *c)
        return Ri - Ri.mean()
    
    x = circle_pts['x']
    y = circle_pts['y']

    x_m = x.mean() # as first estimte of center
    y_m = y.mean()

    center_estimate = x_m, y_m

    # optimize for the distance between data points and circle centered at the center estimate
    center, ier = optimize.leastsq(f, center_estimate, args=(x,y)) 
    xc, yc = center
    Ri = calc_R(x, y, *center)
    R = Ri.mean()
    dbh_circ = R * 2.0
    residu = np.sum((Ri - R)**2)/len(Ri) # average residual between data points and circle fit

    return residu, dbh_circ, xc, yc, circle_pts

In [16]:
def check_overlap(circle_dict):
    """Check overlap between circles, disregard if over 10%."""
    
    labels = list(circle_dict.keys())
    
    i = 0
    j = 1
    
    # initialize overlap to False
    overlap = False 
    
    # loop through each pair of circles and check overlap
    while j < len(labels) and i < len(labels):

        # pull circle values out of the dictionary passed into function
        r1 = circle_dict[labels[i]]['r']
        r2 = circle_dict[labels[j]]['r']

        xc1 = circle_dict[labels[i]]['xc']
        yc1 = circle_dict[labels[i]]['yc']

        xc2 = circle_dict[labels[j]]['xc']
        yc2 = circle_dict[labels[j]]['yc']

        # calculate distance between centers of circles
        dist = math.sqrt((xc1 - xc2) * (xc1 - xc2) + (yc1 - yc2) * (yc1 - yc2))

        
        if dist <= (r1 - r2) or dist <= (r2 - r1):
            overlap = True
        
        # if circles overlap more than 10%, disregard 
        if dist < (r1 + r2) * 0.9:
            overlap = True

        # if j+1 is still less than the number of labels, increase j by 1
        if j + 1 < len(labels):
            j+=1

        elif j + 1 >= len(labels) and i + 1 <= len(labels):
            i+=1
            j = i+1

            
    return overlap

In [17]:
def get_n_stems(tree, ptspath, maxstems = 4, fixedheight = True, hgt=1.3, hgtlist = None):
    """
    Get the number of stems of each tree.
    
    Applies other functions (split_stems, fit_circle, and check_overlap) to
    determine the number of stems for each tree. Various numbers of stems 
    are tested, up to maxstems. 
    
    Args: 
    tree - the tree being processed. 
    ptspath - the path to the folder with .ply point clouds
    maxstems - the max number of stems for each tree
    fixedheight - if True, use the specified height (hgt) for all trees, 
                  if False, use the optimal height for the tree stored in hgtlist
    hgt - If fixedheight is True, specify the height at which to split stems.
    hgtlist - if fixedheight is False, a dictionary with the optimal height for each tree.
    
    Returns: 
    nstems - number of stems for the tree
    """
    # Start with one stem.
    tree, tree_num = construct_treeid(tree, trop_pts_path)
    
    if fixedheight:
        hgt = hgt
    else:
        hgt = hgtlist[tree_num]
    
    # Get DBH from full cloud at desired height. 
    pts = load_pts(tree,False,False)
    pts_dbh = pts[(pts['z'] > pts['z'].min() + hgt - 0.03) & (pts['z'] < pts['z'].min() + hgt + 0.03)]
    
    # Initialize data dictionaries. 
    residuals = {}
    circ_vals = {}

    # Loop through checking residuals and overlap for number of possible stems up to the max. 
    for stems in range(1, maxstems):
        
        # Cluster the points into the specified number of stems.
        labels = split_stems(tree, ptspath, nclusters=stems, fixedheight = fixedheight, hgt=hgt, hgtlist = hgtlist)

        # Empty the dictionaries to hold values for each set of stems.
        inner_residuals = {}
        inner_circ_vals = {}
          
        enough_pts = True
        # Fit a circle to each of the clusters.
        for label in np.unique(labels):
            # Get points for the stem of interest and fit a circle to the points.
            circle_pts = pts_dbh[labels == label]
            
            # Check how many points in this cluster.
            if len(circle_pts) <= 3:
                enough_pts = False
                break
            
            residu, dbh_circ, xc, yc, circle_pts = fit_circle(circle_pts)

            # Append the circle data to the dictionary.
            inner_circ_vals[label] = {'r': dbh_circ/2.0, 'xc': xc, 'yc':yc}

            # Add residual value to list.
            inner_residuals[label] = residu
            
        # Check overlap between circles.
        overlap = check_overlap(inner_circ_vals)

        # If the circles don't overlap:
        if overlap == False and enough_pts == True:
            # Average the residuals for each tree stem and add to dict.
            resid_list = []
            for stem, resid in inner_residuals.items():
                resid_list.append(resid)
            
            avg_resid = np.mean(resid_list)
            
            # Add the data for each tree stem to the dict.
            residuals[stems] = avg_resid
            circ_vals[stems] = inner_circ_vals
    
    nstems = min(residuals, key=residuals.get)
        
    return nstems

In [18]:
def how_circular(tree, ptspath, nstems = 1, fixedheight = True, hgt=1.3, hgtlist = None):
    """
    Returns metrics describing how well circle fits the cross section.
    
    Args: 
    tree - the tree being processed. 
    ptspath - the path to the folder with .ply point clouds
    nstems - number of stems (to determine how many circles to fit to tree)
    fixedheight - if True, use the specified height (hgt) for all trees, 
                  if False, use the optimal height for the tree stored in hgtlist
    hgt - If fixedheight is True, specify the height at which to split stems.
    hgtlist - if fixedheight is False, a dictionary with the optimal height for each tree.
    
    Returns:
    datadict - dictionary with data on circle information (including residuals)
    """
    
    tree, tree_num = construct_treeid(tree, trop_pts_path)
        
    if fixedheight:
        hgt = hgt
    else:
        hgt = hgtlist[tree_num]
    
    
    # get DBH from full cloud at desired height 
    pts = load_pts(tree,False,False)
    pts_dbh = pts[(pts['z'] > pts['z'].min() + hgt - 0.03) & (pts['z'] < pts['z'].min() + hgt + 0.03)]
    
    labels = split_stems(tree, ptspath, nclusters = nstems, fixedheight = fixedheight, hgt=hgt, hgtlist = hgtlist)
    
    datadict = {}
    
    # cycle through labels (tree stems)
    for label in np.unique(labels):
        
        # empty the inner dict to hold data
        innerdict = {}

        # get points for the stem of interest and fit a circle to the points
        circle_pts = pts_dbh[labels == label]

        residu, dbh_circ, xc, yc, circle_pts = fit_circle(circle_pts)

        # construct data dictionaries
        innerdict = {'residual': residu, 'dbh_circ': dbh_circ, 'xc': xc, 'yc':yc, 'circle_pts':circle_pts, 'hgt': hgt}
        datadict[label] = innerdict

    
    return datadict

### Plotting Functions

In [19]:
def plot_convexhull(treelist, ptspath, ncols=4, fixedheight = True, hgt=1.3, 
                    hgtlist = None):
    """
    Plot convex hulls for a list of trees at desired height.
    
    Args:
    treelist - the list of trees that will be plotted
    ptspath - the folder containing the point clouds for the trees in treelist
    ncols - (Optional) the number of columns to use in plotting. Default is 4.
    fixedheight - (Optional) Default is True, indicating that convex hull should be
                  plotted at fixed height for all trees. False indicates that the 
                  dynamic heights should be taken from the hgtlist.
    hgt - (Optional) Height at which to plot convex hull if fixedheight is True.
    hgtlist - (Optional) A dictionary of the optimal DBH heights for each tree.
    
    Returns:
    Plots convex hulls.
    """
    fig, axs = formatplots(treelist, ncols)
    rowind = colind = treeind = 0
    
    for tree in treelist:
        tree, tree_num = construct_treeid(tree, ptspath)
        
        if fixedheight:
            hgt = hgt
        else:
            hgt = hgtlist[tree_num]
        
        dbh_cv, verts, pts_dbh, hull, maxdist = calc_convexhull(tree, ptspath, hgt=hgt)
        xy_dbh_arr = np.asarray(pts_dbh[['x', 'y']])
        
        axs[rowind, colind].scatter(pts_dbh['x'], pts_dbh['y'], color='#4682B4') 
        
        # plot line connecting all the dbh vertices
        for simplex in hull.simplices:
            axs[rowind, colind].plot(xy_dbh_arr[simplex, 0], xy_dbh_arr[simplex, 1], color='#CC5500', linestyle='dashed')
        
        #axs[rowind, colind].plot(xy_dbh_arr[hull.vertices,0], xy_dbh_arr[hull.vertices,1], 'r--', label=f"DBH: {dbh_cv:.3f}") # DBH (convex hull)
        axs[rowind, colind].plot(xy_dbh_arr[hull.vertices,0], xy_dbh_arr[hull.vertices,1], color = '#CC5500', marker='o', label=f"Convex Hull") # vertices

        axs[rowind, colind].set_title(f'Convex Hull for {tree_num}')
        axs[rowind, colind].legend(fontsize='xx-large', loc='upper left')
        axs[rowind, colind].axis('equal')
        
        axs[rowind, colind].set_yticklabels([])
        axs[rowind, colind].set_xticklabels([])

        treeind, colind, rowind = update_inds(treeind, rowind, colind, ncols)
    return

In [20]:
def plot_iter_dbh(treelist, ptspath, ncols = 2, step=0.1, slope_thresh=0.1, dbh_thresh=1.5):
    """Takes the output of function iter_dbh and plots results in grid."""
    
    # format plotting grid
    fig, axs = formatplots(treelist, ncols, shape='horiz')
    rowind = colind = treeind = 0

    for tree in treelist:
        
        # Get treeID
        tree, tree_num = construct_treeid(tree, ptspath)
        
        # get dataframe of iterdbh function
        treedf = iter_dbh(tree, ptspath, step=step, lim=10)
        
        # get height for dbh for the tree 
        hgt = get_dbh_hgt(tree, ptspath, step=step, slope_thresh=slope_thresh, dbh_thresh=dbh_thresh)
        hgtplot = treedf['Slice_Hgt'] == hgt
        
        # get buttress height 
        buttress_hgt, treedf = get_buttress_hgt(tree, ptspath, step=step, lim=10, slope_thresh = slope_thresh)
        buttressplot = treedf['Slice_Hgt'] == buttress_hgt
        
        
        # plot data
        axs[rowind, colind].scatter(treedf['Slice_Hgt'], treedf['DBH_CV'], color='#36454F')
        axs[rowind, colind].set_title(f'DBH w Height for {tree_num}, Slope Thresh: {slope_thresh}', fontsize=20)
        axs[rowind, colind].set_xlabel('Height of DBH Slice (m)', fontsize=20)
        axs[rowind, colind].set_ylabel('Value of DBH Slice (m)', fontsize=20)
        axs[rowind, colind].plot(treedf['Slice_Hgt'][hgtplot], treedf['DBH_CV'][hgtplot], color = '#CC5500', marker = 2, markersize=60)
        axs[rowind, colind].plot(treedf['Slice_Hgt'][buttressplot], treedf['DBH_CV'][buttressplot], color='#013220', marker = 2, markersize=60)
        
        # plot all flagged points
        flagdf = flag_irregular(tree, ptspath, step=step, slope_thresh=slope_thresh, lim=10, dbh_thresh = dbh_thresh)
        flagged = flagdf[flagdf['Flagged'] == 1]
        #axs[rowind, colind].plot(flagged['Slice_Hgt'], flagged['DBH_CV'],'r+', markersize=20)
        
        #axs[rowind, colind].set_yticklabels([])
        #axs[rowind, colind].set_xticklabels([])
        
        fig.tight_layout()

        
        treeind, colind, rowind = update_inds(treeind, rowind, colind, ncols)
    
    return

In [21]:
def plot2D(trees, ptspath, ncols=4, dbh=True, iterdbh=False, hgtlist = None):
    """Function to plot tree point clouds in x/z or y/z direction."""
    # determine number of rows to plot figures
    fig, axs = formatplots(trees, ncols, 'vert')
    rowind = colind = treeind = 0

    # loop through trees
    for tree in trees:
        # Get treeID
        tree, tree_num = construct_treeid(tree, ptspath)

        #get DBH from full cloud to preserve max number of hits on stem slice
        pts = load_pts(tree,False,False)

        # calculate tree height
        tree_hgt = pts['z'].max() - pts['z'].min()
            
        max_pt = pts[pts['z'] == pts['z'].max()]

        # plot scatter plot of points in x and z 
        axs[rowind, colind].scatter(pts['x'], pts['z'], color='#36454F', s=0.1, alpha=0.6) 
        
        #colorlist = ['#4682B4', '#FABC2A', '#939F5C']
        if dbh: 
            pts_dbh = pts[(pts['z'] > pts['z'].min() + 1.27) & (pts['z'] < pts['z'].min() + 1.33)]
            axs[rowind, colind].plot(pts_dbh['x'], pts_dbh['z'], color = '#FABC2A', markersize=3, label=f"DBH Slice")
        
        if iterdbh: 
            hgt = hgtlist[tree_num]
            pts_hgt = pts[(pts['z'] > pts['z'].min() + hgt - 0.03) & (pts['z'] < pts['z'].min() + hgt + 0.03)]
            axs[rowind, colind].plot(pts_hgt['x'], pts_hgt['z'], color='#CC5500', markersize=3, label=f"HGT Slice")
            
        #axs[rowind, colind].legend(fontsize='xx-large', loc='upper left')
        axs[rowind, colind].set_title(f'{tree_num} ({rowind}, {colind})')
        axs[rowind, colind].set_yticklabels([])
        axs[rowind, colind].set_xticklabels([])

        # increase tree index to get to next tree
        treeind, colind, rowind = update_inds(treeind, rowind, colind, ncols)
    return
