In [1]:
import pandas as pd
from glob import glob
import laspy
import re

# Count points in each tree

In [17]:
data = {
    'tree_id': [],
    'num_total': [],
    'num_class1': [],
    'num_class2': [],
    'num_class6': [],
    'num_class10': [],
}

for f in glob('../data/MpalaForestGEO_LasClippedtoTreePolygons/*.las'):
    matches = re.findall(r'treeID_(\d+)', f)
    assert len(matches) == 1
    id = matches[0]
    las = laspy.read(f)
    data['tree_id'].append(id)
    data['num_total'].append(len(las.classification))
    data['num_class1'].append((las.classification == 1).sum())
    data['num_class2'].append((las.classification == 2).sum())
    data['num_class6'].append((las.classification == 6).sum())
    data['num_class10'].append((las.classification == 10).sum())

data = pd.DataFrame.from_dict(data).astype(int)
data.to_pickle('num_points.pkl')

# Examine number of points

In [21]:
df = pd.read_pickle('num_points.pkl')
qnt = 0.9
pct = df['num_total'].quantile(qnt)
print(f'About {1-qnt:.0%} ({int((1-qnt)*len(df))}) of our trees have {int(pct)} or more points')
# df[df['num_total'] > df['num_total'].quantile(0.4)]['tree_id']
id = df[df['num_total'].between(pct-10, pct+10)]['tree_id'].iloc[0]

from util import las_to_pc, sexy_gif
import laspy
pc, _ = las_to_pc(laspy.read(f'../data/MpalaForestGEO_LasClippedtoTreePolygons/treeID_{id}.las'))
sexy_gif(pc, f'sexy_{qnt}.gif')

About 10% (1685) of our trees have 3389 or more points


# Open fake tree labels

In [21]:
points_df = pd.read_pickle('num_points.pkl')
labels_df = pd.read_pickle('fake_labels.pkl')
merged_df = points_df.merge(labels_df, left_on='tree_id', right_on='treeID')
merged_df.to_pickle('data_desc.pkl')

In [17]:
merged_df

Unnamed: 0,tree_id,num_total,num_class1,num_class2,num_class6,num_class10,height,winRads,treeID,crownAr,crwnDmt,geometry,Label
0,6262,6230,5123,698,133,276,4.136,1.8408,6262,24.00,5.527906,"POLYGON ((263702.500 32198.500, 263702.500 321...",Euclea divinorum
1,14995,3937,2327,723,1,886,1.561,1.0683,14995,4.50,2.393654,"POLYGON ((263525.000 32048.500, 263525.000 320...",Euclea divinorum
2,9151,39,26,6,1,6,1.776,1.1328,9151,3.50,2.111004,"POLYGON ((263954.500 32154.000, 263954.500 321...",Acacia drepanolobium
3,15453,2032,1121,188,74,649,4.473,1.9419,15453,34.00,6.579525,"POLYGON ((263962.000 32039.000, 263962.000 320...",Acacia brevispica
4,13022,6956,4288,1736,0,932,1.683,1.1049,13022,5.25,2.585441,"POLYGON ((263578.000 32083.500, 263578.000 320...",Euclea divinorum
...,...,...,...,...,...,...,...,...,...,...,...,...,...
16848,7165,1624,857,343,33,391,1.890,1.1670,7165,5.25,2.585441,"POLYGON ((263658.500 32184.000, 263658.500 321...",Acacia mellifera
16849,8256,1984,1283,660,8,33,1.450,1.0350,8256,3.75,2.185097,"POLYGON ((263682.500 32166.000, 263682.500 321...",Croton dichogamous
16850,9148,440,187,89,25,139,2.215,1.2645,9148,5.75,2.705758,"POLYGON ((263881.500 32153.500, 263881.500 321...",Canthium pseudosetiforum
16851,12325,1194,600,326,34,234,1.849,1.1547,12325,5.75,2.705758,"POLYGON ((263860.000 32095.500, 263860.000 320...",Acacia brevispica


In [16]:
labels = merged_df['Label']
classes = labels.unique().tolist()
labels = [classes.index(l) for l in labels]
labels

[0,
 0,
 1,
 2,
 0,
 3,
 4,
 4,
 2,
 0,
 5,
 6,
 7,
 5,
 1,
 0,
 4,
 2,
 8,
 2,
 8,
 0,
 0,
 0,
 0,
 2,
 0,
 0,
 2,
 9,
 4,
 5,
 0,
 3,
 2,
 2,
 2,
 2,
 5,
 10,
 0,
 11,
 2,
 1,
 3,
 2,
 5,
 5,
 4,
 2,
 5,
 11,
 2,
 3,
 12,
 0,
 5,
 13,
 5,
 2,
 0,
 5,
 1,
 5,
 2,
 3,
 4,
 4,
 4,
 0,
 2,
 5,
 1,
 2,
 5,
 5,
 5,
 0,
 14,
 13,
 5,
 0,
 4,
 5,
 0,
 2,
 2,
 2,
 0,
 5,
 4,
 13,
 0,
 5,
 6,
 5,
 3,
 13,
 5,
 5,
 1,
 5,
 2,
 4,
 2,
 5,
 5,
 2,
 7,
 15,
 2,
 2,
 1,
 2,
 1,
 5,
 1,
 0,
 2,
 5,
 2,
 8,
 2,
 9,
 5,
 5,
 14,
 2,
 4,
 5,
 4,
 2,
 1,
 2,
 4,
 5,
 5,
 3,
 3,
 5,
 5,
 0,
 2,
 14,
 2,
 2,
 5,
 0,
 5,
 4,
 4,
 2,
 3,
 4,
 2,
 4,
 0,
 2,
 0,
 16,
 2,
 5,
 2,
 17,
 17,
 2,
 5,
 4,
 1,
 8,
 2,
 5,
 5,
 5,
 2,
 0,
 3,
 0,
 4,
 5,
 2,
 2,
 4,
 1,
 1,
 2,
 5,
 0,
 2,
 0,
 2,
 5,
 13,
 5,
 3,
 5,
 0,
 2,
 5,
 6,
 4,
 0,
 1,
 2,
 0,
 4,
 5,
 5,
 14,
 4,
 5,
 5,
 5,
 13,
 16,
 2,
 6,
 1,
 0,
 6,
 1,
 5,
 18,
 2,
 2,
 3,
 19,
 1,
 0,
 5,
 1,
 5,
 0,
 0,
 17,
 2,
 5,
 0,
 3,
 0,
 5,
 4,
 2,
 0,
 8

In [3]:
merged_df = pd.read_pickle('data_desc.pkl')
merged_df

Unnamed: 0,tree_id,num_total,num_class1,num_class2,num_class6,num_class10,height,winRads,treeID,crownAr,crwnDmt,geometry,Label
0,6262,6230,5123,698,133,276,4.136,1.8408,6262,24.00,5.527906,"POLYGON ((263702.500 32198.500, 263702.500 321...",Euclea divinorum
1,14995,3937,2327,723,1,886,1.561,1.0683,14995,4.50,2.393654,"POLYGON ((263525.000 32048.500, 263525.000 320...",Euclea divinorum
2,9151,39,26,6,1,6,1.776,1.1328,9151,3.50,2.111004,"POLYGON ((263954.500 32154.000, 263954.500 321...",Acacia drepanolobium
3,15453,2032,1121,188,74,649,4.473,1.9419,15453,34.00,6.579525,"POLYGON ((263962.000 32039.000, 263962.000 320...",Acacia brevispica
4,13022,6956,4288,1736,0,932,1.683,1.1049,13022,5.25,2.585441,"POLYGON ((263578.000 32083.500, 263578.000 320...",Euclea divinorum
...,...,...,...,...,...,...,...,...,...,...,...,...,...
16848,7165,1624,857,343,33,391,1.890,1.1670,7165,5.25,2.585441,"POLYGON ((263658.500 32184.000, 263658.500 321...",Acacia mellifera
16849,8256,1984,1283,660,8,33,1.450,1.0350,8256,3.75,2.185097,"POLYGON ((263682.500 32166.000, 263682.500 321...",Croton dichogamous
16850,9148,440,187,89,25,139,2.215,1.2645,9148,5.75,2.705758,"POLYGON ((263881.500 32153.500, 263881.500 321...",Canthium pseudosetiforum
16851,12325,1194,600,326,34,234,1.849,1.1547,12325,5.75,2.705758,"POLYGON ((263860.000 32095.500, 263860.000 320...",Acacia brevispica
