In [1]:
import pandas as pd
import numpy as np

import laspy
import os

from tqdm import tqdm

In [14]:
N_POINTS = 16384
TXT_FOLDER = "data/Tr3D_species/treesXYZ/"
LAS_FOLDER = "data/Tr3D_species/las/"

np.random.seed(0)

metadata = pd.read_csv("data/Tr3D_species/tree_metadata_training_publish.csv")

#new_df = metadata[['treeID', 'species']]
new_df = metadata.rename(columns={'treeID':'id', 'species':'sp'})
new_df['id'] = new_df['id'].astype(str)
print(new_df)

new_df.to_csv("data/Tr3D_species/meta/Tr3D_meta.csv", index=False)

i = 0
for line in tqdm(metadata.iterrows()):
    las = laspy.read(f"{LAS_FOLDER}/{line[1]['filename']}")
    xyz_points = np.array([(x[0], x[1], x[2]) for x in las.points.array], dtype = np.float32)

    # Sample
    if xyz_points.shape[0] > N_POINTS: # w/o replacement if enough points
        idx = np.random.choice(xyz_points.shape[0], size=N_POINTS, replace = False) 
    else:
        idx = np.random.randint(xyz_points.shape[0], size=N_POINTS)

    xyz_points = xyz_points[idx, :]

    id = line[1]['treeID']
    np.savetxt(f"{TXT_FOLDER}/{id}.txt", xyz_points)

    # i+=1

    # if i == 2:
    #     break

          id                  sp       genus       dataset data_type  tree_H  \
0         70  Eucalyptus_miniata  Eucalyptus   luck_levick       TLS   9.018   
1         71  Eucalyptus_miniata  Eucalyptus   luck_levick       TLS   7.841   
2         72  Eucalyptus_miniata  Eucalyptus   luck_levick       TLS  14.895   
3         73  Eucalyptus_miniata  Eucalyptus   luck_levick       TLS  18.734   
4         74  Eucalyptus_miniata  Eucalyptus   luck_levick       TLS   6.497   
...      ...                 ...         ...           ...       ...     ...   
17702  20241    Pinus_sylvestris       Pinus  puliti_ULS_2       ULS  11.120   
17703  20242    Pinus_sylvestris       Pinus  puliti_ULS_2       ULS  13.040   
17704  20243    Pinus_sylvestris       Pinus  puliti_ULS_2       ULS  16.260   
17705  20244    Pinus_sylvestris       Pinus  puliti_ULS_2       ULS  14.020   
17706  20246    Pinus_sylvestris       Pinus  puliti_ULS_2       ULS  15.140   

               filename  
0      /train

17707it [1:50:26,  2.67it/s]


In [53]:
#Test set
N_POINTS = 16384
TXT_FOLDER = "data/Tr3D_species_comp_test/treesXYZ/"
LAS_FOLDER = "data/Tr3D_species_comp_test/las/"

np.random.seed(0)

fnames = os.listdir("data/Tr3D_species_comp_test/las/")
species = ['Abies_alba' for _ in range(len(fnames))]

data = {'id':fnames, 'sp':species}

meta_df = pd.DataFrame(data)
meta_df.id = meta_df.id.apply(lambda x:str(int(os.path.splitext(str(x))[0])))
meta_df.to_csv("data/Tr3D_species_comp_test/meta/Tr3D_meta.csv", index=False)

for las_file in tqdm(fnames):
    las_location = LAS_FOLDER + las_file
    las = laspy.read(las_location)
    xyz_points = np.array([(x[0], x[1], x[2]) for x in las.points.array], dtype = np.float32)

    # Sample
    if xyz_points.shape[0] > N_POINTS: # w/o replacement if enough points
        idx = np.random.choice(xyz_points.shape[0], size=N_POINTS, replace = False) 
    else:
        idx = np.random.randint(xyz_points.shape[0], size=N_POINTS)

    xyz_points = xyz_points[idx, :]

    id = str(int(os.path.splitext(las_file)[0]))
    np.savetxt(f"{TXT_FOLDER}/{id}.txt", xyz_points)

100%|██████████| 2255/2255 [21:18<00:00,  1.76it/s]  
