In [14]:
# Import packages
import laspy
import joblib
import time
import numpy as np
from numpy import ndarray
from laspy import LasData
from sklearn.ensemble import RandomForestClassifier


In [12]:
# Constants
SOURCE_FILEPATH = "./data/dane.las"
NEW_FILEPATH = "./result_files/{}.laz"
DIMENSIONS = ["X", "Y", "Z"]
CLASSIFICATION_DIMENSION = ["classification"]

RF_MODEL_NAME = "test_random_forest_4"

In [3]:
# Read las file
def read_las(filepath) -> LasData:
    las_data = laspy.read(filepath)
    return las_data

# Prepare las data
def prepare_data(las: LasData, dimensions: list) -> ndarray:
    data = None
    if len(dimensions) == 1:
        data = np.array(las[dimensions[0]])
    elif len(dimensions) > 1:        
        attributes_array = [las[dim] for dim in dimensions]    
        data = np.stack(attributes_array, -1)        
    return data

# Get dimension names
def get_dimension_names(las: LasData) -> list:
    return list(las.point_format.dimension_names)

In [4]:
# Read file
las = read_las(SOURCE_FILEPATH)

X_train = prepare_data(las, DIMENSIONS)
y_train = prepare_data(las, CLASSIFICATION_DIMENSION)
print(" * * *   Train dataset   * * *")
print(X_train.shape)
print(X_train)
print("\n * * *   Train labels   * * *")
print(y_train.shape)
print(y_train)


 * * *   Train dataset   * * *
(39546810, 3)
[[-1319929  -456535  -170181]
 [ -828422 -1019935  -167989]
 [ -828247 -1019953  -167709]
 ...
 [ 1126510   150659    70739]
 [ 1125823   151581    70619]
 [ 1126279   150951    70799]]

 * * *   Train labels   * * *
(39546810,)
[2 3 3 ... 5 5 5]


In [5]:
# Initialzie random forest model
rf = RandomForestClassifier(n_estimators=5, max_depth=5, n_jobs=-2, random_state=42)
rf.verbose = 2

In [6]:
# Learn Random Forest model
rf.fit(X_train, y_train)

[Parallel(n_jobs=-2)]: Using backend ThreadingBackend with 23 concurrent workers.


building tree 1 of 5
building tree 2 of 5
building tree 3 of 5
building tree 4 of 5
building tree 5 of 5


[Parallel(n_jobs=-2)]: Done   2 out of   5 | elapsed:   35.9s remaining:   53.9s
[Parallel(n_jobs=-2)]: Done   5 out of   5 | elapsed:   39.5s remaining:    0.0s
[Parallel(n_jobs=-2)]: Done   5 out of   5 | elapsed:   39.5s finished


In [7]:
# Save model


joblib.dump(rf, f'./models/{RF_MODEL_NAME}.joblib')

['./models/test_random_forest_4.joblib']

In [8]:
# Load learned Random Forest model
rf_loaded = joblib.load(f"./models/{RF_MODEL_NAME}.joblib")

In [9]:
input_data = prepare_data(las, DIMENSIONS)
input_data

array([[-1319929,  -456535,  -170181],
       [ -828422, -1019935,  -167989],
       [ -828247, -1019953,  -167709],
       ...,
       [ 1126510,   150659,    70739],
       [ 1125823,   151581,    70619],
       [ 1126279,   150951,    70799]])

In [10]:
predicted_points = rf_loaded.predict(input_data)

[Parallel(n_jobs=5)]: Using backend ThreadingBackend with 5 concurrent workers.
[Parallel(n_jobs=5)]: Done   2 out of   5 | elapsed:   37.8s remaining:   56.7s
[Parallel(n_jobs=5)]: Done   5 out of   5 | elapsed:   41.0s remaining:    0.0s
[Parallel(n_jobs=5)]: Done   5 out of   5 | elapsed:   41.0s finished


In [13]:
# Save predicted points to new laz file

def reclassify_to_new_file(source_filepath: str, new_filepath: str, classification: ndarray):
    las = laspy.read(source_filepath)
    las.classification = classification
    las.write(new_filepath)

def write_predicted_points_to_las(source_filepath: str, new_filepath: str, predicted_points: ndarray):
    new_filename = new_filepath.format(f"random_forest_{int(time.time())}")
    reclassify_to_new_file(source_filepath, new_filename, predicted_points)


    
write_predicted_points_to_las(SOURCE_FILEPATH, NEW_FILEPATH, predicted_points)