In [None]:
import os

import util_las as las
import pandas as pd
import numpy as np
import pathlib

In [None]:
# Path for the previous and the new point cloud
WORKING_DIR = '/mnt/data-01/nmunger/proj-qalidar/data' 

prev_path = 'initial_input_pcd/2018_NE_retiled/2546500_1212000.las'
new_path = 'initial_input_pcd/2022_Neuchatel/2546500_1212000.laz'
tile_name = os.path.basename(new_path).split('.')[0] # Is used for the naming of the .csv outgoing file

classes_correspondance_path = 'classes_equivalence.csv' 
vox_xy = 1.5 # Voxel size in meters
vox_z = 1.5

os.chdir(WORKING_DIR)

In [None]:
prev_pc_df = las.las_to_df_xyzclass(prev_path)
new_pc_df = las.las_to_df_xyzclass(new_path)

# Remove all points which are noise in the previous generation as they do not bring useful information
prev_pc_df = prev_pc_df[prev_pc_df['classification']!=7]

# Match the supplementary class to classes from the previous generation
new_pc_df = las.reclassify(new_pc_df, classes_correspondance_path) 

# Set the lowest coordinates of the point clouds in each axis as the origin of the common grid 
x_origin = min(prev_pc_df.X.min(), new_pc_df.X.min())
y_origin = min(prev_pc_df.Y.min(), new_pc_df.Y.min())
z_origin = min(prev_pc_df.Z.min(), new_pc_df.Z.min())
# Same logic for the highest coordinates
x_max = max(prev_pc_df.X.max(), new_pc_df.X.max())
y_max = max(prev_pc_df.Y.max(), new_pc_df.Y.max())
z_max = max(prev_pc_df.Z.max(), new_pc_df.Z.max())

grid_origin = x_origin, y_origin, z_origin

grid_max = x_max, y_max, z_max

In [None]:
new_pc_df.head()

In [None]:
prev_voxelised_df = las.to_voxelised_df(prev_pc_df, grid_origin, grid_max, vox_xy, vox_z)
new_voxelised_df = las.to_voxelised_df(new_pc_df, grid_origin, grid_max, vox_xy, vox_z)

In [None]:
def align_columns(df1, df2):
    # Modifiy the dataframes if one column is missing compared to the other. If it is the case it adds an empty column
    
    df1 = df1.copy(deep=True) # Do the modification on a copy of the dataframe
    df2 = df2.copy(deep=True)

    missing_columns_df1 = set(df2.columns) - set(df1.columns)

    for column in missing_columns_df1:
        df1[column] = pd.Series(dtype=df2[column].dtype)

    missing_columns_df2 = set(df1.columns) - set(df2.columns)

    for column in missing_columns_df2:
        df2[column] = pd.Series(dtype=df1[column].dtype)

    # Make sure that the order of the classification columns is sorted
    sorted_class_columns1 = df1.iloc[:,3:].reindex(sorted(df1.iloc[:,3:].columns), axis=1)
    df1.drop(df1.columns[3:], axis=1, inplace=True)
    df1 = pd.concat([df1, sorted_class_columns1],axis=1)

    sorted_class_columns2 = df2.iloc[:,3:].reindex(sorted(df2.iloc[:,3:].columns), axis=1)
    df2.drop(df2.columns[3:], axis=1, inplace=True)
    df2 = pd.concat([df2, sorted_class_columns2],axis=1)

    return df1, df2

In [None]:
# If one class is missing in either of the dataframe compared to the other, create new empty column
prev_voxelised_df, new_voxelised_df = align_columns(prev_voxelised_df, new_voxelised_df)

In [None]:
display(prev_voxelised_df.head(2))
display(new_voxelised_df.head(2))

In [None]:
# Free up space
del prev_pc_df
del new_pc_df

In [None]:
merged_df = prev_voxelised_df.merge(new_voxelised_df, on=['X_grid','Y_grid','Z_grid'], how='outer', suffixes=('_prev','_new'))

In [None]:
merged_df = merged_df.replace(np.NaN, 0)

In [None]:
merged_df.head()

In [None]:
merged_df['vox_id'] = merged_df.index # Define a fixed id for each voxel

# Create the path for the folder to store the .csv file in case it doesn't yet exist
pathlib.Path('out_dataframe/voxelised_comparison').mkdir(parents=True, exist_ok=True)

# In file name, set voxel size in centimeters, so as to avoid decimal (.) presence in the file name
merged_df.to_csv(f'out_dataframe/voxelised_comparison/{tile_name}_{int(vox_xy*100)}-{int(vox_z*100)}'+'.csv', index=False)