In [1]:
import os
import pandas as pd
import geopandas as gpd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
import open3d as o3d
import laspy
import time
import pathlib

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


### Refer to the criticity tree made on lucid chart for the definition of the criticity labels

In [2]:
folder_path = '../out_dataframe/voxelised_comparison'
file_name = '2546500_1212000_150-150.csv'

save_folder_path = '../out_dataframe/criticity_changes_df'

# Create the path for the folder to store the .csv file in case it doesn't yet exist
pathlib.Path(save_folder_path).mkdir(parents=True, exist_ok=True)

In [3]:
tile_name, voxel_dimension = file_name.replace('.csv','').rsplit('_', maxsplit=1) 
vox_width_str, vox_height_str = voxel_dimension.split('-')

vox_width = float(vox_width_str)/100    # Must convert the voxel height/width from centimeters to meters
vox_height = float(vox_height_str)/100

In [4]:
df = pd.read_csv(os.path.join(folder_path,file_name))

df['change_criticity'] = 'TBD' # Set all change criticities to TBD = To be determined

In [5]:
df.head(2)

Unnamed: 0,X_grid,Y_grid,Z_grid,1_prev,2_prev,3_prev,6_prev,7_prev,17_prev,1_new,2_new,3_new,6_new,7_new,17_new,change_criticity
0,2546500.75,1212000.75,1015.75,0.0,19.0,0.0,0.0,0.0,0.0,0.0,130.0,0.0,0.0,0.0,0.0,TBD
1,2546500.75,1212002.25,1015.75,0.0,15.0,0.0,0.0,0.0,0.0,0.0,122.0,0.0,0.0,0.0,0.0,TBD


### Decision A: Is there only one class in both generation, and is it the same?

In [6]:
voxels_to_evaluate = df[df['change_criticity']=='TBD']
voxels_to_evaluate_prev = voxels_to_evaluate.iloc[:, voxels_to_evaluate.columns.str.endswith('_prev')].values.astype(bool)
voxels_to_evaluate_new = voxels_to_evaluate.iloc[:, voxels_to_evaluate.columns.str.endswith('_new')].values.astype(bool)

mask = (voxels_to_evaluate_prev.sum(axis=1)==1) & (voxels_to_evaluate_new.sum(axis=1)==1) & (np.all(voxels_to_evaluate_prev==voxels_to_evaluate_new, axis=1))

# Set criticity to 'non_prob_1' for rows for which the mask is True
df.loc[voxels_to_evaluate.index, 'change_criticity'] = np.where(mask==True, 'non_prob-1','TBD') 

In [7]:
df[df['change_criticity']=='non_prob-1'].head(2)

Unnamed: 0,X_grid,Y_grid,Z_grid,1_prev,2_prev,3_prev,6_prev,7_prev,17_prev,1_new,2_new,3_new,6_new,7_new,17_new,change_criticity
0,2546500.75,1212000.75,1015.75,0.0,19.0,0.0,0.0,0.0,0.0,0.0,130.0,0.0,0.0,0.0,0.0,non_prob-1
1,2546500.75,1212002.25,1015.75,0.0,15.0,0.0,0.0,0.0,0.0,0.0,122.0,0.0,0.0,0.0,0.0,non_prob-1


### Decision B: 'Is there noise in the new voxel?'
#### Originaly was : Is the new class anything else but noise? but changed to this for simplicity

In [8]:
df.loc[df['7_new']>0,'change_criticity'] = 'problematic-13'

In [9]:
df[df['change_criticity']=='problematic-13'].head(2)

Unnamed: 0,X_grid,Y_grid,Z_grid,1_prev,2_prev,3_prev,6_prev,7_prev,17_prev,1_new,2_new,3_new,6_new,7_new,17_new,change_criticity
2650,2546502.25,1212233.25,1032.25,0.0,17.0,0.0,0.0,0.0,0.0,0.0,122.0,6.0,0.0,16.0,0.0,problematic-13
2653,2546502.25,1212234.75,1032.25,0.0,11.0,4.0,0.0,0.0,0.0,0.0,50.0,25.0,0.0,2.0,0.0,problematic-13


### Decision C: Does the number of class and distribution stay the same? 
**Currently using cosine similarity to evaluate this**

In [10]:
df['cosine_similarity'] = np.where(df['change_criticity']=='TBD', 0, 1.0) # Set cosine similarity to 1 for all already determined voxels

voxels_to_evaluate =  df[df['change_criticity']=='TBD']

# Compute cosine similarity
voxels_to_evaluate_prev = voxels_to_evaluate.iloc[:, voxels_to_evaluate.columns.str.endswith('_prev')].values
voxels_to_evaluate_new = voxels_to_evaluate.iloc[:, voxels_to_evaluate.columns.str.endswith('_new')].values
dot_product = np.sum(voxels_to_evaluate_prev * voxels_to_evaluate_new, axis=1)
product_of_norm = np.linalg.norm(voxels_to_evaluate_prev, axis=1)*np.linalg.norm(voxels_to_evaluate_new, axis=1)

# For cases where one vector is completely empty, avoid division by zero and replace by -1
cosine_similarity = np.divide(dot_product, product_of_norm, out = np.full_like(dot_product, -1), where = product_of_norm!=0)
df.loc[df['change_criticity']=='TBD', 'cosine_similarity'] = cosine_similarity

#TODO: Important parameter, must be tuned!
cos_threshold = 0.9

# Mask where True if the boolean presence of the classes are exactly the same in both generation
same_class_present = np.all(df.iloc[:, df.columns.str.endswith('_prev')].values.astype(bool) == df.iloc[:, df.columns.str.endswith('_new')].values.astype(bool), axis=1)

df.loc[(df['cosine_similarity']>cos_threshold) & (df['change_criticity']=='TBD') & (same_class_present), 'change_criticity'] = 'non_prob-2'

In [11]:
df[df['change_criticity']=='non_prob-2'].head(2)

Unnamed: 0,X_grid,Y_grid,Z_grid,1_prev,2_prev,3_prev,6_prev,7_prev,17_prev,1_new,2_new,3_new,6_new,7_new,17_new,change_criticity,cosine_similarity
65,2546500.75,1212063.75,1015.75,0.0,10.0,1.0,0.0,0.0,0.0,0.0,102.0,6.0,0.0,0.0,0.0,non_prob-2,0.999163
570,2546500.75,1212183.75,1021.75,0.0,3.0,9.0,0.0,0.0,0.0,0.0,28.0,97.0,0.0,0.0,0.0,non_prob-2,0.999171


### Decision D: Do the previous classes keep the same distribution?

#### Computing the cosine similarity only between classes which are present in the previous generation 
Note: if only one class is present in the previous generation, the cosine similarity is either 1 or -1 (unvalid division) which doesn't provide much info, possibly compare euclidean distance between the normalised density

In [12]:
voxels_to_evaluate = df[df['change_criticity']=='TBD']

df['second_cosine_similarity'] = np.nan

voxels_to_evaluate_prev = voxels_to_evaluate.iloc[:, voxels_to_evaluate.columns.str.endswith('_prev')].values
voxels_to_evaluate_new = voxels_to_evaluate.iloc[:, voxels_to_evaluate.columns.str.endswith('_new')].values
dot_product = np.sum(voxels_to_evaluate_prev * voxels_to_evaluate_new, axis=1)
# For new vector, only take values for which class is present in the previous vector
product_of_norm = np.linalg.norm(voxels_to_evaluate_prev, axis=1)*np.linalg.norm(voxels_to_evaluate_prev.astype(bool) * voxels_to_evaluate_new, axis=1) 

# For cases where one vector is completely empty, avoid division by zero and replace by -1
cosine_similarity = np.divide(dot_product, product_of_norm, out = np.full_like(dot_product, -1), where = product_of_norm!=0)

df.loc[voxels_to_evaluate.index, 'second_cosine_similarity'] = cosine_similarity

#TODO: tune this parameter
second_cos_threshold=0.9

# Added condition of 'df.cosine_similarity!=-1' as this represent cases of complete disparition in the voxel which we want to keep for decision G
df.loc[(df.second_cosine_similarity<second_cos_threshold) & (df.cosine_similarity!=-1), 'change_criticity']='problematic-12'

In [13]:
df[df['change_criticity']=='problematic-12'].head(2)

Unnamed: 0,X_grid,Y_grid,Z_grid,1_prev,2_prev,3_prev,6_prev,7_prev,17_prev,1_new,2_new,3_new,6_new,7_new,17_new,change_criticity,cosine_similarity,second_cosine_similarity
30,2546500.75,1212044.25,1017.25,0.0,0.0,1.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,problematic-12,0.0,-1.0
33,2546500.75,1212045.75,1017.25,0.0,5.0,7.0,0.0,0.0,0.0,65.0,4.0,0.0,0.0,0.0,0.0,problematic-12,0.035701,0.581238


### Decision E: is the change due to class 1?

In [14]:
# We want to compare whether the voxels are similar if we don't consider the unclassified points. If they stay the same, 
# it means the difference comes from unclassified point.  
voxels_to_evaluate = df[df['change_criticity']=='TBD'].drop(columns=['1_prev','1_new'])

# For the specific cases of apparition or disparition only due to class 1, find rows which are empty for prev. and new gen. when not
# considering the class 1
mask_disparition_apparition = (voxels_to_evaluate.iloc[:, voxels_to_evaluate.columns.str.contains('_prev|_new')].values).sum(axis=1)==0
df.loc[voxels_to_evaluate[mask_disparition_apparition].index, 'change_criticity'] = 'class_1_specific'

In [15]:
df[df.change_criticity=='class_1_specific'].head(2)

Unnamed: 0,X_grid,Y_grid,Z_grid,1_prev,2_prev,3_prev,6_prev,7_prev,17_prev,1_new,2_new,3_new,6_new,7_new,17_new,change_criticity,cosine_similarity,second_cosine_similarity
2020,2546500.75,1212495.75,958.75,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,class_1_specific,-1.0,-1.0
11323,2546509.75,1212227.25,1029.25,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,class_1_specific,-1.0,-1.0


In [16]:
voxels_to_evaluate = df[df['change_criticity']=='TBD'].drop(columns=['1_prev','1_new'])
voxels_to_evaluate_prev = voxels_to_evaluate.iloc[:, voxels_to_evaluate.columns.str.endswith('_prev')].values
voxels_to_evaluate_new = voxels_to_evaluate.iloc[:, voxels_to_evaluate.columns.str.endswith('_new')].values
dot_product = np.sum(voxels_to_evaluate_prev * voxels_to_evaluate_new, axis=1)

product_of_norm = np.linalg.norm(voxels_to_evaluate_prev, axis=1)*np.linalg.norm(voxels_to_evaluate_new, axis=1) 

# For cases where one vector is completely empty, avoid division by zero and replace by -1
cosine_similarity = np.divide(dot_product, product_of_norm, out = np.full_like(dot_product, -1), where = product_of_norm!=0)

In [17]:
df.loc[voxels_to_evaluate.index, 'third_cosine_similarity'] = cosine_similarity

In [18]:
#TODO: tune this parameter
third_cosine_threshold = 0.9

We want to find the voxels which have changed **because** of class 1. We assume those are the ones for which the cosine similarity was low when considering all the class but is actually high if we don't consider the class 1. <br> (Note that the condition on the first cosine threshold is necessary since in condition C we ask wheter the distribution stays the same **and** that the class don't change. This keeps a lot of voxels which have a very high cosine similarity but which do not have exactly the same class.)

In [19]:
df.loc[(df.change_criticity=='TBD') \
        & (df['third_cosine_similarity']>third_cosine_threshold) \
        & (df['cosine_similarity']<cos_threshold), 'change_criticity'] = 'class_1_specific'

In [20]:
df[df.change_criticity=='class_1_specific'].head(2)

Unnamed: 0,X_grid,Y_grid,Z_grid,1_prev,2_prev,3_prev,6_prev,7_prev,17_prev,1_new,2_new,3_new,6_new,7_new,17_new,change_criticity,cosine_similarity,second_cosine_similarity,third_cosine_similarity
2020,2546500.75,1212495.75,958.75,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,class_1_specific,-1.0,-1.0,
2062,2546502.25,1212050.25,1017.25,0.0,0.0,13.0,0.0,0.0,0.0,21.0,0.0,1.0,0.0,0.0,0.0,class_1_specific,0.047565,1.0,1.0


### Decision F: Does the class 1 have a low presence in the new voxel?

In [21]:
nb_points_prev = np.sum(df.iloc[:,df.columns.str.endswith('_prev')].values)
nb_points_new = np.sum(df.iloc[:,df.columns.str.endswith('_new')].values)
normalising_factor = nb_points_prev/nb_points_new
class_1_new_normalised = df[df.change_criticity == 'class_1_specific']['1_new']*normalising_factor

#TODO: tune this parameters
threshold_class_1_presence = 1

df.loc[class_1_new_normalised.index, 'change_criticity'] = np.where(class_1_new_normalised<threshold_class_1_presence,'non_prob-3', 'grey_zone-7')

In [22]:
df[df.change_criticity=='non_prob-3'].head(2)

Unnamed: 0,X_grid,Y_grid,Z_grid,1_prev,2_prev,3_prev,6_prev,7_prev,17_prev,1_new,2_new,3_new,6_new,7_new,17_new,change_criticity,cosine_similarity,second_cosine_similarity,third_cosine_similarity
2020,2546500.75,1212495.75,958.75,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,non_prob-3,-1.0,-1.0,
11323,2546509.75,1212227.25,1029.25,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,non_prob-3,-1.0,-1.0,


In [23]:
df[df.change_criticity=='grey_zone-7'].head(2)

Unnamed: 0,X_grid,Y_grid,Z_grid,1_prev,2_prev,3_prev,6_prev,7_prev,17_prev,1_new,2_new,3_new,6_new,7_new,17_new,change_criticity,cosine_similarity,second_cosine_similarity,third_cosine_similarity
2062,2546502.25,1212050.25,1017.25,0.0,0.0,13.0,0.0,0.0,0.0,21.0,0.0,1.0,0.0,0.0,0.0,grey_zone-7,0.047565,1.0,1.0
3988,2546503.75,1212050.25,1017.25,0.0,0.0,12.0,0.0,0.0,0.0,62.0,0.0,56.0,0.0,0.0,0.0,grey_zone-7,0.670286,1.0,1.0


### Decision G: Is the change from (empty -> class x) | (class x -> empty)

In [24]:
df.loc[(df['change_criticity']=='TBD') & (df['cosine_similarity']==-1) & (df.iloc[:,df.columns.str.endswith('_prev')].sum(axis=1).astype(bool)), 'change_criticity'] = 'disparition'

In [25]:
df[df.change_criticity == 'disparition'].head(2)

Unnamed: 0,X_grid,Y_grid,Z_grid,1_prev,2_prev,3_prev,6_prev,7_prev,17_prev,1_new,2_new,3_new,6_new,7_new,17_new,change_criticity,cosine_similarity,second_cosine_similarity,third_cosine_similarity
31,2546500.75,1212044.25,1018.75,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,disparition,-1.0,-1.0,-1.0
45,2546500.75,1212051.75,1018.75,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,disparition,-1.0,-1.0,-1.0


In [26]:
df.loc[(df['change_criticity']=='TBD')& (df['cosine_similarity']==-1) & (df.iloc[:,df.columns.str.endswith('_new')].sum(axis=1).astype(bool)), 'change_criticity'] = 'apparition'

In [27]:
df[df.change_criticity == 'apparition'].head(2)

Unnamed: 0,X_grid,Y_grid,Z_grid,1_prev,2_prev,3_prev,6_prev,7_prev,17_prev,1_new,2_new,3_new,6_new,7_new,17_new,change_criticity,cosine_similarity,second_cosine_similarity,third_cosine_similarity
438638,2546500.75,1212059.25,1020.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,apparition,-1.0,-1.0,-1.0
438639,2546500.75,1212060.75,1017.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,0.0,apparition,-1.0,-1.0,-1.0


### Decision H: do the neighbouring voxels contain also the new class (case of apparition/disparition)?

Newer method which consider the combination of the class present in all the neighbouring voxels, as opposed to the previous version where one neighbouring voxel had to be exactly similar for it to be considered non problematic.

In [28]:
def compare_to_neighbours(df, tree, kd_tree_query_radius, case='TBD'):
    '''The cases can be: 
     -"TBD", we compare the new vox. occupancy with the new neighbours 
     -"apparition", we compare the new vox. occupancy with the previous neighbours occupancies
     -"disparition", we compare the prev. vox. occupancy with the new neighbours occupancies'''
    
    def find_neighbours_occupancy(x, columns_to_compare):
        # Given a voxel to evaluate, return the commbined occupancy of all its neighbours
        return np.any(columns_to_compare[x].astype(bool),axis=0)

    df = df.copy() 

    if case=='TBD':
        to_evaluate, to_compare = 'new', 'new'
    elif case == 'apparition':
        to_evaluate, to_compare = 'new', 'prev'
    elif case == 'disparition':
        to_evaluate, to_compare = 'prev', 'new'
    else:
        raise ValueError

    voxels_to_evaluate_df = df.loc[df.change_criticity == case]

    # Query all ids of neighbours to the location to evaluate. This also returns the id of the voxel itself which must be removed
    all_neighbours_ids = tree.query_radius(voxels_to_evaluate_df.loc[:, ['X_grid','Y_grid','Z_grid']].values, kd_tree_query_radius)

    # Remove the id of the voxel itself in each neighbours sets
    list_neighbours = []
    for i in range(len(all_neighbours_ids)):
        valid_neighbours_ids = all_neighbours_ids[i][all_neighbours_ids[i] != voxels_to_evaluate_df.index[i]]
        list_neighbours.append(valid_neighbours_ids)
    
    # Select 'new' or 'prev' columns depending on the case
    columns_to_compare = df.loc[:,df.columns.str.endswith(to_compare)].values
    
    neighbours_occupancy = np.asarray([find_neighbours_occupancy(sub_array, columns_to_compare) for sub_array in list_neighbours])

    voxels_to_evaluate_bool = voxels_to_evaluate_df.loc[:, df.columns.str.endswith(to_evaluate)].values.astype(bool)
    
    # For each voxel to evaluate, check if the class present in it are also present in the neighbours
    presence_in_neighbours = np.all(np.equal(voxels_to_evaluate_bool, (neighbours_occupancy & voxels_to_evaluate_bool)), axis=1)
    
    if case == 'disparition':
        df.loc[df.change_criticity==case, 'change_criticity'] = np.where(presence_in_neighbours==True, 'non_prob-4', 'problematic-9')
    elif case == 'apparition':
        df.loc[df.change_criticity==case, 'change_criticity'] = np.where(presence_in_neighbours==True,'non_prob-5','problematic-10')
    elif case == 'TBD':
        df.loc[df.change_criticity==case, 'change_criticity'] = np.where(presence_in_neighbours==True, 'grey_zone-8', 'problematic-11')
       
    return df

In [29]:
from sklearn.neighbors import KDTree
tree = KDTree(df[['X_grid','Y_grid','Z_grid']].values)

#TODO: tune this parameter
# If using vox_height: up to 6 neighbours
# -- ----- 2**(1/2)*vox_height: up to 18 neighbours
# -- ----- 3**(1/2)*vox_height: up to 26 neighbours
kd_tree_query_radius = 2**(1/2)*vox_height 

In [30]:
# ORIGINAL METHODOLOGY, LEFT FOR LEGACY

# def compare_rows_apparition(voxel, df, tree, radius):
#     # Should return True if a neighbour voxel had the same distribution we are observing in the appeared voxel, False otherwise

#     neighbour_voxel_ids = tree.query_radius([[voxel.X_grid, voxel.Y_grid, voxel.Z_grid]], radius)[0]
    
#     new_vox_occupancy = voxel.iloc[voxel.index.str.endswith('_new')].values.astype(bool)

#     for id in neighbour_voxel_ids:
#         if np.all(new_vox_occupancy == df.iloc[id, df.columns.str.endswith('_prev')].values.astype(bool)): # Did the previous generation of the neighbouring voxel share the same characteristics as the new we are evaluating?

#             return True

#     return False

# appeared_vox_status = df[df.change_criticity == 'apparition'].apply(lambda row: compare_rows_apparition(row, df, tree, radius=kd_tree_query_radius), axis=1)

# df.loc[df.change_criticity=='apparition', 'change_criticity'] = np.where(appeared_vox_status==True,'non_prob-5','problematic-10')

In [31]:
# NEW METHODOLOGY:
df = compare_to_neighbours(df, tree, kd_tree_query_radius, case='apparition')

In [32]:
df[df.change_criticity=='non_prob-5'].head(2)

Unnamed: 0,X_grid,Y_grid,Z_grid,1_prev,2_prev,3_prev,6_prev,7_prev,17_prev,1_new,2_new,3_new,6_new,7_new,17_new,change_criticity,cosine_similarity,second_cosine_similarity,third_cosine_similarity
438638,2546500.75,1212059.25,1020.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,non_prob-5,-1.0,-1.0,-1.0
438639,2546500.75,1212060.75,1017.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,0.0,non_prob-5,-1.0,-1.0,-1.0


In [33]:
# ORIGINAL METHODOLOGY, LEFT FOR LEGACY

# def compare_rows_disparition(voxel,df,tree, radius):
#     # Should return True if a neighbour voxel has the same distribution we were observing in the disappeared voxel, False otherwise
#     neighbour_voxel_id = tree.query_radius([[voxel.X_grid, voxel.Y_grid, voxel.Z_grid]], radius)[0]
    
#     prev_vox_occupancy = voxel.iloc[voxel.index.str.endswith('_prev')].values.astype(bool)

#     for id in neighbour_voxel_id:
#         if np.all(prev_vox_occupancy == df.iloc[id, df.columns.str.endswith('_new')].values.astype(bool)): # Does the new generation of the neighbouring voxel share the same characteristics as the previous we are evaluating?
#             return True

#     return False

# disappeared_vox_status=df[df.change_criticity == 'disparition'].apply(lambda row: compare_rows_disparition(row, df, tree, radius=kd_tree_query_radius),axis=1)

# df.loc[df.change_criticity=='disparition', 'change_criticity'] = np.where(disappeared_vox_status==True, 'non_prob-4', 'problematic-9')

In [34]:
# NEW METHODOLOGY:
df = compare_to_neighbours(df, tree, kd_tree_query_radius, case='disparition')

In [35]:
df[df.change_criticity == 'non_prob-4'].head(2)

Unnamed: 0,X_grid,Y_grid,Z_grid,1_prev,2_prev,3_prev,6_prev,7_prev,17_prev,1_new,2_new,3_new,6_new,7_new,17_new,change_criticity,cosine_similarity,second_cosine_similarity,third_cosine_similarity
31,2546500.75,1212044.25,1018.75,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,non_prob-4,-1.0,-1.0,-1.0
51,2546500.75,1212056.25,1018.75,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,non_prob-4,-1.0,-1.0,-1.0


### Decision I: do the neighbouring voxels contain also the new class (case of change of distribution)?

In [36]:
# ORIGINAL METHODOLOGY, LEFT FOR LEGACY

# def compare_rows_change(voxel,df,tree, radius):
#     # Should return True if a neighbour voxel has the same distribution we are observing in the current problematic voxel, False otherwise
#     neighbour_voxel_id = tree.query_radius([[voxel.X_grid, voxel.Y_grid, voxel.Z_grid]], radius)[0]
    
#     vox_occupancy = voxel.iloc[voxel.index.str.endswith('_new')].values.astype(bool) # For the individual voxel, get the class occupancy 

#     for id in neighbour_voxel_id[neighbour_voxel_id!=voxel.name]: # Only consider the voxel in the neighborhood which are not the voxel itself
#         if np.all(vox_occupancy == df.iloc[id, df.columns.str.endswith('_new')].values.astype(bool)): # Does the new generation of the neighbouring voxel share the same characteristics as the one we are evaluating?
#             return True

#     return False

# changed_vox_status=df[df.change_criticity == 'TBD'].apply(lambda row: compare_rows_change(row, df, tree, radius=kd_tree_query_radius),axis=1)

# df.loc[df.change_criticity=='TBD', 'change_criticity'] = np.where(changed_vox_status==True, 'grey_zone-8', 'problematic-11')

In [37]:
# NEW METHODOLOGY:
df = compare_to_neighbours(df, tree, kd_tree_query_radius, case='TBD')

In [38]:
df[df.change_criticity=='problematic-11'].head(2)

Unnamed: 0,X_grid,Y_grid,Z_grid,1_prev,2_prev,3_prev,6_prev,7_prev,17_prev,1_new,2_new,3_new,6_new,7_new,17_new,change_criticity,cosine_similarity,second_cosine_similarity,third_cosine_similarity
257,2546500.75,1212114.75,1015.75,0.0,7.0,0.0,0.0,0.0,0.0,0.0,97.0,1.0,0.0,0.0,0.0,problematic-11,0.999947,1.0,0.999947
297,2546500.75,1212119.25,1015.75,0.0,2.0,0.0,0.0,0.0,0.0,0.0,92.0,1.0,0.0,0.0,0.0,problematic-11,0.999941,1.0,0.999941


In [39]:
df[df.change_criticity=='grey_zone-8'].head(2)

Unnamed: 0,X_grid,Y_grid,Z_grid,1_prev,2_prev,3_prev,6_prev,7_prev,17_prev,1_new,2_new,3_new,6_new,7_new,17_new,change_criticity,cosine_similarity,second_cosine_similarity,third_cosine_similarity
29,2546500.75,1212044.25,1015.75,0.0,15.0,0.0,0.0,0.0,0.0,0.0,122.0,1.0,0.0,0.0,0.0,grey_zone-8,0.999966,1.0,0.999966
32,2546500.75,1212045.75,1015.75,0.0,3.0,1.0,0.0,0.0,0.0,4.0,56.0,1.0,0.0,0.0,0.0,grey_zone-8,0.951754,0.954178,0.954178


### Decision J: Additional check up for class 6 (building). If apparition, check if one voxels located exactly above contains class 6 and is non problematic
This is to solve the problem of point appearing on the facades with a higher density LiDAR scanning

Find out which is the majority class in the voxel

In [40]:
df['majority_class'] = df.iloc[:,df.columns.str.contains('_prev|_new')].idxmax(axis=1)

In [41]:
# Find for each planar grid cell the altitude of the highest point of class building 
highest_building_voxel_df = df[df['6_new']>0].groupby(['X_grid','Y_grid'])['Z_grid'].max()\
                            .to_frame('highest_building_voxel').reset_index()

In [42]:
highest_building_voxel_df.head(2)

Unnamed: 0,X_grid,Y_grid,highest_building_voxel
0,2546500.75,1212237.75,1030.75
1,2546500.75,1212239.25,1030.75


In [43]:
highest_building_voxel_df = highest_building_voxel_df.merge(df,\
                            left_on=['X_grid','Y_grid','highest_building_voxel'], right_on=['X_grid','Y_grid','Z_grid'],how='left') \
                            [['X_grid','Y_grid','highest_building_voxel','change_criticity']]\
                            .rename(columns={'change_criticity':'change_criticity_highest_building_voxel'})

highest_building_voxel_df.head(2)

Unnamed: 0,X_grid,Y_grid,highest_building_voxel,change_criticity_highest_building_voxel
0,2546500.75,1212237.75,1030.75,grey_zone-8
1,2546500.75,1212239.25,1030.75,non_prob-1


In [44]:
# For all voxel which have a problematic apparition of class building, match with the altitude of highest building point
# in their planar grid cell
temporary_df = df[(df.change_criticity=='problematic-10') & (df.majority_class=='6_new')].reset_index()\
                .merge(highest_building_voxel_df, how='left', on=['X_grid','Y_grid']).set_index('index')

In [45]:
temporary_df[['X_grid','Y_grid','Z_grid','highest_building_voxel','change_criticity_highest_building_voxel']].tail(4)

Unnamed: 0_level_0,X_grid,Y_grid,Z_grid,highest_building_voxel,change_criticity_highest_building_voxel
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
543161,2547000.25,1212069.75,1012.75,1012.75,problematic-10
543189,2547000.25,1212122.25,1018.75,1026.25,non_prob-1
543190,2547000.25,1212122.25,1020.25,1026.25,non_prob-1
543227,2547000.25,1212356.25,1033.75,1039.75,non_prob-1


In [46]:
# Get the index of voxel for which the highest building voxel in their column is not problematic
non_problematic_6_apparition_idx = temporary_df[temporary_df['change_criticity_highest_building_voxel'].str.contains('non_prob')].index

In [47]:
df.iloc[non_problematic_6_apparition_idx, df.columns.get_loc('change_criticity')] = 'non_prob-6'

In [48]:
df[df.change_criticity=='non_prob-6'].head(2)

Unnamed: 0,X_grid,Y_grid,Z_grid,1_prev,2_prev,3_prev,6_prev,7_prev,17_prev,1_new,2_new,3_new,6_new,7_new,17_new,change_criticity,cosine_similarity,second_cosine_similarity,third_cosine_similarity,majority_class
446436,2546526.25,1212233.25,1029.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,non_prob-6,-1.0,-1.0,-1.0,6_new
449079,2546533.75,1212219.75,1026.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,4.0,0.0,0.0,non_prob-6,-1.0,-1.0,-1.0,6_new


### Change df so that the label and change_criticity are in a column of their own

In [49]:
df['change_criticity_label'] = 0

In [50]:
df['change_criticity_label'] = df.change_criticity.apply(lambda x: x.split(sep='-')[1]).astype(int)
df['change_criticity'] = df.change_criticity.apply(lambda x: x.split(sep='-')[0])

In [51]:
df.head()

Unnamed: 0,X_grid,Y_grid,Z_grid,1_prev,2_prev,3_prev,6_prev,7_prev,17_prev,1_new,...,3_new,6_new,7_new,17_new,change_criticity,cosine_similarity,second_cosine_similarity,third_cosine_similarity,majority_class,change_criticity_label
0,2546500.75,1212000.75,1015.75,0.0,19.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,non_prob,1.0,,,2_new,1
1,2546500.75,1212002.25,1015.75,0.0,15.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,non_prob,1.0,,,2_new,1
2,2546500.75,1212003.75,1015.75,0.0,15.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,non_prob,1.0,,,2_new,1
3,2546500.75,1212005.25,1015.75,0.0,17.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,non_prob,1.0,,,2_new,1
4,2546500.75,1212006.75,1015.75,0.0,17.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,non_prob,1.0,,,2_new,1


### Save the new dataframe as csv

In [52]:
saving_time = time.strftime("%d%m-%H%M")

csv_file_name = f'{tile_name}_{str(int(vox_height*100))}_{saving_time}.csv'
df.to_csv(os.path.join(save_folder_path, csv_file_name), index=False)

In [53]:
import json
# Save hyperparameters in JSON file with the same time as the .csv
hyperparam_dict = {'first_cos_threshold' : cos_threshold,
                    'second_cos_threshold' : second_cos_threshold,
                    'third_cosine_threshold' : third_cosine_threshold,
                    'query_radius' : kd_tree_query_radius,
                    'class_1_presence_threshold':threshold_class_1_presence}

json.dumps(hyperparam_dict)

with open(os.path.join(save_folder_path, f"{tile_name}_{str(int(vox_height*100))}_{saving_time}.json"), "w") as outfile: 
    json.dump(hyperparam_dict, outfile)