In [None]:
import os
import pandas as pd
import plotly.express as px
import geopandas as gpd
import numpy as np
import pathlib

In [None]:
WORKING_DIR = '/mnt/data-01/nmunger/proj-qalidar/data'

csv_dir =  '/mnt/data-01/nmunger/proj-qalidar/data/out_vis/2546500_1212000_saved_at-0602-0935/dataframes/2546500_1212000_change_detections.csv'

tile_name = '2546500_1212000'

voxel_dimension = 1.5

os.chdir(WORKING_DIR)

# If one plot is saved, will end up in this folder:
plots_folder = 'plots'

# If set to True, will save all the plots for which the cells are ran
save_all_to_html = False
save_all_to_jpeg = False

df = pd.read_csv(csv_dir)

In [None]:
# tile_name, voxel_dimension, _ = os.path.basename(csv_dir).replace('.csv','').rsplit('_',maxsplit=2) # Define the tile name as the coordinates of the origin
# voxel_dimension = int(voxel_dimension)/100 # Divide by 100 to get voxel dim in meters

In [None]:
df.head()

In [None]:
fig = px.histogram(df, x='criticality_number', title=f'Number of voxels per type of criticality change, voxel dim: {voxel_dimension}')
fig.update_layout(xaxis_title="Criticality number", yaxis_title="Number of voxels")
fig.update_layout(
    xaxis = dict(
        tick0 = 0,
        dtick = 1
    )
)
fig.show()

In [None]:
save_to_html = False
save_to_jpeg = False
if save_to_html or save_to_jpeg or save_all_to_html or save_all_to_jpeg:
    pathlib.Path(plots_folder).mkdir(parents=True, exist_ok=True)
    if save_to_html or save_all_to_html:
        plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'number_voxels_per_criticality_number.html'
        fig.write_html(os.path.join(plots_folder,plot_name))
    if save_to_jpeg or save_all_to_jpeg:
        plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'number_voxels_per_criticality_number.jpeg'
        fig.write_image(os.path.join(plots_folder,plot_name),scale = 2)

In [None]:
# Creating plot showing the percentage of voxels per criticality labels
plot_df0 = df.groupby('criticality_number').size().reset_index(name='voxel_count')
plot_df0['voxel_percentage']= plot_df0.voxel_count/plot_df0.voxel_count.sum()*100

fig = px.bar(plot_df0,x='criticality_number',y='voxel_percentage',title=f'Percentage of voxels by type of criticality number, <br>for voxel dimension: {voxel_dimension}',
             labels={'criticality_number': 'Criticality Number', 'voxel_percentage': 'Percentage'},
             text=plot_df0['voxel_percentage'].round(3))

fig.update_layout(
    xaxis = dict(tick0 = 0, dtick = 1),
    autosize=False,
    width=1000,
    height=600,
    yaxis_range=[0,100]
)

fig.show()

In [None]:
save_to_html = False
save_to_jpeg = False
if save_to_html or save_to_jpeg or save_all_to_html or save_all_to_jpeg:
    pathlib.Path(plots_folder).mkdir(parents=True, exist_ok=True)
    if save_to_html or save_all_to_html:
        plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'percentage_voxels_per_criticality_number.html'
        fig.write_html(os.path.join(plots_folder,plot_name))
    if save_to_jpeg or save_all_to_jpeg:
        plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'percentage_voxels_per_criticality_number.jpeg'
        fig.write_image(os.path.join(plots_folder,plot_name),scale = 2)

In [None]:
# Grouping the DataFrame by 'criticality_tag' and 'criticality_number' and counting the rows
grouped_df = df.groupby(['criticality_tag', 'criticality_number']).size().reset_index(name='count')
grouped_df['percentage'] = grouped_df['count']/grouped_df['count'].sum()*100
grouped_df['criticality_number'] = grouped_df['criticality_number'].astype(str)
# Create a bar plot using Plotly Express
fig = px.bar(grouped_df, x='criticality_tag', y='percentage', color='criticality_number',
             title=f'Percentage of voxel by type of criticality changes, for voxel dimension: {voxel_dimension}',
             labels={'criticality_tag': 'Change Criticality', 'count': 'Percentage'})
fig.update_layout(yaxis_range=[0,100]) # Show the whole y axis from 0 to 100 

fig.show()

In [None]:
save_to_html = False
save_to_jpeg = False
if save_to_html or save_to_jpeg or save_all_to_html or save_all_to_jpeg:
    pathlib.Path(plots_folder).mkdir(parents=True, exist_ok=True)
    if save_to_html or save_all_to_html:
        plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'percentage_voxels_per_criticality_tag.html'
        fig.write_html(os.path.join(plots_folder,plot_name))
    if save_to_jpeg or save_all_to_jpeg:
        plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'percentage_voxels_per_criticality_tag.jpeg'
        fig.write_image(os.path.join(plots_folder,plot_name),scale = 2)

Evaluating under which category of voxels the points fall

In [None]:
df['total_points_prev'] = df.iloc[:, df.columns.str.contains('_prev')].sum(axis=1)
df['total_points_new'] = df.iloc[:, df.columns.str.contains('_new')].sum(axis=1)

In [None]:
plot_df1 = df.groupby('criticality_number')[['total_points_prev','total_points_new']].sum().reset_index()
plot_df1['per_points_prev'] = plot_df1['total_points_prev']/plot_df1['total_points_prev'].sum()*100
plot_df1['per_points_new'] = plot_df1['total_points_new']/plot_df1['total_points_new'].sum()*100

fig = px.bar(plot_df1,x='criticality_number',y=['per_points_new','per_points_prev'],title=f'Percentage of point by type of criticality changes label, <br>for voxel dimension: {voxel_dimension}',
             labels={'criticality_number': 'Change Criticality', 'value': 'Percentage'},barmode='group')

fig.update_layout(
    xaxis = dict(tick0 = 0, dtick = 1),
    autosize=False,
    width=1000,
    height=600,
    yaxis_range=[0,100]
)

fig.show()

In [None]:
save_to_html = False
save_to_jpeg = False
if save_to_html or save_to_jpeg or save_all_to_html or save_all_to_jpeg:
    pathlib.Path(plots_folder).mkdir(parents=True, exist_ok=True)
    if save_to_html or save_all_to_html:
        plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'percentage_points_per_criticality_label_old_and_new.html'
        fig.write_html(os.path.join(plots_folder,plot_name))
    if save_to_jpeg or save_all_to_jpeg:
        plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'percentage_points_per_criticality_label_old_and_new.jpeg'
        fig.write_image(os.path.join(plots_folder,plot_name),scale = 2)

In [None]:
fig = px.bar(plot_df1,x='criticality_number',y='per_points_new',#title=f'Percentage of point by type of criticality changes label', #, <br>for voxel dimension: {voxel_dimension}',
             labels={'criticality_number': 'Criticality Number', 'per_points_new': 'Percentage'},
             text=plot_df1['per_points_new'].round(2))

fig.update_layout(
    xaxis = dict(tick0 = 0, dtick = 1),
    autosize=False,
    width=1000,
    height=600,
    yaxis_range=[0,100],
    title=dict(font=dict(size=25)),
    font=dict(size=14)
)
fig.add_annotation(text=f"Non problematic<br>{round(plot_df1.loc[plot_df1.criticality_number<=6,'per_points_new'].sum(),2)}%",
                  align='center',
                  x=3.5, y=90,showarrow=False,
                  font=dict(size=16,),
                    bordercolor="#008000",
                    borderwidth=2,
                    borderpad=4)

fig.add_annotation(text=f"Grey zone<br>{round(plot_df1.loc[plot_df1.criticality_number.isin([7,8]),'per_points_new'].sum(),2)}%",
                  align='center',
                  x=7.5, y=90,showarrow=False,
                  font=dict(size=16,),
                    bordercolor="#FFC300",
                    borderwidth=2,
                    borderpad=4)
                    
fig.add_annotation(text=f"Problematic<br>{round(plot_df1.loc[plot_df1.criticality_number>=9,'per_points_new'].sum(),2)}%",
                  align='center',
                  x=11, y=90,showarrow=False,
                  font=dict(size=16,),
                    bordercolor="#D22B2B",
                    borderwidth=2,
                    borderpad=4)

fig.add_vline(x=6.5, line_width=1, line_dash="dash")
fig.add_vline(x=8.5, line_width=1, line_dash="dash")
fig.update_layout(margin=dict(l=20, r=20, t=20, b=20),)
fig.show()

In [None]:
save_to_html = False
save_to_jpeg = False
if save_to_html or save_to_jpeg or save_all_to_html or save_all_to_jpeg:
    pathlib.Path(plots_folder).mkdir(parents=True, exist_ok=True)
    if save_to_html or save_all_to_html:
        plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'new_points_criticality_number_percentage.html'
        fig.write_html(os.path.join(plots_folder,plot_name))
    if save_to_jpeg or save_all_to_jpeg:
        plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'new_points_criticality_number_percentage.jpeg'
        fig.write_image(os.path.join(plots_folder,plot_name),scale = 2)

In [None]:
plot_df1=df.groupby('criticality_tag')['total_points_new'].sum().to_frame().reset_index()
plot_df1['per_points_new']=plot_df1['total_points_new']/plot_df1['total_points_new'].sum()*100

fig = px.bar(plot_df1,x='criticality_tag',y='per_points_new',title=f'Percentage of point by type of criticality changes,<br>for voxel dimension: {voxel_dimension}',
             labels={'criticality_tag': 'Change Criticality', 'per_points_new': 'Percentage'})

fig.update_layout(
    xaxis = dict(tick0 = 0, dtick = 1),
    autosize=False,
    width=600,
    height=600,
    yaxis_range=[0,100]
)

fig.show()

if save_to_html:
    plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'points_per_type_of_criticality.html'
    fig.write_html(os.path.join(plots_folder,plot_name))

In [None]:
save_to_html = False
save_to_jpeg = False
if save_to_html or save_to_jpeg or save_all_to_html or save_all_to_jpeg:
    pathlib.Path(plots_folder).mkdir(parents=True, exist_ok=True)
    if save_to_html or save_all_to_html:
        plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'new_points_per_criticality_tag_percentage.html'
        fig.write_html(os.path.join(plots_folder,plot_name))
    if save_to_jpeg or save_all_to_jpeg:
        plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'new_points_per_criticality_tag_percentage.jpeg'
        fig.write_image(os.path.join(plots_folder,plot_name),scale = 2)

### Compare class to class
Here we try to compare the mean difference of point between the previous and new generation for each class. For each voxel, we must only consider the class which actually appear in one of the two generations, so as to not generate lots of difference = 0. 

In [None]:
# Normalising factor = [total previous number of points]/[total new number of points]
normalising_factor = df.iloc[:, df.columns.str.endswith('_prev')].to_numpy().sum()/df.iloc[:, df.columns.str.endswith('_new')].to_numpy().sum()
# Normalise the new voxels
df.iloc[:, df.columns.str.endswith('_new')] = df.iloc[:, df.columns.str.endswith('_new')]*normalising_factor

In [None]:
# Create boolean mask where cell is True if some class is present in either the new or previous voxel,
# and which is false if no presence of the class in either generation
bool_mask = df.iloc[:,df.columns.str.endswith('new')].values.astype(bool) | df.iloc[:,df.columns.str.endswith('prev')].values.astype(bool)

In [None]:
bool_mask

In [None]:
# Find for each class in each voxel the difference in number of points between new and previous generation
diff_values = (df.iloc[:,df.columns.str.endswith('new')].values - df.iloc[:,df.columns.str.endswith('prev')].values)
diff_values[:3]

In [None]:
# For class which were not present in either the new or previous gen., replace by nan so as to not influence the mean calculation
diff_values[bool_mask==False] = np.NaN
print(diff_values[:3])

In [None]:
diff_df = pd.DataFrame(diff_values, columns = df.columns[df.columns.str.endswith('_new')].str.replace('new', 'diff'))
diff_df = diff_df.merge(df[['X_grid','Y_grid','Z_grid','criticality_tag','cosine_similarity','second_cosine_similarity']], how='inner', left_index=True, right_index=True)

In [None]:
grouped_by = diff_df.groupby('criticality_tag')[['1_diff','2_diff','3_diff','6_diff','7_diff','17_diff']].mean().reset_index()

In [None]:
fig = px.bar(grouped_by, x='criticality_tag', y=['1_diff','2_diff','3_diff','6_diff','7_diff','17_diff'], labels={'criticality_tag':'Type of change','value':'Mean difference in number of points <br>between previous and new gen.'},title=f'Difference between previous and new generation<br>Voxel size of {voxel_dimension} m')
fig.add_hline(y=0)

fig.update_layout(yaxis_range=[-60,25])

fig.show()


if save_to_html:
    plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'class_to_class_diff.html'
    fig.write_html(os.path.join(plots_folder,plot_name))

In [None]:
save_to_html = False
save_to_jpeg = False
if save_to_html or save_to_jpeg or save_all_to_html or save_all_to_jpeg:
    pathlib.Path(plots_folder).mkdir(parents=True, exist_ok=True)
    if save_to_html or save_all_to_html:
        plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'class_to_class_diff.html'
        fig.write_html(os.path.join(plots_folder,plot_name))
    if save_to_jpeg or save_all_to_jpeg:
        plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'class_to_class_diff.jpeg'
        fig.write_image(os.path.join(plots_folder,plot_name),scale = 2)

### Check under which category of voxel the points of interest fall :
This was done as an attempt to analyse quickly the attribution in criticality of certain points of interest. Not really used anymore, left for legacy.

In [None]:
gdf=gpd.read_file('/home/nmunger/Desktop/points_of_interest.geojson')

In [None]:
def matching_voxel_id(x, y, z, df):
    index_array = df[(abs(df.X_grid - x)<vox_width/2) & (abs(df.Y_grid - y)<vox_width/2) & (abs(df.Z_grid - z)<vox_width/2)  ].index.to_numpy()
    
    if len(index_array) > 1:
        raise ValueError
    
    else:
        return index_array[0]

In [None]:
gdf['matching_voxel']=gdf.geometry.get_coordinates(include_z=True).apply(lambda point: matching_voxel_id(point.x, point.y, point.z, df), axis=1)

In [None]:
print('Vox size:', vox_width)
gdf.merge(df, left_on='matching_voxel',right_index=True, how='left').iloc[:,[0,1,18,19]]

In [None]:
gdf.merge(df, left_on='matching_voxel',right_index=True, how='left').iloc[:,6:18]