The necessary libraries for this script are not included in the requirement file. You will need to install additionally plotly and nbformat. You can use the following commands:

```bash
pip install plotly
pip install nbformat
```

**Warning**: On Windows 10, the export as jpeg sometimes runs endlessly without producing an output. If that the case, reinstall kaleido with `pip install --upgrade "kaleido==0.1.*"`.

In [None]:
import os
import pathlib
import geopandas as gpd
import numpy as np
import pandas as pd
import plotly.express as px
from shapely.geometry import Point

In [None]:
WORKING_DIR = '../..'

csv_dir =  'outputs/full_workflow/csv/2547000_1211500_change_detections.csv'

tile_name = '2547000_1211500'

voxel_size = 1.5

os.chdir(WORKING_DIR)

# If one plot is saved, will end up in this folder:
plots_folder = 'outputs/plots'

# If set to True, will save all the plots for which the cells are ran
save_all_to_html = True
save_all_to_jpeg = True

df = pd.read_csv(csv_dir)

In [None]:
# tile_name, voxel_size, _ = os.path.basename(csv_dir).replace('.csv','').rsplit('_',maxsplit=2) # Define the tile name as the coordinates of the origin
# voxel_size = int(voxel_size)/100 # Divide by 100 to get voxel dim in meters

In [None]:
df.head()

Evaluating the voxels by category

In [None]:
fig = px.histogram(df, x='criticality_number', title=f'Number of voxels per type of criticality change, voxel dim: {voxel_size} m')
fig.update_layout(xaxis_title="Criticality number", yaxis_title="Number of voxels")
fig.update_layout(
    xaxis = dict(
        tick0 = 0,
        dtick = 1
    )
)
fig.show()

In [None]:
save_to_html = False
save_to_jpeg = False
if save_to_html or save_to_jpeg or save_all_to_html or save_all_to_jpeg:
    pathlib.Path(plots_folder).mkdir(parents=True, exist_ok=True)
    if save_to_html or save_all_to_html:
        plot_name = tile_name + '_' + str(int(voxel_size*100)) + 'number_voxels_per_criticality_number.html'
        fig.write_html(os.path.join(plots_folder,plot_name))
    if save_to_jpeg or save_all_to_jpeg:
        plot_name = tile_name + '_' + str(int(voxel_size*100)) + 'number_voxels_per_criticality_number.jpeg'
        fig.write_image(os.path.join(plots_folder,plot_name),scale = 2)

In [None]:
# Creating plot showing the percentage of voxels per criticality labels
plot_df0 = df.groupby('criticality_number').size().reset_index(name='voxel_count')
plot_df0['voxel_percentage']= plot_df0.voxel_count/plot_df0.voxel_count.sum()*100

fig = px.bar(plot_df0,x='criticality_number',y='voxel_percentage',title=f'Percentage of voxels by type of criticality number, <br>for voxel size: {voxel_size} m',
             labels={'criticality_number': 'Criticality Number', 'voxel_percentage': 'Percentage'},
             text=plot_df0['voxel_percentage'].round(3))

fig.update_layout(
    xaxis = dict(tick0 = 0, dtick = 1),
    autosize=False,
    width=1000,
    height=600,
    yaxis_range=[0,100]
)

fig.show()

In [None]:
save_to_html = False
save_to_jpeg = False
if save_to_html or save_to_jpeg or save_all_to_html or save_all_to_jpeg:
    pathlib.Path(plots_folder).mkdir(parents=True, exist_ok=True)
    if save_to_html or save_all_to_html:
        plot_name = tile_name + '_' + str(int(voxel_size*100)) + 'percentage_voxels_per_criticality_number.html'
        fig.write_html(os.path.join(plots_folder,plot_name))
    if save_to_jpeg or save_all_to_jpeg:
        plot_name = tile_name + '_' + str(int(voxel_size*100)) + 'percentage_voxels_per_criticality_number.jpeg'
        fig.write_image(os.path.join(plots_folder,plot_name),scale = 2)

In [None]:
# Grouping the DataFrame by 'criticality_tag' and 'criticality_number' and counting the rows
grouped_df = df.groupby(['criticality_tag', 'criticality_number']).size().reset_index(name='count')
grouped_df['percentage'] = grouped_df['count']/grouped_df['count'].sum()*100
grouped_df['criticality_number'] = grouped_df['criticality_number'].astype(str)
# Create a bar plot using Plotly Express
fig = px.bar(grouped_df, x='criticality_tag', y='percentage', color='criticality_number',
             title=f'Percentage of voxel by type of criticality changes, for voxel size: {voxel_size} m',
             labels={'criticality_tag': 'Change Criticality', 'count': 'Percentage'})
fig.update_layout(yaxis_range=[0,100]) # Show the whole y axis from 0 to 100 

fig.show()

In [None]:
save_to_html = False
save_to_jpeg = False
if save_to_html or save_to_jpeg or save_all_to_html or save_all_to_jpeg:
    pathlib.Path(plots_folder).mkdir(parents=True, exist_ok=True)
    if save_to_html or save_all_to_html:
        plot_name = tile_name + '_' + str(int(voxel_size*100)) + 'percentage_voxels_per_criticality_tag.html'
        fig.write_html(os.path.join(plots_folder,plot_name))
    if save_to_jpeg or save_all_to_jpeg:
        plot_name = tile_name + '_' + str(int(voxel_size*100)) + 'percentage_voxels_per_criticality_tag.jpeg'
        fig.write_image(os.path.join(plots_folder,plot_name),scale = 2)

In [None]:
geometry = [Point(xy) for xy in zip(df.X_grid, df.Y_grid)]
gdf = gpd.GeoDataFrame(df[['criticality_tag', 'clusters', 'vox_id']], crs='EPSG:2056', geometry=geometry)
gdf['geometry'] = gdf.geometry.buffer(voxel_size/2, cap_style=3)   

In [None]:
df.columns

In [None]:
gdf.head()

In [None]:
total_area = 501**2

# Get the area covered by criticality level.
area_proportion = {}
gdf_subset = gdf[gdf.criticality_tag.isin(['grey_zone', 'problematic'])].copy()
area_proportion['grey zone + problematic'] = round(gdf_subset.dissolve().area.iloc[0]/total_area*100, 2)
gdf_subset = gdf[gdf.criticality_tag=='problematic'].copy()
area_proportion['problematic'] = round(gdf_subset.dissolve().area.iloc[0]/total_area*100, 2)
gdf_subset = gdf[~gdf.clusters.isin([0, 1])].copy()
area_proportion['clustered'] = round(gdf_subset.dissolve().area.iloc[0]/total_area*100, 2)

In [None]:
pd.DataFrame({'granularity_level': area_proportion.keys(), 'covered_tile_part': area_proportion.values()}, index=[0, 1, 2])

Evaluating under which category of voxels the points fall

In [None]:
df['prev_total_points'] = df.iloc[:, df.columns.str.contains('_prev')].sum(axis=1)
df['new_total_points'] = df.iloc[:, df.columns.str.contains('_new')].sum(axis=1)

In [None]:
plot_df1 = df.groupby('criticality_number')[['prev_total_points','new_total_points']].sum().reset_index()
plot_df1['per_points_prev'] = plot_df1['prev_total_points']/plot_df1['prev_total_points'].sum()*100
plot_df1['per_points_new'] = plot_df1['new_total_points']/plot_df1['new_total_points'].sum()*100

fig = px.bar(plot_df1,x='criticality_number',y=['per_points_new','per_points_prev'],title=f'Percentage of point by type of criticality changes label, <br>for voxel size: {voxel_size}',
             labels={'criticality_number': 'Change Criticality', 'value': 'Percentage'},barmode='group')

fig.update_layout(
    xaxis = dict(tick0 = 0, dtick = 1),
    autosize=False,
    width=1000,
    height=600,
    yaxis_range=[0,100]
)

fig.show()

In [None]:
save_to_html = False
save_to_jpeg = False
if save_to_html or save_to_jpeg or save_all_to_html or save_all_to_jpeg:
    pathlib.Path(plots_folder).mkdir(parents=True, exist_ok=True)
    if save_to_html or save_all_to_html:
        plot_name = tile_name + '_' + str(int(voxel_size*100)) + 'percentage_points_per_criticality_label_old_and_new.html'
        fig.write_html(os.path.join(plots_folder,plot_name))
    if save_to_jpeg or save_all_to_jpeg:
        plot_name = tile_name + '_' + str(int(voxel_size*100)) + 'percentage_points_per_criticality_label_old_and_new.jpeg'
        fig.write_image(os.path.join(plots_folder,plot_name),scale = 2)

In [None]:
fig = px.bar(plot_df1,x='criticality_number',y='per_points_new',#title=f'Percentage of point by type of criticality changes label', #, <br>for voxel size: {voxel_size}',
             labels={'criticality_number': 'Criticality Number', 'per_points_new': 'Percentage'},
             text=plot_df1['per_points_new'].round(2))

fig.update_layout(
    xaxis = dict(tick0 = 0, dtick = 1),
    autosize=False,
    width=1000,
    height=600,
    yaxis_range=[0,100],
    title=dict(font=dict(size=25)),
    font=dict(size=14)
)
fig.add_annotation(text=f"Non-problematic<br>{round(plot_df1.loc[plot_df1.criticality_number<=6,'per_points_new'].sum(),2)}%",
                  align='center',
                  x=3.5, y=90,showarrow=False,
                  font=dict(size=16,),
                    bordercolor="#008000",
                    borderwidth=2,
                    borderpad=4)

fig.add_annotation(text=f"Grey zone<br>{round(plot_df1.loc[plot_df1.criticality_number.isin([7,8]),'per_points_new'].sum(),2)}%",
                  align='center',
                  x=7.5, y=90,showarrow=False,
                  font=dict(size=16,),
                    bordercolor="#FFC300",
                    borderwidth=2,
                    borderpad=4)
                    
fig.add_annotation(text=f"Problematic<br>{round(plot_df1.loc[plot_df1.criticality_number>=9,'per_points_new'].sum(),2)}%",
                  align='center',
                  x=11, y=90,showarrow=False,
                  font=dict(size=16,),
                    bordercolor="#D22B2B",
                    borderwidth=2,
                    borderpad=4)

fig.add_vline(x=6.5, line_width=1, line_dash="dash")
fig.add_vline(x=8.5, line_width=1, line_dash="dash")
fig.update_layout(margin=dict(l=20, r=20, t=20, b=20),)
fig.show()

In [None]:
save_to_html = False
save_to_jpeg = False
if save_to_html or save_to_jpeg or save_all_to_html or save_all_to_jpeg:
    pathlib.Path(plots_folder).mkdir(parents=True, exist_ok=True)
    if save_to_html or save_all_to_html:
        plot_name = tile_name + '_' + str(int(voxel_size*100)) + 'new_points_criticality_number_percentage.html'
        fig.write_html(os.path.join(plots_folder,plot_name))
    if save_to_jpeg or save_all_to_jpeg:
        plot_name = tile_name + '_' + str(int(voxel_size*100)) + 'new_points_criticality_number_percentage.jpeg'
        fig.write_image(os.path.join(plots_folder,plot_name),scale = 2)

In [None]:
plot_df1=df.groupby('criticality_tag')['new_total_points'].sum().to_frame().reset_index()
plot_df1['per_points_new']=plot_df1['new_total_points']/plot_df1['new_total_points'].sum()*100

fig = px.bar(plot_df1,x='criticality_tag',y='per_points_new',title=f'Percentage of point by type of criticality changes,<br>for voxel dimension: {voxel_size}',
             labels={'criticality_tag': 'Change Criticality', 'per_points_new': 'Percentage'})

fig.update_layout(
    xaxis = dict(tick0 = 0, dtick = 1),
    autosize=False,
    width=600,
    height=600,
    yaxis_range=[0,100]
)

fig.show()

if save_to_html:
    plot_name = tile_name + '_' + str(int(voxel_size*100)) + 'points_per_type_of_criticality.html'
    fig.write_html(os.path.join(plots_folder,plot_name))

In [None]:
save_to_html = False
save_to_jpeg = False
if save_to_html or save_to_jpeg or save_all_to_html or save_all_to_jpeg:
    pathlib.Path(plots_folder).mkdir(parents=True, exist_ok=True)
    if save_to_html or save_all_to_html:
        plot_name = tile_name + '_' + str(int(voxel_size*100)) + 'new_points_per_criticality_tag_percentage.html'
        fig.write_html(os.path.join(plots_folder,plot_name))
    if save_to_jpeg or save_all_to_jpeg:
        plot_name = tile_name + '_' + str(int(voxel_size*100)) + 'new_points_per_criticality_tag_percentage.jpeg'
        fig.write_image(os.path.join(plots_folder,plot_name),scale = 2)

In [None]:
column_mapping_new = {
    '1_new': 'Unclassified (1)',
    '2_new': 'Ground (2)',
    '3_new': 'Vegetation (3)',
    '6_new': 'Building (6)',
    '7_new': 'Noise (7)',
    '9_new': 'Water (9)',
    '17_new': 'Bridge (17)'
}

In [None]:
new_classes_columns =[col for col in df if col.endswith('_new')]
points_per_class_per_tag = df.groupby('criticality_tag')[new_classes_columns].sum()#.reset_index()
points_per_class_per_tag.loc['total'] = points_per_class_per_tag.sum(numeric_only=True)
points_per_class_per_tag.loc['total_normalised'] = points_per_class_per_tag.loc['total']/points_per_class_per_tag.loc['total'].sum()*100
plot_df_class= points_per_class_per_tag.loc[['non_prob','grey_zone','problematic']].div(points_per_class_per_tag.loc[['non_prob','grey_zone','problematic']].sum(axis=0), axis=1)*100
plot_df_class=plot_df_class.reindex(index = ['non_prob','grey_zone','problematic'])
plot_df_class = pd.concat((plot_df_class,points_per_class_per_tag.loc[['total_normalised'],:]),axis=0)
plot_df_class.index.names=['Label']
plot_df_class=plot_df_class.rename(index={'non_prob':'Non-problematic', 'problematic':'Problematic', 'grey_zone':'Grey Zone', 'total_normalised':'Class presence in tile'})

# Rename columns if they exist in the DataFrame
plot_df_class.rename(columns={original_col_name: new_col_name for original_col_name, new_col_name in column_mapping_new.items() if original_col_name in plot_df_class.columns}, inplace=True)

# Reshape the dataframe for Plotly Express
plot_df_class = plot_df_class.reset_index().melt(id_vars='Label', var_name='column', value_name='percentage')

# Rename columns if they exist in the DataFrame
plot_df_class.rename(columns={original_col_name: new_col_name for original_col_name, new_col_name in column_mapping_new.items() if original_col_name in plot_df_class.columns}, inplace=True)

# Plot the grouped bar chart
fig = px.bar(plot_df_class, x='column', y='percentage', color='Label',
             labels={
                     "column": "Classes",
                     "percentage": "Percentage",
                     "species": "Species of Iris"
                 },
                #text=plot_df_class['percentage'].round(1),
             #labels={'column': 'Columns', 'percentage': 'Percentage'},
             color_discrete_map={'Grey Zone': '#FFD93D', 'Non-problematic': '#6BCB77', 'Problematic': '#FF6B6B','Class presence in tile':'#4D96FF'},
             barmode='group')

fig.update_layout(
    xaxis = dict(tick0 = 0, dtick = 1),
    autosize=False,
    width=800,
    height=500,
    yaxis_range=[0,100],
    margin=dict(l=20, r=20, t=20, b=20),
    font=dict(size=12)
)
# Set text orientation to 90°
#fig.update_traces( textangle=90,textfont=dict(size=16),texttemplate='%{y:.1f}%', constraintext='none')
fig.update_yaxes(tickvals=np.arange(0,105,10))

fig.show()

In [None]:
save_to_html = True
save_to_jpeg = True
if save_to_html or save_to_jpeg or save_all_to_html or save_all_to_jpeg:
    pathlib.Path(plots_folder).mkdir(parents=True, exist_ok=True)
    if save_to_html or save_all_to_html:
        plot_name = tile_name + '_' + str(int(voxel_size*100)) + '_class_criticality_repartition.html'
        fig.write_html(os.path.join(plots_folder,plot_name))
    if save_to_jpeg or save_all_to_jpeg:
        plot_name = tile_name + '_' + str(int(voxel_size*100)) + '_class_criticality_repartition.jpeg'
        fig.write_image(os.path.join(plots_folder,plot_name),scale = 2)

### Compare class to class
Here we try to compare the mean difference of point between the previous and new generation for each class. For each voxel, we must only consider the class which actually appear in one of the two generations, so as to not generate lots of difference = 0. 

In [None]:
# Normalising factor = [total previous number of points]/[total new number of points]
normalising_factor = df.iloc[:, df.columns.str.endswith('_prev')].to_numpy().sum()/df.iloc[:, df.columns.str.endswith('_new')].to_numpy().sum()
# Normalise the new voxels
df.iloc[:, df.columns.str.endswith('_new')] = df.iloc[:, df.columns.str.endswith('_new')]*normalising_factor

In [None]:
# Create boolean mask where cell is True if some class is present in either the new or previous voxel,
# and which is false if no presence of the class in either generation
bool_mask = df.iloc[:,df.columns.str.endswith('new')].values.astype(bool) | df.iloc[:,df.columns.str.endswith('prev')].values.astype(bool)

In [None]:
# Find for each class in each voxel the difference in number of points between new and previous generation
diff_values = (df.iloc[:,df.columns.str.endswith('new')].values - df.iloc[:,df.columns.str.endswith('prev')].values)
diff_values[:3]

In [None]:
# For class which were not present in either the new or previous gen., replace by nan so as to not influence the mean calculation
diff_values[bool_mask==False] = np.NaN
print(diff_values[:3])

In [None]:
diff_df = pd.DataFrame(diff_values, columns = df.columns[df.columns.str.endswith('_new')].str.replace('new', 'diff'))
diff_df = diff_df.merge(df[['X_grid','Y_grid','Z_grid','criticality_tag','cosine_similarity','second_cosine_similarity']], how='inner', left_index=True, right_index=True)

In [None]:
grouped_by = diff_df.groupby('criticality_tag')[['1_diff','2_diff','3_diff','6_diff','7_diff','17_diff']].mean().reset_index()

In [None]:
fig = px.bar(grouped_by, x='criticality_tag', y=['1_diff','2_diff','3_diff','6_diff','7_diff','17_diff'], labels={'criticality_tag':'Type of change','value':'Mean difference in number of points <br>between previous and new gen.'},title=f'Difference between previous and new generation<br>Voxel size of {voxel_size} m')
fig.add_hline(y=0)

fig.update_layout(yaxis_range=[-60,25])

fig.show()


if save_to_html:
    plot_name = tile_name + '_' + str(int(voxel_size*100)) + 'class_to_class_diff.html'
    fig.write_html(os.path.join(plots_folder,plot_name))

In [None]:
save_to_html = False
save_to_jpeg = False
if save_to_html or save_to_jpeg or save_all_to_html or save_all_to_jpeg:
    pathlib.Path(plots_folder).mkdir(parents=True, exist_ok=True)
    if save_to_html or save_all_to_html:
        plot_name = tile_name + '_' + str(int(voxel_size*100)) + 'class_to_class_diff.html'
        fig.write_html(os.path.join(plots_folder,plot_name))
    if save_to_jpeg or save_all_to_jpeg:
        plot_name = tile_name + '_' + str(int(voxel_size*100)) + 'class_to_class_diff.jpeg'
        fig.write_image(os.path.join(plots_folder,plot_name),scale = 2)