In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import geopandas as gpd
from shapely.geometry import Point
import numpy as np
import pathlib
import os

In [None]:
criticity_changes_folder = '../out_dataframe/criticity_changes_df'

csv_file = '2546500_1212000_150_2812-1353.csv'

save_to_html = False

df = pd.read_csv(os.path.join(criticity_changes_folder, csv_file))

In [None]:
if save_to_html:
    plots_folder = '../plots'
    pathlib.Path(plots_folder).mkdir(parents=True, exist_ok=True)

In [None]:
tile_name, voxel_dimension, _ = csv_file.replace('.csv','').rsplit('_',maxsplit=2) # Define the tile name as the coordinates of the origin
voxel_dimension = int(voxel_dimension)/100 # Divide by 100 to get voxel dim in meters

In [None]:
df.head()

In [None]:
fig = px.histogram(df, x='change_criticity_label', color='majority_class',title=f'Number of voxels per type of criticity change, voxel dim: {voxel_dimension}')
fig.update_layout(xaxis_title="Criticity change label", yaxis_title="Number of voxels")
fig.update_layout(
    xaxis = dict(
        tick0 = 0,
        dtick = 1
    )
)
fig.show()

if save_to_html:
    plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'voxels_per_type_of_criticity_label.html'
    fig.write_html(os.path.join(plots_folder,plot_name))

In [None]:
# Creating plot showing the percentage of voxels per criticity labels
plot_df0 = df.groupby('change_criticity_label').size().reset_index(name='voxel_count')
plot_df0['voxel_percentage']= plot_df0.voxel_count/plot_df0.voxel_count.sum()*100

fig = px.bar(plot_df0,x='change_criticity_label',y='voxel_percentage',title=f'Percentage of voxels by type of criticity changes label, <br>for voxel dimension: {voxel_dimension}',
             labels={'change_criticity_label': 'Change Criticity', 'voxel_percentage': 'Percentage'},
             text=plot_df0['voxel_percentage'].round(3))

fig.update_layout(
    xaxis = dict(tick0 = 0, dtick = 1),
    autosize=False,
    width=1000,
    height=600,
    yaxis_range=[0,100]
)

fig.show()

if save_to_html:
    plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'points_per_type_of_criticity_label_new_only.html'
    fig.write_html(os.path.join(plots_folder,plot_name))

In [None]:
# Grouping the DataFrame by 'change_criticity' and 'change_criticity_label' and counting the rows
grouped_df = df.groupby(['change_criticity', 'change_criticity_label']).size().reset_index(name='count')
grouped_df['count'] = grouped_df['count']/grouped_df['count'].sum()*100
# Create a bar plot using Plotly Express
fig = px.bar(grouped_df, x='change_criticity', y='count', color='change_criticity_label',
             title=f'Percentage of voxel by type of criticity changes, for voxel dimension: {voxel_dimension}',
             labels={'change_criticity': 'Change Criticity', 'count': 'Percentage'})
fig.update_layout(yaxis_range=[0,100]) # Show the whole y axis from 0 to 100 

fig.show()

if save_to_html:
    plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'voxels_per_criticity_level.html'
    fig.write_html(os.path.join(plots_folder,plot_name))

Evaluating under which category of voxels the points fall

In [None]:
df['total_points_prev'] = df.iloc[:, df.columns.str.contains('_prev')].sum(axis=1)
df['total_points_new'] = df.iloc[:, df.columns.str.contains('_new')].sum(axis=1)

In [None]:
plot_df1 = df.groupby('change_criticity_label')[['total_points_prev','total_points_new']].sum().reset_index()
plot_df1['total_points_prev'] = plot_df1['total_points_prev']/plot_df1['total_points_prev'].sum()*100
plot_df1['total_points_new'] = plot_df1['total_points_new']/plot_df1['total_points_new'].sum()*100

fig = px.bar(plot_df1,x='change_criticity_label',y=['total_points_new','total_points_prev'],title=f'Percentage of point by type of criticity changes label, <br>for voxel dimension: {voxel_dimension}',
             labels={'change_criticity_label': 'Change Criticity', 'value': 'Percentage'},barmode='group')

fig.update_layout(
    xaxis = dict(tick0 = 0, dtick = 1),
    autosize=False,
    width=1000,
    height=600,
    yaxis_range=[0,100]
)

fig.show()

if save_to_html:
    plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'points_per_type_of_criticity_label_old_and_new.html'
    fig.write_html(os.path.join(plots_folder,plot_name))

In [None]:
plot_df1 = df.groupby('change_criticity_label')[['total_points_prev','total_points_new']].sum().reset_index()
plot_df1['total_points_prev'] = plot_df1['total_points_prev']/plot_df1['total_points_prev'].sum()*100
plot_df1['total_points_new'] = plot_df1['total_points_new']/plot_df1['total_points_new'].sum()*100

fig = px.bar(plot_df1,x='change_criticity_label',y='total_points_new',title=f'Percentage of point by type of criticity changes label, <br>for voxel dimension: {voxel_dimension}',
             labels={'change_criticity_label': 'Change Criticity', 'total_points_new': 'Percentage'},
             text=plot_df1['total_points_new'].round(3))

fig.update_layout(
    xaxis = dict(tick0 = 0, dtick = 1),
    autosize=False,
    width=1000,
    height=600,
    yaxis_range=[0,100]
)

fig.show()

if save_to_html:
    plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'points_per_type_of_criticity_label_new_only.html'
    fig.write_html(os.path.join(plots_folder,plot_name))

In [None]:
plot_df1=df.groupby('change_criticity')['total_points_new'].sum().to_frame().reset_index()
plot_df1['total_points_new']=plot_df1['total_points_new']/plot_df1['total_points_new'].sum()*100

fig = px.bar(plot_df1,x='change_criticity',y='total_points_new',title=f'Percentage of point by type of criticity changes,<br>for voxel dimension: {voxel_dimension}',
             labels={'change_criticity': 'Change Criticity', 'total_points_new': 'Percentage'})

fig.update_layout(
    xaxis = dict(tick0 = 0, dtick = 1),
    autosize=False,
    width=600,
    height=600,
    yaxis_range=[0,100]
)

fig.show()

if save_to_html:
    plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'points_per_type_of_criticity.html'
    fig.write_html(os.path.join(plots_folder,plot_name))

### Compare class to class
Here we try to compare the mean difference of point between the previous and new generation for each class. For each voxel, we must only consider the class which actually appear in on the two generation, so as to not generate lots of difference = 0. 

In [None]:
# Normalising factor = [total previous number of points]/[total new number of points]
normalising_factor = df.iloc[:, df.columns.str.endswith('_prev')].to_numpy().sum()/df.iloc[:, df.columns.str.endswith('_new')].to_numpy().sum()
# Normalise the new voxels
df.iloc[:, df.columns.str.endswith('_new')] = df.iloc[:, df.columns.str.endswith('_new')]*normalising_factor

In [None]:
# Create boolean mask where cell is True if some class is present in either the new or previous voxel,
# and which is false if no presence of the class in either generation
bool_mask = df.iloc[:,df.columns.str.endswith('new')].values.astype(bool) | df.iloc[:,df.columns.str.endswith('prev')].values.astype(bool)

In [None]:
bool_mask

In [None]:
# Find for each class in each voxel the difference in number of point between new and previous generation
diff_values = (df.iloc[:,df.columns.str.endswith('new')].values - df.iloc[:,df.columns.str.endswith('prev')].values)
diff_values[:3]

In [None]:
# For class which were not present in either the new or previous gen., replace by nan so as to not influence the mean calculation
diff_values[bool_mask==False] = np.NaN
print(diff_values[:3])

In [None]:
diff_df = pd.DataFrame(diff_values, columns = df.columns[df.columns.str.endswith('_new')].str.replace('new', 'diff'))
diff_df = diff_df.merge(df[['X_grid','Y_grid','Z_grid','change_criticity','cosine_similarity','second_cosine_similarity']],how='inner',left_index=True,right_index=True)

In [None]:
grouped_by = diff_df.groupby('change_criticity')[['1_diff','2_diff','3_diff','6_diff','7_diff','17_diff']].mean().reset_index()

In [None]:
fig = px.bar(grouped_by, x='change_criticity', y=['1_diff','2_diff','3_diff','6_diff','7_diff','17_diff'], labels={'change_criticity':'Type of change','value':'Mean difference in number of points <br>between previous and new gen.'},title=f'Difference between previous and new generation<br>Voxel size of {voxel_dimension} m')
fig.add_hline(y=0)

fig.update_layout(yaxis_range=[-60,25])

fig.show()


if save_to_html:
    plot_name = tile_name + '_' + str(int(voxel_dimension*100)) + 'class_to_class_diff.html''
    fig.write_html(os.path.join(plots_folder,plot_name))

### Check under which category of voxel the points of interest fall :
This was done as an attempt to analyse quickly the attribution in criticity of certain points of interest. Not really used anymore, left for legacy.

In [None]:
gdf=gpd.read_file('/home/nmunger/Desktop/points_of_interest.geojson')

In [None]:
def matching_voxel_id(x, y, z, df):
    index_array = df[(abs(df.X_grid - x)<vox_width/2) & (abs(df.Y_grid - y)<vox_width/2) & (abs(df.Z_grid - z)<vox_width/2)  ].index.to_numpy()
    
    if len(index_array) > 1:
        raise ValueError
    
    else:
        return index_array[0]

In [None]:
gdf['matching_voxel']=gdf.geometry.get_coordinates(include_z=True).apply(lambda point: matching_voxel_id(point.x, point.y, point.z, df), axis=1)

In [None]:
print('Vox size:', vox_width)
gdf.merge(df, left_on='matching_voxel',right_index=True, how='left').iloc[:,[0,1,18,19]]

In [None]:
gdf.merge(df, left_on='matching_voxel',right_index=True, how='left').iloc[:,6:18]