In [23]:
import os

WORKING_DIR = '/mnt/data-01/nmunger/proj-qalidar/code'
os.chdir(WORKING_DIR)

import util_las as las
import pandas as pd
import numpy as np
import pathlib

## Sankey Diagram visualisation:

In [24]:

NEW_PATH = '../data/initial_input_pcd/2022_Neuchatel/2547000_1212000.laz'
CLASSES_CORRESPONDANCE_PATH = '../data/classes_equivalence.csv'
PLOTS_FOLDER_PATH = '../data/plots'
save_to_html = False
save_to_image = True

newer_pc= las.las_to_df_xyzclass(NEW_PATH)

In [25]:
points_per_class = newer_pc.groupby('classification')['classification'].count().to_frame('nb_points').reset_index()
points_per_class.rename(columns={'classification':'id'},inplace=True)
points_per_class.head()

Unnamed: 0,id,nb_points
0,1,117150
1,2,6030981
2,3,367313
3,4,910833
4,5,6036875


In [26]:
class_equivalences = pd.read_csv(CLASSES_CORRESPONDANCE_PATH, sep=';')

In [27]:
class_equivalences['index_new']=class_equivalences.index
class_equivalences.head(4)

Unnamed: 0,id,class_name,matched_id,index_new
0,1,Unclassified,1,0
1,2,Ground,2,1
2,3,Low vegetation,3,2
3,4,Medium vegetation,3,3


In [28]:
prev_class_only = pd.DataFrame({'matched_id':[1,2,3,6,7,9,17,-1], 'common_class_name':['Unclassified (1)','Ground (2)','Vegetation (3)','Building (6)','Noise (7)','Water (9)','Bridge (17)','Removed (-1)']})
prev_class_only.index+=len(class_equivalences) # So that we have an inde which works with sankey
prev_class_only['index_prev']=prev_class_only.index
prev_class_only.head(3)

Unnamed: 0,matched_id,common_class_name,index_prev
21,1,Unclassified (1),21
22,2,Ground (2),22
23,3,Vegetation (3),23


In [29]:
class_equivalences=class_equivalences.merge(prev_class_only, how='left', on='matched_id')
class_equivalences.head()

Unnamed: 0,id,class_name,matched_id,index_new,common_class_name,index_prev
0,1,Unclassified,1,0,Unclassified (1),21
1,2,Ground,2,1,Ground (2),22
2,3,Low vegetation,3,2,Vegetation (3),23
3,4,Medium vegetation,3,3,Vegetation (3),23
4,5,High vegetation,3,4,Vegetation (3),23


In [30]:
sankey_df = class_equivalences.merge(points_per_class, how='inner',on='id')
sankey_df.head()

Unnamed: 0,id,class_name,matched_id,index_new,common_class_name,index_prev,nb_points
0,1,Unclassified,1,0,Unclassified (1),21,117150
1,2,Ground,2,1,Ground (2),22,6030981
2,3,Low vegetation,3,2,Vegetation (3),23,367313
3,4,Medium vegetation,3,3,Vegetation (3),23,910833
4,5,High vegetation,3,4,Vegetation (3),23,6036875


In [31]:
all_diagram_elements = pd.concat([class_equivalences[['id','class_name']],prev_class_only.rename(columns={'matched_id':'id','common_class_name':'class_name'})],ignore_index=True)
all_diagram_elements.tail(9)

Unnamed: 0,id,class_name,index_prev
20,41,Water (synthetic points),
21,1,Unclassified (1),21.0
22,2,Ground (2),22.0
23,3,Vegetation (3),23.0
24,6,Building (6),24.0
25,7,Noise (7),25.0
26,9,Water (9),26.0
27,17,Bridge (17),27.0
28,-1,Removed (-1),28.0


In [32]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = all_diagram_elements.class_name,
      #color = "blue"
    ),
    link = dict(
      source = sankey_df.index_new,
      target = sankey_df.index_prev,
      value = sankey_df.nb_points
  ))])

fig.update_layout(
    autosize=False,
    width=800,
    #height=800,
    margin=dict(l=20, r=20, t=20, b=20),
)

fig.show()

In [33]:
os.path.basename(NEW_PATH).split('.')[0]

'2547000_1212000'

In [34]:
if save_to_html:
    pathlib.Path(PLOTS_FOLDER_PATH).mkdir(parents=True, exist_ok=True)
    plot_name = os.path.basename(NEW_PATH).split('.')[0] + '_sankey_flow_of_classes.html'
    fig.write_html(os.path.join(PLOTS_FOLDER_PATH,plot_name), full_html=False,include_plotlyjs='cdn')

In [37]:
if save_to_image:
    pathlib.Path(PLOTS_FOLDER_PATH).mkdir(parents=True, exist_ok=True)
    plot_name = os.path.basename(NEW_PATH).split('.')[0] + '_sankey_flow_of_classes.svg'
    fig.write_image(os.path.join(PLOTS_FOLDER_PATH,plot_name))