# Overview

How to visualize the tree partitioning done by `KDQTreePartitioner`

# Setup

In [3]:
import numpy as np
import plotly.express as px
from molten.partitioners.KDQTreePartitioner import KDQTreePartitioner, KDQTreeNode

# Create Data, Tree

In [4]:
data = np.random.randint(0, 10, (20,3))
kp = KDQTreePartitioner(count_ubound=8)
root = kp.build(data)
KDQTreeNode.as_text(root)


subtree count: 20
	left: 7
	right: 13

subtree count: 7

subtree count: 13
	left: 6
	right: 7

subtree count: 6

subtree count: 7


# Create Plotly Input

In [5]:
df_plot = kp.to_plotly_dataframe(tree_id1='build')
df_plot # no special statistics

Unnamed: 0,name,idx,parent_idx,cell_count,depth
0,kdqTree,2657821804384,,20,0
1,ax 0 <= 4.5,2657821803952,2657822000000.0,7,1
2,ax 0 > 4.5,2657275493056,2657822000000.0,13,1
3,ax 1 <= 4.0,2657275612416,2657275000000.0,6,2
4,ax 1 > 4.0,2657275612992,2657275000000.0,7,2


# Basic Plot

E.g. using `cell_count` which depends on only one tree, no comparisons

In [6]:
# TODO - there's a textinfo value in plotly.treemap which could add some text to cells
fig = px.treemap(data_frame=df_plot, names='name', ids='idx', parents='parent_idx', values='cell_count')
fig.update_traces(root_color='lightgrey')
fig.show()

# Modifications

#### Accessing count differences between builds and coloring by this value

In [7]:
kp = KDQTreePartitioner(count_ubound=25)
df = np.random.sample([50, 3])
df2 = np.random.sample([50,3])
_ = kp.build(df)
_ = kp.fill(df2, 'fill1')
df_plot = kp.to_plotly_dataframe('build', 'fill1')
df_plot

Unnamed: 0,name,idx,parent_idx,cell_count,depth,count_diff,kss
0,kdqTree,2657851145184,,50,0,0,0.0
1,ax 0 <= 0.505,2657851277424,2657851000000.0,22,1,-2,0.003168
2,ax 0 > 0.505,2657851144848,2657851000000.0,28,1,2,0.003168
3,ax 1 <= 0.501,2657851278480,2657851000000.0,13,2,4,0.014244
4,ax 1 > 0.501,2657851142448,2657851000000.0,15,2,-2,0.003836


In [8]:
fig = px.treemap(data_frame=df_plot, names='name', ids='idx', parents='parent_idx', color='count_diff')
fig.update_traces(root_color='lightgrey')
fig.show()

#### display additional information

In [15]:
fig = px.treemap(data_frame=df_plot, names='name', ids='idx', parents='parent_idx', color='count_diff')
fig.update_traces(root_color='lightgrey',
                textinfo="label+current path") #see textinfo in https://plotly.com/python/reference/treemap/
fig.show()

#### Accessing and coloring by KSS

In [21]:
# TBD
fig = px.treemap(data_frame=df_plot, names='name', ids='idx', parents='parent_idx', color='kss', color_continuous_scale='blues')
fig.update_traces(root_color='lightgrey')
fig.show()

In [24]:
#outline the cells according to the direction of change in counts
df_plot.loc[df_plot.count_diff < 0, 'count_dir'] = 'red'
df_plot.loc[df_plot.count_diff == 0, 'count_dir'] = "lightgrey"
df_plot.loc[df_plot.count_diff > 0, 'count_dir'] = 'green'

fig = px.treemap(data_frame=df_plot, names='name', ids='idx', parents='parent_idx', color='kss', color_continuous_scale='blues')
# fig.update_traces(textinfo="label+current path") #show the path to each leaf
# fig.update_traces(marker_line_width=4, marker_line={'color':df_plot.count_dir}) #set outline to match the color
fig.update_traces(insidetextfont={'color':df_plot.count_dir,
                                # 'size':[14, 18, 18, 18, 18] #can set the text size
                                }, #set the text color to same
                    root_color='lightgrey',
                                )
#may be able to use texttemplate to set the formatting instead?
fig.show()

#### Filter by Depth

In [9]:
kp = KDQTreePartitioner(count_ubound=25)
df = np.random.sample([50, 3])
df2 = np.random.sample([50,3])
_ = kp.build(df)
_ = kp.fill(df2, 'fill1')
df_plot = kp.to_plotly_dataframe('build', 'fill1', max_depth=2)
df_plot

Unnamed: 0,name,idx,parent_idx,cell_count,depth,count_diff,kss
0,kdqTree,2422897374064,,50,0,0,0.0
1,ax 0 <= 0.506,2422897374352,2422897000000.0,28,1,-2,0.00309
4,ax 0 > 0.506,2422897374112,2422897000000.0,22,1,2,0.00309


In [11]:
fig = px.treemap(data_frame=df_plot, names='name', ids='idx', parents='parent_idx', values='cell_count')
fig.update_traces(root_color='lightgrey')
fig.show()

# Alternatives

For the most part, sunburst and icicle plots take the same arguments and behave, though they're not as space-efficient.

In [16]:
kp = KDQTreePartitioner(count_ubound=25)
df = np.random.sample([50, 3])
df2 = np.random.sample([50,3])
_ = kp.build(df)
_ = kp.fill(df2, 'fill1')
df_plot = kp.to_plotly_dataframe('build', 'fill1')
df_plot

Unnamed: 0,name,idx,parent_idx,cell_count,depth,count_diff,kss
0,kdqTree,2422899724544,,50,0,0,0.0
1,ax 0 <= 0.501,2422899728240,2422900000000.0,31,1,-11,0.094287
2,ax 1 <= 0.502,2422867020384,2422900000000.0,19,2,-12,0.165983
3,ax 1 > 0.502,2422867019904,2422900000000.0,12,2,1,0.001004
4,ax 0 > 0.501,2422899727952,2422900000000.0,19,1,11,0.094287


In [25]:
#TODO: should count_dir be moved into the function?
df_plot.loc[df_plot.count_diff < 0, 'count_dir'] = 'red'
df_plot.loc[df_plot.count_diff == 0, 'count_dir'] = "lightgrey"
df_plot.loc[df_plot.count_diff > 0, 'count_dir'] = 'green'

fig = px.sunburst(data_frame=df_plot, names='name', ids='idx', parents='parent_idx', values='cell_count')
# fig.update_traces(marker_line_width=4, marker_line={'color':df_plot.count_dir})
fig.update_traces(insidetextfont={'color':df_plot.count_dir},
                    root_color='lightgrey',)
fig.show()

In [26]:
fig = px.icicle(data_frame=df_plot, names='name', ids='idx', parents='parent_idx', values='cell_count')
# fig.update_traces(marker_line_width=4, marker_line={'color':df_plot.count_dir})
fig.update_traces(insidetextfont={'color':df_plot.count_dir},
                root_color='lightgrey',)
fig.show()