In [None]:
import polars as pl
import networkx as nx
from math import pi, sqrt
import random
import rtsvg
rt = rtsvg.RACETrack()
import os
import kagglehub
path     = kagglehub.dataset_download("konivat/tree-of-life")
df_edges = None
df_nodes = None
for _filename_ in os.listdir(path):
    df = pl.read_csv(os.path.join(path, _filename_))
    if   'source_node_id' in df.columns and 'target_node_id' in df.columns: df_edges = df
    elif 'node_id'        in df.columns:                                    df_nodes = df
    else:                                                                   raise Exception("unknown file")

In [None]:
src_col, dst_col = 'source_node_id', 'target_node_id'
_root_           = 1
_relates_        = [(src_col, dst_col)]
def childrenOf(_node_):
    _df_ = df_edges.filter(pl.col(src_col) == _node_)
    return list(_df_[dst_col])
def totalLeaves(_node_, _levels_):
    if _levels_ == 0: return 1
    _sum_ = 0
    for _child_ in childrenOf(_node_): _sum_ += totalLeaves(_child_, _levels_-1)
    return _sum_
def dumbBreadthFirstKeepers(_node_, _levels_, _level_lu_=None, _keepers_=None):
    if _keepers_  is None: _keepers_ = set([_node_])
    if _level_lu_ is None: _level_lu_ = {0:set([_node_])}    
    _df_ = df_edges.filter(pl.col(src_col).is_in(_keepers_))
    _level_lu_[len(_level_lu_)] = set(_df_[dst_col]) - _keepers_
    _keepers_ |= set(_df_[dst_col])
    if _levels_ == 0: return _keepers_, _level_lu_
    else:             return dumbBreadthFirstKeepers(_node_, _levels_-1, _level_lu_, _keepers_)
totalLeaves(_root_, 1), childrenOf(_root_)

In [None]:
def renderHierarchy(node, levels=3, into_circle=None):
    if into_circle is None: into_circle = (0.0, 0.0, 100.0) # unit circle
    if     levels == 1:
        n = totalLeaves(node, 1)
        if n == 0: return {}
        _circles_ = []
        for i in range(n): _circles_.append((0.0, 0.0, 1.0))
        _packed_         = rt.packCircles(_circles_, into_circle=into_circle)
        _node_to_circle_ = {}
        for _child_ in childrenOf(node): _node_to_circle_[_child_] = _packed_.pop(0)
        return _node_to_circle_
    else:
        area                = pi * into_circle[2] ** 2
        n                   = totalLeaves(node, levels)
        if n == 0: return {}
        _circles_           = []
        _children_          = childrenOf(node)
        for _child_ in _children_:
            _leaf_count_        = totalLeaves(_child_, levels-1)
            if _leaf_count_ == 0: _leaf_count_ = 1
            _area_for_child_    = _leaf_count_ * area / n
            _child_radius_      = sqrt(_area_for_child_/pi)
            _circles_.append((0.0, 0.0, _child_radius_))
        _packed_         = rt.packCircles(_circles_, into_circle=into_circle)
        _node_to_circle_ = {}
        for i in range(len(_children_)):
            _child_                   = _children_[i]
            _childs_circle_           = _packed_[i]
            _node_to_circle_[_child_] = _childs_circle_
            _childs_lu_               = renderHierarchy(_child_, levels-1, into_circle=_childs_circle_)
            _node_to_circle_          = _node_to_circle_ | _childs_lu_
        return _node_to_circle_

_n2c_  = renderHierarchy(_root_, 3)
len(_n2c_)
_svg_ = []
_x0_, _y0_, _x1_, _y1_ = None, None, None, None
for k in _n2c_.keys():
    _circle_ = _n2c_[k]
    _x_, _y_, _r_ = _circle_
    _svg_.append(f'<circle cx=\"{_x_}\" cy=\"{_y_}\" r=\"{_r_}\" fill=\"none\" stroke=\"#000000\" stroke-width=\"0.1\" />')
    if _x0_ is None: _x0_, _y0_, _x1_, _y1_ = _x_ - _r_, _y_ - _r_, _x_ + _r_, _y_ + _r_
    _x0_ = min(_x0_, _x_ - _r_)
    _y0_ = min(_y0_, _y_ - _r_)
    _x1_ = max(_x1_, _x_ + _r_)
    _y1_ = max(_y1_, _y_ + _r_)

_svg_ = '\n'.join(_svg_)
_hdr_ = f'<svg x="0" y="0" width="1200" height="900" viewBox="{_x0_} {_y0_} {_x1_ - _x0_} {_y1_ - _y0_}" xmlns="http://www.w3.org/2000/svg">'
_clr_ = f'<rect x="{_x0_}" y="{_y0_}" width="{_x1_ - _x0_}" height="{_y1_ - _y0_}" fill="#ffffff" />'
_ftr_ = '</svg>'
_svg_ = _hdr_ + _clr_ + _svg_ + _ftr_
rt.tile([_svg_])

In [None]:
_in_set_, _at_level_lu_ = dumbBreadthFirstKeepers(_root_, 1)
_df_          = df_edges.filter(pl.col(src_col).is_in(_in_set_) & pl.col(dst_col).is_in(_in_set_))
g             = rt.createNetworkXGraph(_df_, _relates_)
#_pos_        = nx.kamada_kawai_layout(g)
_pos_         = rt.hyperTreeLayout(g, roots=[_root_])
_node_colors_ = {}
for _level_ in _at_level_lu_.keys():
    _color_ = rt.co_mgr.getColor(_level_)
    for _node_ in _at_level_lu_[_level_]: _node_colors_[str(_node_)] = _color_
rt.linkNode(_df_, _relates_, _pos_, w=600, h=600, draw_labels=True, node_color=_node_colors_)

In [None]:
_circles_ = []
for i in range(8): _circles_.append((0.0, 0.0, 4.0))
rtsvg.CirclePacker(rt, _circles_)

In [None]:
# Not a great idea to do tree traversals using a dataframe...
# ... would be more efficient to use a graph (or even a dictionary)
def percentExtinctUnderNode(node_id):
    _extinct_ =df_nodes.filter(pl.col('node_id') == node_id)['extinct'][0]
    _children_ = childrenOf(node_id)
    if len(_children_) == 0: return _extinct_, 1
    _total_, _extinct_ = 1, _extinct_
    for _child_ in _children_:
        _child_extinct_, _child_total_ = percentExtinctUnderNode(_child_)
        _total_   += _child_total_
        _extinct_ += _child_extinct_
    return _extinct_, _total_
# percentExtinctUnderNode(_root_)
# len(df_nodes.filter(pl.col('extinct') == 1)), len(df_nodes.filter(pl.col('extinct') == 0)), len(df_nodes) 