In [None]:
%load_ext watermark


In [None]:
import os

import alifedata_phyloinformatics_convert as apc
from hstrat._auxiliary_lib import (
    alifestd_downsample_tips_asexual,
    alifestd_downsample_tips_clade_asexual,
    alifestd_join_roots,
    alifestd_mark_origin_time_delta_asexual,
    alifestd_to_working_format,
    alifestd_try_add_ancestor_list_col,
)
import iplotx as ipx
from matplotlib import pyplot as plt
import pandas as pd
from teeplot import teeplot as tp


In [None]:
%watermark -diwmuv -iv


In [None]:
teeplot_subdir = os.environ.get(
    "NOTEBOOK_NAME", "2026-02-12-billion-tip-phylos"
)
teeplot_subdir


## Prep Data


In [None]:
phylo1_df = alifestd_try_add_ancestor_list_col(
    alifestd_mark_origin_time_delta_asexual(
        alifestd_to_working_format(
            alifestd_join_roots(
                pd.read_parquet("https://osf.io/download/485tx"),
            ),
        ),
    ),
)
phylo1_df


In [None]:
phylo2_df = alifestd_try_add_ancestor_list_col(
    alifestd_mark_origin_time_delta_asexual(
        alifestd_to_working_format(
            alifestd_join_roots(
                pd.read_parquet("https://osf.io/download/vkzdw"),
            ),
        ),
    )
)
phylo2_df


In [None]:
phylo1_df["origin_time_delta"].sum() / phylo2_df["origin_time_delta"].sum()


## Whole-tree Sample


In [None]:
tree1 = apc.RosettaTree(
    alifestd_downsample_tips_asexual(phylo1_df, 250, seed=1),
).as_dendropy
tree2 = apc.RosettaTree(
    alifestd_downsample_tips_asexual(phylo2_df, 250, seed=1),
).as_dendropy


In [None]:
tree1.ladderize()
tree2.ladderize()

with tp.teed(
    plt.subplots,
    1,
    2,
    figsize=(4, 2),
    teeplot_outattrs={"clade": False},
    teeplot_subdir=teeplot_subdir,
) as teed:
    fig, (ax1, ax2) = teed
    ipx.plotting.tree(
        tree1,
        ax=ax1,
        layout="vertical",
        edge_linewidth=1.5,
        margins=0.0,
    )
    ipx.plotting.tree(
        tree2,
        ax=ax2,
        layout="vertical",
        edge_linewidth=1.5,
        margins=0.0,
    )


## Clade Sample


In [None]:
tree1 = apc.RosettaTree(
    alifestd_downsample_tips_clade_asexual(phylo1_df, 3_000, seed=1),
).as_dendropy
tree2 = apc.RosettaTree(
    alifestd_downsample_tips_clade_asexual(phylo2_df, 3_000, seed=1),
).as_dendropy


In [None]:
tree1.ladderize()
tree2.ladderize()

with tp.teed(
    plt.subplots,
    1,
    2,
    figsize=(6, 2),
    gridspec_kw={
        "wspace": 0.0,
        "hspace": 0.0,
    },
    teeplot_subdir=teeplot_subdir,
    teeplot_outattrs={"clade": True},
) as teed:
    fig, (ax1, ax2) = teed
    ipx.plotting.tree(
        tree1,
        ax=ax1,
        layout="vertical",
        edge_linewidth=0.5,
        margins=-0.02,
    )
    ipx.plotting.tree(
        tree2,
        ax=ax2,
        layout="vertical",
        edge_linewidth=0.5,
        margins=-0.02,
    )
    fig.tight_layout()
