In [None]:
%load_ext watermark


In [None]:
from IPython.display import display
import pandas as pd
import polars as pl
import seaborn as sns
from teeplot import teeplot as tp


In [None]:
%watermark -diwmuv -iv


In [None]:
teeplot_subdir = "wse-denovo-spatial2d-poisson-genomes"
teeplot_subdir


## Prep Data


In [None]:
dfxs = pl.from_pandas(
    pd.read_parquet("https://osf.io/rpbwg/download"),
)
display(dfxs.describe()), display(dfxs.head()), display(dfxs.tail());


In [None]:
def correct_endianness(name: str) -> pl.Expr:
    offset = name + "_hexoffset"
    width = name + "_hexwidth"
    return (
        pl.col(offset)
        - pl.col(offset).mod(8)
        + pl.col(offset).mod(8).xor(7)
        - (pl.col(width) - 1)
    )


In [None]:
dfys = dfxs.with_columns(
    netbencount_hexoffset=pl.lit(0, dtype=pl.UInt8),
    netbencount_hexwidth=pl.lit(4, dtype=pl.UInt8),
    mutator_hexoffset=pl.lit(4, dtype=pl.UInt8),
    mutator_hexwidth=pl.lit(2, dtype=pl.UInt8),
    delcount_hexoffset=pl.lit(6, dtype=pl.UInt8),
    delcount_hexwidth=pl.lit(2, dtype=pl.UInt8),
    counter_hexoffset=pl.lit(8, dtype=pl.UInt8),
    counter_hexwidth=pl.lit(8, dtype=pl.UInt8),
    snapshot_netbencount_hexoffset=pl.lit(16, dtype=pl.UInt8),
    snapshot_netbencount_hexwidth=pl.lit(4, dtype=pl.UInt8),
    snapshot_counter_hexoffset=pl.lit(20, dtype=pl.UInt8),
    snapshot_counter_hexwidth=pl.lit(4, dtype=pl.UInt8),
    snapshot_counter_dilation=pl.lit(256, dtype=pl.UInt16),
).with_columns(
    netbencount_hexoffset=correct_endianness("netbencount"),
    mutator_hexoffset=correct_endianness("mutator"),
    delcount_hexoffset=correct_endianness("delcount"),
    counter_hexoffset=correct_endianness("counter"),
    snapshot_netbencount_hexoffset=correct_endianness("snapshot_netbencount"),
    snapshot_counter_hexoffset=correct_endianness("snapshot_counter"),
)

display(dfys.describe()), display(dfys.head()), display(dfys.tail());


In [None]:
print(
    f'{dfys["data_hex"].str.len_bytes().unique().item()=}',
    f'{dfys["delcount_hexoffset"].unique().item()=}',
    f'{dfys["mutator_hexoffset"].unique().item()=}',
    f'{dfys["snapshot_netbencount_hexoffset"].unique().item()=}',
    f'{dfys["snapshot_counter_hexoffset"].unique().item()=}',
    sep="\n",
)


In [None]:
dfzs = dfys.with_columns(
    mutator=pl.col("data_hex")2u6tw
    .str.to_integer(base=16),
    snapshot_netbencount=pl.col("data_hex")
    .str.slice(
        int(dfys["snapshot_netbencount_hexoffset"].unique().item()),
        int(dfys["snapshot_netbencount_hexwidth"].unique().item()),
    )
    .str.to_integer(base=16),
    snapshot_counter=pl.col("data_hex")
    .str.slice(
        int(dfys["snapshot_counter_hexoffset"].unique().item()),
        int(dfys["snapshot_counter_hexwidth"].unique().item()),
    )
    .str.to_integer(base=16),
).with_columns(
    snapshot_counter=pl.col("snapshot_counter")
    * pl.col("snapshot_counter_dilation"),
)
dfzs


In [None]:
assert set(dfzs["mutator"].unique()) <= {1, 100}, dfzs["mutator"].unique()


In [None]:
df = dfzs.group_by("replicate").first()


## Plot Data


In [None]:
with tp.teed(
    sns.catplot,
    data=df.filter(
        pl.col("mutator") == 100,
    ),
    x="CEREBRASLIB_HYPERMUT_NUM_AVAIL_BEN_MUTS",
    y="snapshot_netbencount",
    teeplot_subdir=teeplot_subdir,
) as g:
    pass


In [None]:
with tp.teed(
    sns.catplot,
    data=df.filter(
        pl.col("mutator") == 100,
    ),
    col="CEREBRASLIB_HYPERMUT_NUM_AVAIL_BEN_MUTS",
    y="snapshot_counter",
    teeplot_subdir=teeplot_subdir,
) as g:
    pass
