In [None]:
%load_ext watermark


In [None]:
from IPython.display import display
import pandas as pd
import polars as pl
import seaborn as sns
from teeplot import teeplot as tp

from pylib.munge._calc_fixprobs_from_traits import calc_fixprobs_from_traits
from pylib.viz._get_nben_cmap import get_nben_cmap
from pylib.viz._get_nben_hue_norm import get_nben_hue_norm
from pylib.viz._size_fixation_heatmap import size_fixation_heatmap


In [None]:
%watermark -diwmuv -iv


In [None]:
teeplot_subdir = "cupy-5050-spatial1d-traits"
teeplot_subdir


## Prep Data


In [None]:
dfpd = pd.read_parquet("https://osf.io/s67d2/download")
display(dfpd.describe()), display(dfpd.head()), display(dfpd.tail());


In [None]:
(apn,) = dfpd["tilePopSize"].unique()  # agents per node
apn


In [None]:
dfxs = calc_fixprobs_from_traits(dfpd)
display(dfxs.describe()), display(dfxs.head()), display(dfxs.tail());


## Size Fixation Curves


In [None]:
for errorbar in "sd", "se", "ci", None:
    tp.tee(
        sns.relplot,
        data=dfxs,
        x="population size",
        y="fixation probability",
        hue="genotype",
        hue_order=["normomutator", "hypermutator"],
        col="available beneficial mutations",
        style="genotype",
        style_order=["normomutator", "hypermutator"],
        aspect=1.5,
        col_wrap=3,
        errorbar=errorbar,
        height=2,
        kind="line",
        markers=True,
        teeplot_outexclude="post",
        teeplot_subdir=teeplot_subdir,
        teeplot_postprocess=f"""
teed.set(xscale="log", ylim=(-0.02, 1.02))
teed.figure.suptitle("n={apn} agents per node", x=0.63, y=1.02)
teed.set_titles(col_template="{{col_var}} = {{col_name:.0f}}")
sns.move_legend(
    teed,
    "lower center",
    bbox_to_anchor=(0.25, 0.97),
    frameon=False,
    ncol=2,
    title=None,
)
""",
    )


In [None]:
for errorbar in "sd", "se", "ci", None:
    tp.tee(
        sns.relplot,
        data=dfxs[dfxs["genotype"] == "hypermutator"],
        x="population size",
        y="fixation probability",
        hue="available beneficial mutations",
        aspect=1.5,
        height=3,
        kind="line",
        errorbar=errorbar,
        linewidth=2,
        markers=True,
        palette=get_nben_cmap(),
        hue_norm=get_nben_hue_norm(),
        teeplot_outexclude="post",
        teeplot_postprocess=f"""
teed.set(xscale="log", ylim=(-0.02, 1.02))
teed.figure.suptitle("n={apn} agents per node", x=0.63, y=1.02)
teed.set_titles(col_template='{{col_var}} = {{col_name:.0f}}')
""",
        teeplot_subdir=teeplot_subdir,
    )


## Size Fixation Cliffplot


In [None]:
from matplotlib import pyplot as plt
from matplotlib import ticker as mpl_ticker


In [None]:
dfxs["available\nbeneficial mutations"] = dfxs[
    "available beneficial mutations"
]


In [None]:
for errorbar in "ci", None:
    saveit, teed = tp.tee(
        sns.relplot,
        data=dfxs[dfxs["genotype"] == "hypermutator"],
        y="available\nbeneficial mutations",
        x="fixation probability",
        col="population size",
        hue="genotype",
        hue_order=["normomutator", "hypermutator"],
        aspect=0.2,
        errorbar=errorbar,
        err_kws=dict(alpha=0.5),
        height=3,
        kind="line",
        markers=True,
        orient="y",
        teeplot_callback=True,
        teeplot_subdir=teeplot_subdir,
    )

    sns.move_legend(
        teed,
        "lower center",
        bbox_to_anchor=(0.4, -0.05),
        frameon=False,
        ncol=2,
        title=None,
    )

    # teed.set(xscale="log", ylim=(-0.02, 1.02))
    # teed.figure.suptitle("n={apn} agents per node", x=0.63, y=1.02)
    teed.set_titles(col_template="{col_var}")
    from pylib.viz._fill_above_linesx import fill_above_linesx
    from pylib.viz._fill_under_linesx import fill_under_linesx

    teed.set_xticklabels(rotation=-45)

    for ax in teed.axes.flat:
        fill_above_linesx(ax, color=sns.color_palette("tab10")[0])
        fill_under_linesx(ax)
        # xlim_ = ax.get_xlim()
        # ylim_ = ax.get_ylim()
        # ax.fill_betweenx(
        #     ax.get_ylim(),
        #     ax.get_xlim()[0] - 0.05,
        #     0,
        #     color="white",
        #     zorder=10,
        # )
        # ax.fill_betweenx(
        #     ax.get_ylim(),
        #     1,
        #     ax.get_xlim()[1] + 0.05,
        #     color="white",
        #     zorder=10,
        # )
        # ax.set_xlim(xlim_)
        # ax.set_ylim(ylim_)

    for i, ax in enumerate(teed.axes.flat):
        if i + 1 != (len(teed.axes.flat) + 1) // 2:
            ax.set_xlabel("")
        ax.set_title("")

    plt.subplots_adjust(wspace=0.12, top=0.85)

    # Adjust layout to make room for the new x-axis labels
    # plt.subplots_adjust(bottom=0.11)

    # Get the position of the current axes
    pos = teed.axes.flat[0].get_position()

    # Create a new dummy axis underneath the heatmap with minimal height
    delta = 0.001  # Small height for the dummy axis
    new_ax = ax.figure.add_axes(
        [
            pos.x0,
            0.88,
            pos.width * 6.7,
            delta,
        ],
    )

    # Set the x-axis of the dummy axis to logarithmic scale
    new_ax.set_xscale("log")

    # Assume hmdf.columns are your x-axis values
    xvals = dfxs["population size"]
    xmin = xvals.min()
    xmax = xvals.max()
    new_ax.set_xlim(xmin, xmax)

    # Use built-in LogLocator and LogFormatterMathtext for base 10 labels
    new_ax.xaxis.set_ticks_position("top")
    new_ax.xaxis.set_major_locator(mpl_ticker.LogLocator(base=10))
    new_ax.xaxis.set_major_formatter(
        mpl_ticker.LogFormatterMathtext(base=10),
    )

    # Hide the y-axis of the dummy axis
    new_ax.get_yaxis().set_visible(False)

    # Hide spines of the dummy axis
    new_ax.spines[["right", "bottom", "left"]].set_visible(False)

    # Add population size text in the upper left hand corner of the figure
    teed.fig.text(0.02, 0.97, "population\nsize", ha="left", va="top")

    saveit()


## Size Fixation Heatmap


In [None]:
tp.tee(
    size_fixation_heatmap,
    fixprobs_df=dfxs,
    genotype="hypermutator",
    index="available beneficial mutations",
    on="population size",
    values="fixation probability",
    teeplot_subdir=teeplot_subdir,
)


## Quality Check: Number Replicates Available


In [None]:
categories = (
    pl.DataFrame(dfxs)
    .group_by(
        "population size",
    )
    .first()["population size"]
    .sort()
    .cast(pl.String)
    .to_list()
)
categories


In [None]:
dfy = pl.DataFrame(dfxs).with_columns(
    pl.col("population size")
    .cast(pl.String)
    .cast(pl.Enum(categories))
    .alias("population size cat"),
)
display(dfy.describe()), display(dfy.head()), display(dfy.tail());


In [None]:
tp.tee(
    sns.displot,
    data=dfy.to_pandas(),
    x="population size cat",
    hue="genotype",
    hue_order=["normomutator", "hypermutator"],
    col="available beneficial mutations",
    aspect=1.5,
    col_wrap=3,
    height=2,
    kind="hist",
    multiple="dodge",
    teeplot_outexclude="post",
    teeplot_postprocess=f"""
teed.set(yscale='log')
teed.figure.suptitle("n={apn} agents per node", x=0.63, y=1.02)
teed.set_titles(col_template='{{col_var}} = {{col_name:.0f}}')
for ax in teed.axes.flat:
    for label in ax.get_xticklabels():
        label.set_rotation(45)
        label.set_ha("right")
sns.move_legend(
    teed, "lower center",
    bbox_to_anchor=(0.25, 0.97),
    frameon=False,
    ncol=2,
    title=None,
)
""",
    teeplot_subdir=teeplot_subdir,
)


## Quality Check: Has Reached Fixation?


In [None]:
dfxs_ = dfxs[dfxs["genotype"] == "hypermutator"].copy().reset_index(drop=True)


In [None]:
dfxs_["unfixed"] = (dfxs_["fixation probability"] != 1.0) & (
    dfxs_["fixation probability"] > 0.001
)
any_unfixed = dfxs_["unfixed"].any()
any_unfixed


In [None]:
tp.tee(
    sns.relplot,
    data=dfxs_[dfxs_["unfixed"]],
    x="population size",
    y="fixation probability",
    hue="genotype",
    hue_order=["normomutator", "hypermutator"],
    col=[None, "available beneficial mutations"][int(any_unfixed)],
    style="genotype",
    style_order=["normomutator", "hypermutator"],
    aspect=1.5,
    col_wrap=[None, 3][int(any_unfixed)],
    height=2,
    kind="scatter",
    markers=bool(any_unfixed),
    teeplot_outexclude="post",
    teeplot_postprocess=f"""
teed.set(xscale='log', ylim=(-0.02, 1.02))
teed.figure.suptitle("n={apn} agents per node", x=0.63, y=1.02)
teed.set_titles(col_template='{{col_var}} = {{col_name:.0f}}')
try:
    sns.move_legend(
        teed,
        "lower center",
        bbox_to_anchor=(0.25, 0.97),
        frameon=False,
        ncol=2,
        title=None,
    )
except ValueError:
    pass
""",
    teeplot_subdir=teeplot_subdir,
)


In [None]:
tp.tee(
    sns.displot,
    data=dfxs_.reset_index(drop=True),
    x="population size",
    hue="unfixed",
    col="available beneficial mutations",
    col_wrap=3,
    aspect=1.5,
    height=2,
    multiple="fill",
    bins=dfxs_["population size"].nunique(),
    log_scale=(True, False),
    teeplot_outexclude="post",
    teeplot_postprocess=f"""
teed.figure.suptitle("n={apn} agents per node", x=0.63, y=1.02)
teed.set_titles(col_template='{{col_var}} = {{col_name:.0f}}')
sns.move_legend(
    teed,
    "lower center",
    bbox_to_anchor=(0.25, 0.97),
    frameon=False,
    ncol=2,
    title=None,
)
""",
    teeplot_subdir=teeplot_subdir,
)
