In [None]:
import itertools as it

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import polars as pl
import scipy
import seaborn as sns
from teeplot import teeplot as tp


In [None]:
teeplot_subdir = "adaptive-evolution-rate.ipynb"


In [None]:
def load_and_process(url: str, mutation: str) -> pl.DataFrame:
    df = pl.read_parquet(
        url,
        use_pyarrow=True,
    ).group_by(
        ["Task", "Treatment", "Run ID", "Generation Born"],
    ).first().with_columns(
        pl.col("Task").replace_strict(
            {
                "AND": 2,
                "ANDNOT": 3,
                "NAND": 1,
                "NOR": 4,
                "NOT": 1,
                "OR": 3,
                "ORNOT": 2,
                "XOR": 4,
                "EQUALS": 5,
            },
        ).alias("Components"),
        pl.lit(mutation).alias("Mutation"),
    )

    return df


In [None]:
df1 = load_and_process("https://osf.io/emh23/download", "poisson")


In [None]:
df2 = load_and_process("https://osf.io/gk2ty/download", "per site")


In [None]:
df = pl.concat([df1, df2])


In [None]:
dfz = df.filter(
    pl.col("has task")
).group_by(
    ["Task", "Treatment", "Run ID", "Components", "Mutation"],
).agg(
    pl.col("Generation Born").min(),
)


In [None]:
maxgen = dfz["Generation Born"].max()

records = []
records2 = set()
for row in dfz.iter_rows(named=True):
    records.append(
        pd.DataFrame(
            {
                "Task": row["Task"],
                "Treatment": row["Treatment"],
                "Run ID": row["Run ID"],
                "Generation": np.arange(maxgen),
                "Components": row["Components"],
                "Has Task":  row["Generation Born"] <= np.arange(maxgen),
                "mutation": row["Mutation"],
            },
        ),
    )
    records2.add(
        (row["Run ID"], row["Treatment"], row["Task"], row["Components"], row["Mutation"]),
    )

for v, grp in df.to_pandas().groupby(
    ["Run ID", "Treatment", "Task", "Components", "Mutation"],
    observed=True,
):
    if v not in records2:
        print(v)
        records.append(
            pd.DataFrame(
                {
                    "Task": v[2],
                    "Treatment": v[1],
                    "Run ID": v[0],
                    "Components": v[3],
                    "Generation": np.arange(maxgen),
                    "Has Task":  False,
                    "mutation": v[4],
                },
            ),
        )

dfz_ = pd.concat(records, ignore_index=True)


In [None]:
tp.tee(
    sns.relplot,
    data=dfz_[
        dfz_["Generation"] % 16 == 0
    ].reset_index(),
    col="Task",
    row="mutation",
    x="Generation",
    y="Has Task",
    hue="Treatment",
    kind="line",
    errorbar="ci",
    teeplot_postprocess="plt.xlim(0, 600)",
    teeplot_subdir=teeplot_subdir,
)


In [None]:
tp.tee(
    sns.relplot,
    data=dfz_[
        dfz_["Generation"] % 16 == 0
    ].reset_index(),
    col="Components",
    row="mutation",
    x="Generation",
    y="Has Task",
    hue="Treatment",
    kind="line",
    errorbar="ci",
    teeplot_postprocess="plt.xlim(0, 600)",
    teeplot_subdir=teeplot_subdir,
)


In [None]:
for mutation, errorbar in it.product(
    dfz_["mutation"].unique(),
    ["se", "ci"],
):
    dftmp = dfz_[
        (dfz_["Generation"] % 16 == 0)
        & (dfz_["mutation"] == mutation)
    ].reset_index()
    dftmp["Treatment"] = dftmp["Treatment"].map(
        {
            "Slip-": "Baseline",
            "Slip-_Long": "Long-genome",
            "Slip+": "Slip-duplicate",
            "Baseline-Treatment": "Baseline",
            "Long-Ancestor-Control-Treatment": "Long-genome",
            "Slip-duplicate": "Slip-duplicate",
        },
    )
    dftmp["Task Complexity"] = dftmp["Components"]
    dftmp["Has Task"] *= 100
    with tp.teed(
        sns.relplot,
        data=dftmp,
        col="Task Complexity",
        x="Generation",
        y="Has Task",
        hue="Treatment",
        hue_order=["Baseline", "Long-genome", "Slip-duplicate"],
        style="Treatment",
        style_order=["Baseline", "Long-genome", "Slip-duplicate"],
        aspect=0.9,
        height=2,
        col_wrap=3,
        kind="line",
        errorbar=errorbar,
        facet_kws=dict(margin_titles=True),
        teeplot_postprocess="plt.xlim(0, 600)",
        teeplot_subdir=teeplot_subdir,
        teeplot_outattrs={"mutation": mutation},
    ) as g:
        g.set(
            ylabel="Percent\nReplicates",
            yticks=[0, 25, 50, 75, 100],
            yticklabels=["0", "", "50", "", "100"],
        )
        g.set_titles(col_template="Task Complexity\nMin {col_name} NAND")
        for ax in g.axes.flat:
            ax.axhline(100.0, color="black", linewidth=1, linestyle="--")
        sns.move_legend(
            g, "lower center",
            bbox_to_anchor=(0.68, 0.18), ncol=1, title=None, frameon=False,
        )


In [None]:
dftmp = dfz_[
    (dfz_["Generation"] % 16 == 0)
    & (dfz_["mutation"] == "poisson")
].reset_index()
dftmp["Treatment"] = dftmp["Treatment"].map(
    {
        "Slip-": "Baseline",
        "Slip-_Long": "Long-genome",
        "Slip+": "Slip-duplicate",
    },
)
dftmp["Task Complexity"] = dftmp["Components"]
dftmp["Has Task"] *= 100
for errorbar in ["se", "ci"]:
    with tp.teed(
        sns.relplot,
        data=dftmp,
        col="Task Complexity",
        x="Generation",
        y="Has Task",
        hue="Treatment",
        hue_order=["Baseline", "Long-genome", "Slip-duplicate"],
        style="Treatment",
        style_order=["Baseline", "Long-genome", "Slip-duplicate"],
        aspect=0.9,
        height=2,
        col_wrap=3,
        kind="line",
        errorbar=errorbar,
        facet_kws=dict(margin_titles=True),
        teeplot_postprocess="plt.xlim(0, 600)",
        teeplot_subdir=teeplot_subdir,
        teeplot_outattrs={"mutation": "poisson"},
    ) as g:
        g.set(
            ylabel="Percent\nReplicates",
            yticks=[0, 25, 50, 75, 100],
            yticklabels=["0", "", "50", "", "100"],
        )
        g.set_titles(col_template="Task Complexity\nMin {col_name} NAND")
        for ax in g.axes.flat:
            ax.axhline(100.0, color="black", linewidth=1, linestyle="--")
        sns.move_legend(
            g, "lower center",
            bbox_to_anchor=(0.68, 0.18), ncol=1, title=None, frameon=False,
        )
        kind="violin",


In [None]:
fil = dfz_[
    (dfz_["Generation"] == 599)
    & dfz_["Treatment"].isin(["Baseline-Treatment", "Slip-duplicate"])
]

for task in fil["Task"].unique():
    tab = [
        [
            (
                (fil["Treatment"] == "Baseline-Treatment")
                & (fil["Task"] == task)
                & fil["Has Task"]
            ).sum(),
            (
                (fil["Treatment"] == "Slip-duplicate")
                & (fil["Task"] == task)
                & fil["Has Task"]
            ).sum(),
        ],
        [
            (
                (fil["Treatment"] == "Baseline-Treatment")
                & (fil["Task"] == task)
                & (~fil["Has Task"])
            ).sum(),
            (
                (fil["Treatment"] == "Slip-duplicate")
                & (fil["Task"] == task)
                & (~fil["Has Task"])
            ).sum(),
        ],
    ]
    print(task, tab)
    print("    ", scipy.stats.fisher_exact(tab))


In [None]:
fil = dfz_[
    (dfz_["Generation"] == 599)
    & dfz_["Treatment"].isin(["Long-Ancestor-Control-Treatment", "Slip-duplicate"])
]

for task in fil["Task"].unique():
    tab = [
        [
            (
                (fil["Treatment"] == "Long-Ancestor-Control-Treatment")
                & (fil["Task"] == task)
                & fil["Has Task"]
            ).sum(),
            (
                (fil["Treatment"] == "Slip-duplicate")
                & (fil["Task"] == task)
                & fil["Has Task"]
            ).sum(),
        ],
        [
            (
                (fil["Treatment"] == "Long-Ancestor-Control-Treatment")
                & (fil["Task"] == task)
                & (~fil["Has Task"])
            ).sum(),
            (
                (fil["Treatment"] == "Slip-duplicate")
                & (fil["Task"] == task)
                & (~fil["Has Task"])
            ).sum(),
        ],
    ]
    print(task, tab)
    print("    ", scipy.stats.fisher_exact(tab))


In [None]:
fil = dfz_[
    (dfz_["Generation"] == 599)
]

for components in sorted(fil["Components"].unique()):
    tab = [
        [
            (
                (fil["Treatment"] == "Long-Ancestor-Control-Treatment")
                & (fil["Components"] == components)
                & fil["Has Task"]
            ).sum(),
            (
                (fil["Treatment"] == "Slip-duplicate")
                & (fil["Components"] == components)
                & fil["Has Task"]
            ).sum(),
        ],
        [
            (
                (fil["Treatment"] == "Long-Ancestor-Control-Treatment")
                & (fil["Components"] == components)
                & (~fil["Has Task"])
            ).sum(),
            (
                (fil["Treatment"] == "Slip-duplicate")
                & (fil["Components"] == components)
                & (~fil["Has Task"])
            ).sum(),
        ],
    ]
    print(components, tab)
    print("    ", scipy.stats.fisher_exact(tab))
