In [None]:
%load_ext watermark


In [None]:
from IPython.display import display
import pandas as pd
import seaborn as sns
from teeplot import teeplot as tp


In [None]:
%watermark -diwmuv -iv


In [None]:
teeplot_subdir = "2024-11-07-scientific"
teeplot_subdir


## Prep Data


In [None]:
df = pd.read_csv("https://osf.io/vrhk4/download")
display(df.describe()), display(df.head()), display(df.tail());


In [None]:
df["Q"] = df["question"].str.slice(9)


In [None]:
df["num choices"] = df["choices"].str.count(r"\|") + 1


In [None]:
df["adj score"] = df["score"] * df["num choices"]


In [None]:
df["tree repr"] = df["tree representation"]


In [None]:
display(df.describe()), display(df.head()), display(df.tail());


## Tree Repr Comparison


In [None]:
for model, group in df[df["Q"].str.contains("equivalence")].groupby("model"):
    saveit, teed = tp.tee(
        sns.relplot,
        data=group.replace(
            {
                "sample_phylogeny_newick": "anon taxa",
                "scientific_phylogeny_newick": "bio taxa",
                "none": "blank",
                "equivalence_identical": "equivalence\nidentity",
                "equivalence_rotate_tree": "equivalence\nrotate one",
                "equivalence_shufflerotate_tree": "equivalence\nrotate shuffle",
                "equivalence_swap_taxa": "equivalence\nswap one",
                "equivalence_shuffle_taxa": "equivalence\nswap shuffle",
            },
        ),
        x="num_taxa",
        y="adj score",
        hue="tree repr",
        style="tree repr",
        col="Q",
        row="tree source",
        aspect=1.2,
        height=1.7,
        facet_kws=dict(
            margin_titles=True,
        ),
        kind="line",
        palette="Set2",
        teeplot_callback=True,
        teeplot_outattrs=dict(
            model=model,
            is_equivalence=True,
        ),
        teeplot_subdir=teeplot_subdir,
    )
    sns.move_legend(
        teed,
        "lower center",
        bbox_to_anchor=(0.45, 0.95),
        frameon=False,
        ncol=2,
        title=None,
    )
    teed.set(ylim=(-0.1, 2.1))
    teed.refline(y=1.0, linestyle=":", color="black", alpha=0.5)
    teed.refline(y=2.0, linestyle="--", color="black", alpha=0.5)
    teed.set_titles(col_template="{col_name}", row_template="{row_name}")
    teed.tight_layout()
    saveit()


In [None]:
for model, group in df[~df["Q"].str.contains("equivalence")].groupby("model"):
    saveit, teed = tp.tee(
        sns.relplot,
        data=group.replace(
            {
                "sample_phylogeny_newick": "anon taxa",
                "scientific_phylogeny_newick": "bio taxa",
                "identify_most_related": "most_related",
                "none": "blank",
            },
        ),
        x="num_taxa",
        y="adj score",
        hue="tree repr",
        style="tree repr",
        col="Q",
        row="tree source",
        aspect=1.2,
        height=1.7,
        facet_kws=dict(
            margin_titles=True,
        ),
        kind="line",
        palette="Set2",
        teeplot_callback=True,
        teeplot_outattrs=dict(
            model=model,
            is_equivalence=False,
        ),
        teeplot_subdir=teeplot_subdir,
    )
    sns.move_legend(
        teed,
        "lower center",
        bbox_to_anchor=(0.4, 0.95),
        frameon=False,
        ncol=2,
        title=None,
    )
    teed.set(ylim=(-0.1, 3.1))
    teed.refline(y=1.0, linestyle=":", color="black", alpha=0.5)
    teed.refline(y=3.0, linestyle="--", color="black", alpha=0.5)
    teed.set_titles(col_template="{col_name}", row_template="{row_name}")
    teed.tight_layout()

    saveit()


## Taxa Type Comparison


In [None]:
for model, group in df[df["Q"].str.contains("equivalence")].groupby("model"):
    saveit, teed = tp.tee(
        sns.relplot,
        data=group.replace(
            {
                "sample_phylogeny_newick": "anon taxa",
                "scientific_phylogeny_newick": "bio taxa",
                "none": "blank",
                "equivalence_identical": "equivalence\nidentity",
                "equivalence_rotate_tree": "equivalence\nrotate one",
                "equivalence_shufflerotate_tree": "equivalence\nrotate shuffle",
                "equivalence_swap_taxa": "equivalence\nswap one",
                "equivalence_shuffle_taxa": "equivalence\nswap shuffle",
            },
        ),
        x="num_taxa",
        y="adj score",
        hue="tree source",
        style="tree source",
        col="Q",
        row="tree repr",
        aspect=1.2,
        height=1.7,
        facet_kws=dict(
            margin_titles=True,
        ),
        kind="line",
        palette="muted",
        teeplot_callback=True,
        teeplot_outattrs=dict(
            model=model,
            is_equivalence=True,
        ),
        teeplot_subdir=teeplot_subdir,
    )
    sns.move_legend(
        teed,
        "lower center",
        bbox_to_anchor=(0.45, 0.95),
        frameon=False,
        ncol=2,
        title=None,
    )
    teed.set(ylim=(-0.1, 2.1))
    teed.refline(y=1.0, linestyle=":", color="black", alpha=0.5)
    teed.refline(y=2.0, linestyle="--", color="black", alpha=0.5)
    teed.set_titles(col_template="{col_name}", row_template="{row_name}")
    teed.tight_layout()

    saveit()


In [None]:
for model, group in df[~df["Q"].str.contains("equivalence")].groupby("model"):
    saveit, teed = tp.tee(
        sns.relplot,
        data=group.replace(
            {
                "sample_phylogeny_newick": "anon taxa",
                "scientific_phylogeny_newick": "bio taxa",
                "identify_most_related": "most_related",
                "none": "blank",
            },
        ),
        x="num_taxa",
        y="adj score",
        hue="tree source",
        style="tree source",
        col="Q",
        row="tree repr",
        aspect=1.2,
        height=1.7,
        facet_kws=dict(
            margin_titles=True,
        ),
        kind="line",
        palette="muted",
        teeplot_callback=True,
        teeplot_outattrs=dict(
            model=model,
            is_equivalence=False,
        ),
        teeplot_subdir=teeplot_subdir,
    )
    sns.move_legend(
        teed,
        "lower center",
        bbox_to_anchor=(0.4, 0.95),
        frameon=False,
        ncol=2,
        title=None,
    )
    teed.set(ylim=(-0.1, 3.1))
    teed.refline(y=1.0, linestyle=":", color="black", alpha=0.5)
    teed.refline(y=3.0, linestyle="--", color="black", alpha=0.5)
    teed.set_titles(col_template="{col_name}", row_template="{row_name}")
    teed.tight_layout()
    saveit()


## Model Comparison


In [None]:
for repr_, group in df[df["Q"].str.contains("equivalence")].groupby(
    "tree repr"
):
    saveit, teed = tp.tee(
        sns.relplot,
        data=group.replace(
            {
                "sample_phylogeny_newick": "anon taxa",
                "scientific_phylogeny_newick": "bio taxa",
                "none": "blank",
                "equivalence_identical": "equivalence\nidentity",
                "equivalence_rotate_tree": "equivalence\nrotate one",
                "equivalence_shufflerotate_tree": "equivalence\nrotate shuffle",
                "equivalence_swap_taxa": "equivalence\nswap one",
                "equivalence_shuffle_taxa": "equivalence\nswap shuffle",
            },
        ),
        x="num_taxa",
        y="adj score",
        hue="model",
        style="model",
        col="Q",
        row="tree source",
        aspect=1.2,
        height=1.7,
        facet_kws=dict(
            margin_titles=True,
        ),
        kind="line",
        palette="Accent",
        teeplot_callback=True,
        teeplot_outattrs=dict(
            repr_=repr_,
            is_equivalence=True,
        ),
        teeplot_subdir=teeplot_subdir,
    )
    sns.move_legend(
        teed,
        "lower center",
        bbox_to_anchor=(0.42, 0.95),
        frameon=False,
        ncol=2,
        title=None,
    )
    teed.set(ylim=(-0.1, 2.1))
    teed.refline(y=1.0, linestyle=":", color="black", alpha=0.5)
    teed.refline(y=2.0, linestyle="--", color="black", alpha=0.5)
    teed.set_titles(col_template="{col_name}", row_template="{row_name}")
    teed.tight_layout()
    saveit()


In [None]:
for repr_, group in df[~df["Q"].str.contains("equivalence")].groupby(
    "tree repr"
):
    saveit, teed = tp.tee(
        sns.relplot,
        data=group.replace(
            {
                "sample_phylogeny_newick": "anon taxa",
                "scientific_phylogeny_newick": "bio taxa",
                "identify_most_related": "most_related",
                "none": "blank",
            },
        ),
        x="num_taxa",
        y="adj score",
        hue="model",
        style="model",
        col="Q",
        row="tree source",
        aspect=1.2,
        height=1.7,
        facet_kws=dict(
            margin_titles=True,
        ),
        kind="line",
        palette="Accent",
        teeplot_callback=True,
        teeplot_outattrs=dict(
            repr_=repr_,
            is_equivalence=False,
        ),
        teeplot_subdir=teeplot_subdir,
    )
    sns.move_legend(
        teed,
        "lower center",
        bbox_to_anchor=(0.4, 0.95),
        frameon=False,
        ncol=2,
        title=None,
    )
    teed.set(ylim=(-0.1, 3.1))
    teed.refline(y=1.0, linestyle=":", color="black", alpha=0.5)
    teed.refline(y=3.0, linestyle="--", color="black", alpha=0.5)
    teed.set_titles(col_template="{col_name}", row_template="{row_name}")
    teed.tight_layout()
    saveit()
