In [None]:
import glob
import os
import re

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

extra_path = "ny/fRNA/fRNA/"

In [None]:
def load_results(L, threshold=100, no_neut=True, hamming=False, old=True):
    if not hamming:
        if no_neut:
            if old:
                files3 = glob.glob(
                    f"{extra_path}180414_r_assignment_parallel_L{L}_2e6_NO-NEUTRAL-MUTS/"
                    f"180414_r_assignment_parallel_L{L}_2e6/data_out/*stats_*"
                )
                files3 = [
                    el
                    for el in files3
                    if re.findall(r"stats_L\d{1,2}_\d{1,2}\.txt", el)
                ]
            else:
                files3 = [
                    el
                    for el in glob.glob(
                        "gp_maps_nav_data/data/raw_output/landscape/frna/20000/20220423_outs_mono_no-neut-mat/random/**/stats*txt",
                        recursive=True,
                    )
                    if f"l{L}" in el
                ]
        else:
            files3 = glob.glob(
                f"{extra_path}180414_r_assignment_parallel_L{L}_2e6/data_out/*stats_*"
            )
            files3 = [
                el for el in files3 if re.findall(r"stats_L\d{1,2}_\d{1,2}\.txt", el)
            ]

    else:
        if no_neut:
            files3 = [
                el
                for el in glob.glob(
                    "gp_maps_nav_data/data/raw_output/landscape/frna/20000/20220423_outs_mono_no-neut-mat/hamming/**/stats*txt",
                    recursive=True,
                )
                if f"l{L}" in el
            ]
        else:
            files3 = [
                el
                for el in glob.glob(
                    "gp_maps_nav_data/data/raw_output/landscape/frna/20000/20220423_outs_mono/hamming/**/stats*txt",
                    recursive=True,
                )
                if f"l{L}" in el
            ]
    df = pd.concat([pd.read_csv(el, sep="\t") for el in files3]).reset_index(drop=True)

    df["aborted"] = (
        (df["u_size"] + df["v_size"])
        > ((df["u_size"] + df["v_size"]).max() - threshold)
    ) * 1

    df["aborted"] = (df["Fittest_found?"].eq(0) & ~df["u_size"].eq(0)).mul(1)

    df["L"] = L

    df["uv_size"] = df["u_size"] + df["v_size"]
    return df

In [None]:
# Load no neutral mutations and neutral mutations results
# Not neutral, Random
df_no_neut = pd.concat(
    [load_results(el, no_neut=True, old=False) for el in [20, 25, 30, 35, 40]]
).reset_index(drop=True)

# Neutral, Random
df_neut = pd.concat(
    [load_results(el, no_neut=False) for el in [20, 25, 30, 35, 40]]
).reset_index(drop=True)

# Load Hamming resutls
df_neut_ham = pd.concat(
    [
        load_results(el, no_neut=False, hamming=True, threshold=100)
        for el in [20, 25, 30, 35, 40]
    ]
).reset_index(drop=True)
df_no_neut_ham = pd.concat(
    [
        load_results(el, no_neut=True, hamming=True, threshold=100)
        for el in [20, 25, 30, 35, 40]
    ]
).reset_index(drop=True)

In [None]:
# Get navigability estimates, number of unique targets tested and number of phenotypes
# in fRNA database at a given length L
def get_psi(df):
    df_neut_complete = df[df["aborted"] != 1]
    df_neut_complete = (
        df_neut_complete.groupby("L")[["Fittest_found?"]]
        .agg(["mean", lambda x: x.std(ddof=1) / len(x) ** 0.5])
        .T.reset_index(drop=True)
        .T
    )
    df_neut_complete = df_neut_complete.rename(
        columns=dict(zip(df_neut_complete.columns, ["psi", "se"]))
    )
    df_neut_complete["comb"] = df_neut_complete.apply(
        lambda x: "${0:.3f} \pm {1:.3f}$".format(x["psi"], x["se"]), axis=1
    )
    return (
        df_neut_complete["comb"]
        .to_frame()
        .rename(columns={"comb": r"$\left<\psi\right>$"})
    )


def get_aborted(df):
    df_neut_complete = df[df["aborted"] != -1]
    df_neut_complete = (
        df_neut_complete.groupby("L")[["aborted"]]
        .agg(["mean", lambda x: x.std(ddof=1) / len(x) ** 0.5])
        .T.reset_index(drop=True)
        .T
    )
    df_neut_complete = df_neut_complete.rename(
        columns=dict(zip(df_neut_complete.columns, ["aborted", "se"]))
    )
    return (
        df_neut_complete["aborted"]
        .apply(lambda x: f"{x:.3f}")
        .rename("Aborted")
        .to_frame()
    )


def get_ntargets(df):
    return (
        df.groupby("L")["Target"]
        .apply(lambda x: x.nunique())
        .rename("Targets")
        .to_frame()
    )


def get_Np(df):
    data = []
    for L in df["L"].unique():
        pdata = pd.read_csv("../frna/data/ps_l" + str(L) + ".txt", header=None).rename(
            {0: "phenotype"}, axis=1
        )
        data.append([L, len(pdata)])
    return pd.DataFrame(data, columns=["L", r"$N_P$"]).set_index("L")

In [None]:
# Get the final results dataframe
def get_df_outs(df_neut, df_no_neut, fitness="Random"):
    df_outs = None
    names = ["Neutral mutations", "No neutral mutations$^{*}$"]
    for name, df_ in [(names[0], df_neut), (names[1], df_no_neut)]:
        df_1 = get_psi(df_)
        df_2 = get_aborted(df_)
        df_3 = get_ntargets(df_)
        df_4 = get_Np(df_)
        df_neut_out = pd.concat([df_3, df_4, df_1, df_2], axis=1)
        df_outs = pd.concat([df_outs, df_neut_out], axis=1)
    df_outs.columns = pd.MultiIndex.from_product([names, df_neut_out.columns.to_list()])
    df_outs.insert(0, ("", "Fitness"), fitness)
    df_outs = df_outs.drop(columns=[("No neutral mutations$^{*}$", "$N_P$")])
    df_outs = df_outs.drop(columns=[
        ("No neutral mutations$^{*}$", "Targets"),
        ("Neutral mutations", "Targets"),
    ])
    df_outs = (
        df_outs.reset_index()
        .set_index([("", "Fitness"), "L", ("Neutral mutations", "$N_P$")])
        .rename_axis(index=["Fitness", "L", "$N_P$"])
    )
    df_outs.columns = pd.MultiIndex.from_tuples(
        [el if el[1] != "Aborted" else (el[0], "$\\alpha$") for el in df_outs.columns]
    )
    return df_outs


def print_for_paper(df_outs):
    pd.options.display.float_format = "{:,}".format
    print(df_outs.round(3).to_markdown())
    pd.options.display.float_format = "{:,}".format
    print(df_outs.round(3).convert_dtypes().to_latex(
        multicolumn_format="c", escape=False
    ))


df_outs = get_df_outs(df_neut, df_no_neut, fitness="Random")
df_outs_ham = get_df_outs(df_neut_ham, df_no_neut_ham, fitness="Hamming$^{*}$")
df_all = pd.concat([df_outs, df_outs_ham], axis=0)
print_for_paper(df_all)