In [None]:
import glob
import os
import string
from collections import OrderedDict

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy.stats import spearmanr

# WEIGHTED_ROBUSTNESS = False
WEIGHTED_ROBUSTNESS = True
OUT_PATH = "outs_final_nee"
os.makedirs(OUT_PATH, exist_ok=True)
ONE_COL_MM = 890

In [None]:
nee_fs = 7
font = {
    "family": "sans-serif",
    "sans-serif": ["Helvetica"],
    "weight": "normal",
    "size": nee_fs,
}

plt.matplotlib.rc("font", **font)
# plt.matplotlib.rcParams["text.usetex"] = True
# plt.matplotlib.rcParams["pgf.texsystem"] = "pdflatex"

In [None]:
# File paths
pheno_roo = "ny"
plot_root = "ny/afv"

In [None]:
df2 = pd.read_csv(os.path.join(plot_root, "GP_map_data.csv"))

In [None]:
pheno_dict = {
    0: "phenotype",
    1: "n_components",
    2: "Frequency",  # volume_0
    3: "surface_0",  # Sum of component surfaces
    4: "volume_1",  # ( f_p * (K-1) * L * r_p )/(double)2
    5: "surface_1",
    6: "robustness",
    7: "evolvability_wagner",
    8: "evolvability_cowperthwaite",
    9: "evolvability_wagner_nond",
    10: "evolvability_cowperthwaite_nond",
    11: "S_1",  # f_p * (1 - r_p)
    12: "S_2",  # S_1 = V_1 * 2(1/r_p - 1)
    13: "surface_unique",
}

# df_pheno = pd.read_csv("ny/RNA_12/phenotype_stats.txt", sep="\t", names = pheno_dict.values())
# df_pheno

In [None]:
gp_maps = OrderedDict(
    [
        [
            "RNA12",
            (
                os.path.join(plot_root, "prelim/20201109_RNA_12/"),
                "ny/data_neutral_correlations/RNA12/phenotype_stats.csv",
                0,
            ),
        ],
        [
            "RNA15",
            (
                os.path.join(plot_root, "larger_spaces/20201113_RNA_all-0/"),
                "ny/data_neutral_correlations/RNA15/RNA_L15_phenotype_stats.csv",
                None,
            ),
        ],
        [
            "s_2_8",
            (
                os.path.join(plot_root, "prelim/s_2_8/"),
                "ny/data_neutral_correlations/s_2_8/phenotype_stats.csv",
                2,
            ),
        ],
        [
            "s_3_8",
            (
                os.path.join(plot_root, "larger_spaces/POLY/20201108_s_3_8/"),
                "ny/data_neutral_correlations/s_3_8/phenotype_stats.csv",
                1,
            ),
        ],
        [
            "HP5x5s",
            (
                os.path.join(plot_root, "prelim/HP5x5s/"),
                "ny/data_neutral_correlations/HP_5x5s/phenotype_stats.csv",
                0,
            ),
        ],
        [
            "HP3x3x3s",
            (
                os.path.join(plot_root, "prelim/HP3x3x3s/"),
                "ny/data_neutral_correlations/HP_3x3x3s/phenotype_stats.csv",
                None,
            ),
        ],
        [
            "HP20",
            (
                os.path.join(plot_root, "prelim/HP_20/"),
                "ny/data_neutral_correlations/HP_20/phenotype_stats.csv",
                0,
            ),
        ],
        [
            "HP25",
            (
                os.path.join(plot_root, "prelim/HP_25/"),
                "ny/data_neutral_correlations/HP_25/phenotype_stats.csv",
                0,
            ),
        ],
    ]
)

for key_old, key_new in zip(list(gp_maps.keys()), df2["label"].to_list()):
    gp_maps[key_new] = gp_maps.pop(key_old)

dfs = OrderedDict(
    [
        [
            gp_map,
            pd.read_csv(
                glob.glob(os.path.join(gp_maps[gp_map][0], "stats*"))[0], sep="\t"
            ),
        ]
        for gp_map in gp_maps
    ]
)

In [None]:
dfs_pheno = OrderedDict()
for gp_map in gp_maps:
    df_ = pd.read_csv(os.path.join(gp_maps[gp_map][1]))
    del_pheno = gp_maps[gp_map][2]
    if del_pheno is not None:
        df_ = df_[~df_["phenotype"].eq(del_pheno)]

    dfs_pheno[gp_map] = df_
    

In [None]:
# Calculate navigability (MIFP) and SE
for gp_map in gp_maps:
    ind = df2[(df2["label"] == gp_map)].index

    df_rob = dfs_pheno[gp_map]
    df_rob.columns = [el.lower() for el in df_rob.columns]

    mean_robustness = df_rob["robustness"].mean()
    median_robustness = df_rob["robustness"].median()
    mean_weighted_robustness = (
        (df_rob["frequency"] * df_rob["robustness"]) / df_rob["frequency"].sum()
    ).sum()

    df = dfs[gp_map]
    samples = len(df["Sample"].unique())
    tests = len(df["Test"].unique())

    threshold = max(df["u_size"] + df["v_size"]) - 50
    df["aborted"] = ((df["u_size"] + df["v_size"]) >= threshold) * 1

    nm = df[df["aborted"] == 0]["Fittest_found?"].agg("mean")
    nm_se = df[df["aborted"] == 0]["Fittest_found?"].agg(
        lambda x: x.std(ddof=1) / np.sqrt(len(x))
    )
    abort = df["aborted"].mean()

    df2.loc[ind, "aborted"] = round(abort, 3)
    df2.loc[ind, "MIFP"] = nm
    df2.loc[ind, "MIFP_se"] = nm_se
    df2.loc[ind, "tests"] = tests
    df2.loc[ind, "samples"] = samples

    if not WEIGHTED_ROBUSTNESS:
        df2.loc[ind, "mean_robustness"] = mean_robustness
    else:
        df2.loc[ind, "mean_robustness"] = mean_weighted_robustness

    df2.loc[ind, "median_robustness"] = median_robustness

# Calculate redundancy
df2["Redundancy"] = (
    (df2["K"].astype(float) ** df2["L"].astype(float))
    * (1 - df2["Z_frac"].astype(float))
    / (df2["N_p"].astype(float) - 1)
)
df2["log Redundancy"] = np.log10(df2["Redundancy"])

df2["GP_map"] = df2["GP_map"].apply(str.strip)

In [None]:
# Print Table 1 in latex format
cols = [
    "label",
    "K",
    "L",
    "N_p",
    "Z_frac",
    "log Redundancy",
    "mean_robustness",
    "MIFP",
    "MIFP_se",
]
df3 = df2[cols].copy()
df3["N_p"] = df3["N_p"].astype(int)
df3["log Redundancy"] = df3["log Redundancy"].round(1)
df3["Z_frac"] = df3["Z_frac"].round(3)
df3["mean_robustness"] = df3["mean_robustness"].round(3)
df3["MIFP"] = df3["MIFP"].round(3)
df3["MIFP_se"] = df3["MIFP_se"].round(3)
df3["MIFP"] = "$" + df3["MIFP"].astype(str) + " \pm " + df3["MIFP_se"].astype(str) + "$"
df3 = df3.drop(columns=["MIFP_se"])
for col in ["K", "L", "N_p"]:
    print(col)
    df3[col] = df3[col].astype(float).apply(lambda x: "{0:,.0f}".format(x))
df3.columns = [
    "GP map",
    "K",
    "L",
    r"$N_P$",
    r"$f_{del}$",
    r"$\log_{10}R$",
    r"$\left<\rho\right>$",
    r"$\left<\psi\right> \pm SE(\psi)$",
]
print(df3.to_markdown())
print(df3.to_latex(index=False, escape=False))

In [None]:
def make_color_map(df2, black_and_white=False):
    colors_map = {}
    colors_map["HP_3x3x3s"] = "C0"
    colors_map["HP3x3x3s"] = "C0"
    colors_map["HP3x3x3snd1"] = "C0"
    colors_map["HP_5x5s"] = "C2"
    colors_map["HP5x5s"] = "C2"
    colors_map["HP5x5snd1"] = "C2"
    colors_map["s_2_8"] = "#9e0142"
    colors_map[r"$S_{2,8}$"] = "#9e0142"
    colors_map["RNA_12"] = "C1"
    colors_map["RNA12"] = "C1"
    idx = 3
    for el in df2["GP_map"]:
        if el not in colors_map:
            colors_map[el] = f"C{idx}"
            idx += 1

    if black_and_white:
        colors_map = {key: "k" for key, value in colors_map.items()}
    return colors_map

In [None]:
# Fig. 2 - with and without colour
# Box for rho
bbox = dict(boxstyle="round", fc="white", ec="black", pad=0.2)
for black_and_white in [False, True]:
    fname = "fig2_{0}_{1}.pdf".format(
        "bw" if black_and_white else "color",
        "rho_g" if WEIGHTED_ROBUSTNESS else "rho_p"
    )
    
        
    # fs = 8
    fs = nee_fs
    colors = make_color_map(df2, black_and_white=black_and_white)
    df2["colors"] = df2["GP_map"].map(colors)

    fig = plt.figure()
    # fig, axs = plt.subplots(3, 1, sharey=True, figsize=(246 / 72.27, 7))
    fig, axs = plt.subplots(3, 1, sharey=True, figsize=(ONE_COL_MM / 250, (ONE_COL_MM / 250)* 7 / (246 / 72.27)))

    ax = axs[0]

    scatter_s = 10

    # Redundancy versus navigability
    # fig.text(-.06, 0.88, "A")
    # fig.text(-.06, 0.66, "B")

    ax.scatter(
        np.log10(df2["Redundancy"]), df2["MIFP"], color=df2["colors"], s=scatter_s
    )

    upper_left = ["HP5x5"]
    upper_right = []
    # lower_left = [r"$S_{2,8}$", r"$S_{3,8}$"]
    lower_left = []
    lower_right = ["GC20", "HP3x3x3", "RNA15", "HP20"] + [r"$S_{2,8}$", r"$S_{3,8}$"]

    def annotate(
        x, y, label, color, extra, lower_left, lower_right, upper_left, upper_right, ax
    ):
        if label in lower_right:
            ax.annotate(
                label,
                (x, y),
                (extra, -extra),
                xycoords="data",
                textcoords="offset points",
                va="top",
                ha="left",
                fontsize=fs,
                color=color,
            )
        elif label in lower_left:
            ax.annotate(
                label,
                (x, y),
                (-extra, -extra),
                xycoords="data",
                textcoords="offset points",
                va="top",
                ha="right",
                fontsize=fs,
                color=color,
            )
        elif label in upper_left:
            ax.annotate(
                label,
                (x, y),
                (-extra, extra),
                xycoords="data",
                textcoords="offset points",
                va="bottom",
                ha="right",
                fontsize=fs,
                color=color,
            )
        else:
            ax.annotate(
                label,
                (x, y),
                (extra, extra),
                xycoords="data",
                textcoords="offset points",
                va="bottom",
                ha="left",
                fontsize=fs,
                color=color,
            )

    extra = 2
    adjust_dict = {"RNA12": (-0.15, 0.0), "$S_{2,8}$": (-0.1, -0.01)}

    for i in range(len(df2)):
        label = df2.iloc[i, :]["label"]
        x = np.log10(df2.iloc[i, :]["Redundancy"])
        y = df2.iloc[i, :]["MIFP"]
        color = colors[df2.iloc[i, :]["GP_map"]]

        xadjust = 0.0
        yadjust = 0.0
        if label in adjust_dict:
            xadjust, yadjust = adjust_dict[label]

        annotate(
            x + xadjust,
            y + yadjust,
            label,
            color,
            extra,
            lower_left,
            lower_right,
            upper_left,
            upper_right,
            ax,
        )

    ax.set_xlabel(r"Redundancy, $\log_{10}R$")
    ax.set_xlim(0, 9)

    rho = spearmanr(df2["log Redundancy"], df2["MIFP"])[0]

    ax.text(
        0.95,
        0.05,
        r"$\rho_s=$ {:.3f}".format(rho),
        {
            "color": "black",
            "fontsize": "medium",
            "va": "bottom",
            # "va": "top",
            "ha": "right",
            "bbox": None,
        },
        transform=ax.transAxes,
    )

    # Del fraction versus navigability
    ax = axs[1]
    ax.scatter(df2["Z_frac"], df2["MIFP"], color=df2["colors"], s=scatter_s)

    upper_left = ["HP25", "RNA15", "HP5x5"]
    upper_right = []
    lower_left = ["HP20", "HP3x3x3"]
    lower_right = ["GC20", r"$S_{2,8}$", r"$S_{3,8}$"]

    for i in range(len(df2)):
        label = df2.iloc[i, :]["label"]
        x = df2.iloc[i, :]["Z_frac"]
        y = df2.iloc[i, :]["MIFP"]
        color = colors[df2.iloc[i, :]["GP_map"]]

        annotate(
            x,
            y,
            label,
            color,
            extra,
            lower_left,
            lower_right,
            upper_left,
            upper_right,
            ax,
        )

    # ax.set_xlabel("Deleterious proportion, $f_{del}$")
    ax.set_xlabel("Deleterious frequency, $f_{del}$")

    ax.set_xlim(0.5, 1.0)
    ax.set_ylim(-0.1, 1.2)

    rho = spearmanr(df2["Z_frac"], df2["MIFP"])[0]

    ax.text(
        0.05,
        0.05,
        r"$\rho_s=$ {:.3f}".format(rho),
        {
            "color": "black",
            "fontsize": "medium",
            "va": "bottom",
            # "va": "top",
            "ha": "left",
            "bbox": None,
        },
        transform=ax.transAxes,
    )

    # Rho fraction versus navigability
    ax = axs[2]
    ax.scatter(df2["mean_robustness"], df2["MIFP"], color=df2["colors"], s=scatter_s)

    if not WEIGHTED_ROBUSTNESS:
        upper_left = ["HP25", "HP5x5"]
        upper_right = ["HP20", "RNA15"]
        lower_left = ["HP3x3x3"]
        lower_right = ["GC20", r"$S_{2,8}$", r"$S_{3,8}$", "RNA12"]
    else:
        upper_left = ["HP25", "HP5x5", "RNA12"]
        upper_right = ["HP20", "RNA15"]
        lower_left = ["HP3x3x3", r"$S_{2,8}$"]
        lower_right = ["GC20", r"$S_{3,8}$"]

    for i in range(len(df2)):
        label = df2.iloc[i, :]["label"]
        x = df2.iloc[i, :]["mean_robustness"]
        y = df2.iloc[i, :]["MIFP"]
        color = colors[df2.iloc[i, :]["GP_map"]]

        annotate(
            x,
            y,
            label,
            color,
            extra,
            lower_left,
            lower_right,
            upper_left,
            upper_right,
            ax,
        )

    rho = spearmanr(df2["mean_robustness"], df2["MIFP"])[0]
    ax.text(
        0.95,
        0.05,
        r"$\rho_s=$ {:.3f}".format(rho),
        {
            "color": "black",
            "fontsize": "medium",
            "va": "bottom",
            # "va": "top",
            "ha": "right",
            "bbox": None,
        },
        transform=ax.transAxes,
    )

    # ax.set_xlabel("Deleterious proportion, $f_{del}$")
    if not WEIGHTED_ROBUSTNESS:
        ax.set_xlabel(r"Mean phenotypic robustness, $\left<\rho\right>$")
    else:
        ax.set_xlabel(r"Mean genotypic robustness, $\left<\rho_g\right>$")

    if not WEIGHTED_ROBUSTNESS:
        ax.set_xlim(0.0, 0.45)
    else:
        ax.set_xlim(0.0, 0.6)

    ax.set_ylim(-0.15, 1.2)

    for i, ax in enumerate(axs):
        ax.set_ylabel(r"Navigability, $\left<\psi\right>$")
        ax.text(
            # -0.19,
            -0.16,
            1.0,
            string.ascii_lowercase[i],
            va="bottom",
            # va="top",
            ha="left",
            fontsize = nee_fs + 1,
            fontproperties = {"weight": "bold"},
            transform=ax.transAxes,
        )

    fig.subplots_adjust(wspace=0.05, hspace=0.4)
    fig.savefig(os.path.join(OUT_PATH, fname), transparent=True, bbox_inches="tight")
    fig.savefig(
        os.path.join(OUT_PATH_PATH, fname.replace(".pdf", ".png")), transparent=True, bbox_inches="tight", dpi=150
    )