In [None]:
import matplotlib.pyplot as plt
from nbmetalog import nbmetalog as nbm
import numpy as np
import pandas as pd
import scipy.stats as stats
import seaborn as sns
from teeplot import teeplot as tp

In [None]:
nbm.print_metadata()

# get data

In [None]:
reg = pd.read_csv("356848/regulation.csv")
no_reg = pd.read_csv("356848/no-regulation.csv")
nbm.print_dataframe_summary(*eval(nbm.nvp_expr(
    'reg'
)))
nbm.print_dataframe_summary(*eval(nbm.nvp_expr(
    'no_reg'
)))

reg['regulation'] = "enabled"
no_reg['regulation'] = "disabled"

df = pd.concat([reg, no_reg])
nbm.print_dataframe_summary(*eval(nbm.nvp_expr(
    'df'
)))

# define teeplot functions

In [None]:
def logx_lineplot(*args, **kwargs):
    g = sns.lineplot(
        *args,
        **kwargs
    )
    g.set_xscale("log")
    plt.axhline(256, c="grey", linestyle="dotted", zorder=-1)
    y = np.array([16*i for i in range(17)])
    plt.yticks(y)
    # todo: change names here

# fitness over time

## process data

In [None]:

'''
new_rows = []
for (replicate, regulation), filtered in df.groupby(["replicate", "regulation"]):
    max_update = filtered["update"].max()

    for update in range(max_update, 10000):
        new_rows.append({
            "update": update,
            "max_fitness": 16,
            "replicate": replicate,
            "regulation": regulation
})

df = df.append(new_rows)
df
'''

## graph results (ci: SD)

In [None]:
tp.tee(
    logx_lineplot,
    data=df,
    x="update",
    y="max_fitness",
    ci="sd",
    hue="regulation"
)

## graph results (ci: 95%)

In [None]:
data = df[np.log2(df["update"]+1) % 1.0 < 0.0001]
len(data)

In [None]:
'''
tp.tee(
    logx_lineplot,
    data=df,
    x="update",
    y="max_fitness",
    hue="regulation"
)
'''

# time to solution over number of states (k)

### process data

In [None]:
x = (df["max_fitness"] != df["max_fitness"].shift()).cumsum()
for a, b in zip(x, df["max_fitness"]):
    print(a, b)

In [None]:
df = df.sort_values(by=["replicate", "regulation", "update"])

df["streak_length"] = df.groupby(
    (df["max_fitness"] != df["max_fitness"].shift()).cumsum()
).cumcount() + 1

In [None]:
sns.lineplot(
    data=df[(df["replicate"] == 7) & (df["regulation"] == 1)],
    y="streak_length",
    x="update",
    ci=None,
    hue="max_fitness"
)

In [None]:
box_df = df[(df["max_fitness"] == 16)]

res_df = []
for (replicate, regulation), filtered in box_df.groupby(["replicate", "regulation"]):
    sol_time = filtered["update"].min()
    print(replicate, sol_time, "reg:", regulation)
    res_df.append({
        "solution_time": sol_time,
        "replicate": replicate,
        "regulation": regulation
    })

all_pairs = {item for item, filtered in df.groupby(["replicate", "regulation"])}
sol_pairs = {item for item, filtered in box_df.groupby(["replicate", "regulation"])}

for (replicate, regulation) in all_pairs - sol_pairs:
    sol_time = 10000
    print(replicate, sol_time, "reg:", regulation)
    res_df.append({
        "solution_time": sol_time,
        "replicate": replicate,
        "regulation": regulation
    })

res_df = df.from_records(res_df)

### graph

In [None]:
tp.tee(
    sns.boxplot,
    data=res_df,
    y="solution_time",
    x="regulation"
)

## time to solution (swarm plot)

In [None]:
def tee_swarmplot(*args, **kwargs):
    sns.swarmplot(
        linewidth=0.5,
        s=4,
        edgecolor='white',
        *args,
        **kwargs
    )
    sns.boxplot(
        *args,
        **kwargs
    )

In [None]:
tp.tee(
    tee_swarmplot,
    data=res_df,
    y="solution_time",
    x="regulation"
)

In [None]:
def tee_barswarm(*args, **kwargs):
    sns.swarmplot(
        linewidth=0.5,
        s=4,
        edgecolor='white',
        *args,
        **kwargs
    )
    sns.barplot(
        capsize=0.4,
        errwidth=2,
        *args,
        **kwargs
    )

In [None]:
tp.tee(
    tee_barswarm,
    data=res_df,
    y="solution_time",
    x="regulation"
)