In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from teeplot import teeplot as tp
import numpy as np

# get data

In [None]:
df2 = pd.read_csv("86bf42/K2/concat=100+replicate=1-100+ext=.csv")
df4 = pd.read_csv("86bf42/K4/concat=100+replicate=1-100+ext=.csv")
df8 = pd.read_csv("86bf42/K8/concat=100+replicate=1-100+ext=.csv")
df16 = pd.read_csv("86bf42/K16/concat=100+replicate=1-100+ext=.csv")



# preprocess data

In [None]:
df2["k"] = 2
df4["k"] = 4
df8["k"] = 8
df16["k"] = 16

df = pd.concat([df2, df4, df8, df16])

# define teeplot functions

In [None]:
def logx_lineplot(*args, **kwargs):
    g = sns.lineplot(
        *args,
        **kwargs
    )
    g.set_xscale("log")
    plt.axhline(256, c="grey", linestyle="dotted", zorder=-1)
    y = np.array([16*i for i in range(17)])
    plt.yticks(y)
    # todo: change names here

# fitness over time

## process data

In [None]:
new_rows = []
for (replicate, k), filtered in df.groupby(["replicate", "k"]):
    max_update = filtered["update"].max()

    for update in range(max_update, 10000):
        new_rows.append({
            "update": update,
            "max_fitness": 256,
            "replicate": replicate,
            "k": k
        })

df = df.append(new_rows)
df

## graph results (ci: SD)

In [None]:
tp.tee(
    logx_lineplot,
    data=df,
    x="update",
    y="max_fitness",
    hue="k",
    ci="sd"
)

## graph results (ci: 95%)

In [None]:
data = df[np.log2(df["update"]+1) % 1.0 < 0.0001]
len(data)

In [None]:
tp.tee(
    logx_lineplot,
    data=data,
    x="update",
    y="max_fitness",
    hue="k"
)

# time to solution over number of states (k)

### process data

In [None]:
box_df = df[df["max_fitness"] == 256]

res_df = []
for (replicate, k), filtered in box_df.groupby(["replicate", "k"]):
    sol_time = filtered["update"].min()

    res_df.append({
        "solution_time": sol_time,
        "replicate": replicate,
        "k": k
    })

res_df = df.from_records(res_df)

### graph

In [None]:
tp.tee(
    sns.boxplot,
    data=res_df,
    y="solution_time",
    x="k"
)

## time to solution (swarm plot)

In [None]:
def tee_swarmplot(*args, **kwargs):
    sns.swarmplot(
        linewidth=0.5,
        s=4,
        edgecolor='white',
        *args,
        **kwargs
    )
    sns.boxplot(
        *args,
        **kwargs
    )

In [None]:
tp.tee(
    tee_swarmplot,
    data=res_df,
    x="k",
    y="solution_time"
)