In [None]:
%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'  # always print last expr.
%config InlineBackend.figure_format = 'svg'
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt

plt.style.use("bmh")
plt.rcParams["axes.axisbelow"] = True
import numpy as np
import pandas
import pandas as pd
from pandas import DataFrame, Timedelta

from tsdm.utils.strings import snake2camel

rng = np.random.default_rng()
np.set_printoptions()

## Helper Functions

In [None]:
def update_legend(ax, legend_dict, *, legend_kwargs=None, **kwargs):
    """Update the legend with a dict[line, "name"]. Useful for dynamically updating the legend."""
    _legend_kwargs = dict(
        frameon=False,
        loc="upper right",
        bbox_to_anchor=(0.75, 0.95, 0.05, 0.05),
        mode="expand",
    )
    legend_kwargs = _legend_kwargs.update(
        {} if legend_kwargs is None else legend_kwargs
    )
    for key, val in kwargs.items():
        legend_dict[key] = val
    ax.legend(legend_dict.keys(), legend_dict.values(), **legends_kwargs)

In [None]:
def data_overview(df: DataFrame):
    overview = DataFrame(index=df.columns)
    mask = pandas.isna(df)
    overview["# datapoints"] = (~mask).sum()
    overview["% missing"] = (mask.mean() * 100).round(2)
    overview["min"] = df.min().round(2)
    overview["mean"] = df.mean().round(2)
    overview["std"] = df.std().round(2)
    overview["max"] = df.max().round(2)
    return overview

## Overview Task data

pretty much the same as cleaned but without run 355

In [None]:
from tsdm.tasks import KIWI_RUNS_TASK

task = KIWI_RUNS_TASK()

In [None]:
task.targets

In [None]:
from tsdm.tasks import KIWI_RUNS_TASK

task = KIWI_RUNS_TASK()
data = task.timeseries
units = task.units

for run_exp in data.reset_index(level=2).index.unique():
    time = data.loc[run_exp].index
    td = (time.max() - time.min()) / Timedelta("1h")
    data.loc[run_exp, "runtime"] = td

overview = data_overview(data.reset_index(level=[0, 1], drop=True))
overview["unit"] = units.loc[task.timeseries.columns]

with pd.option_context("display.float_format", "{:,.2f}".format):
    display(overview)

In [None]:
fig, axes = plt.subplots(ncols=4, nrows=4, figsize=(12, 12))

for col, ax in zip(data, axes.flatten()):
    vals = data[col]
    mask = pandas.notna(vals)
    ax.hist(vals[mask], bins=59, density=True)
    ax.set_title(snake2camel(col))
    ax.set_xscale("symlog")
    # ax.set_yscale("log")

## Select specific single Experiment

In [None]:
task = KIWI_RUNS_TASK()
ts, md = task.splits((0, "train"))
ts = ts[sorted(ts.columns)]
# ts.columns = ts.columns.map(snake2camel)
# md.columns = md.columns.map(snake2camel)
key = 439, 15325
ts = ts.loc[key]

# KIWI_RUNS - The booklet

In [None]:
task = KIWI_RUNS_TASK()
ts = task.timeseries

In [None]:
overview

In [None]:
def make_all_plots(key, ts):
    ts = ts.astype("float32")
    T = ((ts.index - ts.index[0]) / Timedelta("1h")).values
    fig, axes = plt.subplots(
        nrows=5, ncols=3, figsize=(10, 14), constrained_layout=True
    )

    for col, ax in zip(ts.columns, axes.flatten()):
        vals = ts[col]
        mask = pandas.notna(vals)
        ax.plot(
            T[mask],
            vals[mask],
            ls="-",
            lw=1,
            marker=".",
            ms=2,
        )
        ax.set_title(snake2camel(col))
        ax.set_ylim(overview["min"][col], overview["max"][col])
    fig.suptitle(f"Run {key[0]} -- Experiment {key[1]}")
    return fig

In [None]:
from tqdm.auto import tqdm

In [None]:
from matplotlib.backends.backend_pdf import PdfPages

In [None]:
with PdfPages("pics/kiwi-runs-booklet.pdf") as pdf:
    groups = ts.groupby(["run_id", "experiment_id"])

    for key, slc in tqdm(groups):
        slc = slc.reset_index(["run_id", "experiment_id"], drop=True)
        fig = make_all_plots(key, slc)
        pdf.savefig(fig)
        del fig

## Make the plot

In [None]:
T = ((ts.index - ts.index[0]) / Timedelta("1h")).values

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

for col in task.controls:
    vals = ts[col]
    mask = pandas.notna(vals)
    ax.plot(T[mask], vals[mask], ls="-", lw=1)
# ax.set_yscale("log")
ax.legend(snake2camel(task.controls))

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

for col in task.targets:
    vals = ts[col]
    mask = pandas.notna(vals)
    ax.plot(T[mask], vals[mask], marker=".", ms=2, ls="-", lw=0.1)
ax.set_yscale("log")
ax.legend(snake2camel(task.observables))

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

others = tuple(set(task.observables) - set(task.targets))

for col in others:
    vals = ts[col]
    mask = pandas.notna(vals)
    ax.plot(T[mask], vals[mask], marker=".", ms=2, ls="-", lw=0.1)

ax.set_yscale("log")
ax.legend(snake2camel(others))

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

others = ("pH",)

for col in others:
    vals = ts[col]
    mask = pandas.notna(vals)
    ax.plot(T[mask], vals[mask], marker=".", ms=2, ls="-", lw=0.1)

ax.set_yscale("log")
ax.legend(snake2camel(others))

In [None]:
KIWI_RUNS.units

In [None]:
fig, ax = plt.subplots()

ax.plot(T, ts["DOT"], marker'.')
ax.plot(T, ts["OD600"], '.')

In [None]:
fig, ax = plt.subplots()

ax.plot(T, df["Acetate"], ".")
ax.plot(T, df["Glucose"], ".")

In [None]:
fig, axes = plt.subplots(ncols=3, figsize=(12, 4))

for ax, variables in zip(axes, (task.observables, task.targets, task.controls)):
    for target in variables.index:
        ax.plot(t_train, df_train[target], ".")
    ax.legend(clean_strings(variables.index))