In [None]:
%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'  # always print last expr.
%config InlineBackend.figure_format = 'svg'
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt

plt.style.use("bmh")
plt.rcParams["axes.axisbelow"] = True
import numpy as np
import pandas
import pandas as pd
from pandas import DataFrame, Timedelta

from tsdm.utils.strings import snake2camel

rng = np.random.default_rng()
np.set_printoptions()

## Helper Functions

In [None]:
def data_overview(df: DataFrame):
    overview = DataFrame(index=df.columns)
    mask = pandas.isna(df)
    overview["# datapoints"] = (~mask).sum()
    overview["% missing"] = (mask.mean() * 100).round(2)
    overview["min"] = df.min().round(2)
    overview["mean"] = df.mean().round(2)
    overview["std"] = df.std().round(2)
    overview["max"] = df.max().round(2)
    return overview

## Overview Task data

pretty much the same as cleaned but without run 355

In [None]:
from tsdm.datasets import KIWI_RUNS

ds = KIWI_RUNS()

data = ds.timeseries.copy()
units = ds.units

for run_exp in data.reset_index(level=2).index.unique():
    time = data.loc[run_exp].index
    td = (time.max() - time.min()) / Timedelta("1h")
    data.loc[run_exp, "runtime"] = td

overview = data_overview(data.reset_index(level=[0, 1], drop=True))
overview["unit"] = units.loc[ds.timeseries.columns]

with pd.option_context("display.float_format", "{:,.2f}".format):
    display(overview)

In [None]:
fig, axes = plt.subplots(ncols=4, nrows=4, figsize=(12, 12))

for col, ax in zip(data, axes.flatten()):
    vals = data[col]
    mask = pandas.notna(vals)
    ax.hist(vals[mask], bins=59, density=True)
    ax.set_title(snake2camel(col))
    ax.set_xscale("symlog")
    # ax.set_yscale("log")

## Plotting specific single Experiment

In [None]:
def make_all_plots(key, ts):
    ts = ts.astype("float32")
    T = ((ts.index - ts.index[0]) / Timedelta("1h")).values
    fig, axes = plt.subplots(
        nrows=5, ncols=3, figsize=(10, 14), constrained_layout=True, sharex=True
    )
    for col, ax in zip(ts.columns, axes.flatten()):
        vals = ts[col]
        mask = pandas.notna(vals)
        ax.plot(
            T[mask],
            vals[mask],
            ls="-",
            lw=0.5,
            marker=".",
            ms=3,
        )
        ax.set_title(snake2camel(col))

        ymin, ymax = overview["min"][col], overview["max"][col]
        ypad = (ymax - ymin) / 20
        ax.set_ylim(ymin - ypad, ymax + ypad)
        xmin, xmax = 0, overview["max"]["runtime"]
        xpad = (xmax - xmin) / 20
        ax.set_xlim(xmin - xpad, xmax + xpad)
    fig.suptitle(f"Run {key[0]} -- Experiment {key[1]}")
    return fig

In [None]:
ts = ds.timeseries.copy()
ts = ts[sorted(ts.columns, key=snake2camel)]
key = 439, 15325
ts = ts.loc[key]

fig = make_all_plots(key, ts);

# KIWI_RUNS - The booklet

In [None]:
from matplotlib.backends.backend_pdf import PdfPages
from tqdm.auto import tqdm

In [None]:
%matplotlib agg

ts = ds.timeseries
ts = ts[sorted(ts.columns, key=snake2camel)]

with PdfPages("pics/kiwi-runs-booklet.pdf") as pdf:
    groups = ts.groupby(["run_id", "experiment_id"])

    for key, slc in tqdm(groups):
        slc = slc.reset_index(["run_id", "experiment_id"], drop=True)
        fig = make_all_plots(key, slc)
        pdf.savefig(fig)
        plt.close(fig)