Aim of this notebook:

construct the plotly and matplotlib figures which will be used for the various 
benchmarking configurations. 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import itertools
import os
import sys
import traceback
from multiprocessing import Pool

import numpy as np
import pandas as pd
from tqdm.auto import tqdm

sys.path.append("..")
from agg_utils.fig_construction import construct_plotly_fig
from agg_utils.path_conf import figure_root_dir, loc_data_dir

In [3]:
df_agg_data = pd.read_csv(loc_data_dir / "agg_data_preselect.csv")
reference_mask = df_agg_data.aggregator == "reference"
agg_mask = df_agg_data.aggregator.isin(["LTTB", "MinMaxLTTB"]) | (
    (df_agg_data["aggregator"] == "MinMaxAggregator") & (df_agg_data.factor.isna())
)


def read_set_index_first_col(pqt_path) -> pd.Series:
    df = pd.read_parquet(pqt_path)
    df = df.set_index(df.columns[0], drop=True)
    assert len(df.columns) == 1
    return df.iloc[:, 0]



In [4]:
ref_data_dict = {
    f"{r.data}_{r.aggregator}_{r.n}": read_set_index_first_col(r.path)
    for _, r in tqdm(df_agg_data[reference_mask].iterrows(), total=sum(reference_mask))
}

agg_data_dict = {
    # fmt: off
    f"{r.data}_{r.aggregator}_{r.n}_{int(r.n_out)}{('_' + str(int(r.factor)) if not np.isnan(r.factor) else '')}": read_set_index_first_col(r.path)
    for _, r in tqdm(df_agg_data[agg_mask].iterrows(), total=sum(agg_mask))
}


  0%|          | 0/18 [00:00<?, ?it/s]

  0%|          | 0/44694 [00:00<?, ?it/s]

## Construct plotly-figs

In [5]:
plotly_save_dir = figure_root_dir / "plotly_preselect"
if not plotly_save_dir.exists():
    os.makedirs(plotly_save_dir)
plotly_vis_grid = [2], ['linear']

In [6]:
for line_width, drawstyle in tqdm(list(itertools.product(*plotly_vis_grid))):
    for k, data in tqdm(list(ref_data_dict.items())):
        s_name, _, n = k.split("_")
        xlim = (data.index[0], data.index[-1])
        ylim = (data.min(), data.max())
        save_name = (
            str(plotly_save_dir)
            + f"/reference_{s_name}_{n}_ls={drawstyle}_lw={line_width}"
        )
        construct_plotly_fig(
            data.index,
            data.values,
            save_path=save_name + ".png",
            xlim=xlim,
            ylim=ylim,
            line_width=line_width,
            line_shape=drawstyle,
        )

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/18 [00:00<?, ?it/s]


Discarding nonzero nanoseconds in conversion.



In [7]:
def wrap_create_plotly_figs(k: str):
    s_name, aggregator, n, n_out = k.split("_")[:4]
    factor = ''
    if len(k.split('_')) == 5:
        factor = f"_factor={k.split('_')[4]}"
    agg_data = agg_data_dict[k]
    ref_data = ref_data_dict[f"{s_name}_reference_{n}"]
    xlim = (ref_data.index[0], ref_data.index[-1])
    ylim = (ref_data.min(), ref_data.max())

    for line_width, drawstyle in itertools.product(*plotly_vis_grid):
        save_name =  str(plotly_save_dir) + f"/{aggregator}_{s_name}_{n}_{int(n_out)}{factor}_ls={drawstyle}_lw={line_width}"
        construct_plotly_fig(
            agg_data.index,
            agg_data.values.ravel(),
            save_path=save_name + ".png",
            xlim=xlim,
            ylim=ylim,
            line_width=line_width,
            line_shape=drawstyle,
        )


t_list = list(agg_data_dict.keys())
with Pool(processes=8) as pool:
    results = pool.imap_unordered(wrap_create_plotly_figs, t_list)
    results = tqdm(results, total=len(t_list))
    try:
        [f for f in results]
        del results
    except:
        traceback.print_exc()
        pool.terminate()
    finally:
        pool.close()
        pool.join()



  0%|          | 0/44694 [00:00<?, ?it/s]