Aim of this notebook:

construct the plotly and matplotlib figures which will be used for the various 
benchmarking configurations. 

In [3]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
import numpy as np
import plotly.express as px
import pandas as pd
from pathlib import Path
import os

# prevent images from appearing in the notebook
import matplotlib

matplotlib.use("Agg")

import itertools

from tqdm.auto import tqdm
from multiprocessing import Pool
import traceback

import sys

sys.path.append("..")
from agg_utils.path_conf import figure_root_dir, loc_data_dir
from agg_utils.fig_construction import (
    construct_plotly_fig,
    construct_matplotlib_fig,
    construct_bokeh_fig,
)


In [None]:
# read in the agg data csv from `0.2_create_agg_data.ipynb` and construct the
# corresponding data dicts which will be used by the visualization toolkits to 
# operate upon
df_agg_data = pd.read_csv(loc_data_dir / "agg_data.csv")
n_out_mask = pd.isna(df_agg_data.n_out)

def read_set_index_first_col(pqt_path) -> pd.Series:
    df = pd.read_parquet(pqt_path)
    df = df.set_index(df.columns[0], drop=True)
    assert len(df.columns) == 1
    return df.iloc[:, 0]

agg_data_dict = {
    f"{r.data}_{r.aggregator}_{r.n}_{int(r.n_out)}": read_set_index_first_col(r.path)
    for _, r in tqdm(df_agg_data[~n_out_mask].iterrows(), total=sum(~n_out_mask))
}

ref_data_dict = {
    f"{r.data}_{r.aggregator}_{r.n}": read_set_index_first_col(r.path)
    for _, r in tqdm(df_agg_data[n_out_mask].iterrows(), total=sum(n_out_mask))
}

### `matplotlib` images

In [6]:
antialiasing_grid = [True, False]
line_width_grid = [1, 2, 3, 4]
drawstyle_grid = ["default", "steps-mid", "steps-pre", "steps-post"][:2]

matplotlib_save_dir = figure_root_dir / "matplotlib"
if not matplotlib_save_dir.exists():
    os.makedirs(matplotlib_save_dir)

In [None]:
matplotlib_image_grid = antialiasing_grid, line_width_grid, drawstyle_grid

# fmt: off
# iterate over the visualization configuration grid and over the REFERENCE images
for antialiasing, line_width, drawstyle in tqdm( list(itertools.product(*matplotlib_image_grid))):
    for k, data in tqdm(list(ref_data_dict.items())):
        s_name, _, n = k.split("_")
        xlim = (data.index[0], data.index[-1])
        ylim = (data.min(), data.max())
        save_name = ( str(matplotlib_save_dir)
            + f"/reference_{s_name}_{n}_ls={drawstyle}_lw={line_width}"
            + f"{'_aa' if antialiasing else ''}"
        )
        construct_matplotlib_fig(
            data.index,
            data.values,
            save_path=save_name + ".png",
            aa=antialiasing,
            xlim=xlim,
            ylim=ylim,
            line_width_px=line_width,
            drawstyle=drawstyle,
        )


In [None]:
def wrap_create_matplotlib_figs(k: str):
    s_name, aggregator, n, n_out = k.split("_")
    agg_data = agg_data_dict[k]
    ref_data = ref_data_dict[f"{s_name}_reference_{n}"]
    xlim = (ref_data.index[0], ref_data.index[-1])
    ylim = (ref_data.min(), ref_data.max())
    for antialiasing, line_width, drawstyle in tqdm(
        list(itertools.product(*matplotlib_image_grid))
    ):
        save_name = (
            str(matplotlib_save_dir)
            + f"/{aggregator}_{s_name}_{n}_{int(n_out)}_ls={drawstyle}_lw={line_width}"
            f"{'_aa' if antialiasing else ''}"
        )
        construct_matplotlib_fig(
            agg_data.index,
            agg_data.values.ravel(),
            save_path=save_name + ".png",
            aa=antialiasing,
            xlim=xlim,
            ylim=ylim,
            line_width_px=line_width,
            drawstyle=drawstyle,
        )

# Create the AGGREGATED images
t_list = list(agg_data_dict.keys())
with Pool(processes=8) as pool:
    results = pool.imap_unordered(wrap_create_matplotlib_figs, t_list)
    results = tqdm(results, total=len(t_list))
    try:
        out = [f for f in results]
        del results
    except:
        traceback.print_exc()
        pool.terminate()
    finally:
        pool.close()
        pool.join()


### `matplotlib` images - `cairo` backend

In [15]:
antialiasing_grid = [True, False]
line_width_grid = [1, 2, 3, 4]
drawstyle_grid = ["default", "steps-mid", "steps-pre", "steps-post"][:1]

matplotlib_save_dir = figure_root_dir / "matplotlib_cairo"
if not matplotlib_save_dir.exists():
    os.makedirs(matplotlib_save_dir)
matplotlib_image_grid = antialiasing_grid, line_width_grid, drawstyle_grid

# fmt: off
# iterate over the visualization configuration grid and over the REFERENCE images
for antialiasing, line_width, drawstyle in tqdm( list(itertools.product(*matplotlib_image_grid))):
    for k, data in ref_data_dict.items():
        s_name, _, n = k.split("_")
        xlim = (data.index[0], data.index[-1])
        ylim = (data.min(), data.max())
        save_name = ( str(matplotlib_save_dir)
            + f"/reference_{s_name}_{n}_ls={drawstyle}_lw={line_width}"
            + f"{'_aa' if antialiasing else ''}"
        )
        construct_matplotlib_fig(
            data.index,
            data.values,
            save_path=save_name + ".png",
            aa=antialiasing,
            xlim=xlim,
            ylim=ylim,
            line_width_px=line_width,
            drawstyle=drawstyle,
            backend='cairo'
        )

def wrap_create_matplotlib_figs(k: str):
    s_name, aggregator, n, n_out = k.split("_")
    agg_data = agg_data_dict[k]
    ref_data = ref_data_dict[f"{s_name}_reference_{n}"]
    xlim = (ref_data.index[0], ref_data.index[-1])
    ylim = (ref_data.min(), ref_data.max())
    for antialiasing, line_width, drawstyle in tqdm(
        list(itertools.product(*matplotlib_image_grid))
    ):
        save_name = (
            str(matplotlib_save_dir)
            + f"/{aggregator}_{s_name}_{n}_{int(n_out)}_ls={drawstyle}_lw={line_width}"
            f"{'_aa' if antialiasing else ''}"
        )
        construct_matplotlib_fig(
            agg_data.index,
            agg_data.values.ravel(),
            save_path=save_name + ".png",
            aa=antialiasing,
            xlim=xlim,
            ylim=ylim,
            line_width_px=line_width,
            drawstyle=drawstyle,
            backend='cairo'
        )

t_list = list(agg_data_dict.keys())
with Pool(processes=8) as pool:
    results = pool.imap_unordered(wrap_create_matplotlib_figs, t_list)
    results = tqdm(results, total=len(t_list))
    try:
        out = [f for f in results]
        del results
    except:
        traceback.print_exc()
        pool.terminate()
    finally:
        pool.close()
        pool.join()


  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/36672 [00:00<?, ?it/s]

### `plotly` images

In [None]:
line_shape_grid = ["linear", "spline", "vhv"]

plotly_save_dir = figure_root_dir / "plotly"
if not plotly_save_dir.exists():
    os.makedirs(plotly_save_dir)
plotly_vis_grid = line_width_grid, line_shape_grid


In [None]:
# iterate over the visualization configuration grid and over the reference images
# NOTE: this may take a while
for line_width, drawstyle in tqdm(list(itertools.product(*plotly_vis_grid))):
    for k, data in tqdm(list(ref_data_dict.items())):
        s_name, _, n = k.split("_")
        xlim = (data.index[0], data.index[-1])
        ylim = (data.min(), data.max())
        save_name = (
            str(plotly_save_dir)
            + f"/reference_{s_name}_{n}_ls={drawstyle}_lw={line_width}"
        )
        construct_plotly_fig(
            data.index,
            data.values,
            save_path=save_name + ".png",
            xlim=xlim,
            ylim=ylim,
            line_width=line_width,
            line_shape=drawstyle,
        )


In [None]:
def wrap_create_plotly_figs(k: str):
    s_name, aggregator, n, n_out = k.split("_")
    agg_data = agg_data_dict[k]
    ref_data = ref_data_dict[f"{s_name}_reference_{n}"]
    xlim = (ref_data.index[0], ref_data.index[-1])
    ylim = (ref_data.min(), ref_data.max())
    for line_width, drawstyle in itertools.product(*plotly_vis_grid):
        save_name =  str(plotly_save_dir) + f"/{aggregator}_{s_name}_{n}_{int(n_out)}_ls={drawstyle}_lw={line_width}"
        construct_plotly_fig(
            agg_data.index,
            agg_data.values.ravel(),
            save_path=save_name + ".png",
            xlim=xlim,
            ylim=ylim,
            line_width=line_width,
            line_shape=drawstyle,
        )


t_list = list(agg_data_dict.keys())
with Pool(processes=8) as pool:
    results = pool.imap_unordered(wrap_create_plotly_figs, t_list)
    results = tqdm(results, total=len(t_list))
    try:
        [f for f in results]
        del results
    except:
        traceback.print_exc()
        pool.terminate()
    finally:
        pool.close()
        pool.join()


### `bokeh` images

In [7]:
bokeh_save_dir = figure_root_dir / "bokeh"
if not bokeh_save_dir.exists():
    os.makedirs(bokeh_save_dir)
bokeh_vis_grid = line_width_grid, drawstyle_grid


In [8]:
# iterate over the visualization configuration grid and over the REFERENCE images
for line_width, drawstyle in tqdm(list(itertools.product(*bokeh_vis_grid))):
    for k, data in tqdm(list(ref_data_dict.items())):
        s_name, _, n = k.split("_")
        xlim = (data.index[0], data.index[-1])
        ylim = (data.min(), data.max())
        save_name = (
            str(bokeh_save_dir)
            + f"/reference_{s_name}_{n}_ls={drawstyle}_lw={line_width}"
        )
        construct_bokeh_fig(
            data.index,
            data.values,
            save_path=save_name + ".png",
            xlim=xlim,
            ylim=ylim,
            line_width=line_width,
            line_shape=drawstyle,
        )


  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]

**note**: constructing bokeh images is `slow` (see [this stackoverflow answer](https://stackoverflow.com/a/59184722)), and cannot be sped up by using multiprocessing. As such, we do not use the above approach for bokeh images, but a [python_script](construct_bokeh_figs.py),which can be run in a screen session. 

Note: It took ~1 day to compute the bokeh images.