In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys

sys.path.append("..")

import pandas as pd
from tqdm.auto import tqdm
from pathlib import Path
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import numpy as np

from multiprocessing import Pool
import traceback
from functional import seq

from agg_utils.metrics import compute_dssim_matplotlib, compute_dssim_plotly
from agg_utils.path_conf import figure_root_dir
from plotly.subplots import make_subplots

# Calculation of the Metrics 

## Matplotlib figures

In [3]:
# Create a dichw chih contains the reference images, which saves the time of loading
# the reference images every time
ref_dict = {
    str(p): (255 * plt.imread(p)).astype(np.float32)
    for p in tqdm(list((figure_root_dir / "matplotlib").glob("reference_*.png")))
}

# files = seq((figure_root_dir / "matplotlib").glob("*200000*ls=default*lw=1*.png")).filter(lambda x: 'reference' not in x.name).to_list()
files = seq((figure_root_dir / "matplotlib").glob("*.png")).filter(lambda x: 'reference' not in x.name).to_list()

  0%|          | 0/384 [00:00<?, ?it/s]

In [7]:
def wrap_compute_dssim_matplotlib(agg_path):
    return compute_dssim_matplotlib(agg_path, mse=True, ref_dict=ref_dict)

out = []
with Pool(processes=8) as pool:
    results = pool.imap_unordered(wrap_compute_dssim_matplotlib, files)
    results = tqdm(results, total=len(files))
    try:
        out = [f for f in results]
        del results
    except:
        traceback.print_exc()
        pool.terminate()
    finally:
        pool.close()
        pool.join()


df_out = pd.concat(out, axis=1).T
# df_out.to_csv("../loc_data/matplotlib_metrics_v4.csv", index=False)

cat_cols = ['toolkit', 'data', 'aggregator', 'ls']
int_cols = ['n', 'lw', 'n_out']
for c in cat_cols:
    df_out[c] = df_out[c].astype('category')
for c in int_cols:
    df_out[c] = df_out[c].astype('int')
df_out['aa'] = df_out['aa'].astype('bool')



for c in set(df_out.columns).difference(cat_cols + int_cols + ['aa']):
    df_out[c] = df_out[c].astype('float')
df_out.to_parquet("../loc_data/matplotlib_metrics_v4.parquet")


  0%|          | 0/586752 [00:00<?, ?it/s]

## Matplotlib `cairo` figures

In [32]:
ref_dict = {
    str(p): (255 * plt.imread(p)).astype(np.float32)
    for p in tqdm(list((figure_root_dir / "matplotlib_cairo").glob("reference_*.png")))
}

# files = seq((figure_root_dir / "matplotlib").glob("*200000*ls=default*lw=1*.png")).filter(lambda x: 'reference' not in x.name).to_list()
files = seq((figure_root_dir / "matplotlib_cairo").glob("*.png")).filter(lambda x: 'reference' not in x.name).to_list()

  0%|          | 0/192 [00:00<?, ?it/s]

In [33]:
def wrap_compute_dssim_matplotlib(agg_path):
    return compute_dssim_matplotlib(agg_path, mse=True, ref_dict=ref_dict)

out = []
with Pool(processes=8) as pool:
    results = pool.imap_unordered(wrap_compute_dssim_matplotlib, files)
    results = tqdm(results, total=len(files))
    try:
        out = [f for f in results]
        del results
    except:
        traceback.print_exc()
        pool.terminate()
    finally:
        pool.close()
        pool.join()


df_out = pd.concat(out, axis=1).T
# df_out.to_csv("../loc_data/matplotlib_cairo_metrics_v4.csv", index=False)

cat_cols = ['toolkit', 'data', 'aggregator', 'ls']
int_cols = ['n', 'lw', 'n_out']
for c in cat_cols:
    df_out[c] = df_out[c].astype('category')
for c in int_cols:
    df_out[c] = df_out[c].astype('int')
df_out['aa'] = df_out['aa'].astype('bool')



for c in set(df_out.columns).difference(cat_cols + int_cols + ['aa']):
    df_out[c] = df_out[c].astype('float')
df_out.to_parquet("../loc_data/matplotlib_cairo_metrics_v4.parquet")


  0%|          | 0/293376 [00:00<?, ?it/s]

## `plotly` figures

In [12]:
ref_dict = {
    str(p): (255 - 255 * plt.imread(p)).astype(np.float32)
    for p in tqdm(list((figure_root_dir / "plotly").glob("reference_*.png")))
}

files = seq((figure_root_dir / "plotly").glob("*.png")).filter(lambda x: 'reference' not in x.name).to_list()

# files = seq((figure_root_dir / "plotly").glob("*200000*ls=linear*lw=1.png")).filter(lambda x: 'reference' not in x.name).to_list()

  0%|          | 0/288 [00:00<?, ?it/s]

In [13]:
def wrap_compute_dssim_plotly(agg_path):
    return compute_dssim_plotly(agg_path, mse=True, ref_dict=ref_dict)

out = []
with Pool(processes=8) as pool:
    results = pool.imap_unordered(wrap_compute_dssim_plotly, files)
    results = tqdm(results, total=len(files))
    try:
        out = [f for f in results]
        del results
    except:
        traceback.print_exc()
        pool.terminate()
    finally:
        pool.close()
        pool.join()


df_out = pd.concat(out, axis=1).T
# df_out.to_csv("../loc_data/plotly_metrics_v4.csv", index=False)

cat_cols = ['toolkit', 'data', 'aggregator', 'ls']
int_cols = ['n', 'lw', 'n_out']
for c in cat_cols:
    df_out[c] = df_out[c].astype('category')
for c in int_cols:
    df_out[c] = df_out[c].astype('int')
df_out['aa'] = df_out['aa'].astype('bool')


for c in set(df_out.columns).difference(cat_cols + int_cols + ['aa']):
    df_out[c] = df_out[c].astype('float')
df_out.to_parquet("../loc_data/plotly_metrics_v4.parquet")


  0%|          | 0/440064 [00:00<?, ?it/s]

## `bokeh` figures

In [17]:
ref_dict = {
    str(p): (255 - 255 * plt.imread(p)).astype(np.float32)
    for p in tqdm(list((figure_root_dir / "bokeh").glob("reference_*.png")))
}

# files = seq((figure_root_dir / "bokeh").glob("*200000*ls=linear*lw=1.png")).filter(lambda x: 'reference' not in x.name).to_list()
files = seq((figure_root_dir / "bokeh").glob("*.png")).filter(lambda x: 'reference' not in x.name).to_list()

  0%|          | 0/192 [00:00<?, ?it/s]

In [21]:
def wrap_compute_dssim_bokeh(agg_path):
    return compute_dssim_plotly(agg_path, mse=True, ref_dict=ref_dict)

out = []
with Pool(processes=8) as pool:
    results = pool.imap_unordered(wrap_compute_dssim_bokeh, files)
    results = tqdm(results, total=len(files))
    try:
        out = [f for f in results]
        del results
    except:
        traceback.print_exc()
        pool.terminate()
    finally:
        pool.close()
        pool.join()


df_out = pd.concat(out, axis=1).T
# df_out.to_csv("../loc_data/bokeh_metrics_v4.csv", index=False)

cat_cols = ['toolkit', 'data', 'aggregator', 'ls']
int_cols = ['n', 'lw', 'n_out']
for c in cat_cols:
    df_out[c] = df_out[c].astype('category')
for c in int_cols:
    df_out[c] = df_out[c].astype('int')
df_out['aa'] = df_out['aa'].astype('bool')


for c in set(df_out.columns).difference(cat_cols + int_cols + ['aa']):
    df_out[c] = df_out[c].astype('float')
df_out.to_parquet("../loc_data/bokeh_metrics_v4.parquet")


  0%|          | 0/293376 [00:00<?, ?it/s]