In [None]:
import pandas as pd
import pathlib as pl
import seaborn as sns
import h5py
import cam_bpw_sim as bps
import bpwave
import bpwave.visu
import matplotlib.pyplot as plt
import datetime
import numpy as np

sns.set_style("whitegrid")

In [None]:
datetime.datetime.now().isoformat(), bps.__version__

In [None]:
%matplotlib inline

In [None]:
app_config = bps.app.read_config()

In [None]:
par_result_folders = ""
par_tag = "*"  # Unused

In [None]:
if not par_result_folders:
    raise ValueError("par_result_folders should not be empty")
folders = par_result_folders.split(",")

In [None]:
def parse_fname(s):
    p = pl.Path(s["path"])
    seq = int(p.suffixes[-3].lstrip("."))
    stem = p.name.removesuffix("".join(p.suffixes))
    protocol, cam, voltage, tag = stem.split("__")
    voltage = float(voltage.rstrip("V"))
    return stem, protocol, cam, voltage, tag, seq

In [None]:
result_files = pd.DataFrame(
    {
        "path": [
            p
            for f in folders
            for p in (app_config.measurement_folder / f).glob("*.r.hdf5")
        ]
    }
)
result_files[["stem", "protocol", "cam", "voltage", "tag", "seq"]] = result_files.apply(
    parse_fname, axis=1, result_type="expand"
)

In [None]:
result_files

In [None]:
tag = "240202"

In [None]:
fcr_stats_cols = [
    "all_rep_fcrs",
    "all_acc_fcrs",
]

In [None]:
def load_stats(row, datasets: list[str]) -> pd.DataFrame:
    with h5py.File(row["path"]) as f:
        s = bpwave.Signal.from_hdf(f["measured_long_bw_corr"])
        ampl = np.array(
            [(fcr_y := s[slc].y).max() - fcr_y.min() for slc in s.slices["fcr"]]
        ).mean()
        dataframes = []
        for dataset in datasets:
            df = pd.DataFrame(ds := f[dataset], columns=ds.attrs["columns"])
            df["cam"] = row["cam"]
            df["seq"] = row["seq"]
            df["voltage"] = row["voltage"]
            df["ampl"] = ampl
            dataframes.append(df)
        # meas_params = bps.meas.MeasWithMeta.from_hdf(f["meas_params"])
        # cam_params = bps.cam.CamParams(f["cam_params"])
        # cam_inst_params = bps.cam.CamInstance.from_hdf("cam_inst")

    return dataframes

In [None]:
def load_meas_stats(row) -> dict:
    record = {}
    with h5py.File(row["path"]) as f:
        s = bpwave.Signal.from_hdf(f["measured_long_bw_corr"])
        ampl = np.array(
            [(fcr_y := s[slc].y).max() - fcr_y.min() for slc in s.slices["fcr"]]
        ).mean()
        record["cam"] = row["cam"]
        record["seq"] = row["seq"]
        record["voltage"] = row["voltage"]
        record["ampl"] = ampl
        record |= {
            f"meas_{name}": value
            for name, value in bps.meas.MeasParams.from_hdf(
                f["meas_params"]
            ).__dict__.items()
        }
        record |= {
            f"cam_{name}": value
            for name, value in bps.cam.CamParams.from_hdf(
                f["cam_params"]
            ).__dict__.items()
        }
        record |= {
            f"cami_{name}": value
            for name, value in bps.cam.CamInstance.from_hdf(
                f["cam_inst"]
            ).__dict__.items()
        }

    return record

In [None]:
meas_meta = pd.DataFrame.from_records(
    [
        load_meas_stats(row)
        for _, row in result_files[result_files["tag"] == tag].iterrows()
    ]
)
meas_meta

## Repeatability (precision)

In [None]:
fcr_rep_stats = pd.concat(
    [
        load_stats(row, fcr_stats_cols)[0]
        for _, row in result_files[result_files["tag"] == tag].iterrows()
    ]
)
fcr_rep_stats

In [None]:
fcr_rep_stats["rmse_rep_fcrs_long_bw_corr_rel"] = (
    fcr_rep_stats["rmse_rep_fcrs_long_bw_corr"] / fcr_rep_stats["ampl"]
)
fcr_rep_stats

In [None]:
fcr_rep_stats.groupby(["voltage"]).count()

In [None]:
fcr_rep_stats.boxplot("rmse_rep_fcrs_long_bw_corr", by="voltage", vert=False)

In [None]:
fcr_rep_stats.boxplot("rmse_rep_fcrs_long_bw_corr", by=["voltage", "seq"], vert=False)

In [None]:
fcr_rep_stats.boxplot("pearson_rep_fcrs_long_bw_corr", by="voltage", vert=False)

## Accuracy

In [None]:
fcr_acc_stats = pd.concat(
    [
        load_stats(row, fcr_stats_cols)[1]
        for _, row in result_files[result_files["tag"] == tag].iterrows()
    ]
)
fcr_acc_stats

In [None]:
fcr_acc_stats["rmse_acc_fcrs_long_bw_corr_rel"] = (
    fcr_acc_stats["rmse_acc_fcrs_long_bw_corr"] / fcr_acc_stats["ampl"]
)
fcr_acc_stats["rmse_acc_fcrs_bw_corr_rel"] = (
    fcr_acc_stats["rmse_acc_fcrs_bw_corr"] / fcr_acc_stats["ampl"]
)
fcr_acc_stats

In [None]:
fcr_acc_stats.groupby(["voltage", "cam"]).mean()

### Longterm BW corrected

In [None]:
fcr_acc_stats.boxplot("rmse_acc_fcrs_long_bw_corr", by="voltage", vert=False)

In [None]:
fcr_acc_stats.boxplot("rmse_acc_fcrs_long_bw_corr", by=["voltage", "seq"], vert=False)

In [None]:
fcr_acc_stats.boxplot("pearson_acc_fcrs_long_bw_corr", by="voltage", vert=False)

### Full BW corrected

In [None]:
fcr_acc_stats.boxplot("rmse_acc_fcrs_bw_corr", by="voltage", vert=False)

In [None]:
fcr_acc_stats.boxplot("rmse_acc_fcrs_bw_corr", by=["voltage", "seq"], vert=False)

In [None]:
fcr_acc_stats.boxplot("pearson_acc_fcrs_bw_corr", by="voltage", vert=False)

In [None]:
fcr_acc_stats.boxplot("pearson_acc_fcrs_bw_corr", by=["voltage", "seq"], vert=False)

## Tables

### Precision

In [None]:
def to_latex(df: pd.DataFrame, index_cols: np.ndarray, cols: np.ndarray):
    print(
        "\\begin{tabular}{",
        "".join(index_cols[:, 1].tolist() + cols[:, 1].tolist()),
        "}",
        sep="",
    )
    print("\\toprule")
    print(
        " & ".join(f"{{{c}}}" for c in index_cols[:, 0].tolist() + cols[:, 0].tolist()),
        end="\\\\\n",
    )
    print("\\midrule")
    for index, cols in df.iterrows():
        print(
            " & ".join(str(c).replace("_", "\\_") for c in np.atleast_1d(index)),
            " & ".join(str(c).replace("_", "\\_") for c in np.atleast_1d(cols)),
            sep=" & ",
            end="\\\\\n",
        )
    print("\\bottomrule")
    print("\\end{tabular}")

In [None]:
prec = 4
prec_col = "S[round-precision=4]"
int_col = "S[round-precision=0, table-format=4]"

In [None]:
fcr_rep_stats.columns

In [None]:
rep_table = (
    fcr_rep_stats[
        [
            "voltage",
            "rmse_rep_fcrs_long_bw_corr",
            "rmse_rep_fcrs_long_bw_corr_rel",
        ]
    ]
    .groupby("voltage")
    .agg(
        count=("rmse_rep_fcrs_long_bw_corr", "count"),
        rmse_orig_mean=("rmse_rep_fcrs_long_bw_corr", "mean"),
        rmse_orig_std=("rmse_rep_fcrs_long_bw_corr", "std"),
        rmse_orig_median=("rmse_rep_fcrs_long_bw_corr", "median"),
        rmse_rel_mean=("rmse_rep_fcrs_long_bw_corr_rel", "mean"),
        rmse_rel_std=("rmse_rep_fcrs_long_bw_corr_rel", "std"),
        rmse_rel_median=("rmse_rep_fcrs_long_bw_corr_rel", "median"),
    )
)

In [None]:
rep_table

In [None]:
to_latex(
    rep_table,
    index_cols=np.array(
        [
            ["$U$", "r"],
        ]
    ),
    cols=np.array(
        [
            ["\\#", int_col],
            ["$\\overline{{E}}$", prec_col],
            ["$\\sigma(E)$", prec_col],
            ["$\\text{{med}}(E)$", prec_col],
            ["$\\overline{{E_{{rel}} }}$", prec_col],
            ["$\\sigma(E_{{rel}})$", prec_col],
            ["$\\text{{med}}(E_{{rel}})$", prec_col],
        ]
    ),
)

In [None]:
fcr_rep_stats_gr = (
    fcr_rep_stats[["voltage", "pearson_rep_fcrs_long_bw_corr"]]
    .groupby("voltage")
    .agg(["mean", "std"])
)
fcr_rep_stats_gr

### Accuracy

In [None]:
fcr_acc_stats.columns

In [None]:
acc_table = (
    fcr_acc_stats[
        [
            "voltage",
            # "rmse_acc_fcrs_long_bw_corr",
            # "rmse_acc_fcrs_long_bw_corr_rel",
            "rmse_acc_fcrs_bw_corr",
            "rmse_acc_fcrs_bw_corr_rel",
        ]
    ]
    .groupby("voltage")
    .agg(
        count=("rmse_acc_fcrs_bw_corr", "count"),
        rmse_orig_mean=("rmse_acc_fcrs_bw_corr", "mean"),
        rmse_orig_std=("rmse_acc_fcrs_bw_corr", "std"),
        rmse_orig_median=("rmse_acc_fcrs_bw_corr", "median"),
        rmse_rel_mean=("rmse_acc_fcrs_bw_corr_rel", "mean"),
        rmse_rel_std=("rmse_acc_fcrs_bw_corr_rel", "std"),
        rmse_rel_median=("rmse_acc_fcrs_bw_corr_rel", "median"),
    )
)

In [None]:
acc_table

In [None]:
to_latex(
    acc_table,
    index_cols=np.array(
        [
            ["$U$", "r"],
        ]
    ),
    cols=np.array(
        [
            ["\\#", int_col],
            ["$\\overline{E}$", prec_col],
            ["$\\sigma(E)$", prec_col],
            ["$\\text{med}(E)$", prec_col],
            ["$\\overline{E_{rel}}$", prec_col],
            ["$\\sigma(E_{rel})$", prec_col],
            ["$\\text{med}(E_{rel})$", prec_col],
        ]
    ),
)

In [None]:
fcr_acc_stats_gr = (
    fcr_acc_stats[["voltage", "pearson_acc_fcrs_bw_corr"]]
    .groupby("voltage")
    .agg(["mean", "std"])
)
fcr_acc_stats_gr