In [None]:
import json
from pathlib import Path

import pandas as pd
import streaminghub_pydfds as dfds
from tqdm import tqdm

pd.set_option('display.max_rows', int(1e9))
pd.set_option('display.max_columns', int(1e9))

%cd evaluation/cstdy_optimization

In [None]:
cols = ["t", "x", "y"]
base_path = Path.cwd() / "generated"
print(base_path)

In [None]:
def compute_stat(fp: Path):

    dir_name = fp.parent.name
    file_name = fp.name

    subject, noise, task = dir_name.split("_")
    simulation, vt = file_name.split("_")

    with open(fp) as f:
        recs = [json.loads(line) for line in f.readlines()]
    rcn_df = pd.DataFrame(recs)
    rcn_df["t"] = rcn_df["t"].round(2)
    rcn_df = rcn_df[cols]
    t0, tn = rcn_df["t"].iloc[0], rcn_df["t"].iloc[-1]

    # source dataset
    src_dir = dfds.load_config().data_dir
    src_fp = src_dir / "ADHD_SIN" / f"{subject}_{noise}_{task}.parquet"
    reader = dfds.create_reader(src_fp)
    src_meta, src_df = reader.read(rec_path=".")
    src_df["x"] = (src_df["lx"] + src_df["rx"]) / 2
    src_df["y"] = (src_df["ly"] + src_df["ry"]) / 2
    src_df["t"] = ((src_df["t"] - src_df["t"].iloc[0]) * 1e-3).round(2)
    src_df = src_df[src_df["t"].between(t0, tn)][cols]

    rcn_df = rcn_df.set_index("t")
    src_df = src_df.set_index("t")

    if len(rcn_df.index) != len(src_df.index):
        print(f"bad result: {subject}_{noise}_{task}_{simulation}_{vt}")
        return None

    m = rcn_df.join(src_df, lsuffix="_rcn", rsuffix="_src").dropna()
    m["err_l2"] = ((m["x_rcn"] - m["x_src"]) ** 2 + (m["y_rcn"] - m["y_src"]) ** 2) ** 0.5
    err = m["err_l2"].mean()

    return dict(subject=subject, noise=noise, task=task, simulation=simulation, vt=vt, err=err)

In [None]:
stats = []

for fp in tqdm(sorted(base_path.glob("*/*.log"))):
    stat = compute_stat(fp)
    if stat is not None:
        stats.append(stat)

stats_df = pd.DataFrame(stats)
stats_df.to_csv("generated/summary.csv", index=False)