# Sweep parameters while running estimate_rr_dataset()

Assuming datasets are placed in the `data` directory like so:
```
data
| bench
| CapnoBase
| Kapiolani
| etc...
```

In [1]:
from ppg2rr.rr_est import estimate_rr_dataset
from ppg2rr.config import AlgorithmParams
import numpy as np
import pandas as pd

from dataclasses import dataclass, field, fields, replace, asdict
from itertools import product
from copy import deepcopy
from pathlib import Path
import csv

from datetime import datetime

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# pd.set_option('display.max_rows', None)
# pd.set_option('display.max_columns', None)

# only display 2 decimals
pd.set_option('display.precision', 2)

import warnings

# replacing 'default' with 'error' will cause all warnings to be treated as exceptions
warnings.filterwarnings('default')

# Run dataset evaluation

In [None]:
# Paramters

dataset = '3ps'

trials = None           # None: all trials
# trials = list(range(0, 2))

# Fixed parameters
window_size=30
window_increment=5

base_params = AlgorithmParams(
    dataset=dataset,
    probe=1,                        # For Kapiolani
    probe_type="Tr",                # For 3ps, "Tr" or "Re"
    led_num=1,                      # For 3ps, 1 or 2
    window_size=window_size,
    window_increment=window_increment,
)

# Parameters to sweep
sweep = {
    # "min_keep_pct": [0.3, 0.5],
    "peak_to_peak_std_cutoff": [18, 13, 15, 21],
    "outlier_tolerance": [5, 3, 4, 6],
    # "psd_rqi_thresh": [0.04, 0.08, 0.2],
}

single_pass_suffix=f'sweep-pp-ot_batch-1-2'

current_time = datetime.now().strftime("%Y%m%d-%H%M%S")
csv_path = Path(f"../data/results/{dataset}_{single_pass_suffix}_{current_time}.csv")

# Column names for accuracy calcs
rr_reference = 'RR ref (mean)'
rr_estimated = 'mean of fused candidates'
rr_error = "RR error"

In [5]:
# Functions

def apply_patch(obj, patch):
    """Apply top-level or dotted key patch to a dataclass instance."""
    obj = deepcopy(obj)
    for key, value in patch.items():
        if '.' in key:
            outer, inner = key.split('.', 1)
            nested = deepcopy(getattr(obj, outer))
            nested[inner] = value
            setattr(obj, outer, nested)
        else:
            setattr(obj, key, value)
    return obj

def sweep_params(base: AlgorithmParams, sweep: dict, print_num_combos = False):
    """
    Generate (run_id, Params) tuples for all combinations in sweep.
    Supports nested keys like 'min_deviation_tolerance.RIFV_max'.
    """
    if not sweep:
        return [(1, deepcopy(base))]

    keys, values = zip(*sweep.items())
    combos = [dict(zip(keys, combo)) for combo in product(*values)]

    if print_num_combos:
        print(f"Total combos to run: {len(combos)}")
        print()

    return [
        (i + 1, apply_patch(base, patch))
        for i, patch in enumerate(combos)
    ]

def flatten_params(run_id, params):
    row = {"run_id": run_id}

    for field in fields(params):
        value = getattr(params, field.name)
        if isinstance(value, dict):
            for subkey, subval in value.items():
                row[f"{field.name}.{subkey}"] = subval
        else:
            row[field.name] = value

    return row

def calc_results (df, col_name, len_df_full):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=RuntimeWarning)
        return {
            'yield': ( len(df) / len_df_full ),
            'bias': np.nanmean(df[col_name]),
            'stdev': np.nanstd(df[col_name], axis=0),
            'aliased': ( len(df[df['aliased']]) / len(df) ),
        }

In [6]:
# Run sweeps

print("Sweep parameters:")
print(sweep)
print()
sweep_params(base_params, sweep, print_num_combos=True)

# Collect all fieldnames in order
fieldnames = ["run_id"]

# Preserves declared order in Params
for field in fields(base_params):
    value = getattr(base_params, field.name)
    if isinstance(value, dict):
        for subkey in value:
            fieldnames.append(f"{field.name}.{subkey}")
    else:
        fieldnames.append(field.name)
fieldnames += [
    "r_panel.yield", "r_panel.bias", "r_panel.stdev", "r_panel.aliased",
    "r_algoa.yield", "r_algoa.bias", "r_algoa.stdev", "r_algoa.aliased",
    "r_algob.yield", "r_algob.bias", "r_algob.stdev", "r_algob.aliased",
]

# Open CSV in write mode once and write header
with csv_path.open("w", newline="") as f:
    results = {
        'yield':None,
        'bias':None,
        'stdev':None,
        'aliased':None,
    }
    
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()

    # Iterate through sweep and log one row at a time
    for run_id, params in sweep_params(base_params, sweep):

        _, _, _, df, _ = estimate_rr_dataset(
            dataset=dataset,
            trials=trials, 
            params=params,
            save_df=True,
            show=False, 
            fig_large=True,
            save_fig=False,
            save_psd_fig=False,
            save_frame_psd_figs=False,
            file_suffix=f"{single_pass_suffix}_run-{run_id:04d}",
            show_rr_candidates=True,
            stop_on_error=False,
        )

        # Results
        len_df_full = len(df)

        df.dropna(subset=[rr_estimated], inplace=True)      # no RR result

        df[rr_error] = df[rr_estimated] - df[rr_reference]

        # Boolean filters
        panel_exclusions = (
            (df['RR uncertainty panel (mean)'] > 0)            # exclude all uncertain frames
            | (df['RR ref disagreement panel (bpm)'] > 3)
        )
        df = df[~panel_exclusions].copy()
        results_panel_exclusions = calc_results(df, rr_error, len_df_full)
        
        algorithm_exclusions_a = (
            ( df['HR disagreement of means'] > 3 )
            | ( df['HR est reliable'] == False )
            | (df['quality - fusion candidate quality-std'] > 3)
        )
        df = df[~algorithm_exclusions_a].copy()
        results_algo_exclusions_a = calc_results(df, rr_error, len_df_full)

        algorithm_exclusions_b = (
            (df['quality - fusion candidate quality-std'] > 1)
        )
        df = df[~algorithm_exclusions_b].copy()
        results_algo_exclusions_b = calc_results(df, rr_error, len_df_full)

        row = flatten_params(run_id, params)
        row["r_panel.yield"] =    results_panel_exclusions["yield"]
        row["r_panel.bias"] =     results_panel_exclusions["bias"]
        row["r_panel.stdev"] =    results_panel_exclusions["stdev"]
        row["r_panel.aliased"] =  results_panel_exclusions["aliased"]
        row["r_algoa.yield"] =    results_algo_exclusions_a["yield"]
        row["r_algoa.bias"] =     results_algo_exclusions_a["bias"]
        row["r_algoa.stdev"] =    results_algo_exclusions_a["stdev"]
        row["r_algoa.aliased"] =  results_algo_exclusions_a["aliased"]
        row["r_algob.yield"] =    results_algo_exclusions_b["yield"]
        row["r_algob.bias"] =     results_algo_exclusions_b["bias"]
        row["r_algob.stdev"] =    results_algo_exclusions_b["stdev"]
        row["r_algob.aliased"] =  results_algo_exclusions_b["aliased"]
        writer.writerow(row)
        f.flush()  # write immediately in case of a crash

        print(f"Run {run_id:4d}: {params} -> results_panel_exclusions={results_panel_exclusions}, results_algo_exclusions_a={results_algo_exclusions_a}, results_algo_exclusions_b={results_algo_exclusions_b}")

print()
print("Complete.")

Sweep parameters:
{'peak_to_peak_std_cutoff': [18, 13, 15, 21], 'outlier_tolerance': [5, 3, 4, 6]}

Total combos to run: 16

Run    1: AlgorithmParams(dataset='3ps', probe=1, probe_type='Tr', led_num=1, cpo=False, window_size=30, window_increment=5, win_allow_partial_at_end=False, win_omit_first_and_last_frames=False, duration_limit=184, reference_rr_use_post=True, reference_rr_target_is_nested=True, reference_rr_use_duration_markers=True, reference_rr_expand_other_duration_markers=False, uncertain_edge_offset=0.01, rate_at_frame_edge='beyond_frame', ppg_quality_corr_threshold=0.99, psd_rqi_thresh=0.04, RR_min_bpm=15, RR_max_bpm=120, hr_max_bpm=240, lowpass_cutoff_ppg='dynamic', lowpass_dynamic_scalar_ppg=2.1, fs_riv=12, remove_riv_outliers='segment-wise', n_kalman_fusion=3, peak_counting_prominence=0.5, peak_to_peak_std_cutoff=18, outlier_tolerance=5, min_keep_pct=0.5, min_deviation_tolerance={'RIFV_max': 25, 'RIFV_min': 25, 'RIFV_mas': 25, 'AUDP': 0.01, 'RIIV_upper': 0.05, 'RIIV_lowe