# Run estimate_rr_dataset() to get RR estimations

Assuming datasets are placed in the `data` directory like so:
```
data
| bench
| CapnoBase
| Kapiolani
| etc...
```

In [1]:
from ppg2rr.rr_est import estimate_rr_dataset
from ppg2rr.config import AlgorithmParams
import pandas as pd

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# pd.set_option('display.max_rows', None)
# pd.set_option('display.max_columns', None)

# only display 2 decimals
pd.set_option('display.precision', 2)

# Run dataset evaluation

In [None]:
import warnings

# replacing 'default' with 'error' will cause all warnings to be treated as exceptions
warnings.filterwarnings('default')

# dataset = 'kapiolani'
# dataset = 'capnobase'
# dataset = 'mimic'
# dataset = 'vortal'
dataset = '3ps'

trials = None           # None: all trials
# trials = list(range(0, 2))

if dataset == 'capnobase':
    # trials = [3, 7, 10, 1, 11, 4, 0, 15, 24]   # just the people <= 5 years and spontaneous breathing, sorted by age
    trials = [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 19, 20, 21, 22, 23, 24, 33, 37, 38, 39, 41]    # spontaneous breathing, all ages

window_size=30
window_increment=5

params = AlgorithmParams(
    dataset=dataset,
    probe=1,                        # For Kapiolani
    probe_type="Tr",                # For 3ps, "Tr" or "Re"
    led_num=1,                      # For 3ps, 1 or 2
    window_size=window_size,
    window_increment=window_increment,
)
file_suffix=f'all-sessions'

df, rr_candidates, quality_indices, _, per_trial_df = estimate_rr_dataset(
    dataset=dataset,
    trials=trials, 
    params=params,
    save_df=True,
    show=False, 
    fig_large=False,
    save_fig=False,
    save_psd_fig=False,
    save_frame_psd_figs=False,
    file_suffix=file_suffix,
    show_rr_candidates=True,
    stop_on_error=False,
)
print()
print("Complete.")

processing trial: 122

In [None]:
per_trial_df

In [None]:
print("Sessions not marked by each panelist:")

none_found = True
for initial in ['D', 'J', 'O']:
    col = f'RR uncertainty panelist {initial} (mean)'
    if col in per_trial_df:
        df_unmarked = per_trial_df[per_trial_df[col, 'frames'].isna()]['id', '-']
        if len(df_unmarked):
            none_found = False
            suffix = 's' if len(df_unmarked) > 1 else ''
            print()
            print(f'{initial}: {len(df_unmarked)} session{suffix}')
            print(df_unmarked.to_string(index=False))
    else:
        print()
        print(f'{initial}: No sessions marked')
if none_found:
    print("None.")

In [None]:
print("Exclusionary markers, other than 'un*', used by each panelist:")

none_found = True
for initial in ['D', 'J', 'O']:
    col = f'RR ref video problems panelist {initial}'
    if col in per_trial_df:
        df_exclusionary_markers = per_trial_df[per_trial_df[col, '-'] != ''][[['id', '-'], [col, '-']]]
        if len(df_exclusionary_markers):
            none_found = False
            suffix = 's' if len(df_exclusionary_markers) > 1 else ''
            print()
            print(f'{initial}: {len(df_exclusionary_markers)} session{suffix}')
            display(df_exclusionary_markers.style.hide(axis='columns').hide(axis='index').set_properties(**{'text-align':'left'}))
if none_found:
    print("None.")

In [None]:
df

In [None]:
print("total number of data points:", df.shape[0])

In [None]:
# inspect parameters used
from dataclasses import asdict
for field, value in asdict(params).items():
    print(f'{field}: {value}')