# Overview of signal processing results for ecg derived respiration

In [8]:
import pandas as pd

## BIDMC
Results from RRest toolbox saved at: bidmc_results

Component data: per subject the CC's are store in separated file *_CC.mat; also each file has CC values per algorithm and per window

### Calculating CC's

In [115]:
import scipy.io
import numpy as np
import os
import glob

def load_mat_files(directory):
    """Load all *_cc.mat files from the given directory."""
    files = glob.glob(os.path.join(directory, '*_cc.mat'))
    data = {}
    for file in files:
        subject_id = os.path.basename(file).split('_')[0]
        mat_data = scipy.io.loadmat(file)
        data[subject_id] = mat_data
    return data

def extract_cc_values(data, signal_prefix='ekg'):
    """Extract CC values for a specific signal type from the loaded mat files."""
    cc_values = {}
    for subject, mat_data in data.items():
        for key in mat_data:
            if key.startswith(signal_prefix) and isinstance(mat_data[key], np.ndarray):
                if 'CCp' in mat_data[key].dtype.names:
                    method = key.split('_', 1)[1]  # Extract method name
                    cc_values.setdefault(subject, {}).setdefault(method, []).extend(mat_data[key]['CCp'][0])
    return cc_values

def calculate_median_cc(cc_values):
    """Calculate the median CC for each subject and each method."""
    median_values = {}
    for subject, methods in cc_values.items():
        median_values[subject] = {}
        for method, value in methods.items():
            median_values[subject][method] = np.nanmedian(value)
    return median_values

def calculate_mean_cc(cc_values):
    """Calculate the median CC for each subject and each method."""
    mean_values = {}
    for subject, methods in cc_values.items():
        mean_values[subject] = {}
        for method, value in methods.items():
            mean_values[subject][method] = np.nanmean(value)
    return mean_values

def calculate_overall_median(median_cc):
    """Calculate the overall median CC across all subjects for each method."""
    method_values = {}
    for methods in median_cc.values():
        for method, value in methods.items():
            method_values.setdefault(method, []).append(value)
    for method, values in method_values.items():
        overall_median = {method: np.nanmedian(values) for method, values in method_values.items()}
    return overall_median

def calculate_overall_mean(mean_cc):
    """Calculate the overall median CC across all subjects for each method."""
    method_values = {}
    for methods in mean_cc.values():
        for method, value in methods.items():
            method_values.setdefault(method, []).append(value)
    for method, values in method_values.items():
        overall_mean = {method: np.nanmedian(values) for method, values in method_values.items()}
    return overall_mean

In [93]:
directory = './bidmc_results/Analysis_files/Component_Data/'

In [94]:
data = load_mat_files(directory)
cc_values = extract_cc_values(data)

In [116]:
median_values = calculate_median_cc(cc_values)

In [117]:
mean_values = calculate_mean_cc(cc_values)

In [118]:
calculate_overall_median(median_values)

{'ELF_RSlinB_FMeam_FPt_RDtGC_EHF': 0.6689206558933194,
 'ELF_RSlinB_FMebw_FPt_RDtGC_EHF': 0.6244211874382044,
 'ELF_RSlinB_FMefm_FPt_RDtGC_EHF': 0.32408458191380607,
 'flt_BFi': 0.5806797173223288,
 'flt_Wam': 0.3696333693824217,
 'flt_Wfm': 0.29898291638369046}

In [119]:
calculate_overall_mean(mean_values)

{'ELF_RSlinB_FMeam_FPt_RDtGC_EHF': 0.6372758821727452,
 'ELF_RSlinB_FMebw_FPt_RDtGC_EHF': 0.6002211256607378,
 'ELF_RSlinB_FMefm_FPt_RDtGC_EHF': 0.3268251567073984,
 'flt_BFi': 0.5586074061739241,
 'flt_Wam': 0.38144137099571845,
 'flt_Wfm': 0.31143610106791636}

## TBC
Explanation of the table:
https://github.com/peterhcharlton/RRest/blob/f5022e7029c5b6d6b8159b665dccc2c8f267976e/docs/toolbox/results_files.md

The results tables contain a header line (providing the results variables), and then a row providing the results for each algorithm. The variables reported are as follows:

| Variable | Definition |
|-|-|
alg_no | An arbitrary number allocated to the algorithm |
m_xa | The abbreviation of the filter-based technique used to extract a respiratory signal (if any). |
m_xb | The abbreviation of the feature-based technique used to extract a respiratory signal (if any). |
m_ef | The abbreviation of the frequency-domain technique used to estimate respiratory rate (if any). |
m_et | The abbreviation of the time-domain technique used to estimate respiratory rate (if any). |
m_fm | The abbreviation of the modulation-fusion technique used to estimate respiratory rate (if any). |
m_ft | The abbreviation of the temporal-fusion technique used to estimate respiratory rate (if any). |
alg_name | The algorithm name (using abbreviations) |
signal | The input signal |
two_sd | The precision of the algorithm (_i.e._ 2 times the standard deviation of the errors), accounting for repeated measures. |
bias | The bias of the algorithm (_i.e._ the mean error), accounting for repeated measures. |
cp1 | The proportion of RR estimates which have an error of <1bpm. |
cp2 | The proportion of RR estimates which have an error of <2bpm. |
icp5 | The proportion of RR estimates which have an error of >5bpm. |
cp1_entire | The proportion of windows (which contain high quality input and reference respiratory signals, and a reference RR) for which the algorithm estimated RR and the error was <1bpm. |
cp2_entire | The proportion of windows (which contain high quality input and reference respiratory signals, and a reference RR) for which the algorithm estimated RR and the error was <2bpm. |
icp5_entire | The proportion of windows (which contain high quality input and reference respiratory signals, and a reference RR) for which the algorithm estimated RR and the error was >5bpm. |
cost_func | TBC |
tdi95 | The 95th percentile of absolute errors. |
mape | The mean absolute percentage error |
mae | The mean absolute error |
sdae | The standard deviation of absolute errors |
rmse | The root-mean-square error |
prop_wins_hq_ref_and_input_signal_and_ref_rr | The proportion of windows (in the entire dataset) which had high quality input and reference respiratory signals, and a reference RR. |
prop_wins_est | The proportion of windows (which contain high quality input and reference respiratory signals, and a reference RR) for which the algorithm provided an RR estimate and for which there was a reference RR available. |
total_wins_inc_in_analysis | The total number of windows included in the analysis (_i.e._ which contain high quality input and reference respiratory signals, and estimated and reference RRs). |
total_wins_in_dataset | The total number of windows in the dataset. |
prop_wins_inc_in_analysis | The proportion of windows in the dataset which were included in the analysis. |

Notes:

- Unless otherwise stated, only the following windows are included in the analysis: windows with a high quality reference respiratory signal, a reference RR, a high quality input signal (_i.e._ ECG or PPG), and an estimated RR.

Methods were ranked by sorting first by 2SD and then by absolute bias.

In [11]:
bidmc_results = pd.read_excel('RRest_results/BIDMC_results.xlsx')
bidmc_results.head()

Unnamed: 0,alg_no,m_xa,m_xb,m_ef,m_et,m_fm,m_ft,alg_name,signal,two_sd,bias,cp2,percerr,mae,sdae,rmse,prop_wins_ref_and_good_sqi,prop_wins_est,total_wins_all
0,a1,,99.0,6.0,,1.0,,"Ef6,Fm1",ekg,4.686992,-0.521867,0.8,8.007281,1.437345,2.002698,2.461569,0.632432,0.491453,230
1,a2,,99.0,6.0,,1.0,1.0,"Ef6,Fm1,Ft1",ekg,5.610748,-1.147512,0.725055,10.505333,1.885757,2.332837,2.997689,0.632432,0.963675,451
2,a3,,99.0,6.0,,2.0,,"Ef6,Fm2",ekg,,,,,,,,0.632432,0.0,0
3,a4,,99.0,6.0,,2.0,1.0,"Ef6,Fm2,Ft1",ekg,,,,,,,,0.632432,0.0,0
4,a5,,99.0,3.0,,1.0,,"Ef3,Fm1",ekg,4.114786,-0.267459,0.784753,7.293917,1.309293,1.719509,2.15817,0.632432,0.476496,223


Interested in ecg signal only

In [25]:
bidmc_results_ecg = bidmc_results[bidmc_results['signal'] == 'ekg']
bidmc_results_ecg.tail(20)

Unnamed: 0,alg_no,m_xa,m_xb,m_ef,m_et,m_fm,m_ft,alg_name,signal,two_sd,bias,cp2,percerr,mae,sdae,rmse,prop_wins_ref_and_good_sqi,prop_wins_est,total_wins_all
130,a131,3.0,,6.0,,,,"Xa3,Ef6",ekg,15.57388,-5.687106,0.284188,40.932384,7.34756,6.219257,9.622012,0.632432,1.0,468
131,a132,3.0,,6.0,,,1.0,"Xa3,Ef6,Ft1",ekg,9.427093,-5.984381,0.183761,35.847107,6.434727,4.03217,7.591401,0.632432,1.0,468
132,a133,3.0,,3.0,,,,"Xa3,Ef3",ekg,17.58426,-4.160913,0.25,40.541985,7.277482,6.423703,9.702449,0.632432,1.0,468
133,a134,3.0,,3.0,,,1.0,"Xa3,Ef3,Ft1",ekg,10.631348,-4.261907,0.239316,30.688519,5.508735,3.959085,6.781374,0.632432,1.0,468
134,a135,3.0,,2.0,,,,"Xa3,Ef2",ekg,19.426806,-2.676037,0.252137,39.650204,7.117403,7.094989,10.044337,0.632432,1.0,468
135,a136,3.0,,2.0,,,1.0,"Xa3,Ef2,Ft1",ekg,12.539688,-2.5845,0.260684,28.458124,5.10837,4.38476,6.729077,0.632432,1.0,468
136,a137,3.0,,,5.0,,,"Xa3,Et5",ekg,18.504652,0.201734,0.274892,34.215942,6.141924,6.907982,9.237974,0.632432,0.987179,462
137,a138,3.0,,,5.0,,1.0,"Xa3,Et5,Ft1",ekg,10.738053,-0.269838,0.303419,22.899555,4.110579,3.395139,5.329091,0.632432,1.0,468
138,a139,3.0,,,4.0,,,"Xa3,Et4",ekg,22.253877,5.994771,0.33244,48.183163,8.649106,9.661846,12.957935,0.632432,0.797009,373
139,a140,3.0,,,4.0,,1.0,"Xa3,Et4,Ft1",ekg,19.660175,6.877669,0.34188,45.556053,8.177529,8.669578,11.911044,0.632432,1.0,468


Also interested in respiratory signal metrics only, not interested in estimating respiratory rate

In [19]:
bidmc_results_ecg['m_xa'].notna()

0      False
1      False
2      False
3      False
4      False
       ...  
145     True
146     True
147     True
148     True
149     True
Name: m_xa, Length: 150, dtype: bool

In [16]:
bidmc_results_ecg_resp_signal = bidmc_results_ecg_resp_signal.sort_values(by=['two_sd', 'bias'], key=lambda x: x.abs() if x.name == 'bias' else x)
bidmc_results_ecg_resp_signal

Unnamed: 0,alg_no,m_xa,m_xb,m_ef,m_et,m_fm,m_ft,alg_name,signal,two_sd,bias,cp2,percerr,mae,sdae,rmse,prop_wins_ref_and_good_sqi,prop_wins_est,total_wins_all


Best method for BIDMC: 
- Ef3,Fm1 