# Analysis of pRRx/pRRx% properties in AF detection

## 1. Data preparation

In [1]:
import prepare_data

In [2]:
# 1. Save QRS locations of continuous AF and SR segments to CSV files
for db in ['ltafdb', 'afdb']:
    rec_dir = f'../data/raw/{db}/1.0.0'
    prepare_data.prepare_qrs(rec_dir, db)
# 2. Calculate pRRx and pRRx%
x_sec = 60  # Length of RR segments [s]
for db, fs in [['afdb', 128]]:  # ['ltafdb', 128], 
    prepare_data.prepare_prrx(
        db, fs, x_sec,
        qrs_dir='../data/interim',
        prrx_dir='../data/processed')

db = 'ltafdb'
Reading annotations from record 84/84
Associating rhythms with QRS's in record (84 / 84)


## 2. Distribution analysis
For each pRRx/pRRx%, plot:
* median
* range from 25th to 75th percentile
* range from 10th to 90th percentile

In [None]:
import os
import distribution
import helper

In [None]:
prrx_dir = '../data/processed'
fig_dir = '../reports/images/distr'
x_sec = 60
db = 'ltafdb'
df = helper.read_prrx(prrx_dir, db, x_sec)
perc_dict = distribution.calc_percentiles(df)
if not os.path.exists(fig_dir):
    os.makedirs(os.path.join(fig_dir))
distribution.plot_distr(perc_dict, db, x_sec, fig_dir)

## 3. ROC analysis

For each pRRx/pRRx%, using reveiver operating characteristic (ROC) curve, calculate:
* area under curve (AUC)
* optimal cutoff using Youden's criterion

In [None]:
import os
from roc_analysis import auc_prrx_to_excel, cutoff_prrx_to_excel, plot_auc, plot_cutoff

In [None]:
prrx_dir = '../data/processed'
roc_dir = '../reports/excel/roc'
fig_dir = '../reports/images/roc'
x_sec = 60
db = 'ltafdb'
if not os.path.exists(roc_dir):
    os.makedirs(os.path.join(roc_dir))
if not os.path.exists(fig_dir):
    os.makedirs(os.path.join(fig_dir))
# 1. Calculate and save AUC
auc_prrx_to_excel(prrx_dir, db, x_sec, roc_dir)
# 2. Plot AUC
plot_auc(roc_dir, db, x_sec, fig_dir)

In [None]:
# 2. Calculate and save optimal cutoffs
cutoff_prrx_to_excel(
    prrx_dir, db, method='youden', x_sec=60, cutoff_dir=roc_dir)
# 4. Plot cutoffs
plot_cutoff(roc_dir, db, x_sec, 'youden', fig_dir)

## 4. Classify RR series as AF or SR

* Using optimal cutoffs, classify the dataset (LTAFDB)
    * nonparametric bootstrap - random subsets of the database are chosen N times
    * Calculate classification metrics
    * Calculate 95% CI for each metric
    * Plot the results
    * Compare distributions of classification metrics for
        * pRR31 and pRR3.25%
        * pRR50 and pRR31
* Classify the test set (AFDB) using cutoffs from the training set (LTAFDB)
    * compare results with the training set
    * other analyses as in the training set

In [None]:
import bootstrap as boot

In [None]:
N = 5000
prrx_dir = '../data/processed'
cutoff_dir = '../reports/excel/roc'
x_sec = 60
cutoff_method = 'youden'
boot_dir = '../reports/excel/boot'
fig_dir = '../reports/images/boot'
db = 'ltafdb'
# 1. Bootstrap - classification (train set)
boot.bootstrap(prrx_dir, cutoff_dir, db, x_sec, cutoff_method, N,
            boot_dir)
for group in ['pRRx', 'pRRx%']:
    boot.calculate_95ci(boot_dir, db, x_sec, N, group, cutoff_method)
    boot.plot_boot(db, group, N, x_sec, boot_dir, fig_dir)
    boot.describe_median_results(db, group, N, x_sec, boot_dir)
    boot.test_diff_significance(db, group, N, 'all', x_sec, boot_dir, cutoff_method)

In [None]:
# 2. Compare distributions of metrics from bootstrap
# pRR3.25% vs pRR31
boot.plot_compare_scores_distr(
    db, group_param_label=(('pRRx', 'pRR31.25', 'pRR31'),
                           ('pRRx%', 'pRR3.25%', 'pRR3.25%')),
    cutoff_method=cutoff_method, N=N, x_sec=x_sec,
    boot_dir=boot_dir, fig_dir=fig_dir)
# pRR31 vs pRR50
boot.plot_compare_scores_distr(
    db, group_param_label=(('pRRx', 'pRR54.6875', 'pRR50'),
                           ('pRRx', 'pRR31.25', 'pRR31')),
    cutoff_method=cutoff_method, N=N, x_sec=x_sec,
    boot_dir=boot_dir, fig_dir=fig_dir)

In [None]:
# 3. Bootstrap - classification (test set)
boot.boot_test_set(
    db_train='ltafdb', db_test='afdb', cutoff_method=cutoff_method, N=N,
    x_sec=x_sec, cutoff_dir=cutoff_dir, prrx_dir=prrx_dir,
    boot_dir=boot_dir)
# 4. Plot train and test set results
for group in ['pRRx', 'pRRx%']:
    boot.calculate_95ci(boot_dir, 'train_ltafdb_test_afdb', x_sec, N, group,
                    cutoff_method)
    print(group)
    boot.plot_boot_train_vs_test(
        'ltafdb', 'afdb', group, N, x_sec, boot_dir, fig_dir)

In [None]:
# 5. Compare distributions of metrics from bootstrap (test set)
# pRR3.25% vs pRR31
boot.plot_compare_scores_distr(
    db='train_ltafdb_test_afdb',
    group_param_label=(('pRRx', 'pRR31.25', 'pRR31'),
                        ('pRRx%', 'pRR3.25%', 'pRR3.25%')),
    cutoff_method=cutoff_method, N=N, x_sec=x_sec,
    boot_dir=boot_dir, fig_dir=fig_dir,
    suptitle='Test set, AFDB')
# pRR31 vs pRR50
boot.plot_compare_scores_distr(
    db='train_ltafdb_test_afdb',
    group_param_label=(('pRRx', 'pRR54.6875', 'pRR50'),
                        ('pRRx', 'pRR31.25', 'pRR31')),
    cutoff_method=cutoff_method, N=N, x_sec=x_sec,
    boot_dir=boot_dir, fig_dir=fig_dir,
    suptitle='Test set, AFDB')