In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import shutil

import wfdb
from wfdb import processing

%matplotlib inline

  return f(*args, **kwds)


In [2]:
from QRSeeker import QRSeeker

In [3]:
database = "nsrdb"
database_dir = '../nsrDb/'
record_ids = wfdb.get_record_list(database)

record_path = database_dir + record_ids[0]
signal_len = wfdb.rdrecord(record_path).__dict__['sig_len']

print(record_ids)
print(signal_len)

['16265', '16272', '16273', '16420', '16483', '16539', '16773', '16786', '16795', '17052', '17453', '18177', '18184', '19088', '19090', '19093', '19140', '19830']
11730944


# Run one sample

In [4]:
record_id = '19090'
record_path = database_dir + record_id

In [5]:
qrs_seeker = QRSeeker(record_path, channels=[0],
                      findpeaks_limit=0.1, findpeaks_spacing_factor=0.2, 
                      verbose=False)

Number of peaks detected >= 85% of number of reference peaks \m/


In [6]:
qrs_seeker.qrs_validator.print_summary()

81953 reference annotations, 81546 test annotations

True Positives (matched samples): 81546
False Positives (unmatched test samples: 0
False Negatives (unmatched reference samples): 407

Specificity: 0.9950 (81546/81953)
Positive Predictivity: 1.0000 (81546/81546)
False Positive Rate: 0.0000 (0/81546)


True Negatives = Total # samples - Positives - False Negatives

In [7]:
qrs_seeker = QRSeeker(record_path, channels=[0],
                      findpeaks_limit=0.01, findpeaks_spacing_factor=0.2, 
                      verbose=False) 

print(qrs_seeker.qrs_validator.print_summary())

Number of peaks detected >= 85% of number of reference peaks \m/
81953 reference annotations, 81609 test annotations

True Positives (matched samples): 81609
False Positives (unmatched test samples: 0
False Negatives (unmatched reference samples): 344

Specificity: 0.9958 (81609/81953)
Positive Predictivity: 1.0000 (81609/81609)
False Positive Rate: 0.0000 (0/81609)
None


# Run all samples

In [8]:
summary = pd.DataFrame()

for record_id in record_ids:
    record_path = database_dir + record_id
    
    print("Analyzing record {:s} ...".format(record_id))
    
    # Run QRSeeker
    qrs_seeker = QRSeeker(record_path, channels=[0],
                          findpeaks_limit=0.01, findpeaks_spacing_factor=0.2, 
                          verbose=False) 
    
    # Store the results
    results = {
        'record_id': record_id,
        
        'ref_annotation': qrs_seeker.ref_annotation,
        'test_annotation': np.array(qrs_seeker.detected_inds),
        
        'matched_ref_inds': qrs_seeker.matched_ref_inds,
        'matched_test_inds': qrs_seeker.matched_test_inds,
        'unmatched_ref_inds': qrs_seeker.unmatched_ref_inds,
        'unmatched_test_inds': qrs_seeker.unmatched_test_inds,

        'true_positive': qrs_seeker.true_positive,
        'false_positive': qrs_seeker.false_positive,
        'false_negative': qrs_seeker.false_negative,

        'specificity': qrs_seeker.specificity,
        'positive_predictivity': qrs_seeker.positive_predictivity,
        'FPR': qrs_seeker.fpr
    }
    
    summary = pd.concat([summary, pd.DataFrame([results], columns = results.keys())])
    print("\n")

Analyzing record 16265 ...
Number of peaks detected >= 85% of number of reference peaks \m/


Analyzing record 16272 ...
Insufficient number of peaks detected - less than 85% of reference peaks !


Analyzing record 16273 ...
Number of peaks detected >= 85% of number of reference peaks \m/


Analyzing record 16420 ...
Number of peaks detected >= 85% of number of reference peaks \m/


Analyzing record 16483 ...
Number of peaks detected >= 85% of number of reference peaks \m/


Analyzing record 16539 ...
Number of peaks detected >= 85% of number of reference peaks \m/


Analyzing record 16773 ...
Insufficient number of peaks detected - less than 85% of reference peaks !


Analyzing record 16786 ...
Number of peaks detected >= 85% of number of reference peaks \m/


Analyzing record 16795 ...
Number of peaks detected >= 85% of number of reference peaks \m/


Analyzing record 17052 ...
Number of peaks detected >= 85% of number of reference peaks \m/


Analyzing record 17453 ...
Number of pea

In [10]:
summary.head()

Unnamed: 0,record_id,ref_annotation,test_annotation,matched_ref_inds,matched_test_inds,unmatched_ref_inds,unmatched_test_inds,true_positive,false_positive,false_negative,specificity,positive_predictivity,FPR
0,16265,"[1, 52, 129, 207, 284, 364, 442, 522, 598, 675...","[58, 135, 213, 291, 370, 448, 527, 604, 682, 7...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 479, 488, 509, 519, 840, 842, 844, 846, 84...","[3563, 4751, 9010, 17586, 20111, 20352, 23401,...",100231.0,15.0,724.0,0.992828,0.99985,0.00015
0,16272,"[1, 73, 199, 321, 445, 567, 690, 815, 941, 106...","[79, 205, 328, 451, 573, 696, 821, 947, 1071, ...",,,,,,,,,,
0,16273,"[9, 87, 164, 241, 319, 397, 476, 557, 641, 723...","[15, 93, 171, 248, 325, 403, 482, 564, 647, 73...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[54, 242, 247, 249, 252, 411, 416, 423, 508, 5...",[5239],89784.0,1.0,313.0,0.996526,0.999989,1.1e-05
0,16420,"[68, 148, 228, 309, 391, 474, 557, 643, 727, 8...","[75, 156, 235, 317, 399, 481, 565, 651, 735, 8...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[307, 308, 318, 320, 334, 387, 396, 2786, 2794...",[],101846.0,0.0,590.0,0.99424,1.0,0.0
0,16483,"[18, 99, 178, 257, 337, 417, 497, 577, 657, 73...","[19, 102, 182, 261, 340, 420, 500, 580, 660, 7...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[60, 91, 103, 154, 421, 429, 1276, 1433, 1440,...",[51446],104322.0,1.0,239.0,0.997714,0.99999,1e-05


In [11]:
summary[['specificity', 'positive_predictivity', 'FPR']].describe()

Unnamed: 0,specificity,positive_predictivity,FPR
count,15.0,15.0,15.0
mean,0.987375,0.999913,8.7e-05
std,0.02485,0.000239,0.000239
min,0.899223,0.999062,0.0
25%,0.992004,0.999957,0.0
50%,0.994909,0.999991,9e-06
75%,0.996227,1.0,4.3e-05
max,0.998948,1.0,0.000938


In [14]:
sobad_record_ids = summary.loc[summary['specificity'].isna() == True, 'record_id']

detected_lengths = [len(x) for x in list(summary.loc[summary['specificity'].isna() == True, 'test_annotation'])]
ref_lengths = [len(x) for x in list(summary.loc[summary['specificity'].isna() == True, 'ref_annotation'])]

print(list(sobad_record_ids))
print(np.array(detected_lengths)*100 / np.array(ref_lengths))

['16272', '16773', '19088']
[21.99575896 72.62017591 84.20342721]


# Re-analyse 3 records 

In [15]:
summary_newrun = pd.DataFrame()

for record_id in sobad_record_ids:
    record_path = database_dir + record_id
    
    print("Analyzing record {:s} ...".format(record_id))
    
    # Run QRSeeker
    qrs_seeker = QRSeeker(record_path, channels=[0],
                          findpeaks_limit=None, findpeaks_spacing_factor=0.2, 
                          verbose=False) 
    
    # Store the results
    results = {
        'record_id': record_id,
        
        'ref_annotation': qrs_seeker.ref_annotation,
        'test_annotation': np.array(qrs_seeker.detected_inds),
        
        'matched_ref_inds': qrs_seeker.matched_ref_inds,
        'matched_test_inds': qrs_seeker.matched_test_inds,
        'unmatched_ref_inds': qrs_seeker.unmatched_ref_inds,
        'unmatched_test_inds': qrs_seeker.unmatched_test_inds,

        'true_positive': qrs_seeker.true_positive,
        'false_positive': qrs_seeker.false_positive,
        'false_negative': qrs_seeker.false_negative,

        'specificity': qrs_seeker.specificity,
        'positive_predictivity': qrs_seeker.positive_predictivity,
        'FPR': qrs_seeker.fpr
    }
    
    summary_newrun = pd.concat([summary_newrun, pd.DataFrame([results], columns = results.keys())])
    print("\n")

Analyzing record 16272 ...
Insufficient number of peaks detected - less than 85% of reference peaks !


Analyzing record 16773 ...
Insufficient number of peaks detected - less than 85% of reference peaks !


Analyzing record 19088 ...
Insufficient number of peaks detected - less than 85% of reference peaks !




In [16]:
detected_lengths = [len(x) for x in list(summary_newrun.loc[summary_newrun['specificity'].isna() == True, 'test_annotation'])]
ref_lengths = [len(x) for x in list(summary_newrun.loc[summary_newrun['specificity'].isna() == True, 'ref_annotation'])]

print(np.array(detected_lengths)*100 / np.array(ref_lengths))

[22.02355218 72.62017591 84.20342721]


# Save final summary table, 15 good records

In [17]:
summary_final = summary[summary['specificity'].isna() == False].copy()
summary_final.reset_index(drop=True, inplace=True)

In [19]:
import pickle
pickle.dump(summary_final, open("nsrDb_summary.pickle", "wb"))