## Explore

In [12]:
import numpy as np
import os
import sys
import pandas as pd

import wfdb
from utils import qrs_detect, comp_cosEn, save_dict

In [32]:
def load_data(sample_path):
    
    sig, fields = wfdb.rdsamp(sample_path)
    ann_ref = wfdb.rdann(sample_path, 'atr')
    
    #print(wfdb.rdsamp(sample_path))
    #print("\n\n", wfdb.rdann(sample_path, 'atr').aux_note)
    
    
    label = fields['comments']
    fs = fields['fs']
    sig = sig[:, 1]
    length = len(sig)
    
    #print("Signal: ", sig)
    #print("\nLabel: ", label)
    
    beat_loc = np.array(ann_ref.sample) # r-peak locations
    ann_note = np.array(ann_ref.aux_note) # rhythm change flag
    
    return sig, length, fs, label, ann_note, beat_loc


In [15]:
def ngrams_rr(data, length):
    grams = []
    for i in range(0, length-12, 12):
        grams.append(data[i: i+12])
    return grams

In [35]:
def challenge_entry(sample_path):

    #x = load_data(sample_path)
    #print(x)
    
    #"""

    sig, sig_len, fs, label, label_arr, beat_loc  = load_data(sample_path)

    end_points = []

    r_peaks = qrs_detect(sig, fs=200)

    rr_seq = np.diff(r_peaks) / fs
    len_rr = len(rr_seq)

    rr_seq_slice = ngrams_rr(rr_seq, len_rr)
    
    print("\nSignal: ", sig)
    
    #print("FS: ", fs)
    print("\nr peaks: ", r_peaks)
    print("\nRR seq: ", rr_seq)
    #print("\nRR seq slice: ", rr_seq_slice)
    print("\nTrue ts labels: ", label_arr)
    print("\nBeat loc: ", beat_loc)
    
    print("\nSignal length: ", sig_len)
    print("\nr peaks length: ", len(r_peaks))
    print("\nrr seq length: ", len(rr_seq))
    print("\nRR_seq_slice len: ", len(rr_seq_slice))
    print("\nTrue ts labels len: ", len(label_arr))
    print("\nBeat loc len: ", len(beat_loc))
    print("\nTrue Label: ", label)
    
    
    #print("RR_req_slice len: ", len(rr_seq_slice))
    
    is_af = []
    for rr_period in rr_seq_slice:
        cos_en, _ = comp_cosEn(rr_period)
        if cos_en <= -1.4:
            is_af.append(0)
        else:
            is_af.append(1)
    is_af = np.array([[j] * 12 for j in is_af]).flatten()
    rr_seq_last = rr_seq[-12: ]
    cos_en, _ = comp_cosEn(rr_seq_last)
    if cos_en <= -1.4:
        is_af_last = 0
    else:
        is_af_last = 1
    
    len_rr_remain = len_rr - int(12*len(rr_seq_slice))
    is_af = np.concatenate((is_af, np.array([is_af_last] * len_rr_remain).flatten()), axis=0)

    if np.sum(is_af) == len(is_af):
        end_points.append([0, len(sig)-1])
    elif np.sum(is_af) != 0:
        state_diff = np.diff(is_af)
        start_r = np.where(state_diff==1)[0] + 1
        end_r = np.where(state_diff==-1)[0] + 1

        if is_af[0] == 1:
            start_r = np.insert(start_r, 0, 0)
        if is_af[-1] == 1:
            end_r = np.insert(end_r, len(end_r), len(is_af)-1)
        start_r = np.expand_dims(start_r, -1)
        end_r = np.expand_dims(end_r, -1)
        start_end = np.concatenate((r_peaks[start_r], r_peaks[end_r]), axis=-1).tolist()
        end_points.extend(start_end)
        
    pred_dict = {'predict_endpoints': end_points}
    
    #print("\nPred_dict: ", pred_dict)
    
    return pred_dict

    #print("Label: ", is_af)
    #print("Label len: ", is_af.shape)
    
    #""" 
    
    return {}

In [36]:
if __name__ == '__main__':
    DATA_PATH = "/Users/Hasan/Desktop/Workspace/cpsc2021-AFIB/afib_data"
    RESULT_PATH = "/Users/Hasan/Desktop/Workspace/cpsc2021-AFIB/afib_data/output"
    if not os.path.exists(RESULT_PATH):
        os.makedirs(RESULT_PATH)
        
    test_set = open(os.path.join(DATA_PATH, 'RECORDS'), 'r').read().splitlines()[0:10]
    
  
    for i, sample in enumerate(test_set):
        print("\n\n\n", sample)
        sample_path = os.path.join(DATA_PATH, sample)
        pred_dict = challenge_entry(sample_path)




 Training_set_I/data_30_2

Signal:  [ 0.06019023  0.07659802  0.06860666 ... -0.07251733 -0.06393087
 -0.06444095]

r peaks:  [1.67000e+02 2.69000e+02 4.47000e+02 ... 5.19743e+05 5.19901e+05
 5.20058e+05]

RR seq:  [0.51  0.89  0.685 ... 1.01  0.79  0.785]

True ts labels:  ['' '' '' ... '' '' '']

Beat loc:  [    30    168    269 ... 519745 519902 520059]

Signal length:  520088

r peaks length:  3472

rr seq length:  3471

RR_seq_slice len:  289

True ts labels len:  3513

Beat loc len:  3513

True Label:  ['non atrial fibrillation']



 Training_set_I/data_35_8

Signal:  [-0.11974417 -0.15112954 -0.11074836 ...  0.04171388  0.01559273
 -0.03971482]

r peaks:  [2.41000e+02 5.70000e+02 7.90000e+02 ... 6.10361e+05 6.10689e+05
 6.11019e+05]

RR seq:  [1.645 1.1   1.65  ... 1.165 1.64  1.65 ]

True ts labels:  ['' '' '' ... '' '' '']

Beat loc:  [    30    244    574 ... 610365 610693 611022]

Signal length:  611052

r peaks length:  2023

rr seq length:  2022

RR_seq_slice len:  168



Signal:  [5.00001672 4.97004683 4.95200293 ... 4.92505326 4.932023   4.94201297]

r peaks:  [1.66000e+02 3.05000e+02 4.45000e+02 ... 2.90170e+05 2.90307e+05
 2.90442e+05]

RR seq:  [0.695 0.7   0.705 ... 0.695 0.685 0.675]

True ts labels:  ['' '' '' ... '' '' '']

Beat loc:  [    30    169    308 ... 290173 290311 290446]

Signal length:  290476

r peaks length:  2021

rr seq length:  2020

RR_seq_slice len:  168

True ts labels len:  2040

Beat loc len:  2040

True Label:  ['non atrial fibrillation']



 Training_set_I/data_41_10

Signal:  [-0.00315635 -0.00981976 -0.00736482 ... -0.04857274 -0.04734527
 -0.03945439]

r peaks:  [1.83000e+02 3.37000e+02 4.89000e+02 ... 3.10315e+05 3.10466e+05
 3.10616e+05]

RR seq:  [0.77  0.76  0.77  ... 0.755 0.755 0.75 ]

True ts labels:  ['' '' '' ... '' '' '']

Beat loc:  [    30    186    339 ... 310318 310470 310619]

Signal length:  310648

r peaks length:  2251

rr seq length:  2250

RR_seq_slice len:  187

True ts labels len:  2276

Beat lo


Signal:  [ 0.01298596  0.04284154 -0.00388365 ... -0.2990412  -0.29697801
 -0.26336015]

r peaks:  [1.92000e+02 3.61000e+02 5.30000e+02 ... 5.18001e+05 5.18169e+05
 5.18338e+05]

RR seq:  [0.845 0.845 0.835 ... 0.845 0.84  0.845]

True ts labels:  ['' '' '' ... '' '' '']

Beat loc:  [    30    196    365 ... 518002 518170 518343]

Signal length:  518372

r peaks length:  3334

rr seq length:  3333

RR_seq_slice len:  277

True ts labels len:  3366

Beat loc len:  3366

True Label:  ['non atrial fibrillation']
