In [1]:
import wfdb
import numpy as np
import neurokit2 as nk
import os
import os.path as osp
from wfdb import rdrecord
import cv2
import matplotlib.pyplot as plt

In [2]:
MIT_DIR = 'mit-bih'
sig_names = list(map(lambda x: x.split('.')[0],filter(lambda x: x.endswith('.dat'), os.listdir(MIT_DIR))))
sig_names.sort(key=lambda x: int(x))
sig_names


['100',
 '101',
 '102',
 '103',
 '104',
 '105',
 '106',
 '107',
 '108',
 '109',
 '111',
 '112',
 '113',
 '114',
 '115',
 '116',
 '117',
 '118',
 '119',
 '121',
 '122',
 '123',
 '124',
 '200',
 '201',
 '202',
 '203',
 '205',
 '207',
 '208',
 '209',
 '210',
 '212',
 '213',
 '214',
 '215',
 '217',
 '219',
 '220',
 '221',
 '222',
 '223',
 '228',
 '230',
 '231',
 '232',
 '233',
 '234']

In [3]:
sig_names_with_mlii = []
for sig_name in sig_names:
    record = rdrecord(osp.join(MIT_DIR, sig_name))
    if 'MLII' in record.sig_name:
        sig_names_with_mlii.append((sig_name, record.sig_name.index('MLII')))


In [11]:
test_file_names = ['100', '101', '103', '106', '107', '118', '109', '209']

In [13]:
arr = np.array(sig_names_with_mlii)
arr[arr[:, 0].argsort()]

array([['100', '0'],
       ['101', '0'],
       ['103', '0'],
       ['105', '0'],
       ['106', '0'],
       ['107', '0'],
       ['108', '0'],
       ['109', '0'],
       ['111', '0'],
       ['112', '0'],
       ['113', '0'],
       ['114', '1'],
       ['115', '0'],
       ['116', '0'],
       ['117', '0'],
       ['118', '0'],
       ['119', '0'],
       ['121', '0'],
       ['122', '0'],
       ['123', '0'],
       ['124', '0'],
       ['200', '0'],
       ['201', '0'],
       ['202', '0'],
       ['203', '0'],
       ['205', '0'],
       ['207', '0'],
       ['208', '0'],
       ['209', '0'],
       ['210', '0'],
       ['212', '0'],
       ['213', '0'],
       ['214', '0'],
       ['215', '0'],
       ['217', '0'],
       ['219', '0'],
       ['220', '0'],
       ['221', '0'],
       ['222', '0'],
       ['223', '0'],
       ['228', '0'],
       ['230', '0'],
       ['231', '0'],
       ['232', '0'],
       ['233', '0'],
       ['234', '0']], dtype='<U21')

In [4]:
#
# class_rec = {'N': ['100', '101', '103', '105'], 'V': ['106', '116', '119', '200'], 'PAB': ['102', '104', '107', '217'], 'R': ['118', '124', '212', '231'], 'L': ['109', '111', '207', '213'], 'A': ['209', '220', '222', '223', '232'], '!': ['207'], 'E': ['207']}


In [5]:
# class_rec_mlii = {}
# for k, v in class_rec.items():
#     class_rec_mlii[k] = []
#     for sig_name, mlii_idx in sig_names_with_mlii:
#         if sig_name in v:
#             class_rec_mlii[k].append((sig_name, mlii_idx))


In [6]:
# class_rec_mlii

In [7]:
classes = ["N", "V", "PAB", "R", "L", "A", "!", "E"]

OUTPUT_DIR = 'custom_cut_data'
if not osp.exists(OUTPUT_DIR):
    os.mkdir(OUTPUT_DIR)
for class_name in classes:
    if not osp.exists(osp.join(OUTPUT_DIR, class_name)):
        os.mkdir(osp.join(OUTPUT_DIR, class_name))


In [8]:
def plot(signal, figsize, image_size, filename):
    plt.figure(figsize=figsize, frameon=False)
    plt.axis("off")
    plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
    # plt.margins(0, 0) # use for generation images with no margin
    plt.plot(signal)
    plt.savefig(filename)

    plt.close()

    im_gray = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
    im_gray = cv2.resize(im_gray, image_size, interpolation=cv2.INTER_LANCZOS4)
    cv2.imwrite(filename, im_gray)


In [9]:
image_size = 128
fig = plt.figure(frameon=False)
dpi = fig.dpi
figsize = (image_size / dpi, image_size / dpi)


<Figure size 640x480 with 0 Axes>

In [10]:
def create_data(sig_names_with_mlii):
    for sig_name, sig_idx in sig_names_with_mlii:
        record = rdrecord(osp.join(MIT_DIR, sig_name))
        signal = record.p_signal[:, sig_idx]
        _, rpeaks = nk.ecg_process(signal, sampling_rate=360)
        rpeaks_detected = rpeaks['ECG_R_Peaks']
        ann = wfdb.rdann(osp.join(MIT_DIR, sig_name), 'atr')
        rpeaks_true = ann.sample
        peak_pairs = []
        labels = []

        for idx in range(len(rpeaks_true[:-1])):
            curr_range = rpeaks_true[idx:idx+3]
            detected_peaks_in_range = rpeaks_detected[(curr_range[0]<rpeaks_detected) & (rpeaks_detected<curr_range[-1])]
            if len(detected_peaks_in_range) == 2:
                peak_pairs.append((curr_range[1], detected_peaks_in_range[0]))
                labels.append(ann.symbol[idx+1])
        peak_pairs_np = np.stack(peak_pairs)
        err_range = peak_pairs_np[:, 0] - peak_pairs_np[:, 1]
        peak_pairs_np = peak_pairs_np[err_range<10]
        labels = np.array(labels)[err_range<10]


        ann_idx = ann.sample
        ann_symbol = ann.symbol
        for i in range(len(ann_idx)):
            if ann_symbol[i] in classes:
                start = ann_idx[i] - 360
                end = ann_idx[i] + 360
                if start < 0:
                    start = 0
                if end > len(signal):
                    end = len(signal)
                signal = signal[start:end]
                filename = osp.join(OUTPUT_DIR, ann_symbol[i], f'{sig_name}_{i}.png')
                plot(signal, figsize, (image_size, image_size), filename)
