In [1]:
# Implicit reloading of modules
# (jupyter is acting weird and doesn't reimport stuff after you make changes)
%load_ext autoreload
%autoreload 2

In [2]:
# Public modules
import os
import wfdb
import importlib
import numpy as np

# Custom modules
import preprocessing_utils

In [3]:
# Explicit reloading of modules
importlib.reload(preprocessing_utils)

<module 'preprocessing_utils' from '/media/minx/Data/KDE_Home/Documents/Projects/arrhythmia-detection/preprocessing_utils.py'>

In [4]:
# List with the databases we're including in the preprocessing
databases = ['vfdb/', 'mitdb/', 'nsrdb/', 'svdb/', 'ltdb/']
# Path to original data directory
databases_dir = "data_original/"
preprocessed_dir = "data_processed/"

In [None]:
for database in databases:
    database_path = os.path.join(databases_dir, database)

    # LF a `RECORDS` file - contains the names of all the records in that database
    records_file = os.path.join(database_path, "RECORDS")

    with open(records_file, "r") as file:
        record_names = file.read().splitlines()

    for record_name in record_names:
        record_path = os.path.join(database_path, record_name)

        # Read the current record and its annotation
        record = wfdb.rdrecord(record_path)
        annotations = wfdb.rdann(record_path, "atr")

        # No idea what these formats are but we'll save the original one
        # as it is obligatory for creating new records (see wfdb.wrsamp())
        fmt = record.fmt
        sig_name = record.sig_name

        print("Processing file: ", record_path, ", fmt: ", fmt, ", n_signal: ", record.n_sig)

        # We want to resample all the records to a 200hz freq
        fs_target = 200

        # Resampling
        resampled_signal, resampled_annotations = preprocessing_utils.resample_record_and_annotations(record, annotations, fs_target)
        # Normalizing the signal between 0 and 1 and then centering it around 0
        signal, annotations = preprocessing_utils.normalize_signal_and_center(resampled_signal), resampled_annotations

        # Noise reduction, TODO: decide if we're using the fir filter or convolution
        #signal = preprocessing_utils.cutoff_freqs_fir_filter(signal)
        signal = preprocessing_utils.remove_noise_convolution(signal)

        # Removing the baseline wander using wavelet transform
        signal = preprocessing_utils.remove_baseline_wander_wavelets(signal)

        # Same folder structure for processed data as it is for original
        saving_dir = os.path.join(preprocessed_dir, database)

        wfdb.wrsamp(record_name=record_name + "_processed", fs=fs_target, units=record.units,
                    sig_name=['ECG CH1', 'ECG CH2'], p_signal=signal, fmt=fmt,
                    write_dir=saving_dir)

        wfdb.wrann(record_name + "_processed", extension='atr', sample=annotations.sample,
                   symbol=annotations.symbol, fs=fs_target,
                   write_dir=saving_dir)
