In [33]:
import numpy as np
import wfdb
from scipy.io import loadmat
import os
import scipy as sp

In [63]:
def load_recording(record_name):
    signal_file_true = record_name + '.mat'
    # Allow either the record name or the header filename.
    root, ext = os.path.splitext(record_name)
    if ext=='':
        header_file = record_name + '.hea'
    else:
        header_file = record_name

    # Load the header file.
    if not os.path.isfile(header_file):
        raise FileNotFoundError('{} recording not found.'.format(record_name))

    with open(header_file, 'r') as f:
        header = [l.strip() for l in f.readlines() if l.strip()]

    # Parse the header file.
    record_name = None
    num_signals = None
    sampling_frequency = None
    num_samples = None
    signal_files = list()
    gains = list()
    offsets = list()
    channels = list()
    initial_values = list()
    checksums = list()
    
    for i, l in enumerate(header):
        arrs = [arr.strip() for arr in l.split(' ')]
        # Parse the record line.
        if i==0:
            record_name = arrs[0]
            num_signals = int(arrs[1])
            sampling_frequency = float(arrs[2])
            num_samples = int(arrs[3])
        # Parse the signal specification lines.
        elif i<13:
            signal_file = arrs[0]
            gain = float(arrs[2].split('/')[0])
            offset = int(arrs[4])
            initial_value = int(arrs[5])
            checksum = int(arrs[6])
            channel = arrs[8]
            signal_files.append(signal_file)
            gains.append(gain)
            offsets.append(offset)
            initial_values.append(initial_value)
            checksums.append(checksum)
            channels.append(channel)
        elif i==15:
            labels = arrs[1]
            

    # Check that the header file only references one signal file. WFDB format  allows for multiple signal files, but we have not
    # implemented that here for simplicity.
    num_signal_files = len(set(signal_files))
    if num_signal_files!=1:
        raise NotImplementedError('The header file {}'.format(header_file) \
            + ' references {} signal files; one signal file expected.'.format(num_signal_files))

    # Load the signal file.
    data = np.asarray(sp.io.loadmat(signal_file_true)['val'])

    # Check that the dimensions of the signal data in the signal file is consistent with the dimensions for the signal data given
    # in the header file.
    num_channels = len(channels)
    if np.shape(data)!=(num_channels, num_samples):
        raise ValueError('The header file {}'.format(header_file) \
            + ' is inconsistent with the dimensions of the signal file.')

    # Rescale the signal data using the ADC gains and ADC offsets.
    rescaled_data = np.zeros(np.shape(data), dtype=np.float32)
    for i in range(num_channels):
        rescaled_data[i, :] = (data[i, :]-offsets[i])/gains[i]

    return rescaled_data, sampling_frequency, channels, labels

In [64]:
rescaled_data, sampling_frequency, channels, labels = load_recording('12lead/WFDBRecords/01/010/JS00001')

In [65]:
print(type(rescaled_data), rescaled_data.shape)
print(rescaled_data)

<class 'numpy.ndarray'> (12, 5000)
[[-0.254 -0.254 -0.254 ... -0.034  0.024  0.005]
 [ 0.264  0.264  0.264 ... -0.068 -0.049 -0.034]
 [ 0.517  0.517  0.517 ... -0.034 -0.073 -0.039]
 ...
 [ 0.81   0.81   0.81  ... -0.205 -0.2   -0.171]
 [ 0.81   0.81   0.81  ... -0.2   -0.195 -0.166]
 [ 0.527  0.527  0.527 ...  0.102  0.093  0.112]]


In [66]:
print(type(sampling_frequency))
print(sampling_frequency)

<class 'float'>
500.0


In [67]:
print(type(channels))
print(channels)

<class 'list'>
['I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6']


In [68]:
print(type(labels))
print(labels)

<class 'str'>
164889003,59118001,164934002
