In [24]:
import os
import wfdb
import numpy as np
import pandas as pd
from scipy import interpolate
from scipy import signal
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

In [30]:
# Get all record names and create id_dict with labels
data_path = '/home/teaching/Documents/Medformer/dataset/MIT-BIH/files/mitdb/1.0.0'
records = sorted([f.split('.')[0] for f in os.listdir(data_path) if f.endswith('.hea')])

def final_beat(beats):
    count = {}
    for b in beats:
        count[b] = count.get(b, 0) + 1
    if not count:
        return 'others'
    max_b = max(count, key=count.get)
    if max_b == 'N':
        return 'NORM'
    elif max_b == 'V':
        return 'PVC'
    elif max_b == 'S':
        return 'PAC'
    elif max_b == 'F':
        return 'Fusion'
    elif max_b == 'Q':
        return 'Paced'
    else:
        return 'others'

id_dict = {}
order = 1
for rec in records:
    try:
        ann = wfdb.rdann(os.path.join(data_path, rec), 'atr')
        beats = ann.symbol
        scp = final_beat(beats)
        id_dict['{:05d}'.format(order)] = [[int(rec)], [scp]]
        order += 1
    except:
        print(f"Error loading {rec}")

print(f"Total records: {len(id_dict)}")
print(id_dict)

Total records: 48
{'00001': [[100], ['NORM']], '00002': [[101], ['NORM']], '00003': [[102], ['others']], '00004': [[103], ['NORM']], '00005': [[104], ['others']], '00006': [[105], ['NORM']], '00007': [[106], ['NORM']], '00008': [[107], ['others']], '00009': [[108], ['NORM']], '00010': [[109], ['others']], '00011': [[111], ['others']], '00012': [[112], ['NORM']], '00013': [[113], ['NORM']], '00014': [[114], ['NORM']], '00015': [[115], ['NORM']], '00016': [[116], ['NORM']], '00017': [[117], ['NORM']], '00018': [[118], ['others']], '00019': [[119], ['NORM']], '00020': [[121], ['NORM']], '00021': [[122], ['NORM']], '00022': [[123], ['NORM']], '00023': [[124], ['others']], '00024': [[200], ['NORM']], '00025': [[201], ['NORM']], '00026': [[202], ['NORM']], '00027': [[203], ['NORM']], '00028': [[205], ['NORM']], '00029': [[207], ['others']], '00030': [[208], ['NORM']], '00031': [[209], ['NORM']], '00032': [[210], ['NORM']], '00033': [[212], ['others']], '00034': [[213], ['NORM']], '00035': [[

In [31]:
# Resampling to 250Hz
def resampling(array, freq, kind='linear'):
    t = np.linspace(1, len(array), len(array))
    f = interpolate.interp1d(t, array, kind=kind)
    t_new = np.linspace(1, len(array), int(len(array)/freq * 250))
    new_array = f(t_new)
    return new_array

# Standard normalization
def normalize(data):
    scaler = StandardScaler()
    data_norm = scaler.fit_transform(data)
    return data_norm

In [34]:
# Main processing
feature_path = './Feature'
if not os.path.exists(feature_path):
    os.mkdir(feature_path)

for pid in id_dict.keys():
    sub = []
    rec_id = id_dict[pid][0][0]
    tri_path = os.path.join(data_path, str(rec_id))
    try:
        ecg_data, field = wfdb.rdsamp(tri_path)
        trial = []
        for ch in range(ecg_data.shape[1]):
            data = resampling(ecg_data[:,ch], freq=360, kind='linear')
            trial.append(data)
        trial = np.array(trial).T
        trial_norm = normalize(trial)
        sub.append(trial_norm)
        sub = np.array(sub)
        num_samples = sub.shape[1] // 250 * 250  # truncate to multiple of 250
        sub = sub[:, :num_samples, :]
        sub = sub.reshape(-1, 250, sub.shape[-1])  # split into 1s samples
        np.save(feature_path + '/feature_{}.npy'.format(pid), sub)
        print(f"Saved feature_{pid}.npy with shape {sub.shape}")
    except Exception as e:
        print(f"Error processing {pid}: {e}")

Saved feature_00001.npy with shape (1805, 250, 2)
Saved feature_00002.npy with shape (1805, 250, 2)
Saved feature_00003.npy with shape (1805, 250, 2)
Saved feature_00004.npy with shape (1805, 250, 2)
Saved feature_00005.npy with shape (1805, 250, 2)
Saved feature_00006.npy with shape (1805, 250, 2)
Saved feature_00004.npy with shape (1805, 250, 2)
Saved feature_00005.npy with shape (1805, 250, 2)
Saved feature_00006.npy with shape (1805, 250, 2)
Saved feature_00007.npy with shape (1805, 250, 2)
Saved feature_00008.npy with shape (1805, 250, 2)
Saved feature_00009.npy with shape (1805, 250, 2)
Saved feature_00007.npy with shape (1805, 250, 2)
Saved feature_00008.npy with shape (1805, 250, 2)
Saved feature_00009.npy with shape (1805, 250, 2)
Saved feature_00010.npy with shape (1805, 250, 2)
Saved feature_00011.npy with shape (1805, 250, 2)
Saved feature_00012.npy with shape (1805, 250, 2)
Saved feature_00010.npy with shape (1805, 250, 2)
Saved feature_00011.npy with shape (1805, 250, 2)


In [28]:
# Create label.npy
label_path = './Label'
if not os.path.exists(label_path):
    os.mkdir(label_path)

scp_dict = {'NORM': 0, 'PVC': 1, 'PAC': 2, 'Fusion': 3, 'Paced': 4, 'others': 5}
label = []
for k, v in id_dict.items():
    scp = v[1][0]
    diag = scp_dict.get(scp, 5)
    label.append([diag, int(k)])
label = np.array(label)
print(label)
np.save(label_path + '/label.npy', label)

[[ 0  1]
 [ 0  2]
 [ 5  3]
 [ 0  4]
 [ 5  5]
 [ 0  6]
 [ 0  7]
 [ 5  8]
 [ 0  9]
 [ 5 10]
 [ 5 11]
 [ 0 12]
 [ 0 13]
 [ 0 14]
 [ 0 15]
 [ 0 16]
 [ 0 17]
 [ 5 18]
 [ 0 19]
 [ 0 20]
 [ 0 21]
 [ 0 22]
 [ 5 23]
 [ 0 24]
 [ 0 25]
 [ 0 26]
 [ 0 27]
 [ 0 28]
 [ 5 29]
 [ 0 30]
 [ 0 31]
 [ 0 32]
 [ 5 33]
 [ 0 34]
 [ 5 35]
 [ 0 36]
 [ 5 37]
 [ 0 38]
 [ 0 39]
 [ 0 40]
 [ 0 41]
 [ 0 42]
 [ 0 43]
 [ 0 44]
 [ 5 45]
 [ 5 46]
 [ 0 47]
 [ 0 48]]


In [35]:
# Check shape of a sample feature
import numpy as np
sample = np.load('./Feature/feature_00001.npy')
print(sample.shape)

(1805, 250, 2)
