using numpy

In [24]:
import os
import numpy as np

ecg_directory = '/content/mesa_preproc_sample/preproc/final/ecg/'
hr_directory = '/content/mesa_preproc_sample/preproc/final/hr/'

def process_data(file_path, block_size, is_ecg=True):
    data = []
    psg_status = []
    with open(file_path, 'r') as file:
        next(file)
        for line in file:
            if line.strip() != '':
                row_data = line.split(',')
                if is_ecg:
                    data.append(float(row_data[0]))
                else:
                    data.append(float(row_data[1]))
                psg_status.append(int(row_data[-1]))
    data = np.array(data)
    psg_status = np.array(psg_status)

    num_blocks = len(data) // block_size
    data_blocks = np.split(data[:num_blocks * block_size], num_blocks)
    psg_status_blocks = np.split(psg_status[:num_blocks * block_size], num_blocks)
    return data_blocks, psg_status_blocks


In [25]:
def process_data(file_path, block_size, is_ecg=True):
    data = []
    psg_status = []
    labels = []  # Initialize a list to store labels
    with open(file_path, 'r') as file:
        next(file)
        for line in file:
            if line.strip() != '':
                row_data = line.split(',')
                if is_ecg:
                    data.append(float(row_data[0]))
                else:
                    data.append(float(row_data[1]))
                psg_status.append(int(row_data[-1]))
        # Extract label from the file name
        label = os.path.basename(file_path).split('_')[1]
        labels.extend([label] * len(data))
    data = np.array(data)
    psg_status = np.array(psg_status)
    labels = np.array(labels)

    num_blocks = len(data) // block_size
    data_blocks = np.split(data[:num_blocks * block_size], num_blocks)
    psg_status_blocks = np.split(psg_status[:num_blocks * block_size], num_blocks)
    label_blocks = np.split(labels[:num_blocks * block_size], num_blocks)
    return data_blocks, psg_status_blocks, label_blocks

In [26]:
# Process ECG data
ecg_data_blocks = []
ecg_psg_status_blocks = []
ecg_label_blocks = []
for filename in os.listdir(ecg_directory):
    if filename.endswith('.csv'):
        ecg_file_path = os.path.join(ecg_directory, filename)
        data_blocks, psg_status_blocks, label_blocks = process_data(ecg_file_path, block_size=7680, is_ecg=True)
        ecg_data_blocks.extend(data_blocks)
        ecg_psg_status_blocks.extend(psg_status_blocks)
        ecg_label_blocks.extend(label_blocks)

# Process heart rate data
hr_data_blocks = []
hr_psg_status_blocks = []
hr_label_blocks = []
for filename in os.listdir(hr_directory):
    if filename.endswith('.csv'):
        hr_file_path = os.path.join(hr_directory, filename)
        data_blocks, psg_status_blocks, label_blocks = process_data(hr_file_path, block_size=30, is_ecg=False)
        hr_data_blocks.extend(data_blocks)
        hr_psg_status_blocks.extend(psg_status_blocks)
        hr_label_blocks.extend(label_blocks)

# Convert data to NumPy arrays
ecg_data_array = np.array(ecg_data_blocks)
ecg_psg_status_array = np.array(ecg_psg_status_blocks)
ecg_label_array = np.array(ecg_label_blocks)

hr_data_array = np.array(hr_data_blocks)
hr_psg_status_array = np.array(hr_psg_status_blocks)
hr_label_array = np.array(hr_label_blocks)

In [27]:
print(ecg_data_array.shape)

(4791, 7680)


In [28]:
print(hr_data_array.shape)

(4791, 30)


In [32]:
# Shuffle data
shuffle_indices = np.random.permutation(len(ecg_data_array))
ecg_data_array_shuffled = ecg_data_array[shuffle_indices]
ecg_psg_array_shuffled = ecg_psg_status_array[shuffle_indices]
ecg_label_array_shuffled = ecg_label_array[shuffle_indices]

shuffle_indices = np.random.permutation(len(hr_data_array))
hr_data_array_shuffled = hr_data_array[shuffle_indices]
hr_psg_array_shuffled = hr_psg_status_array[shuffle_indices]
hr_label_array_shuffled = hr_label_array[shuffle_indices]

# Save shuffled data to .npy files
np.save('/content/ecg_data_shuffled.npy', ecg_data_array_shuffled)
np.save('/content/ecg_psg_shuffled.npy', ecg_psg_array_shuffled)
np.save('/content/ecg_label_shuffled.npy', ecg_label_array_shuffled)

np.save('/content/hr_data_shuffled.npy', hr_data_array_shuffled)
np.save('/content/hr_psg_shuffled.npy', hr_psg_array_shuffled)
np.save('/content/hr_label_shuffled.npy', hr_label_array_shuffled)

In [34]:
ecg_data_array_shuffled

array([[ 0.26741436,  0.24071107,  0.24147402, ...,  0.06050202,
         0.04142824,  0.09285115],
       [ 0.18028534,  0.17799649,  0.17189288, ..., -0.08461128,
        -0.07911803, -0.07286183],
       [-0.11711299, -0.11772335, -0.11772335, ...,  0.00953689,
         0.00755322,  0.00770581],
       ...,
       [ 0.40550851,  0.4181735 ,  0.31303883, ..., -0.06309606,
        -0.02250706,  0.02067597],
       [ 0.00434882,  0.00068666, -0.00144961, ...,  0.0296788 ,
         0.03059434,  0.02876326],
       [ 0.01380941,  0.01045243,  0.00892653, ...,  0.01014725,
         0.01060502,  0.01197833]])