using numpy

In [11]:
import os
import numpy as np

ecg_directory = '/content/mesa_preproc_sample/preproc/final/ecg/'
hr_directory = '/content/mesa_preproc_sample/preproc/final/hr/'

def process_data(file_path, block_size, is_ecg=True):
    data = []
    psg_status = []
    with open(file_path, 'r') as file:
        next(file)
        for line in file:
            if line.strip() != '':
                row_data = line.split(',')
                if is_ecg:
                    data.append(float(row_data[0]))
                else:
                    data.append(float(row_data[1]))
                psg_status.append(int(row_data[-1]))
    data = np.array(data)
    psg_status = np.array(psg_status)

    num_blocks = len(data) // block_size
    data_blocks = np.split(data[:num_blocks * block_size], num_blocks)
    psg_status_blocks = np.split(psg_status[:num_blocks * block_size], num_blocks)
    return data_blocks, psg_status_blocks

In [12]:
# Process ECG data
ecg_data_blocks = []
ecg_psg_status_blocks = []
for filename in os.listdir(ecg_directory):
    if filename.endswith('.csv'):
        ecg_file_path = os.path.join(ecg_directory, filename)
        data_blocks, psg_status_blocks = process_data(ecg_file_path, block_size=7680, is_ecg=True)
        ecg_data_blocks.extend(data_blocks)
        ecg_psg_status_blocks.extend(psg_status_blocks)

# Process heart rate data
hr_data_blocks = []
hr_psg_status_blocks = []
for filename in os.listdir(hr_directory):
    if filename.endswith('.csv'):
        hr_file_path = os.path.join(hr_directory, filename)
        data_blocks, psg_status_blocks = process_data(hr_file_path, block_size=30, is_ecg=False)
        hr_data_blocks.extend(data_blocks)
        hr_psg_status_blocks.extend(psg_status_blocks)

# Convert data to NumPy arrays
ecg_data_array = np.array(ecg_data_blocks)
ecg_psg_status_array = np.array(ecg_psg_status_blocks)
hr_data_array = np.array(hr_data_blocks)
hr_psg_status_array = np.array(hr_psg_status_blocks)

In [16]:
print(ecg_data_array.shape)

(4791, 7680)


In [17]:
print(hr_data_array.shape)

(4791, 30)


In [18]:
# Shuffle data
shuffle_indices = np.random.permutation(len(ecg_data_array))
ecg_data_array_shuffled = ecg_data_array[shuffle_indices]
ecg_psg_status_array_shuffled = ecg_psg_status_array[shuffle_indices]

shuffle_indices = np.random.permutation(len(hr_data_array))
hr_data_array_shuffled = hr_data_array[shuffle_indices]
hr_psg_status_array_shuffled = hr_psg_status_array[shuffle_indices]

np.save('/content/ecg_data_shuffled.npy', ecg_data_array_shuffled)
np.save('/content/ecg_psg_status_shuffled.npy', ecg_psg_status_array_shuffled)
np.save('/content/hr_data_shuffled.npy', hr_data_array_shuffled)
np.save('/content/hr_psg_status_shuffled.npy', hr_psg_status_array_shuffled)
