In [48]:
import os
import numpy as np

data_directory = '/content/mesa_preproc_sample/preproc/final/'

def process_data(file_path, block_size, is_ecg=True):
    data = []
    status = []
    labels = []

    with open(file_path, 'r') as file:
        next(file)
        for line in file:
            if line.strip() != '':
                row_data = line.split(',')
                if is_ecg:
                    data.append(float(row_data[0]))
                else:
                    data.append(float(row_data[1]))
                status.append(int(row_data[-1]))
        label = os.path.basename(file_path).split('_')[1]
        labels.extend([label] * len(data))
    data = np.array(data)
    status = np.array(status)
    labels = np.array(labels)

    num_blocks = len(data) // block_size
    data_blocks = np.split(data[:num_blocks * block_size], num_blocks)
    status_blocks = np.split(status[:num_blocks * block_size], num_blocks)
    label_blocks = np.split(labels[:num_blocks * block_size], num_blocks)
    return data_blocks, status_blocks, label_blocks

ecg_data_blocks = []
ecg_status_blocks = []
ecg_label_blocks = []

hr_data_blocks = []
hr_status_blocks = []
hr_label_blocks = []

for filename in os.listdir(data_directory):
    if filename.endswith('.csv'):
        file_path = os.path.join(data_directory, filename)
        if 'ecg' in filename:  # Process ECG files
            data_blocks, status_blocks, label_blocks = process_data(file_path, block_size=7680, is_ecg=True)
            ecg_data_blocks.extend(data_blocks)
            ecg_status_blocks.extend(status_blocks)
            ecg_label_blocks.extend(label_blocks)
        else:
            data_blocks, status_blocks, label_blocks = process_data(file_path, block_size=30, is_ecg=False)
            hr_data_blocks.extend(data_blocks)
            hr_status_blocks.extend(status_blocks)
            hr_label_blocks.extend(label_blocks)

ecg_data_array = np.array(ecg_data_blocks)
ecg_status_array = np.array(ecg_status_blocks)
ecg_label_array = np.array(ecg_label_blocks)

hr_data_array = np.array(hr_data_blocks)
hr_status_array = np.array(hr_status_blocks)
hr_label_array = np.array(hr_label_blocks)

# Shuffle data
shuffle_indices = np.random.permutation(len(ecg_data_array))
ecg_data_array_shuffled = ecg_data_array[shuffle_indices]
ecg_status_array_shuffled = ecg_status_array[shuffle_indices]
ecg_label_array_shuffled = ecg_label_array[shuffle_indices]

shuffle_indices = np.random.permutation(len(hr_data_array))
hr_data_array_shuffled = hr_data_array[shuffle_indices]
hr_status_array_shuffled = hr_status_array[shuffle_indices]
hr_label_array_shuffled = hr_label_array[shuffle_indices]

np.save('/content/ecg_data_shuffled.npy', ecg_data_array_shuffled)
np.save('/content/ecg_status_shuffled.npy', ecg_status_array_shuffled)
np.save('/content/ecg_label_shuffled.npy', ecg_label_array_shuffled)

np.save('/content/hr_data_shuffled.npy', hr_data_array_shuffled)
np.save('/content/hr_status_shuffled.npy', hr_status_array_shuffled)
np.save('/content/hr_label_shuffled.npy', hr_label_array_shuffled)
