# Splitting MIT-BIH Dataset for Multi-client protocol

## Import packages

In [1]:
import h5py
import numpy as np

## Reading and Splitting the training and testing datasets into 3 equal parts

The training dataset has the length of 13245 => we split at 4415, 8830 

In [2]:
train_file = 'mitbih_train.hdf5'
with h5py.File(train_file, "r") as f:
    x_train = np.array(f['x_train'][:])
    y_train = np.array(f['y_train'][:])
    x_train_1 = x_train[:4415, :, :]
    x_train_2 = x_train[4415:8830, :, :]
    x_train_3 = x_train[8830:, :, :]
    y_train_1 = y_train[:4415]
    y_train_2 = y_train[4415:8830]
    y_train_3 = y_train[8830:]

In [3]:
print('--- Training dataset ---')
print(f'split training dataset 1: {x_train_1.shape}, {y_train_1.shape}')
print(f'split training dataset 2: {x_train_2.shape}, {y_train_2.shape}')
print(f'split training dataset 3: {x_train_3.shape}, {y_train_3.shape}')

--- Training dataset ---
split training dataset 1: (4415, 1, 128), (4415,)
split training dataset 2: (4415, 1, 128), (4415,)
split training dataset 3: (4415, 1, 128), (4415,)


The testing dataset also has the length of 13245 => we split at 4415, 8830 

In [4]:
test_file = 'mitbih_test.hdf5'
with h5py.File(test_file, "r") as f:
    x_test = np.array(f['x_test'][:])
    y_test = np.array(f['y_test'][:])
    x_test_1 = x_test[:4415, :, :]
    x_test_2 = x_test[4415:8830, :, :]
    x_test_3 = x_test[8830:, :, :]
    y_test_1 = y_test[:4415]
    y_test_2 = y_test[4415:8830]
    y_test_3 = y_test[8830:]

In [5]:
print('--- Testing dataset ---')
print(f'split test dataset 1: {x_test_1.shape}, {y_test_1.shape}')
print(f'split test dataset 2: {x_test_2.shape}, {y_test_2.shape}')
print(f'split test dataset 3: {x_test_3.shape}, {y_test_3.shape}')

--- Testing dataset ---
split test dataset 1: (4415, 1, 128), (4415,)
split test dataset 2: (4415, 1, 128), (4415,)
split test dataset 3: (4415, 1, 128), (4415,)


## Write the data into `hdf5` files

In [6]:
with h5py.File('multiclient_mitbih_train.hdf5', 'w') as hdf:
    hdf['x_train_1'] = x_train_1[:]
    hdf['x_train_2'] = x_train_2[:]
    hdf['x_train_3'] = x_train_3[:]
    hdf['y_train_1'] = y_train_1[:]
    hdf['y_train_2'] = y_train_2[:]
    hdf['y_train_3'] = y_train_3[:]
    print('Multiclient train data saved to multiclient_mitbih_train.hdf5')

Multiclient train data saved to multiclient_mitbih_train.hdf5


In [7]:
with h5py.File('multiclient_mitbih_test.hdf5', 'w') as hdf:
    hdf['x_test_1'] = x_test_1[:]
    hdf['x_test_2'] = x_test_2[:]
    hdf['x_test_3'] = x_test_3[:]
    hdf['y_test_1'] = y_test_1[:]
    hdf['y_test_2'] = y_test_2[:]
    hdf['y_test_3'] = y_test_3[:]
    print('Multiclient test data saved to multiclient_mitbih_test.hdf5')

Multiclient test data saved to multiclient_mitbih_test.hdf5
