# WESAD dataset processing

In [1]:
import os
import glob 
import wfdb
import pytz
import pickle
import numpy as np
import pandas as pd
from tqdm import tqdm
import neurokit2 as nk
from dateutil import tz
from pathlib import Path
from datetime import datetime as dt
from sklearn.preprocessing import StandardScaler

In [2]:
PHYSCIO_2017_SR = 300
WESAD_SR = 700
SWELL_SR = 2048
DOWNSAMPLE_SR = 128

# 1. Preprocessing

In [3]:
def moving_average(signal, window_size=10):
    """Compute moving average with specified window size."""
    if window_size < 1:
        raise ValueError("window_size must be >= 1")
    return np.convolve(signal, np.ones(window_size)/window_size, mode='same')

def ecg_preprocessing(signal, sample_rate, lowcut=0.5, highcut=100, ma_window=10, downsample_rate=128):
    band_passed_ecg =  nk.signal_filter(signal, sampling_rate=sample_rate, lowcut=lowcut, highcut=highcut, method='butterworth_zi', order = 2)
    emg = moving_average(band_passed_ecg, window_size=ma_window)
    downsampled_ecg = nk.signal_resample(emg, sampling_rate=sample_rate, desired_sampling_rate=downsample_rate)
    return downsampled_ecg

def z_scale(arr):
     scaler = StandardScaler()
     x = scaler.fit_transform(arr)
     return x

# 2. WESAD_Dataset

In [None]:
def load_subject_pickle_data(data_dir, sub_id):
    # load subject data and labels
    sub_path = os.path.join(data_dir, f"{sub_id}", f"{sub_id}.pkl")
    sub_data = pickle.load(open(sub_path, "rb"), encoding="latin1")
    labels = np.array(sub_data["label"])
    
    # Preprocessing the ECG signal
    ecg_raw = np.array(sub_data["signal"]["chest"]["ECG"][:, 0])
    cleaned_ecg = ecg_preprocessing(ecg_raw, sample_rate=WESAD_SR, downsample_rate=DOWNSAMPLE_SR)
    #ecg_singal = z_scale(cleaned_ecg.reshape(-1, 1))

    # assign timestamps
    start_dt = dt(2017, 11, 28, 0, 0, 0, 0, tzinfo=pytz.UTC)

    # freq = 1 / 128 seconds
    label_freq = pd.DateOffset(seconds=1 / WESAD_SR)
    ecg_freq = pd.DateOffset(seconds=1 / DOWNSAMPLE_SR)
    label_times = pd.date_range(start=start_dt, periods=labels.shape[0], freq=label_freq, tz="UTC")
    ecg_times = pd.date_range(start=start_dt, periods=ecg_singal.shape[0], freq=ecg_freq, tz="UTC")

    #Prepare the dataframe
    label_df = pd.DataFrame({"label_sample_timestamp_utc": label_times, "y": labels})
    ECG_df = pd.DataFrame({"ecg_sample_timestamp_utc": ecg_times,"ecg": ecg_singal.flatten()})

    # align labels to ECG via merge_asof
    ECG_df = pd.merge_asof(
        ECG_df.sort_values("ecg_sample_timestamp_utc"),
        label_df.sort_values("label_sample_timestamp_utc"),
        left_on="ecg_sample_timestamp_utc",
        right_on="label_sample_timestamp_utc",
        direction="nearest",
    )

    # drop label timestamp column
    ECG_df.drop(columns="label_sample_timestamp_utc", inplace=True)
    # make timestamp index
    ECG_df.set_index("ecg_sample_timestamp_utc", inplace=True, drop=False)
    return ECG_df


In [26]:
# aggregate labels per segment (drop segments with mixed labels)
def agg_labels(label_list):
    label_set = set(label_list)
    if len(label_set) != 1:
        return np.nan
    l = list(label_set)[0]
    if l in [1,2,3]:
        return l
    return np.nan

def create_segments(ecg_df,segment_length, segment_stride):
    ecg_segs = []
    label_segs = []
    left_buffers = []
    right_buffers = []

    for i in range(1, len(ecg_df) - segment_length, segment_stride):
        ecg_seg = ecg_df["ecg"][i : i + segment_length]
        ecg_segs.append(list(ecg_seg))
        label_segs.append(agg_labels(ecg_df["y"][i : i + segment_length]))

        # left buffer
        if i - segment_length >= 0:
            left_buffers.append(list(ecg_df["ecg"][i - segment_length : i]))
        else:
            left_buffer = np.full_like(ecg_seg, np.nan)
            remaining_left_values = ecg_df["ecg"][:i]
            left_buffer[-remaining_left_values.shape[0] :] = remaining_left_values
            left_buffers.append(left_buffer)

        # right buffer
        if i + 2 * segment_length < len(ecg_df):
            right_buffers.append(list(ecg_df["ecg"][i + segment_length : i + 2 * segment_length]))
        else:
            right_buffer = np.full_like(ecg_seg, np.nan)
            remaining_right_values = ecg_df["ecg"][i + segment_length :]
            right_buffer[: remaining_right_values.shape[0]] = remaining_right_values
            right_buffers.append(right_buffer)
            
    keep_mask = ~np.isnan(label_segs)
    ecg_segs = np.array(ecg_segs)
    left_buffers = np.array(left_buffers)
    right_buffers = np.array(right_buffers)
    label_segs = np.array(label_segs)
    print(np.unique(label_segs, return_counts=True))
    # ---------- CREATE LABELLED PARQUET ----------
    df_labelled = pd.DataFrame({
    "x": ecg_segs[keep_mask].tolist(),  # call once on full slice
    "x_left_buffer": left_buffers[keep_mask].tolist(),
    "x_right_buffer": right_buffers[keep_mask].tolist(),
    "y": label_segs[keep_mask].tolist(),
    })
    # ---------- CREATE UNLABELLED PARQUET ----------
    df_unlabelled = pd.DataFrame({
        "x": ecg_segs.tolist(),
        "x_left_buffer": left_buffers.tolist(),
        "x_right_buffer": right_buffers.tolist(),
        "y": label_segs.tolist(),
    })
    return df_labelled, df_unlabelled


def process_subject_data(
    data_dir,
    sub_id, output_dir,
    segment_length=640, segment_stride=1):
    
    ecg_df = load_subject_pickle_data(data_dir, sub_id)
    # segment data
    df_labelled, df_unlabelled = create_segments(ecg_df, segment_length, segment_stride)
    print(df_labelled.shape)
    # save
    subject_out_dir = os.path.join(output_dir, sub_id)
    os.makedirs(subject_out_dir, exist_ok=True)
    df_labelled.to_parquet(os.path.join(subject_out_dir, "ECG_labelled.parquet"), index=False)
    df_unlabelled.to_parquet(os.path.join(subject_out_dir, "ECG_unlabelled.parquet"), index=False)


def load_all_subjects(data_dir, output_dir, segment_length, segment_stride):
    # run on all subjects
    subject = os.listdir(data_dir)
    print(subject)
    subject.sort()
    for sub_id in tqdm(subject):
        if sub_id.startswith("S"):
            print("working with the subject", sub_id)
            process_subject_data(
                data_dir, sub_id, output_dir, 
                segment_length=segment_length, segment_stride=segment_stride,
            )


In [11]:
SEGMENT_LENGTH = 1280
SEGMENT_STRIDE = 64
WESAD_DATA_DIR = "/home/s223149341/SSL-invariance-Subject_Project_model/data/WESAD/WESAD_LOSO"
WESAD_OUTPUT_DIR = "/home/s223149341/SSL-invariance-Subject_Project_model/data/WESAD/wesad_10_05"

load_all_subjects(WESAD_DATA_DIR, WESAD_OUTPUT_DIR,
                    SEGMENT_LENGTH, SEGMENT_STRIDE)


  0%|          | 0/16 [00:00<?, ?it/s]

working with the subject S10
(4496,)


 12%|█▎        | 2/16 [01:04<07:30, 32.17s/it]

working with the subject S11
(4398,)


 19%|█▉        | 3/16 [02:00<09:08, 42.22s/it]

working with the subject S13
(4393,)


 25%|██▌       | 4/16 [03:03<09:57, 49.80s/it]

working with the subject S14
(4396,)


 31%|███▏      | 5/16 [04:07<10:05, 55.02s/it]

working with the subject S15
(4408,)


 38%|███▊      | 6/16 [05:07<09:26, 56.61s/it]

working with the subject S16
(4384,)


 44%|████▍     | 7/16 [06:11<08:51, 59.03s/it]

working with the subject S17
(4494,)


 50%|█████     | 8/16 [07:18<08:11, 61.47s/it]

working with the subject S2
(4184,)


 56%|█████▋    | 9/16 [08:26<07:24, 63.56s/it]

working with the subject S3
(4251,)


 62%|██████▎   | 10/16 [09:39<06:37, 66.33s/it]

working with the subject S4
(4272,)


 69%|██████▉   | 11/16 [10:51<05:39, 67.95s/it]

working with the subject S5
(4375,)


 75%|███████▌  | 12/16 [12:01<04:34, 68.57s/it]

working with the subject S6
(4345,)


 81%|████████▏ | 13/16 [13:19<03:34, 71.54s/it]

working with the subject S7
(4337,)


 88%|████████▊ | 14/16 [14:18<02:15, 67.85s/it]

working with the subject S8
(4359,)


 94%|█████████▍| 15/16 [15:23<01:06, 66.98s/it]

working with the subject S9
(4336,)


100%|██████████| 16/16 [16:22<00:00, 61.41s/it]


In [None]:
df = pd.read_parquet('/home/s223149341/SSL-invariance-Subject_Project_model/data/WESAD/wesad_10_05/S10/ECG_labelled.parquet')
df

Unnamed: 0,x,x_left_buffer,x_right_buffer,y
0,"[0.4350993896761403, 0.4227587291043353, 0.372...","[0.5241152517057747, 0.4479350142959354, 0.355...","[0.15299997294264012, 0.21753927235356896, 0.2...",1.0
1,"[-0.8802328091755541, -0.7709747455539313, -0....","[0.700878603263332, 0.5796874848693652, 0.4838...","[0.4300915905581949, 0.4538402800476009, 0.446...",1.0
2,"[-0.6537861956846467, 0.13994133701793904, 0.3...","[0.07538467662411984, -0.054958272344688246, -...","[0.21069885628834378, 0.11922739644110023, -0....",1.0
3,"[-0.14836644122885925, -0.37025100694415203, -...","[-1.6164114646223342, -2.293234528448982, -2.8...","[-2.8641654891690442, -2.7045562997394916, -1....",1.0
4,"[0.8196932230474445, 0.7716069217221877, 0.706...","[-0.5611843283889669, -0.482108057415054, -0.3...","[0.7931183148437482, 0.648868244929643, 0.5451...",1.0
...,...,...,...,...
4491,"[-0.014481816147570374, -0.1754002456911428, 0...","[0.30232100275375684, 0.17144603232315844, 0.2...","[-0.520649439974816, -0.012640142448390863, 0....",2.0
4492,"[-0.6449719464513708, -0.28788747579703333, -0...","[-1.2859883388787248, -2.3756108799676117, -3....","[-1.348494786246596, -1.7815250144372519, -2.1...",2.0
4493,"[0.5663259897365313, 0.589498108636255, 0.7934...","[0.3302448835639161, 0.8618969265668054, 1.005...","[0.22557622292544516, 0.5913805892148948, 0.86...",2.0
4494,"[-0.12451888764832496, -0.1533932863772264, -0...","[0.5091239430400456, 0.3871063076627842, 0.277...","[-0.4752697902322011, -0.15912207005498966, -0...",2.0


## SWELL Dataset

In [27]:
def load_subject_pickle_data(data_dir, sub_id):
    # load subject data and labels
    sub_path = os.path.join(data_dir, sub_id)
    sub_data = pickle.load(open(sub_path, "rb"), encoding="latin1")
    labels = np.array(sub_data["label"])
    
    # Preprocessing the ECG signal
    ecg_raw = np.array(sub_data["ECG"])
    cleaned_ecg = ecg_preprocessing(ecg_raw, sample_rate=WESAD_SR, downsample_rate=DOWNSAMPLE_SR)
    ecg_singal =  cleaned_ecg
    #ecg_singal = z_scale(cleaned_ecg.reshape(-1, 1))

    # assign timestamps
    start_dt = dt(2017, 11, 28, 0, 0, 0, 0, tzinfo=pytz.UTC)

    # freq = 1 / 128 seconds
    label_freq = pd.DateOffset(seconds=1 / WESAD_SR)
    ecg_freq = pd.DateOffset(seconds=1 / DOWNSAMPLE_SR)
    label_times = pd.date_range(start=start_dt, periods=labels.shape[0], freq=label_freq, tz="UTC")
    ecg_times = pd.date_range(start=start_dt, periods=ecg_singal.shape[0], freq=ecg_freq, tz="UTC")

    #Prepare the dataframe
    label_df = pd.DataFrame({"label_sample_timestamp_utc": label_times, "y": labels})
    ECG_df = pd.DataFrame({"ecg_sample_timestamp_utc": ecg_times,"ecg": ecg_singal.flatten()})

    # align labels to ECG via merge_asof
    ECG_df = pd.merge_asof(
        ECG_df.sort_values("ecg_sample_timestamp_utc"),
        label_df.sort_values("label_sample_timestamp_utc"),
        left_on="ecg_sample_timestamp_utc",
        right_on="label_sample_timestamp_utc",
        direction="nearest",
    )

    # drop label timestamp column
    ECG_df.drop(columns="label_sample_timestamp_utc", inplace=True)
    # make timestamp index
    ECG_df.set_index("ecg_sample_timestamp_utc", inplace=True, drop=False)
    return ECG_df


In [28]:
# aggregate labels per segment (drop segments with mixed labels)
def agg_labels(label_list):
    label_set = set(label_list)
    if len(label_set) != 1:
        return np.nan
    l = list(label_set)[0]
    if l in [0,1]:
        return l
    return np.nan


In [29]:
def load_all_subjects(data_dir, output_dir, segment_length, segment_stride):
    # run on all subjects
    subject = os.listdir(data_dir)
    print(subject)
    subject.sort()
    for sub_id in tqdm(subject):
        if sub_id.startswith("s"):
            print("working with the subject", sub_id)
            process_subject_data(
                data_dir, sub_id, output_dir, 
                segment_length=segment_length, segment_stride=segment_stride,
            )


In [None]:
SEGMENT_LENGTH = 1280
SEGMENT_STRIDE = 320
SWELL_DATA_DIR = "/home/s223149341/SSL-invariance-Subject_Project_model/data/SWELL"
SWELL_OUTPUT_DIR = "/home/s223149341/SSL-invariance-Subject_Project_model/data/SWELL/SWELL_1280_320"

load_all_subjects(SWELL_DATA_DIR, SWELL_OUTPUT_DIR,
                    SEGMENT_LENGTH, SEGMENT_STRIDE)


['s5_phsyio.pkl', 's25_phsyio.pkl', 's21_phsyio.pkl', 's8_phsyio.pkl', 's13_phsyio.pkl', 's9_phsyio.pkl', 'SWELL_1280_640', 's14_phsyio.pkl', 's23_phsyio.pkl', 's12_phsyio.pkl', 's11_phsyio.pkl', 's24_phsyio.pkl', 's22_phsyio.pkl', 's15_phsyio.pkl', 's16_phsyio.pkl', 's7_phsyio.pkl', 's18_phsyio.pkl', 's3_phsyio.pkl', 's2_phsyio.pkl', 's20_phsyio.pkl', 's19_phsyio.pkl', 's1_phsyio.pkl', 's17_phsyio.pkl', 's6_phsyio.pkl', 's4_phsyio.pkl', 's10_phsyio.pkl']


  0%|          | 0/26 [00:00<?, ?it/s]

working with the subject s10_phsyio.pkl
(array([ 0.,  1., nan]), array([3997, 3915,   12]))
(7912, 4)


  8%|▊         | 2/26 [05:11<1:02:16, 155.70s/it]

working with the subject s11_phsyio.pkl
(array([ 0.,  1., nan]), array([2531,  458,    4]))
(2989, 4)


 12%|█▏        | 3/26 [07:12<54:06, 141.17s/it]  

working with the subject s12_phsyio.pkl
(array([ 0.,  1., nan]), array([3926, 4683,   12]))
(8609, 4)


 15%|█▌        | 4/26 [13:41<1:25:43, 233.81s/it]

working with the subject s13_phsyio.pkl
(array([ 0.,  1., nan]), array([3304, 4370,   12]))
(7674, 4)


 19%|█▉        | 5/26 [19:27<1:35:30, 272.90s/it]

working with the subject s14_phsyio.pkl
(array([ 0.,  1., nan]), array([3926, 4659,   12]))
(8585, 4)


 23%|██▎       | 6/26 [25:22<1:40:00, 300.03s/it]

working with the subject s15_phsyio.pkl
(array([ 0.,  1., nan]), array([3856, 3686,   12]))
(7542, 4)


 27%|██▋       | 7/26 [31:17<1:40:38, 317.80s/it]

working with the subject s16_phsyio.pkl
(array([ 0.,  1., nan]), array([3724, 4695,   12]))
(8419, 4)


 31%|███       | 8/26 [37:06<1:38:17, 327.65s/it]

working with the subject s17_phsyio.pkl
(array([ 0.,  1., nan]), array([3874, 4696,   12]))
(8570, 4)


 35%|███▍      | 9/26 [43:12<1:36:08, 339.34s/it]

working with the subject s18_phsyio.pkl
(array([ 0.,  1., nan]), array([3224, 4695,   12]))
(7919, 4)


 38%|███▊      | 10/26 [49:06<1:31:42, 343.90s/it]

working with the subject s19_phsyio.pkl
(array([ 0.,  1., nan]), array([3529, 4443,   12]))
(7972, 4)


 42%|████▏     | 11/26 [56:29<1:33:34, 374.28s/it]

working with the subject s1_phsyio.pkl
(array([ 0.,  1., nan]), array([3926, 4650,   12]))
(8576, 4)


 46%|████▌     | 12/26 [1:04:41<1:35:36, 409.72s/it]

working with the subject s20_phsyio.pkl
(array([ 0.,  1., nan]), array([2733, 3149,   12]))
(5882, 4)


 50%|█████     | 13/26 [1:10:19<1:24:04, 388.05s/it]

working with the subject s21_phsyio.pkl
(array([ 0.,  1., nan]), array([3926, 3080,   12]))
(7006, 4)


 54%|█████▍    | 14/26 [1:16:13<1:15:36, 378.02s/it]

working with the subject s22_phsyio.pkl
(array([ 0.,  1., nan]), array([3856, 1929,   12]))
(5785, 4)


 58%|█████▊    | 15/26 [1:21:08<1:04:41, 352.89s/it]

working with the subject s23_phsyio.pkl
(array([ 0.,  1., nan]), array([1050, 1821,    4]))
(2871, 4)


 62%|██████▏   | 16/26 [1:23:14<47:25, 284.59s/it]  

working with the subject s24_phsyio.pkl
(array([ 0.,  1., nan]), array([3759, 2997,   12]))
(6756, 4)


 65%|██████▌   | 17/26 [1:29:03<45:36, 304.07s/it]

working with the subject s25_phsyio.pkl
(array([ 0.,  1., nan]), array([3877, 2290,   12]))
(6167, 4)


 69%|██████▉   | 18/26 [1:34:06<40:28, 303.55s/it]

working with the subject s2_phsyio.pkl
(array([ 0.,  1., nan]), array([4066, 3993,   12]))
(8059, 4)


 73%|███████▎  | 19/26 [1:41:07<39:32, 338.97s/it]

working with the subject s3_phsyio.pkl
(array([ 0.,  1., nan]), array([3266, 4513,   12]))
(7779, 4)


 77%|███████▋  | 20/26 [1:47:29<35:11, 351.90s/it]

working with the subject s4_phsyio.pkl
(array([ 0.,  1., nan]), array([3754, 2818,   12]))
(6572, 4)


 81%|████████  | 21/26 [1:52:38<28:14, 338.86s/it]

working with the subject s5_phsyio.pkl
(array([ 0.,  1., nan]), array([3785, 3941,   12]))
(7726, 4)


 85%|████████▍ | 22/26 [1:58:45<23:10, 347.53s/it]

working with the subject s6_phsyio.pkl
(array([ 0.,  1., nan]), array([3927, 4484,   12]))
(8411, 4)


 88%|████████▊ | 23/26 [2:05:12<17:57, 359.25s/it]

working with the subject s7_phsyio.pkl
(array([ 0.,  1., nan]), array([2465, 4766,   12]))
(7231, 4)


 92%|█████████▏| 24/26 [2:11:15<12:00, 360.43s/it]

working with the subject s8_phsyio.pkl
(array([ 0.,  1., nan]), array([1460, 3249,   12]))
(4709, 4)


 96%|█████████▌| 25/26 [2:15:16<05:24, 324.63s/it]

working with the subject s9_phsyio.pkl
(array([ 0.,  1., nan]), array([3236, 4485,   12]))
(7721, 4)


100%|██████████| 26/26 [2:20:56<00:00, 325.25s/it]


## Physico_net_2017


In [4]:


def read_subject_data(hea):
    """
    Load ALL CinC2017 training ECGs as fixed-length windows.
    - root_dir: path to the 1.0.0 folder that contains 'training/'
    - preproc: callable f(x)->x (optional), e.g., baseline-wander removal
    - return_ids: if True, also return parallel list of record_ids for each window
    """
    
    rec_id = os.path.splitext(os.path.basename(hea))[0]   # "A00001"
    rec_path = os.path.join(os.path.dirname(hea), rec_id) # no extension

    sig, _ = wfdb.rdsamp(rec_path)   # works for .mat+.hea
    x = sig.squeeze().astype(np.float32)            # make 1-D if single lead
    x = ecg_preprocessing(x, sample_rate=PHYSCIO_2017_SR, downsample_rate=DOWNSAMPLE_SR)
    #s_scaled = z_scale(x.reshape(-1, 1)).squeeze().astype(np.float32)  # back to 1D
    return x


def create_segments_no_labels(ecg_array,segment_length, segment_stride):
    ecg_segs = []
    left_buffers = []
    right_buffers = []
    
    for i in range(1, len(ecg_array) - segment_length, segment_stride):
        ecg_seg = ecg_array[i : i + segment_length]
        ecg_segs.append(list(ecg_seg))

        # left buffer
        if i - segment_length >= 0:
            left_buffers.append(list(ecg_array[i - segment_length : i]))
        else:
            left_buffer = np.full_like(ecg_seg, np.nan)
            remaining_left_values = ecg_array[:i]
            left_buffer[-remaining_left_values.shape[0] :] = remaining_left_values
            left_buffers.append(left_buffer)

        # right buffer
        if i + 2 * segment_length < len(ecg_array):
            right_buffers.append(list(ecg_array[i + segment_length : i + 2 * segment_length]))
        else:
            right_buffer = np.full_like(ecg_seg, np.nan)
            remaining_right_values = ecg_array[i + segment_length :]
            right_buffer[: remaining_right_values.shape[0]] = remaining_right_values
            right_buffers.append(right_buffer)
            
    # ---------- CREATE UNLABELLED PARQUET ----------
    df_unlabelled = pd.DataFrame({
        "x": ecg_segs,
        "x_left_buffer": left_buffers,
        "x_right_buffer": right_buffers,
    })

    return df_unlabelled


def process_psychio_net(
    data_dir, output_dir,
    segment_length=640, segment_stride=1):
    df_list = []
    #Load all the head path
    hea_paths = sorted(glob.glob(os.path.join(data_dir, "A*.hea")))
    subjects = sorted({fname.split(".")[0] for fname in os.listdir(data_dir)})

    for hea, s in tqdm(zip(hea_paths, subjects), total=len(subjects)):
        ecg_array = read_subject_data(hea)
        # segment data
        df_unlabelled = create_segments_no_labels(ecg_array, segment_length, segment_stride)
        df_unlabelled['subject_id'] = [s] * len(df_unlabelled)
        df_unlabelled.info
        df_list.append(df_unlabelled)

    data_df = pd.concat(df_list)
    # save
    print("Number of segments:", data_df.shape[0])
    os.makedirs(output_dir, exist_ok=True)
    out_path = os.path.join(output_dir, "physionet2017_unlabelled_10_5.parquet")

    print("Saving to:", os.path.abspath(out_path))   # <--- add this
    data_df.to_parquet(out_path, index=False)
    print("File exists after save:", os.path.exists(out_path))

        
    

In [5]:
SEGMENT_LENGTH = 1280
SEGMENT_STRIDE = 320
PSY_DATA_DIR = "/home/s223149341/SSL-invariance-Subject_Project_model/data/PhysioNet2017/raw_data"
OUTPUT_DIR = "/home/s223149341/SSL-invariance-Subject_Project_model/data/PhysioNet2017"

process_psychio_net(PSY_DATA_DIR, OUTPUT_DIR,
                    SEGMENT_LENGTH, SEGMENT_STRIDE)


  0%|          | 0/8531 [00:00<?, ?it/s]

100%|█████████▉| 8528/8531 [02:29<00:00, 57.09it/s]


Number of segments: 77640
Saving to: /home/s223149341/SSL-invariance-Subject_Project_model/data/PhysioNet2017/physionet2017_unlabelled_10_5.parquet
File exists after save: True
