In [1]:
import os
import numpy as np
from sklearn.decomposition import PCA
import random
import matplotlib.pyplot as plt
import sys
sys.path.insert(1, '../src/')
from config import raw_data_path, univariate_data_path, processed_data_path
from scipy.signal import resample
from preprocessing_modules import EHGRecord, trim_target, filter_target, z_normalize_target, check_normalize_target, remove_records, z_normalize_signals



In [2]:
dataset = 'target'
print('Now processing: ', dataset)
data_dir = os.path.join(processed_data_path, dataset + "_preprocessed_old.npy")
data = np.load(data_dir, allow_pickle=True)
print(len(data))
print(data[0])

Now processing:  target
168
{'record_name': np.str_('Hopper-2023_05_25_12_03_38-0000010090-0003'), 'signal': array([[ -3.4528623 , -17.36775705, -11.05391952,  -3.46650686,
         -2.6896713 ,  -1.84537916],
       [ -3.39068175, -17.07373422, -10.8699895 ,  -3.41311593,
         -2.6480211 ,  -1.81727669],
       [ -3.32876095, -16.78078961, -10.68671023,  -3.35987783,
         -2.60649057,  -1.78925403],
       ...,
       [  0.60066929,   0.2995247 ,   0.3930668 ,  -0.31037572,
         -0.51762818,   0.23008789],
       [  0.6006698 ,   0.29952186,   0.39306581,  -0.31037678,
         -0.51762974,   0.23008839],
       [  0.6006702 ,   0.29951964,   0.39306503,  -0.31037761,
         -0.51763096,   0.23008877]]), 'fs': np.int64(128), 'preterm': 0}


In [3]:
# raw_data_dir = os.path.join(raw_data_path, dataset + "_data.npy")
# print(raw_data_dir)
# raw_data = np.load(data_dir, allow_pickle=True)
# print(len(raw_data))

In [4]:
# print(raw_data[0])

In [5]:
def convert_target_to_univariate(data):
    """
    Converts a list of multivariate time series records to univariate ones.
    
    Parameters:
    - data: list of dicts, each with keys: 'record_name', 'signal', 'fs', 'preterm'
    
    Returns:
    - List of univariate records
    """
    univariate_data = []
    
    for record in data:
        signal = record['signal']  # Shape: (T, C)
        fs = record['fs']
        preterm = record['preterm']
        base_name = record['record_name']
        
        num_channels = signal.shape[1]
        
        for i in range(num_channels):
            univariate_signal = signal[:, i]
            univariate_data.append({
                'record_name': f"{base_name}-chan{i}",
                'signal': univariate_signal,
                'fs': fs,
                'preterm': preterm
            })
    
    return univariate_data

def convert_source_to_univariate(data):
    """
    Converts a list of multivariate time series records to univariate ones.
    
    Parameters:
    - data: list of dicts, each with keys: 'record_name', 'signal', 'fs', 'preterm'
    
    Returns:
    - List of univariate records
    """
    univariate_data = []
    
    for record in data:
        signal = record['signal']  # Shape: (T, C)
        fs = record['metadata']['fs']
        base_name = record['record_name']
        metadata = record['metadata']
        
        num_channels = signal.shape[1]
        
        for i in range(num_channels):
            univariate_signal = signal[:, i]
            univariate_data.append({
                'record_name': f"{base_name}-chan{i}",
                'signal': univariate_signal,
                'fs': fs,
                'metadata': metadata,
            })
    
    return univariate_data


In [6]:
# save_dir = os.path.join(univariate_data_path, dataset + "_univariate_raw.npy")
# np.save(save_dir, np.array(univariate_raw, dtype=object))

In [7]:
def downsample_target(univariate_data, target_fs=20):
    """
    Downsamples all signals OF TARGET SET in univariate_data to the target frequency (default: 20Hz).
    
    Parameters:
    - univariate_data (list of dicts): Each entry should have 'metadata' with 'fs' (sampling rate) and 'signal'.
    - target_fs (int): The target sampling frequency (default is 20 Hz).
    
    Returns:
    - downsampled_data (list of dicts): Same structure as input but with downsampled signals.
    """
    downsampled_data = []
    
    for entry in univariate_data:
        original_fs = entry['fs']
        print('óriginaloriginal_fs', original_fs)
        signal = entry['signal']
        
        # Compute the new length after downsampling
        new_length = int(len(signal) * target_fs / original_fs)
        print(target_fs, original_fs)
        print(len(signal), new_length)
        
        # Resample the signal
        downsampled_signal = resample(signal, new_length)
        print('old signal lengt', len(signal))
        print('new length', len(downsampled_signal))
        # Store the downsampled signal with updated metadata
        downsampled_entry = {
            'fs': target_fs,
            'signal': downsampled_signal,
            'record_name': entry['record_name'],
            'preterm': entry['preterm']
        }
        downsampled_data.append(downsampled_entry)
    downsampled_data = z_normalize_target(downsampled_data)
    save_dir = os.path.join(univariate_data_path, dataset + "_univariate_no_PCA_old.npy")
    np.save(save_dir, np.array(downsampled_data, dtype=object))

    print(f"univariate no pca transformation complete. Saved as '{save_dir}'.")
    return(downsampled_data)

def downsample_signal(univariate_data, target_fs=20):
    """
    Downsamples all signals in univariate_data to the target frequency (default: 20Hz).
    
    Parameters:
    - univariate_data (list of dicts): Each entry should have 'metadata' with 'fs' (sampling rate) and 'signal'.
    - target_fs (int): The target sampling frequency (default is 20 Hz).
    
    Returns:
    - downsampled_data (list of dicts): Same structure as input but with downsampled signals.
    """
    downsampled_data = []
    
    for entry in univariate_data:
        original_fs = entry['fs']
        signal = entry['signal']
        
        # Compute the new length after downsampling
        new_length = int(len(signal) * target_fs / original_fs)
        print(target_fs, original_fs)
        print(len(signal), new_length)
        
        # Resample the signal
        downsampled_signal = resample(signal, new_length)
        print('old signal lengt', len(signal))
        print('new length', len(downsampled_signal))
        # Store the downsampled signal with updated metadata
        downsampled_entry = {
            'metadata': {**entry['metadata'], 'fs': target_fs},
            'signal': downsampled_signal,
            'record_name': entry['record_name']
        }
        downsampled_data.append(downsampled_entry)
    
    downsampled_data = z_normalize_signals(downsampled_data)
    save_dir = os.path.join(univariate_data_path, dataset + "_univariate_no_PCA.npy")
    np.save(save_dir, np.array(downsampled_data, dtype=object))

    print(f"univariate no pca transformation complete. Saved as '{save_dir}'.")
    
    return downsampled_data
    
if dataset == 'target':
    univariate_dataset = convert_target_to_univariate(data)
    downsampled_data = downsample_target(univariate_dataset)
else: 
    univariate_dataset = convert_source_to_univariate(data)
    downsampled_data = downsample_signal(univariate_dataset)


óriginaloriginal_fs 128
20 128
421248 65820
old signal lengt 421248
new length 65820
óriginaloriginal_fs 128
20 128
421248 65820
old signal lengt 421248
new length 65820
óriginaloriginal_fs 128
20 128
421248 65820
old signal lengt 421248
new length 65820
óriginaloriginal_fs 128
20 128
421248 65820


old signal lengt 421248
new length 65820
óriginaloriginal_fs 128
20 128
421248 65820
old signal lengt 421248
new length 65820
óriginaloriginal_fs 128
20 128
421248 65820
old signal lengt 421248
new length 65820
óriginaloriginal_fs 128
20 128
605056 94540
old signal lengt 605056
new length 94540
óriginaloriginal_fs 128
20 128
605056 94540
old signal lengt 605056
new length 94540
óriginaloriginal_fs 128
20 128
605056 94540
old signal lengt 605056
new length 94540
óriginaloriginal_fs 128
20 128
605056 94540
old signal lengt 605056
new length 94540
óriginaloriginal_fs 128
20 128
605056 94540
old signal lengt 605056
new length 94540
óriginaloriginal_fs 128
20 128
605056 94540
old signal lengt 605056
new length 94540
óriginaloriginal_fs 128
20 128
459392 71780
old signal lengt 459392
new length 71780
óriginaloriginal_fs 128
20 128
459392 71780
old signal lengt 459392
new length 71780
óriginaloriginal_fs 128
20 128
459392 71780
old signal lengt 459392
new length 71780
óriginaloriginal_fs 128


In [8]:

data_dir = os.path.join(univariate_data_path, dataset + "_univariate_no_PCA.npy")
data = np.load(data_dir, allow_pickle=True)
print(len(data))

1008


In [9]:
print(data[0])
print(len(data[0]['signal']))

{'record_name': 'Hopper-2023_05_25_12_03_38-0000010090-0003-chan0', 'signal': array([[-1.70320952],
       [-3.41875192],
       [-2.48399423],
       ...,
       [ 0.73116872],
       [ 0.4095313 ],
       [ 0.95039407]]), 'fs': 20, 'preterm': 0}
65820
