> This NB applies the moments feature engineerings to the new windowed h5 files and saves them as more h5 files

In [1]:
import numpy as np
import pandas as pd
import time
import json
import h5py


from segraw_featureengr import *

## Khushaba Moments

In [2]:

def normalize_array_to_unit_std(arr):
    """
    Normalizes a 1D numpy array so that its sample standard deviation equals 1.

    Args:
        arr (np.ndarray): 1D array of floats

    Returns:
        np.ndarray: Normalized array with std = 1
    """
    arr = np.asarray(arr, dtype=float)
    std = np.std(arr, ddof=1)  # Sample standard deviation
    if std > 0:
        return arr / std
    else:
        print(arr)
        raise ValueError("Standard deviation is zero; cannot normalize.")


In [3]:

def extract_spectral_features(segment, fs=2000, normalize_feature_vector=True):
    """
    Compute all features for a segment: shape [window_len, n_channels]
    Returns a flat array of all features for all channels, with shape [n_channels * n_features,]
    """
    n_channels = segment.shape[1]
    feature_vec = []
    for ch in range(n_channels):
        ch_data = segment[:, ch]
        zero_log, m0 = zero_order(ch_data, fs)
        second_log, m2 = second_order(ch_data, m0, fs)
        fourth_log, m4 = fourth_order(ch_data, m0, fs)
        spars = sparsity(m0, m2, m4)
        irreg = irregularity_factor(ch_data, m0, m2, m4, fs)
        feature_vec.extend([zero_log, second_log, fourth_log, spars, irreg])
    feature_arr = np.array(feature_vec, dtype=np.float32)
    if normalize_feature_vector:
        return normalize_array_to_unit_std(feature_arr)
    else:
        return feature_arr



In [4]:
def process_features_and_save(in_h5_path, out_h5_path, fs=2000, batch_size=10000):
    with h5py.File(in_h5_path, 'r') as fin:
        n_samples = fin['features'].shape[0]
        window_len = fin['features'].shape[1]
        n_channels = fin['features'].shape[2]
        n_features_per_channel = 5  # zero, second, fourth, sparsity, irregularity
        total_features = n_channels * n_features_per_channel

        with h5py.File(out_h5_path, 'w') as fout:
            features_ds = fout.create_dataset('features', shape=(n_samples, total_features), dtype='float32')
            # Copy metadata
            for k in ['participant', 'gesture_id', 'gesture_num']:
                fout.create_dataset(k, data=fin[k])

            for batch_start in range(0, n_samples, batch_size):
                batch_end = min(batch_start + batch_size, n_samples)
                batch_segments = fin['features'][batch_start:batch_end]  # shape [batch, window_len, n_channels]
                feats_batch = np.zeros((batch_end - batch_start, total_features), dtype=np.float32)
                for i, segment in enumerate(batch_segments):
                    feats_batch[i] = extract_spectral_features(segment, fs=fs)
                features_ds[batch_start:batch_end] = feats_batch
                print(f"Processed {batch_end} / {n_samples} segments")


In [5]:
base_box_path = "C:\\Users\\kdmen\\Box\\Yamagami Lab\\Data\\Meta_Gesture_Project\\windowed_data_augmentation\\"
max_batch_size = 30_000


In [None]:
#NoFE_windowed_window200ms_step100ms --> Took 8 mins

process_features_and_save(
    in_h5_path=base_box_path+"NoFE_windowed_window200ms_step100ms.h5",
    out_h5_path=base_box_path+"moments_windowed_window200ms_step100ms.h5",
    fs=2000,            
    batch_size=max_batch_size     # Tune based on RAM/speed
)


Processed 30000 / 104244 segments
Processed 60000 / 104244 segments
Processed 90000 / 104244 segments
Processed 104244 / 104244 segments


In [None]:
#NoFE_windowed_window300ms_step20ms --> Took 64 mins

process_features_and_save(
    in_h5_path=base_box_path+"NoFE_windowed_window300ms_step20ms.h5",
    out_h5_path=base_box_path+"moments_windowed_window300ms_step20ms.h5",
    fs=2000,            
    batch_size=max_batch_size     # Tune based on RAM/speed
)


Processed 30000 / 498872 segments
Processed 60000 / 498872 segments
Processed 90000 / 498872 segments
Processed 120000 / 498872 segments
Processed 150000 / 498872 segments
Processed 180000 / 498872 segments
Processed 210000 / 498872 segments
Processed 240000 / 498872 segments
Processed 270000 / 498872 segments
Processed 300000 / 498872 segments
Processed 330000 / 498872 segments
Processed 360000 / 498872 segments
Processed 390000 / 498872 segments
Processed 420000 / 498872 segments
Processed 450000 / 498872 segments
Processed 480000 / 498872 segments
Processed 498872 / 498872 segments


In [6]:
print("Ready")


Ready


In [7]:
#NoFE_windowed_window300ms_step10ms
## This one ran for 37 minutes but never printed anything... I'm assuming something broke? This is the big one FWIW. Maybe it's still loading the data in? Idk

print("Started")

process_features_and_save(
    in_h5_path=base_box_path+"NoFE_windowed_window300ms_step10ms.h5",
    out_h5_path=base_box_path+"moments_windowed_window300ms_step10ms.h5",
    fs=2000,            
    batch_size=max_batch_size     # Tune based on RAM/speed
)

print("Finished")


Started
Processed 30000 / 997448 segments
Processed 60000 / 997448 segments
Processed 90000 / 997448 segments
Processed 120000 / 997448 segments
Processed 150000 / 997448 segments
Processed 180000 / 997448 segments
Processed 210000 / 997448 segments
Processed 240000 / 997448 segments
Processed 270000 / 997448 segments
Processed 300000 / 997448 segments
Processed 330000 / 997448 segments
Processed 360000 / 997448 segments
Processed 390000 / 997448 segments
Processed 420000 / 997448 segments
Processed 450000 / 997448 segments
Processed 480000 / 997448 segments
Processed 510000 / 997448 segments
Processed 540000 / 997448 segments
Processed 570000 / 997448 segments
Processed 600000 / 997448 segments
Processed 630000 / 997448 segments
Processed 660000 / 997448 segments
Processed 690000 / 997448 segments
Processed 720000 / 997448 segments
Processed 750000 / 997448 segments
Processed 780000 / 997448 segments
Processed 810000 / 997448 segments
Processed 840000 / 997448 segments
Processed 87000

In [7]:
#NoFE_windowed_window200ms_step20ms

print("Started")

process_features_and_save(
    in_h5_path=base_box_path+"NoFE_windowed_window200ms_step20ms.h5",
    out_h5_path=base_box_path+"moments_windowed_window200ms_step20ms.h5",
    fs=2000,            
    batch_size=max_batch_size     # Tune based on RAM/speed
)

print("Finished")


Started
Processed 30000 / 514872 segments
Processed 60000 / 514872 segments
Processed 90000 / 514872 segments
Processed 120000 / 514872 segments
Processed 150000 / 514872 segments
Processed 180000 / 514872 segments
Processed 210000 / 514872 segments
Processed 240000 / 514872 segments
Processed 270000 / 514872 segments
Processed 300000 / 514872 segments
Processed 330000 / 514872 segments
Processed 360000 / 514872 segments
Processed 390000 / 514872 segments
Processed 420000 / 514872 segments
Processed 450000 / 514872 segments
Processed 480000 / 514872 segments
Processed 510000 / 514872 segments
Processed 514872 / 514872 segments
Finished
