In [1]:
import pandas as pd
from scipy.stats import skew, kurtosis, entropy
from scipy.signal import welch
import numpy as np
from scipy.fft import rfft, rfftfreq

In [2]:
df = pd.read_pickle("../../data/raw/df_resample_100ms.pkl")

In [3]:
df

Unnamed: 0_level_0,acc_x,acc_y,acc_z,gyro_x,gyro_y,gyro_z,label,subject_id,trial,age,height,weight,gender
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1970-01-01 00:38:38.700,0.948777,-9.636166,0.002699,0.003818,0.016875,0.006643,BSC,1.0,1.0,32.0,180.0,85.0,M
1970-01-01 00:38:38.800,0.948993,-9.759188,0.087482,0.020693,0.027565,0.015669,BSC,1.0,1.0,32.0,180.0,85.0,M
1970-01-01 00:38:38.900,1.071514,-9.787465,-0.093610,0.168721,0.075747,0.037797,BSC,1.0,1.0,32.0,180.0,85.0,M
1970-01-01 00:38:39.000,1.135679,-9.754036,0.065878,0.339488,0.078970,0.040134,BSC,1.0,1.0,32.0,180.0,85.0,M
1970-01-01 00:38:39.100,1.126174,-9.443248,0.075006,0.594494,-0.012065,0.103557,BSC,1.0,1.0,32.0,180.0,85.0,M
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1970-01-01 04:49:55.200,2.128992,14.084110,0.907692,-1.022787,0.162796,0.609964,WAL,67.0,1.0,23.0,180.0,67.0,M
1970-01-01 04:49:55.300,-1.563028,11.139874,1.061856,-2.417346,-0.699792,1.861551,WAL,67.0,1.0,23.0,180.0,67.0,M
1970-01-01 04:49:55.400,2.767679,8.869539,0.154884,-0.998184,0.612194,1.557859,WAL,67.0,1.0,23.0,180.0,67.0,M
1970-01-01 04:49:55.500,0.149554,5.565219,2.023148,0.629237,0.462074,-0.136162,WAL,67.0,1.0,23.0,180.0,67.0,M


In [4]:

# Sort the whole DataFrame by subject_id, trial, and timestamp
df = df.sort_values(by=['subject_id', 'trial', 'timestamp'])

In [5]:
df.to_pickle("../../data/external/df_docker.pkl")

In [6]:
def energy_entropy(signal, num_blocks=10):
    """
    Example placeholder function for energy entropy.
    Adjust or replace as needed.
    """
    # compute energy in each block, then compute Shannon entropy
    block_size = len(signal) // num_blocks
    energies = []
    for i in range(num_blocks):
        start = i * block_size
        end = start + block_size
        block = signal[start:end]
        energies.append(np.sum(block**2))

    energies = np.array(energies)
    energies_norm = energies / (np.sum(energies) + 1e-10)
    entropy_val = -np.sum(energies_norm * np.log2(energies_norm + 1e-10))
    return entropy_val

In [7]:
import numpy as np
from scipy.stats import skew, kurtosis

def extract_features_from_windows(df, 
                                  window_duration_s=2.5, 
                                  overlap=0.5, 
                                  sampling_rate_ms=100):
    """
    Slide a window through one trial of one subject.
    Extract statistical features per window: mean, std, min, max, skew, kurtosis,
    slope (SL), and tilt angle (T_Ai) stats.
    """
    window_size = int(window_duration_s * 1000 / sampling_rate_ms)
    step_size   = int(window_size * (1 - overlap))
    
    features = []
    labels   = []

    signal_data = df[['acc_x', 'acc_y', 'acc_z', 
                              'gyro_x', 'gyro_y', 'gyro_z']].values
    class_labels = df['label'].values

    for start in range(0, len(signal_data) - window_size + 1, step_size):
        window = signal_data[start:start + window_size]
        window_labels = class_labels[start:start + window_size]
        label = window_labels[-1]

        # Basic stats
        mean_feat = window.mean(axis=0)
        std_feat  = window.std(axis=0)
        min_feat  = window.min(axis=0)
        max_feat  = window.max(axis=0)
        skew_feat = skew(window, axis=0)
        kurt_feat = kurtosis(window, axis=0)
        minmax_diff = max_feat - min_feat

        # Slope (SL)
        acc_x = window[:, 0]
        acc_y = window[:, 1]
        acc_z = window[:, 2]
        acc_slope = np.sqrt(
            (acc_x.max() - acc_x.min())**2 +
            (acc_y.max() - acc_y.min())**2 +
            (acc_z.max() - acc_z.min())**2
        )

        # Tilt angle (T_Ai)
        magnitude = np.sqrt(acc_x**2 + acc_y**2 + acc_z**2)
        tilt_angles = np.arcsin(acc_y / magnitude)
        tilt_mean = np.mean(tilt_angles)
        tilt_std  = np.std(tilt_angles)
        tilt_skew = skew(tilt_angles)
        tilt_kurt = kurtosis(tilt_angles)

        # Accelerometer magnitude
        acc_mag = np.sqrt(np.sum(window[:, :3] ** 2, axis=1))
        acc_mag_mean = np.mean(acc_mag)
        acc_mag_std  = np.std(acc_mag)
        acc_mag_min  = np.min(acc_mag)
        acc_mag_max  = np.max(acc_mag)
        acc_mag_diff = acc_mag_max - acc_mag_min
        acc_mag_energy_entropy = energy_entropy(acc_mag, num_blocks=10)

        # Gyroscope magnitude
        gyro_mag = np.sqrt(np.sum(window[:, 3:] ** 2, axis=1))
        gyro_mag_mean = np.mean(gyro_mag)
        gyro_mag_std  = np.std(gyro_mag)
        gyro_mag_min  = np.min(gyro_mag)
        gyro_mag_max  = np.max(gyro_mag)
        gyro_mag_diff = gyro_mag_max - gyro_mag_min

        # Final feature vector
        feat = np.concatenate([
            mean_feat,
            std_feat,
            min_feat,
            max_feat,
            skew_feat,
            kurt_feat,
            minmax_diff,
            [acc_slope],
            [tilt_mean, tilt_std, tilt_skew, tilt_kurt],
            [acc_mag_mean, acc_mag_std, acc_mag_min, acc_mag_max, acc_mag_diff, acc_mag_energy_entropy],
            [gyro_mag_mean, gyro_mag_std, gyro_mag_min, gyro_mag_max, gyro_mag_diff]
        ])
        
        features.append(feat)
        labels.append(label)

    return features, labels


In [8]:
features, labels = extract_features_from_windows(df, window_duration_s=2.5, overlap=0.5, sampling_rate_ms=100)