# Feature Extraction

This notebook extracts statistical and signal-based features from the preprocessed ECG, EDA, and Resp. signals for each subject, and compiles them into a structured Pandas DataFrame for model development.

In [None]:
# Importing Dependencies

import numpy as np
import pandas as pd

from scipy.signal import find_peaks



# Disable UserWarnings
import warnings
warnings.filterwarnings("ignore", category=UserWarning)


In [3]:
# Load one subject's processed data
X = np.load("/Users/sarvesh/Desktop/stress-detection-wesad/data/processed/S4_segments.npy")   # shape: (n_windows, 42000, 3)
y = np.load("/Users/sarvesh/Desktop/stress-detection-wesad/data/processed/S4_labels.npy")     # shape: (n_windows,)

print("X shape:", X.shape)
print("y shape:", y.shape)

X shape: (213, 42000, 3)
y shape: (213,)


## Functions for Extracting Features

In [4]:

def extract_ecg_features(signal, fs=700):
    peaks, _ = find_peaks(signal, distance=int(fs * 0.5))  # 0.5 sec apart
    rr_intervals = np.diff(peaks) / fs  # seconds between beats
    
    return {
        "mean_rr": np.mean(rr_intervals) if len(rr_intervals) > 0 else 0,
        "sdnn": np.std(rr_intervals) if len(rr_intervals) > 0 else 0,
        "rmssd": np.sqrt(np.mean(np.square(np.diff(rr_intervals)))) if len(rr_intervals) > 1 else 0,
        "ecg_peak_count": len(peaks)
    }


def extract_eda_features(signal):
    peaks, _ = find_peaks(signal, distance=100)
    slope = (signal[-1] - signal[0]) / len(signal)
    
    return {
        "eda_mean": np.mean(signal),
        "eda_std": np.std(signal),
        "eda_peak_count": len(peaks),
        "eda_slope": slope
    }


def extract_resp_features(signal):
    zero_crossings = np.sum(np.diff(np.sign(signal)) != 0)
    
    return {
        "resp_mean": np.mean(signal),
        "resp_std": np.std(signal),
        "resp_zero_crossings": zero_crossings
    }




In [5]:
features = []

for segment in X:
    ecg = segment[:, 0]
    eda = segment[:, 1]
    resp = segment[:, 2]

    ecg_feats = extract_ecg_features(ecg)
    eda_feats = extract_eda_features(eda)
    resp_feats = extract_resp_features(resp)

    # Combine all into one feature vector
    all_feats = {**ecg_feats, **eda_feats, **resp_feats}
    features.append(list(all_feats.values()))


In [6]:
feature_names = list(all_feats.keys())  # use the last one to get names
df_features = pd.DataFrame(features, columns=feature_names)
df_features["label"] = y

df_features


Unnamed: 0,mean_rr,sdnn,rmssd,ecg_peak_count,eda_mean,eda_std,eda_peak_count,eda_slope,resp_mean,resp_std,resp_zero_crossings,label
0,0.839416,0.081676,0.064250,72,-0.564494,0.167938,39,1.386197e-05,0.000487,0.979562,31,0
1,0.843038,0.113958,0.094823,72,-0.462671,0.028234,41,1.066180e-06,0.053279,0.957919,35,0
2,0.800270,0.175418,0.194228,75,-0.517287,0.049605,35,-2.384147e-06,0.000799,0.891410,37,0
3,0.795124,0.176663,0.202567,76,-0.572951,0.028220,32,-2.865320e-06,-0.019642,0.826233,44,0
4,0.849469,0.143849,0.157376,71,-0.630440,0.052114,34,-4.090417e-06,-0.018199,0.750609,45,0
...,...,...,...,...,...,...,...,...,...,...,...,...
208,0.812094,0.134521,0.145569,74,0.327153,0.015504,35,1.828373e-07,-0.042851,1.484759,27,0
209,0.787876,0.167903,0.183902,77,0.345581,0.026521,37,1.414749e-06,0.087361,1.123280,29,0
210,0.714167,0.207503,0.188360,85,0.372413,0.007476,43,2.134597e-07,0.039183,0.906761,34,0
211,0.706803,0.192231,0.147501,85,0.372250,0.006622,43,-2.375773e-07,0.020059,1.385700,33,0


In [7]:
df_features.label.value_counts()

label
0    108
1     39
4     27
2     21
3     12
5      2
7      2
6      2
Name: count, dtype: int64

## Looping it Over All Subjects

In [8]:
# === Batch Processing === #

subjects = [f"S{i}" for i in range(2, 18) if i != 12]
input_dir = "/Users/sarvesh/Desktop/wesad/data/processed"
output_dir = "/Users/sarvesh/Desktop/wesad/data/features"

for subj in subjects:
    try:
        print(f"\n🔄 Extracting features for {subj}...")
        X = np.load(f"{input_dir}/{subj}_segments.npy")
        y = np.load(f"{input_dir}/{subj}_labels.npy")

        features = []
        for segment in X:
            ecg = segment[:, 0]
            eda = segment[:, 1]
            resp = segment[:, 2]

            ecg_feats = extract_ecg_features(ecg)
            eda_feats = extract_eda_features(eda)
            resp_feats = extract_resp_features(resp)

            all_feats = {**ecg_feats, **eda_feats, **resp_feats}
            features.append(list(all_feats.values()))

        feature_names = list(all_feats.keys())
        df = pd.DataFrame(features, columns=feature_names)
        df["label"] = y

        df.to_csv(f"{output_dir}/{subj}_features.csv", index=False)
        print(f"✅ Done: {df.shape[0]} samples saved.")
    except Exception as e:
        print(f"❌ Failed to process {subj}: {e}")


🔄 Extracting features for S2...
✅ Done: 201 samples saved.

🔄 Extracting features for S3...
✅ Done: 215 samples saved.

🔄 Extracting features for S4...
✅ Done: 213 samples saved.

🔄 Extracting features for S5...
✅ Done: 207 samples saved.

🔄 Extracting features for S6...
✅ Done: 234 samples saved.

🔄 Extracting features for S7...
✅ Done: 173 samples saved.

🔄 Extracting features for S8...
✅ Done: 181 samples saved.

🔄 Extracting features for S9...
✅ Done: 173 samples saved.

🔄 Extracting features for S10...
✅ Done: 182 samples saved.

🔄 Extracting features for S11...
✅ Done: 173 samples saved.

🔄 Extracting features for S13...
✅ Done: 183 samples saved.

🔄 Extracting features for S14...
✅ Done: 183 samples saved.

🔄 Extracting features for S15...
✅ Done: 174 samples saved.

🔄 Extracting features for S16...
✅ Done: 186 samples saved.

🔄 Extracting features for S17...
✅ Done: 196 samples saved.
