# Feature Extraction from Multi-Signal Wearable Data for Stress Detection
 
This notebook processes wearable signals (EDA, HR, TEMP, and ACC) and extracts window-based statistical features. These features are later used for stress classification using machine learning models.

In [13]:
# basic libraries
import os
import numpy as np
import pandas as pd


In [21]:
# main dataset folder
BASE_PATH = r"C:\Users\KARAN\Downloads\Internship Project\Wearable_Dataset\Wearable_Dataset"

# choose condition
STRESS_PATH = os.path.join(BASE_PATH, "STRESS")

print("Dataset path loaded")

Dataset path loaded


In [22]:
# choose subject
subject = "S01"

# subject folder path
SUBJECT_PATH = os.path.join(STRESS_PATH, subject)

print("Using subject:", subject)
print("Folder:", SUBJECT_PATH)


Using subject: S01
Folder: C:\Users\KARAN\Downloads\Internship Project\Wearable_Dataset\Wearable_Dataset\STRESS\S01


In [23]:
# function to read Empatica signal file
def read_empatica_signal(file_path):
    
    raw = pd.read_csv(file_path, header=None)
    
    start_time = raw.iloc[0, 0]
    fs = float(raw.iloc[1, 0])
    values = raw.iloc[2:, 0].astype(float).values
    
    # handle both unix and datetime format
    try:
        t0 = pd.to_datetime(float(start_time), unit="s")
    except:
        t0 = pd.to_datetime(start_time)
    
    time_index = pd.date_range(
        start=t0,
        periods=len(values),
        freq=pd.Timedelta(seconds=1/fs)
    )
    
    df = pd.DataFrame({
        "time": time_index,
        "value": values
    })
    
    return df, fs


In [24]:
# load EDA
eda_df, eda_fs = read_empatica_signal(os.path.join(SUBJECT_PATH, "EDA.csv"))

# load HR
hr_df, hr_fs = read_empatica_signal(os.path.join(SUBJECT_PATH, "HR.csv"))

# load TEMP
temp_df, temp_fs = read_empatica_signal(os.path.join(SUBJECT_PATH, "TEMP.csv"))

# load ACC
acc_raw = pd.read_csv(os.path.join(SUBJECT_PATH, "ACC.csv"), header=None)
acc_start = acc_raw.iloc[0, 0]
acc_fs = float(acc_raw.iloc[1, 0])
acc_values = acc_raw.iloc[2:, :3].astype(float).values

# convert ACC time
try:
    acc_t0 = pd.to_datetime(float(acc_start), unit="s")
except:
    acc_t0 = pd.to_datetime(acc_start)

acc_time = pd.date_range(
    start=acc_t0,
    periods=len(acc_values),
    freq=pd.Timedelta(seconds=1/acc_fs)
)

# compute magnitude
acc_magnitude = np.sqrt(
    acc_values[:,0]**2 +
    acc_values[:,1]**2 +
    acc_values[:,2]**2
)

acc_df = pd.DataFrame({
    "time": acc_time,
    "value": acc_magnitude
})

print("All signals loaded successfully")


All signals loaded successfully


In [25]:
# load tags
tags_raw = pd.read_csv(os.path.join(SUBJECT_PATH, "tags.csv"), header=None)

try:
    tag_times = pd.to_datetime(tags_raw[0].astype(float), unit="s")
except:
    tag_times = pd.to_datetime(tags_raw[0])

# default label rest
eda_df["label"] = "rest"

# mark stress intervals
for i in range(0, len(tag_times), 2):
    
    if i+1 >= len(tag_times):
        break
        
    start = tag_times.iloc[i]
    end = tag_times.iloc[i+1]
    
    eda_df.loc[
        (eda_df["time"] >= start) &
        (eda_df["time"] <= end),
        "label"
    ] = "stress"

print("Labels created")
eda_df["label"].value_counts()


Labels created


label
rest      6596
stress    2302
Name: count, dtype: int64

In [26]:
# sliding window parameters
window_size = 30   # seconds
step_size = 15     # seconds

window_samples = int(window_size * eda_fs)
step_samples = int(step_size * eda_fs)

rows = []

for start in range(0, len(eda_df) - window_samples, step_samples):
    
    end = start + window_samples
    
    window_eda = eda_df.iloc[start:end]
    
    start_time = window_eda["time"].iloc[0]
    end_time = window_eda["time"].iloc[-1]
    
    # get matching HR window
    hr_window = hr_df[(hr_df["time"] >= start_time) & (hr_df["time"] <= end_time)]
    
    # get matching TEMP window
    temp_window = temp_df[(temp_df["time"] >= start_time) & (temp_df["time"] <= end_time)]
    
    # get matching ACC window
    acc_window = acc_df[(acc_df["time"] >= start_time) & (acc_df["time"] <= end_time)]
    
    # skip if any signal empty
    if len(hr_window)==0 or len(temp_window)==0 or len(acc_window)==0:
        continue
    
    row = [
        subject,
        start_time,
        end_time,
        
        window_eda["value"].mean(),
        window_eda["value"].std(),
        
        hr_window["value"].mean(),
        hr_window["value"].std(),
        
        temp_window["value"].mean(),
        temp_window["value"].std(),
        
        acc_window["value"].mean(),
        acc_window["value"].std(),
        
        window_eda["label"].mode()[0]
    ]
    
    rows.append(row)

features_df = pd.DataFrame(rows, columns=[
    "subject",
    "window_start",
    "window_end",
    "eda_mean", "eda_std",
    "hr_mean", "hr_std",
    "temp_mean", "temp_std",
    "acc_mean", "acc_std",
    "label"
])

print("Total windows created:", len(features_df))
features_df.head()


Total windows created: 147


Unnamed: 0,subject,window_start,window_end,eda_mean,eda_std,hr_mean,hr_std,temp_mean,temp_std,acc_mean,acc_std,label
0,S01,2013-02-20 17:55:19,2013-02-20 17:55:48.750,0.201066,0.266698,86.7245,9.193781,31.762333,0.828232,65.360075,9.94326,rest
1,S01,2013-02-20 17:55:34,2013-02-20 17:56:03.750,0.271109,0.206442,86.400667,0.944103,32.268333,0.27435,65.579771,9.131499,rest
2,S01,2013-02-20 17:55:49,2013-02-20 17:56:18.750,0.276522,0.088712,87.592333,0.983746,32.564333,0.167165,64.963706,5.817983,rest
3,S01,2013-02-20 17:56:04,2013-02-20 17:56:33.750,0.346803,0.030328,88.883667,0.585141,32.820667,0.139264,64.899532,7.292461,rest
4,S01,2013-02-20 17:56:19,2013-02-20 17:56:48.750,0.383235,0.014667,90.852,1.97265,32.986,0.065573,64.817945,5.443674,rest


In [27]:
file_name = f"features_multisignal_{subject}.csv"

features_df.to_csv(file_name, index=False)

print("Features saved as:", file_name)

Features saved as: features_multisignal_S01.csv
