# Feature Extraction from Wearable EDA Signals
 
This notebook extracts simple statistical features such as mean and standard deviation from labeled EDA signals using sliding time windows. The extracted features form the input dataset for machine learning models.

In [1]:
# basic libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [2]:
# dataset path
BASE_PATH = r"C:\Users\KARAN\Downloads\Internship Project\Wearable_Dataset\Wearable_Dataset"
STRESS_PATH = os.path.join(BASE_PATH, "STRESS")

# use the same subject you used before
subject = "S01"   # change if needed
SUBJECT_PATH = os.path.join(STRESS_PATH, subject)

print("Using subject:", subject)
print("Files:", os.listdir(SUBJECT_PATH))


Using subject: S01
Files: ['ACC.csv', 'BVP.csv', 'EDA.csv', 'HR.csv', 'IBI.csv', 'tags.csv', 'TEMP.csv']


In [3]:
# load EDA.csv
eda_path = os.path.join(SUBJECT_PATH, "EDA.csv")
eda_raw = pd.read_csv(eda_path, header=None)

# start time can be unix OR datetime string
start_raw = str(eda_raw.iloc[0, 0]).strip()
try:
    start_time = pd.to_datetime(float(start_raw), unit="s", utc=True)
except:
    start_time = pd.to_datetime(start_raw, utc=True)

# sampling rate
fs = float(eda_raw.iloc[1, 0])

# EDA values
eda_values = eda_raw.iloc[2:, 0].astype(float).values

# make time axis
time_axis = start_time + pd.to_timedelta(np.arange(len(eda_values)) / fs, unit="s")

# dataframe
eda_df = pd.DataFrame({"time": time_axis, "eda": eda_values})

print("EDA loaded | fs =", fs)
eda_df.head()


EDA loaded | fs = 4.0


Unnamed: 0,time,eda
0,2013-02-20 17:55:19+00:00,0.0
1,2013-02-20 17:55:19.250000+00:00,0.005125
2,2013-02-20 17:55:19.500000+00:00,0.001281
3,2013-02-20 17:55:19.750000+00:00,0.006407
4,2013-02-20 17:55:20+00:00,0.006407


In [4]:
# load tags.csv
tags_path = os.path.join(SUBJECT_PATH, "tags.csv")
tags_raw = pd.read_csv(tags_path, header=None)

# parse tags safely (unix or datetime)
tag_times = []
for x in tags_raw[0].astype(str):
    x = x.strip()
    try:
        tag_times.append(pd.to_datetime(float(x), unit="s", utc=True))
    except:
        tag_times.append(pd.to_datetime(x, utc=True))

tag_times = pd.to_datetime(tag_times)

print("Total tags:", len(tag_times))
tag_times[:10]


Total tags: 13


DatetimeIndex(['2013-02-20 18:01:30+00:00', '2013-02-20 18:05:37+00:00',
               '2013-02-20 18:08:32+00:00', '2013-02-20 18:10:17+00:00',
               '2013-02-20 18:17:37+00:00', '2013-02-20 18:19:42+00:00',
               '2013-02-20 18:24:49+00:00', '2013-02-20 18:25:17+00:00',
               '2013-02-20 18:25:55+00:00', '2013-02-20 18:26:32+00:00'],
              dtype='datetime64[ns, UTC]', freq=None)

In [5]:
# make segments between tags
segments = []

for i in range(len(tag_times) - 1):
    start = tag_times[i]
    end = tag_times[i + 1]
    
    # alternate labels
    label = "rest" if i % 2 == 0 else "stress"
    segments.append([subject, start, end, label])

segments_df = pd.DataFrame(segments, columns=["subject", "start_time", "end_time", "label"])

print("Total segments:", len(segments_df))
segments_df.head()


Total segments: 12


Unnamed: 0,subject,start_time,end_time,label
0,S01,2013-02-20 18:01:30+00:00,2013-02-20 18:05:37+00:00,rest
1,S01,2013-02-20 18:05:37+00:00,2013-02-20 18:08:32+00:00,stress
2,S01,2013-02-20 18:08:32+00:00,2013-02-20 18:10:17+00:00,rest
3,S01,2013-02-20 18:10:17+00:00,2013-02-20 18:17:37+00:00,stress
4,S01,2013-02-20 18:17:37+00:00,2013-02-20 18:19:42+00:00,rest


In [7]:
# window settings (simple)
WINDOW_SEC = 30   # window length = 30 seconds
STEP_SEC = 15     # step = 15 seconds (overlap)

rows = []

# go through each segment (rest/stress)
for _, seg in segments_df.iterrows():
    seg_start = seg["start_time"]
    seg_end = seg["end_time"]
    label = seg["label"]

    # move window start from seg_start to seg_end
    w_start = seg_start
    while w_start + pd.Timedelta(seconds=WINDOW_SEC) <= seg_end:
        w_end = w_start + pd.Timedelta(seconds=WINDOW_SEC)

        # take EDA values inside this window
        w_data = eda_df[(eda_df["time"] >= w_start) & (eda_df["time"] < w_end)]["eda"]

        # if window has enough samples then calculate features
        if len(w_data) > 5:
            eda_mean = float(w_data.mean())
            eda_std = float(w_data.std())

            rows.append([subject, label, w_start, w_end, eda_mean, eda_std])

        # move window forward
        w_start = w_start + pd.Timedelta(seconds=STEP_SEC)

# final features dataframe
features_df = pd.DataFrame(
    rows,
    columns=["subject", "label", "window_start", "window_end", "eda_mean", "eda_std"]
)

print("Total windows created:", len(features_df))
features_df.head()


Total windows created: 89


Unnamed: 0,subject,label,window_start,window_end,eda_mean,eda_std
0,S01,rest,2013-02-20 18:01:30+00:00,2013-02-20 18:02:00+00:00,0.492917,0.009664
1,S01,rest,2013-02-20 18:01:45+00:00,2013-02-20 18:02:15+00:00,0.500637,0.006211
2,S01,rest,2013-02-20 18:02:00+00:00,2013-02-20 18:02:30+00:00,0.511497,0.014188
3,S01,rest,2013-02-20 18:02:15+00:00,2013-02-20 18:02:45+00:00,0.535863,0.016988
4,S01,rest,2013-02-20 18:02:30+00:00,2013-02-20 18:03:00+00:00,0.543188,0.010099


In [8]:
# save features to csv
out_file = f"features_{subject}.csv"
features_df.to_csv(out_file, index=False)

print("Saved features file:", out_file)


Saved features file: features_S01.csv


In [9]:
# simple summary to see difference
print("Average features by label:")
print(features_df.groupby("label")[["eda_mean", "eda_std"]].mean())


Average features by label:
        eda_mean   eda_std
label                     
rest    0.602091  0.016213
stress  0.652786  0.008080
