## LIBRARIES

In [37]:
import neurokit2 as nk
import numpy as np
import pandas as pd
import os

## PARAMETERS

In [38]:
base_foler = "D:/Kuliah/TA/Dataset/Multimodal/dataset/15_1594140175"

In [39]:
window_sec = 60
overlap = 0.5
bvp_path = os.path.join(base_foler, "BVP.csv")
eda_path = os.path.join(base_foler, "EDA.csv")
hr_path = os.path.join(base_foler, "HR.csv")
ibi_path = os.path.join(base_foler, "IBI.csv")


## LOAD BVP SIGNAL

In [40]:
bvp_raw = pd.read_csv(bvp_path, header=None)
start_time = bvp_raw.iloc[0, 0]
sample_rate = bvp_raw.iloc[1, 0]
signal = bvp_raw.iloc[2:, 0].values
signal

array([ -0.  ,  -0.  ,  -0.  , ..., -48.28, -34.91, -23.27])

## SEGMENTATION

In [41]:
def segment_signal(signal, sampling_rate, window_sec, overlap):
    window_size = int(sampling_rate * window_sec)
    step_size = int(window_size * (1 - overlap))
    
    segments = []
    starts = []

    for start in range(0, len(signal) - window_size + 1, step_size):
        end = start + window_size
        segments.append(signal[start:end])
        starts.append(start / sampling_rate)  # time in seconds
    return segments, starts

In [42]:
segments, start_times = segment_signal(signal, sample_rate, window_sec, overlap)
print(f"Segmented into {len(segments)} windows of {window_sec}s each")

Segmented into 259 windows of 60s each


## FEATURE EXTRACTION USING NEUROKIT2

In [43]:
features_list = []
for i, seg in enumerate(segments):
    try:
        signals, info = nk.ppg_process(seg, sampling_rate=sample_rate)
        features = nk.hrv_frequency(signals, sampling_rate=sample_rate, show=False)
        features["window_start_sec"] = start_times[i]
        features["start_unix"] = start_time + start_times[i]
        features["end_unix"] = start_time + start_times[i] + window_sec
        features_list.append(features)
    except Exception as e:
        print(f"Skipping window {i} due to error: {e}")

In [44]:
features_df = pd.concat(features_list, ignore_index=True)

## LABEL THE DATA

In [45]:
survey_labeled = pd.read_csv("survey_labeled.csv")
survey_labeled

Unnamed: 0,ID,Start,End,Start_unix,End_unix,Stress level
0,15,2020-07-22 16:10:00,2020-07-22 16:33:00,1595434200,1595435580,2
1,15,2020-07-08 09:03:00,2020-07-08 09:14:00,1594198980,1594199640,2
2,15,2020-07-21 08:23:00,2020-07-21 08:30:00,1595319780,1595320200,0
3,15,2020-07-21 11:19:00,2020-07-21 11:45:00,1595330340,1595331900,2
4,15,2020-07-22 15:34:00,2020-07-22 16:01:00,1595432040,1595433660,2
...,...,...,...,...,...,...
240,F5,2020-07-15 09:01:00,2020-07-15 09:07:00,1594803660,1594804020,2
241,F5,2020-07-10 16:14:00,2020-07-10 16:19:00,1594397640,1594397940,2
242,F5,2020-07-13 09:13:00,2020-07-13 09:17:00,1594631580,1594631820,2
243,F5,2020-07-17 11:49:00,2020-07-17 12:13:00,1594986540,1594987980,2


In [46]:
# count the data that has specific label
survey_labeled["Stress level"].value_counts()

Stress level
2    179
0     46
1     20
Name: count, dtype: int64

In [47]:
# label the features_df with the survey_labeled
features_df["label"] = None
for i, row in features_df.iterrows():
    for j, survey_row in survey_labeled.iterrows():
        if row["start_unix"] >= survey_row["Start_unix"] and row["end_unix"] <= survey_row["End_unix"]:
            features_df.at[i, "label"] = survey_row["Stress level"]
            break
features_df

Unnamed: 0,HRV_ULF,HRV_VLF,HRV_LF,HRV_HF,HRV_VHF,HRV_TP,HRV_LFHF,HRV_LFn,HRV_HFn,HRV_LnHF,window_start_sec,start_unix,end_unix,label
0,,,0.007035,0.032369,0.008759,0.048162,0.217334,0.146065,0.672073,-3.430566,0.0,1.594140e+09,1.594140e+09,
1,,,0.042679,0.105337,0.010001,0.158017,0.405165,0.270091,0.666619,-2.250589,30.0,1.594140e+09,1.594140e+09,
2,,,0.011684,0.001421,0.000054,0.013159,8.222006,0.887905,0.107991,-6.556385,60.0,1.594140e+09,1.594140e+09,
3,,,0.011637,0.097629,0.004126,0.113392,0.119195,0.102625,0.860985,-2.326583,90.0,1.594140e+09,1.594140e+09,
4,,,0.059070,0.075588,0.063349,0.198007,0.781478,0.298325,0.381744,-2.582456,120.0,1.594140e+09,1.594140e+09,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
254,,,0.052811,0.070209,0.004908,0.127927,0.752195,0.412818,0.548818,-2.656283,7620.0,1.594148e+09,1.594148e+09,
255,,,0.022286,0.045790,0.008774,0.076850,0.486700,0.289993,0.595834,-3.083697,7650.0,1.594148e+09,1.594148e+09,
256,,,0.062711,0.036293,0.002663,0.101666,1.727926,0.616832,0.356978,-3.316141,7680.0,1.594148e+09,1.594148e+09,
257,,,0.040708,0.080500,0.004858,0.126066,0.505693,0.322913,0.638555,-2.519499,7710.0,1.594148e+09,1.594148e+09,


In [48]:
# Save the labeled data to a CSV file
features_df.to_csv("features_labeled.csv", index=False)

In [49]:
features_df["label"].value_counts()

Series([], Name: count, dtype: int64)