In [6]:
import neurokit2 as nk
import numpy as np
import pandas as pd
import os

In [7]:
base_foler = "D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset"

In [9]:
data_all = [path for path in os.listdir(base_foler)]
data_all.remove("SurveyResults.xlsx")
data_all.sort()
# save data_all to a csv file
data_all = pd.DataFrame(data_all, columns=["filename"])
data_all.to_csv("data_all.csv", index=False)

In [10]:
subjects = [path.split("_")[0] for path in os.listdir(base_foler)]
subjects = list(set(subjects))
subjects.remove("SurveyResults.xlsx")
print(f"Found {len(subjects)} Subjects: {subjects}")

Found 15 Subjects: ['6B', '7A', 'F5', '8B', 'CE', '15', '5C', '94', '7E', '6D', '83', 'E4', 'BG', 'DF', 'EG']


In [11]:
# get the list of all files in the directory
files = os.listdir(base_foler)
# get folders that starts with "5C"
# folders = [f for f in files if f.startswith("5C")]
# folders.sort()
# bvp_paths = [os.path.join(base_foler, f, "BVP.csv") for f in folders]
# eda_paths = [os.path.join(base_foler, f, "EDA.csv") for f in folders]
# hr_paths = [os.path.join(base_foler, f, "HR.csv") for f in folders]
# ibi_paths = [os.path.join(base_foler, f, "IBI.csv") for f in folders]


# subject_folders format [{"ID": {subject_id}, "folders" : [folder1, folder2, ...]}]
subject_folders = []
for subject in subjects:
    # get all folders that starts with the subject id
    folders = [f for f in files if f.startswith(subject)]
    # sort the folders
    folders.sort()
    # add to the list
    subject_folders.append({"ID": subject, "folders": folders})

# len(subject_folders)
bvp_subject_paths = []
eda_subject_paths = []
hr_subject_paths = []
ibi_subject_paths = []
for subject_folder in subject_folders:
    subject_id = subject_folder["ID"]
    # get the list of all folders for this subject
    folders = subject_folder["folders"]
    # get the paths for each folder
    bvp_paths = [os.path.join(base_foler, f, "BVP.csv") for f in folders]
    eda_paths = [os.path.join(base_foler, f, "EDA.csv") for f in folders]
    hr_paths = [os.path.join(base_foler, f, "HR.csv") for f in folders]
    ibi_paths = [os.path.join(base_foler, f, "IBI.csv") for f in folders]

    bvp_subject_paths.append({"ID": subject_id, "paths": bvp_paths})
    eda_subject_paths.append({"ID": subject_id, "paths": eda_paths})
    hr_subject_paths.append({"ID": subject_id, "paths": hr_paths})
    ibi_subject_paths.append({"ID": subject_id, "paths": ibi_paths})


In [12]:
bvp_subject_paths[0]

{'ID': '6B',
 'paths': ['D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\6B_1587569373\\BVP.csv',
  'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\6B_1587585327\\BVP.csv',
  'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\6B_1587653438\\BVP.csv',
  'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\6B_1587661095\\BVP.csv',
  'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\6B_1587662477\\BVP.csv',
  'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\6B_1588961203\\BVP.csv',
  'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\6B_1588970444\\BVP.csv',
  'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\6B_1589052301\\BVP.csv',
  'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\6B_1589901451\\BVP.csv',
  'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\6B_1590172247\\BVP.csv',
  'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\6B_1590239123\\BVP.csv',
  'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\6B_1592925729\\BVP.csv',
  'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\6B_1592996203\\BVP.csv',
  'D:\\Kuliah\\T

In [13]:
# loop through each files
# gather all the data into a single dataframe
def gather_data(subject_paths, data_type, output_dir="processed"):
    os.makedirs(output_dir, exist_ok=True)

    for subject in subject_paths:
        subject_id = subject["ID"]
        paths = subject["paths"]
        output_path = os.path.join(output_dir, f"{subject_id}_{data_type}.csv")
        # check if the file already exists
        if os.path.exists(output_path):
            print(f"File {output_path} already exists, skipping...")
            continue
        # create an empty dataframe
        df = pd.DataFrame()
        # loop through each path
        for path in paths:
            # read the data
            data = pd.read_csv(path)
            # check if the data is empty
            if data.empty:
                print(f"File {path} is empty, skipping...")
                continue
            # check if the data has the same columns
            if df.empty:
                df = data
            else:
                if not df.columns.equals(data.columns):
                    print(f"File {path} has different columns, skipping...")
                    continue
                # concatenate the data
                df = pd.concat([df, data], ignore_index=True)

# gather the data for each type
# gather_data(bvp_subject_paths, "BVP")
# gather_data(eda_subject_paths, "EDA")
# gather_data(hr_subject_paths, "HR")
# gather_data(ibi_subject_paths, "IBI")

In [14]:
def segment_signal(signal, sampling_rate, window_sec, overlap):
    window_size = int(sampling_rate * window_sec)
    step_size = int(window_size * (1 - overlap))
    
    segments = []
    starts = []

    for start in range(0, len(signal) - window_size + 1, step_size):
        end = start + window_size
        segments.append(signal[start:end])
        starts.append(start / sampling_rate)  # time in seconds
    print(f"Segmented signal into {len(segments)} segments of {window_sec} seconds each")
    return segments, starts

In [None]:
def label_bvp(bvp_path, window_sec=10, overlap=0.5, survey_labeled=None):
    # load bvp signal
    # filename = os.path.basename(bvp_path)
    parent_folder = os.path.dirname(bvp_path)
    filename = os.path.basename(parent_folder)
    bvp_raw = pd.read_csv(bvp_path, header=None)
    # check if the data is empty
    if bvp_raw.empty:
        print(f"File {bvp_path} is empty, skipping...")
        return
    start_time = bvp_raw.iloc[0, 0]
    sample_rate = bvp_raw.iloc[1, 0]
    signal = bvp_raw.iloc[2:, 0].values
    print(f"Loaded {len(signal)} samples from {bvp_path}")
    # segmentation
    segments, start_times = segment_signal(signal, sample_rate, window_sec, overlap)
    # print(f"Segmented into {len(segments)} windows of {window_sec}s each")
    # feature extraction
    features_list = []
    for i, seg in enumerate(segments):
        try:
            signals, info = nk.ppg_process(seg, sampling_rate=sample_rate)
            features = nk.hrv_time(signals, sampling_rate=sample_rate, show=False)
            features["window_start_sec"] = start_times[i]
            features["start_unix"] = start_time + start_times[i]
            features["end_unix"] = start_time + start_times[i] + window_sec
            features_list.append(features)
        except Exception as e:
            print(f"Skipping window {i} due to error: {e}")
    print(f"Extracted {len(features_list)} windows from {filename}")
    features_df = pd.concat(features_list, ignore_index=True)
    # add label
    features_df["label"] = None
    for i, row in features_df.iterrows():
        for j, survey_row in survey_labeled.iterrows():
            if row["start_unix"] >= survey_row["Start_unix"] and row["end_unix"] <= survey_row["End_unix"]:
                features_df.at[i, "label"] = survey_row["Stress level"]
                break
    # save to csv
    output_path = os.path.join("processed", f"BVP_{filename}.csv")
    features_df.to_csv(output_path, index=False)
    print(f"Saved {len(features_df)} rows to {output_path}")

In [16]:
len(bvp_subject_paths)
all_bvp_paths = []
for subject in bvp_subject_paths:
    paths = subject["paths"]
    for path in paths:
        all_bvp_paths.append(path)

In [17]:
all_bvp_paths.sort()
all_bvp_paths

['D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\15_1594140175\\BVP.csv',
 'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\15_1594149654\\BVP.csv',
 'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\15_1594213322\\BVP.csv',
 'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\15_1594220239\\BVP.csv',
 'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\15_1594238057\\BVP.csv',
 'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\15_1594298975\\BVP.csv',
 'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\15_1594311202\\BVP.csv',
 'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\15_1594325201\\BVP.csv',
 'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\15_1594378283\\BVP.csv',
 'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\15_1594380033\\BVP.csv',
 'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\15_1595334888\\BVP.csv',
 'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\15_1595347920\\BVP.csv',
 'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\15_1595360012\\BVP.csv',
 'D:\\Kuliah\\TA\\Dataset\\Multimodal\\dataset\\15_

In [18]:
len(all_bvp_paths)
print(all_bvp_paths[0])
bvp_raw = pd.read_csv(all_bvp_paths[2], header=None)

D:\Kuliah\TA\Dataset\Multimodal\dataset\15_1594140175\BVP.csv


In [19]:
survey_labeled = pd.read_csv("survey_labeled.csv")
survey_labeled["Stress level"].value_counts()

Stress level
2    179
0     46
1     20
Name: count, dtype: int64

In [20]:
label_bvp(all_bvp_paths[0], survey_labeled=survey_labeled, window_sec=10, overlap=0.5)

Loaded 499631 samples from D:\Kuliah\TA\Dataset\Multimodal\dataset\15_1594140175\BVP.csv
Segmented signal into 1560 segments of 10 seconds each


  warn(


Skipping window 1227 due to error: cannot convert float NaN to integer
Extracted 1559 windows from 15_1594140175
[   HRV_MeanNN    HRV_SDNN  HRV_SDANN1  HRV_SDNNI1  HRV_SDANN2  HRV_SDNNI2  \
0  774.147727  302.767479         NaN         NaN         NaN         NaN   

   HRV_SDANN5  HRV_SDNNI5   HRV_RMSSD    HRV_SDSD  ...  HRV_Prc80NN  \
0         NaN         NaN  507.944694  535.258569  ...     1078.125   

   HRV_pNN50  HRV_pNN20  HRV_MinNN  HRV_MaxNN   HRV_HTI  HRV_TINN  \
0  81.818182  81.818182    515.625   1328.125  3.666667     31.25   

   window_start_sec    start_unix      end_unix  
0               0.0  1.594140e+09  1.594140e+09  

[1 rows x 28 columns],    HRV_MeanNN    HRV_SDNN  HRV_SDANN1  HRV_SDNNI1  HRV_SDANN2  HRV_SDNNI2  \
0      893.75  460.800651         NaN         NaN         NaN         NaN   

   HRV_SDANN5  HRV_SDNNI5   HRV_RMSSD    HRV_SDSD  ...  HRV_Prc80NN  \
0         NaN         NaN  728.329132  768.811573  ...     1240.625   

   HRV_pNN50  HRV_pNN20  HR

In [75]:
all_bvp_paths[0]
bvp_raw = pd.read_csv(all_bvp_paths[0], header=None)
bvp_raw.iloc[1, 0]

64.0

In [94]:
window_size = int(64 * 10)
start_sample = int(1227 * window_size * (1 - 0.5))
end_sample = start_sample + window_size

flagged_bvp = (bvp_raw[start_sample:end_sample])
flagged_bvp = np.array(flagged_bvp.iloc[:, 0].values)
print(np.std(flagged_bvp))
if np.std(flagged_bvp) < 0.01:
    print(f"Skipping window {i}: signal too flat (std={np.std(flagged_bvp):.5f})")
#save flagged_bvp to csv
# flagged_bvp.to_csv("flagged_bvp.csv", index=False, header=False)
# signals, info = nk.ppg_process(flagged_bvp, sampling_rate=64)
# features = nk.hrv_time(signals, sampling_rate=64, show=False)

91.98466389275251
