In [1]:
import os
import pandas as pd

# Define signals and their sampling rates
SIGNALS = {
    "BVP": 64,
    "EDA": 4,
    "HR": 1,
    "TEMP": 4
}

# Root data directory
DATA_DIR = "./data"

# Output files: one per signal
output_dfs = {signal: [] for signal in SIGNALS}

# Loop through students (s1 to s10)
for student in os.listdir(DATA_DIR):
    student_path = os.path.join(DATA_DIR, student)
    if not os.path.isdir(student_path):
        continue

    # Loop through exams (midterm_1, midterm_2, final)
    for exam in os.listdir(student_path):
        exam_path = os.path.join(student_path, exam)
        if not os.path.isdir(exam_path):
            continue

        # For each desired signal
        for signal, freq in SIGNALS.items():
            signal_file = os.path.join(exam_path, f"{signal}.csv")
            if os.path.exists(signal_file):
                df = pd.read_csv(signal_file, header=None, names=["value"])
                df["timestamp"] = df.index / freq
                df["student"] = student
                df["exam"] = exam
                output_dfs[signal].append(df[["student", "exam", "timestamp", "value"]])

# Save the combined files
for signal, dfs in output_dfs.items():
    combined_df = pd.concat(dfs, ignore_index=True)
    combined_df.to_csv(f"{signal}_all.csv", index=False)
    print(f"{signal}_all.csv saved with {len(combined_df)} rows.")


BVP_all.csv saved with 28370578 rows.
EDA_all.csv saved with 1773180 rows.
HR_all.csv saved with 443062 rows.
TEMP_all.csv saved with 1773044 rows.
