In [1]:
import os
import pandas as pd

# Define signals and their sampling rates
SIGNALS = {
    "BVP": 64,
    "EDA": 4,
    "HR": 1,
    "TEMP": 4
}

# Root data directory
DATA_DIR = "./data"

# Output files: one per signal
output_dfs = {signal: [] for signal in SIGNALS}

# Loop through students (s1 to s10)
for student in os.listdir(DATA_DIR):
    student_path = os.path.join(DATA_DIR, student)
    if not os.path.isdir(student_path):
        continue

    # Loop through exams (midterm_1, Midterm 2, Final)
    for exam in os.listdir(student_path):
        exam_path = os.path.join(student_path, exam)
        if not os.path.isdir(exam_path):
            continue

        # For each desired signal
        for signal, freq in SIGNALS.items():
            signal_file = os.path.join(exam_path, f"{signal}.csv")
            if os.path.exists(signal_file):
                df = pd.read_csv(signal_file, header=None, names=["value"])
                df["timestamp"] = df.index / freq
                df["student"] = student
                df["exam"] = exam
                output_dfs[signal].append(df[["student", "exam", "timestamp", "value"]])

# Save the combined files
for signal, dfs in output_dfs.items():
    combined_df = pd.concat(dfs, ignore_index=True)
    combined_df.to_csv(f"{signal}_all.csv", index=False)
    print(f"{signal}_all.csv saved with {len(combined_df)} rows.")


BVP_all.csv saved with 28370578 rows.
EDA_all.csv saved with 1773180 rows.
HR_all.csv saved with 443062 rows.
TEMP_all.csv saved with 1773044 rows.


In [3]:
import pandas as pd

# List of signal files
signals = ["HR", "EDA", "BVP", "TEMP"]

summary_list = []

for signal in signals:
    df = pd.read_csv(f"{signal}_all.csv")
    summary = df.groupby(["student", "exam"])["value"].mean().reset_index()
    summary["signal"] = signal
    summary.rename(columns={"value": "avg_value"}, inplace=True)
    summary_list.append(summary)

# Combine all into one summary table
final_df = pd.concat(summary_list)
final_df.to_csv("signal_summary.csv", index=False)



In [7]:
import pandas as pd

df = pd.read_csv("HR_all.csv")

# Convert seconds to minutes
df["minute"] = (df["timestamp"] / 60).round(1)

# Simulate gender if not available
df["gender"] = df["student"].apply(lambda x: "female" if x in ["s1", "s2", "s4"] else "male")

# You can also group by exam or student if you want more detail
summary = df.groupby(["gender", "minute"]).agg(
    mean=("value", "mean"),
    min=("value", "min"),
    max=("value", "max")
).reset_index()

# Optional: restrict to first 180 minutes
summary = summary[summary["minute"] <= 180]

summary.to_csv("hr_summary_by_minute.csv", index=False)



In [5]:
grades = {
    ('S1', 'Midterm 1'): 78,
    ('S2', 'Midterm 1'): 82,
    ('S3', 'Midterm 1'): 77,
    ('S4', 'Midterm 1'): 75,
    ('S5', 'Midterm 1'): 67,
    ('S6', 'Midterm 1'): 71,
    ('S7', 'Midterm 1'): 64,
    ('S8', 'Midterm 1'): 92,
    ('S9', 'Midterm 1'): 80,
    ('S10', 'Midterm 1'): 89,

    ('S1', 'Midterm 2'): 82,
    ('S2', 'Midterm 2'): 85,
    ('S3', 'Midterm 2'): 90,
    ('S4', 'Midterm 2'): 77,
    ('S5', 'Midterm 2'): 77,
    ('S6', 'Midterm 2'): 64,
    ('S7', 'Midterm 2'): 33,
    ('S8', 'Midterm 2'): 88,
    ('S9', 'Midterm 2'): 39,
    ('S10', 'Midterm 2'): 64,

    ('S1', 'Final'): 182,
    ('S2', 'Final'): 180,
    ('S3', 'Final'): 188,
    ('S4', 'Final'): 149,
    ('S5', 'Final'): 157,
    ('S6', 'Final'): 175,
    ('S7', 'Final'): 110,
    ('S8', 'Final'): 184,
    ('S9', 'Final'): 126,
    ('S10', 'Final'): 116
}



In [6]:
import pandas as pd

# Load your signal data
df = pd.read_csv("BVP_all.csv")

# Convert grades dict into a DataFrame
grades_df = pd.DataFrame([
    {"student": k[0], "exam": k[1], "grade": v}
    for k, v in grades.items()
])

# Merge with signal data
merged = df.merge(grades_df, on=["student", "exam"], how="left")

# Save to a new file
merged.to_csv("BVP_with_grades.csv", index=False)
