Imports

In [None]:

import os, glob
import pandas as pd, numpy as np
import matplotlib.pyplot as plt, seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

import tensorflow as tf
from tensorflow.keras import layers, models


Project folders


In [None]:
os.makedirs("project/data", exist_ok=True)
os.makedirs("project/notebooks", exist_ok=True)
os.makedirs("project/models", exist_ok=True)

root = "/content/drive/MyDrive/STData"
modalities = ["EEG", "IVT", "EYE", "GSR", "TIVA"]


Build summary features per student

In [None]:

if os.path.exists("project/data/merged_features.csv"):
    all_data = pd.read_csv("project/data/merged_features.csv")
    print("Loaded precomputed merged_features.csv:", all_data.shape)
else:
    students_data = []
    for student_id in range(1, 39):  # 1 to 38
        student_folder = os.path.join(root, str(student_id))
        feature_dict = {"Student_ID": student_id}
        for modality in modalities:
            file_path = os.path.join(student_folder, f"{student_id}_{modality}.csv")
            if os.path.exists(file_path):
                df = pd.read_csv(file_path)
                for col in df.columns:
                    try:
                        feature_dict[f"{modality}_{col}_mean"] = df[col].mean()
                        feature_dict[f"{modality}_{col}_std"]  = df[col].std()
                        feature_dict[f"{modality}_{col}_min"]  = df[col].min()
                        feature_dict[f"{modality}_{col}_max"]  = df[col].max()
                    except Exception:
                        continue
        students_data.append(feature_dict)
    all_data = pd.DataFrame(students_data)
    all_data.to_csv("project/data/merged_features.csv", index=False)
    print("Saved: project/data/merged_features.csv")

print("Summary shape:", all_data.shape)
all_data.head()


  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.

Saved: project/data/merged_features.csv
Summary shape: (38, 553)


Unnamed: 0,Student_ID,EEG_UnixTime_mean,EEG_UnixTime_std,EEG_UnixTime_min,EEG_UnixTime_max,EEG_Delta_TP9_mean,EEG_Delta_TP9_std,EEG_Delta_TP9_min,EEG_Delta_TP9_max,EEG_Delta_AF7_mean,...,TIVA_Interocular Distance_min,TIVA_Interocular Distance_max,EEG_QuestionKey_mean,EEG_QuestionKey_std,EEG_QuestionKey_min,EEG_QuestionKey_max,EEG_Elements_mean,EEG_Elements_std,EEG_Elements_min,EEG_Elements_max
0,1,1680004000.0,192.182403,1680003000.0,1680004000.0,0.833664,0.378599,-0.352964,1.855107,0.574739,...,146.864014,171.006134,,,,,,,,
1,2,1676985000.0,278.776719,1676984000.0,1676985000.0,0.618608,0.437824,-0.432199,2.014942,0.354701,...,97.135643,153.222336,,,,,,,,
2,3,1678717000.0,349.528413,1678717000.0,1678718000.0,0.871065,0.44928,-0.483961,1.947719,0.469323,...,114.828568,151.62619,,,,,,,,
3,4,1676379000.0,270.939855,1676379000.0,1676380000.0,0.819391,0.350548,-0.237624,1.930022,0.515442,...,100.826195,130.640976,,,,,,,,
4,5,1676463000.0,366.82071,1676462000.0,1676463000.0,0.675257,0.389188,-0.579701,1.852745,0.685228,...,110.571152,187.176758,,,,,,,,


Merge PSY labels

In [None]:
psy_records = []
for sid in range(1, 39):
    f = os.path.join(root,  str(sid), f"{sid}_PSY.csv")
    if os.path.exists(f):
        df = pd.read_csv(f)
        df['Student_ID'] = sid
        # pick a label column
        candidates = [c for c in df.columns if c.lower() in ('correct','engagement','label')]
        if candidates:
            col = candidates[0]
            val = df[col].mean() if pd.api.types.is_numeric_dtype(df[col]) else df[col].mode().iloc[0]
            psy_records.append({'Student_ID': sid, 'PSY_label': val})
        else:
            numcols = df.select_dtypes(include=[np.number]).columns
            if len(numcols)>0:
                val = df[numcols[0]].mean()
                psy_records.append({'Student_ID': sid, 'PSY_label': val})

psy_df = pd.DataFrame(psy_records)
all_data = pd.merge(all_data, psy_df, on="Student_ID", how="left")
print("After merge:", all_data.shape)
print(all_data[['Student_ID','PSY_label']].head())


After merge: (38, 554)
   Student_ID  PSY_label
0           1   2.100000
1           2   1.909091
2           3   2.142857
3           4   2.142857
4           5   2.181818
