In [1]:
# Step 0: Imports
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib
import os

# Step 1: Load engineered dataset
df_path = "../data/combined_engineered.csv"
df = pd.read_csv(df_path)
print("✅ Loaded engineered dataset")
print("Shape:", df.shape)
print("Label distribution:\n", df['label'].value_counts())

✅ Loaded engineered dataset
Shape: (1572, 50)
Label distribution:
 label
2    670
1    455
0    447
Name: count, dtype: int64


In [2]:
# Step 2: Define modality columns
modality_features = {
    "EEG": [col for col in df.columns if "eeg" in col],
    "GSR": [col for col in df.columns if "gsr" in col],
    "IVT": [col for col in df.columns if "ivt" in col],
    "TIVA": [col for col in df.columns if "tiva" in col],
    "FUSION": [col for col in df.columns if col not in ['label', 'Key', 'Participant_ID']]
}


In [3]:
# Step 3: Train modality-specific models
os.makedirs("../models", exist_ok=True)
results = {}

for modality, cols in modality_features.items():
    print(f"\nTraining model for {modality} (features: {len(cols)})")
    
    X = df[cols]
    y = df['label']
    
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    model = RandomForestClassifier(class_weight="balanced", random_state=42)
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    report = classification_report(y_test, y_pred, output_dict=True)
    results[modality] = report


Training model for EEG (features: 10)

Training model for GSR (features: 4)

Training model for IVT (features: 4)

Training model for TIVA (features: 26)

Training model for FUSION (features: 47)


In [4]:
 # Save model
    model_path = f"../models/randomforest_{modality.lower()}.pkl"
    joblib.dump(model, model_path)
    print(f"✅ {modality} model saved at {model_path}")
    print(f"Classification Report for {modality}:\n", classification_report(y_test, y_pred))

IndentationError: unexpected indent (3615960075.py, line 2)

In [5]:
for modality, cols in modality_features.items():
    print(f"\nTraining model for {modality} (features: {len(cols)})")
    
    X = df[cols]
    y = df['label']
    
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    model = RandomForestClassifier(class_weight="balanced", random_state=42)
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    report = classification_report(y_test, y_pred, output_dict=True)
    results[modality] = report
    
    # Save model
    model_path = f"../models/randomforest_{modality.lower()}.pkl"
    joblib.dump(model, model_path)
    
    print(f"✅ {modality} model saved at {model_path}")
    print(f"Classification Report for {modality}:\n", classification_report(y_test, y_pred))



Training model for EEG (features: 10)
✅ EEG model saved at ../models/randomforest_eeg.pkl
Classification Report for EEG:
               precision    recall  f1-score   support

           0       0.51      0.58      0.54        91
           1       0.52      0.36      0.43        97
           2       0.63      0.72      0.67       127

    accuracy                           0.57       315
   macro avg       0.55      0.55      0.55       315
weighted avg       0.56      0.57      0.56       315


Training model for GSR (features: 4)
✅ GSR model saved at ../models/randomforest_gsr.pkl
Classification Report for GSR:
               precision    recall  f1-score   support

           0       0.56      0.55      0.56        91
           1       0.55      0.53      0.54        97
           2       0.72      0.76      0.74       127

    accuracy                           0.63       315
   macro avg       0.61      0.61      0.61       315
weighted avg       0.62      0.63      0.62     

In [6]:
# Step 4: Compare performance
print("\n✅ Summary of F1-scores per modality:")
for modality, report in results.items():
    f1_macro = report['macro avg']['f1-score']
    print(f"{modality}: F1-macro = {f1_macro:.4f}")


✅ Summary of F1-scores per modality:
EEG: F1-macro = 0.5473
GSR: F1-macro = 0.6103
IVT: F1-macro = 0.4632
TIVA: F1-macro = 0.6496
FUSION: F1-macro = 0.6800
