# Late Fusion Model with EEG Modality

This notebook extends the existing late fusion model to include EEG data.

**Modalities:**
1. **Physiological data** (pupil metrics)
2. **Behavioral data** (reaction time, decision time, etc.)
3. **Gaze data** (gaze position, movements, fixations)
4. **EEG data** (band powers across frequency bands and regions) ← NEW

**Goals:**
1. Train separate models for each modality
2. Compare late fusion with/without EEG
3. Analyze EEG contribution to predictions

In [7]:
import json
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, LeaveOneGroupOut
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import pickle
import warnings
warnings.filterwarnings('ignore')

np.random.seed(42)
sns.set_style('whitegrid')

## 1. Load Pre-Extracted Features

In [8]:
# Load existing features (physio, behavior, gaze)with open('../data/results/features_POST/extracted_features_POST.pkl', 'rb') as f:    feature_data = pickle.load(f)merged_df = feature_data['merged_df']physio_cols = feature_data['physio_cols']behavior_cols = feature_data['behavior_cols']gaze_cols = feature_data['gaze_cols']print(f"✓ Loaded {len(merged_df)} trials from {merged_df['subject_id'].nunique()} subjects")print(f"  Features extracted on: {feature_data['metadata']['extraction_date']}")print(f"  Baseline method: {feature_data['metadata']['baseline_method']}")print(f"\nFeature counts:")print(f"  Physiology (POST): {len(physio_cols)} features")print(f"  Behavior: {len(behavior_cols)} features")print(f"  Gaze: {len(gaze_cols)} features")


✓ Loaded 12511 trials from 97 subjects
  Features extracted on: 2025-12-12 16:04:34
  Baseline method: t3_stable_pre_decision

Feature counts:
  Physiology (POST): 13 features
  Behavior: 7 features
  Gaze: 20 features


In [9]:
# Load EEG featureswith open('../data/results/features_POST/eeg_features_POST.pkl', 'rb') as f:    eeg_data = pickle.load(f)eeg_features_df = eeg_data['eeg_features_df']eeg_cols = eeg_data['feature_columns']print(f"✓ Loaded EEG features: {len(eeg_features_df)} trials")print(f"  EEG features: {len(eeg_cols)} features")print(f"  Frequency bands: {list(eeg_data['metadata']['frequency_bands'].keys())}")print(f"  Brain regions: {eeg_data['metadata']['regions']}")


✓ Loaded EEG features: 10 trials
  EEG features: 20 features
  Frequency bands: ['Delta', 'Theta', 'Alpha', 'Beta', 'Gamma']
  Brain regions: ['Frontal', 'Central', 'Parietal', 'Occipital']


## 2. Merge EEG Features with Existing Data

In [10]:
# Merge EEG features with existing features
# Match on subject_id and trial_id
merged_with_eeg = merged_df.merge(
    eeg_features_df,
    on=['subject_id', 'trial_id'],
    how='inner'
)

print(f"\n✓ Merged data:")
print(f"  Total trials after merge: {len(merged_with_eeg)}")
print(f"  Subjects: {merged_with_eeg['subject_id'].nunique()}")
print(f"  Total features: {len(physio_cols)} + {len(behavior_cols)} + {len(gaze_cols)} + {len(eeg_cols)} = {len(physio_cols) + len(behavior_cols) + len(gaze_cols) + len(eeg_cols)}")

# Check for trials lost in merge
if len(merged_with_eeg) < len(merged_df):
    print(f"\n⚠ Warning: Lost {len(merged_df) - len(merged_with_eeg)} trials in merge")
    print(f"  This is expected if EEG data is only available for a subset of trials")

print(f"\nOutcome distribution:")
print(merged_with_eeg['outcome'].value_counts())


✓ Merged data:
  Total trials after merge: 10
  Subjects: 1
  Total features: 13 + 7 + 20 + 20 = 60

  This is expected if EEG data is only available for a subset of trials

Outcome distribution:
outcome
1    9
0    1
Name: count, dtype: int64


## 3. Prepare Feature Sets

In [11]:
# Prepare feature arrays for model training
X_physio = SimpleImputer(strategy='mean').fit_transform(merged_with_eeg[physio_cols])
X_behavior = SimpleImputer(strategy='mean').fit_transform(merged_with_eeg[behavior_cols])
X_gaze = SimpleImputer(strategy='mean').fit_transform(merged_with_eeg[gaze_cols])
X_eeg = SimpleImputer(strategy='mean').fit_transform(merged_with_eeg[eeg_cols])
y = merged_with_eeg['outcome'].values
subjects = merged_with_eeg['subject_id'].values

print(f"Feature array shapes:")
print(f"  X_physio: {X_physio.shape}")
print(f"  X_behavior: {X_behavior.shape}")
print(f"  X_gaze: {X_gaze.shape}")
print(f"  X_eeg: {X_eeg.shape}")
print(f"  y: {y.shape}")

Feature array shapes:
  X_physio: (10, 13)
  X_behavior: (10, 7)
  X_gaze: (10, 20)
  X_eeg: (10, 20)
  y: (10,)


## 4. Train Individual Modality Models

In [12]:
def train_evaluate_modality(X, y, subjects, modality_name):
    """Train and evaluate model with LOSO cross-validation."""
    print(f"\n{'='*50}\n{modality_name} Model\n{'='*50}")
    
    n_subjects = len(np.unique(subjects))
    print(f"Number of subjects: {n_subjects}")
    
    # Check if we have enough subjects for LOSO
    if n_subjects < 2:
        print(f"⚠ Warning: Only {n_subjects} subject(s) available. Using train/test split instead of LOSO.")
        
        # Use simple train/test split
        from sklearn.model_selection import train_test_split
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.3, random_state=42, stratify=y if len(np.unique(y)) > 1 else None
        )
        
        model = RandomForestClassifier(n_estimators=100, max_depth=5, min_samples_split=10,
                                       min_samples_leaf=5, random_state=42)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        acc = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='weighted')
        
        print(f"Accuracy: {acc:.3f}")
        print(f"F1-Score: {f1:.3f}")
        
        return {
            'model': model,
            'accuracy': acc,
            'accuracy_sem': 0.0,  # No SEM for single split
            'f1_score': f1,
            'f1_sem': 0.0,
            'predictions': y_pred,
            'y_true': y_test
        }
    
    # Standard LOSO cross-validation
    model = RandomForestClassifier(n_estimators=100, max_depth=5, min_samples_split=10,
                                   min_samples_leaf=5, random_state=42)
    logo = LeaveOneGroupOut()
    
    subject_accs = {}
    subject_f1s = {}
    preds_all, y_true_all = [], []
    
    for train_idx, test_idx in logo.split(X, y, subjects):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        test_subject = subjects[test_idx][0]
        acc = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='weighted')
        subject_accs[test_subject] = acc
        subject_f1s[test_subject] = f1
        preds_all.extend(y_pred)
        y_true_all.extend(y_test)
    
    subject_acc_values = np.array(list(subject_accs.values()))
    subject_f1_values = np.array(list(subject_f1s.values()))
    
    print(f"Accuracy: {np.mean(subject_acc_values):.3f} ± {stats.sem(subject_acc_values):.3f} (SEM)")
    print(f"F1-Score: {np.mean(subject_f1_values):.3f} ± {stats.sem(subject_f1_values):.3f} (SEM)")
    
    return {
        'model': model,
        'accuracy': np.mean(subject_acc_values),
        'accuracy_sem': stats.sem(subject_acc_values),
        'f1_score': np.mean(subject_f1_values),
        'f1_sem': stats.sem(subject_f1_values),
        'predictions': preds_all,
        'y_true': y_true_all
    }

# Train all modalities
results_physio = train_evaluate_modality(X_physio, y, subjects, "Physiology")
results_behavior = train_evaluate_modality(X_behavior, y, subjects, "Behavior")
results_gaze = train_evaluate_modality(X_gaze, y, subjects, "Gaze")
results_eeg = train_evaluate_modality(X_eeg, y, subjects, "EEG")


Physiology Model
Number of subjects: 1


ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.

## 5. Late Fusion Implementation

In [None]:
def late_fusion(X_modalities, y, subjects, modality_names, fusion_method='weighted'):
    """
    Late fusion by combining modality predictions.
    
    fusion_method:
    - 'average': Simple average of probabilities
    - 'weighted': Learn weights via logistic regression (meta-learner)
    - 'stacking': Random Forest meta-learner on probability predictions
    """
    print(f"\n{'='*50}\nLate Fusion: {fusion_method.upper()}\n{'='*50}")
    
    n_subjects = len(np.unique(subjects))
    print(f"Number of subjects: {n_subjects}")
    
    # Check if we have enough subjects for LOSO
    if n_subjects < 2:
        print(f"⚠ Warning: Only {n_subjects} subject(s) available. Using train/test split instead of LOSO.")
        
        # Use simple train/test split
        from sklearn.model_selection import train_test_split
        
        # Split indices
        indices = np.arange(len(y))
        train_idx, test_idx = train_test_split(
            indices, test_size=0.3, random_state=42, stratify=y if len(np.unique(y)) > 1 else None
        )
        
        # Train base models and get probabilities
        base_models = [RandomForestClassifier(n_estimators=100, max_depth=5, min_samples_split=10,
                                              min_samples_leaf=5, random_state=42)
                       for _ in X_modalities]
        
        train_probs, test_probs = [], []
        for X, model in zip(X_modalities, base_models):
            X_train, X_test = X[train_idx], X[test_idx]
            y_train = y[train_idx]
            
            model.fit(X_train, y_train)
            train_probs.append(model.predict_proba(X_train)[:, 1])
            test_probs.append(model.predict_proba(X_test)[:, 1])
        
        train_probs = np.column_stack(train_probs)
        test_probs = np.column_stack(test_probs)
        y_train, y_test = y[train_idx], y[test_idx]
        
        # Fusion
        if fusion_method == 'average':
            y_pred = (np.mean(test_probs, axis=1) > 0.5).astype(int)
            weights = np.ones(len(X_modalities)) / len(X_modalities)
        elif fusion_method == 'weighted':
            meta = LogisticRegression(random_state=42, max_iter=1000)
            meta.fit(train_probs, y_train)
            weights = meta.coef_[0]
            y_pred = meta.predict(test_probs)
        elif fusion_method == 'stacking':
            meta = RandomForestClassifier(n_estimators=50, max_depth=3, random_state=42)
            meta.fit(train_probs, y_train)
            weights = meta.feature_importances_
            y_pred = meta.predict(test_probs)
        
        # Normalize weights
        if fusion_method == 'weighted':
            norm_weights = np.exp(weights) / np.sum(np.exp(weights))
        else:
            norm_weights = weights / np.sum(weights)
        
        acc = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='weighted')
        
        print(f"Accuracy: {acc:.3f}")
        print(f"F1-Score: {f1:.3f}")
        print(f"\nModality Weights:")
        for name, w in zip(modality_names, norm_weights):
            print(f"  {name}: {w:.3f}")
        
        return {
            'accuracy': acc,
            'accuracy_sem': 0.0,
            'f1_score': f1,
            'f1_sem': 0.0,
            'weights': norm_weights,
            'modality_names': modality_names,
            'predictions': y_pred,
            'y_true': y_test
        }
    
    # Standard LOSO cross-validation
    logo = LeaveOneGroupOut()
    base_models = [RandomForestClassifier(n_estimators=100, max_depth=5, min_samples_split=10,
                                          min_samples_leaf=5, random_state=42)
                   for _ in X_modalities]
    
    subject_accs = {}
    subject_f1s = {}
    all_weights = []
    preds_all, y_true_all = [], []
    
    for train_idx, test_idx in logo.split(X_modalities[0], y, subjects):
        # Train base models and get probabilities
        train_probs, test_probs = [], []
        
        for X, model in zip(X_modalities, base_models):
            X_train, X_test = X[train_idx], X[test_idx]
            y_train = y[train_idx]
            
            model.fit(X_train, y_train)
            train_probs.append(model.predict_proba(X_train)[:, 1])
            test_probs.append(model.predict_proba(X_test)[:, 1])
        
        train_probs = np.column_stack(train_probs)
        test_probs = np.column_stack(test_probs)
        y_train, y_test = y[train_idx], y[test_idx]
        
        # Fusion
        if fusion_method == 'average':
            y_pred = (np.mean(test_probs, axis=1) > 0.5).astype(int)
            weights = np.ones(len(X_modalities)) / len(X_modalities)
            
        elif fusion_method == 'weighted':
            meta = LogisticRegression(random_state=42, max_iter=1000)
            meta.fit(train_probs, y_train)
            weights = meta.coef_[0]
            y_pred = meta.predict(test_probs)
            
        elif fusion_method == 'stacking':
            meta = RandomForestClassifier(n_estimators=50, max_depth=3, random_state=42)
            meta.fit(train_probs, y_train)
            weights = meta.feature_importances_
            y_pred = meta.predict(test_probs)
        
        # Store subject-level metrics
        test_subject = subjects[test_idx][0]
        acc = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='weighted')
        subject_accs[test_subject] = acc
        subject_f1s[test_subject] = f1
        all_weights.append(weights)
        preds_all.extend(y_pred)
        y_true_all.extend(y_test)
    
    # Convert to arrays
    subject_acc_values = np.array(list(subject_accs.values()))
    subject_f1_values = np.array(list(subject_f1s.values()))
    
    avg_weights = np.mean(all_weights, axis=0)
    
    # Normalize weights
    if fusion_method == 'weighted':
        norm_weights = np.exp(avg_weights) / np.sum(np.exp(avg_weights))
    else:
        norm_weights = avg_weights / np.sum(avg_weights)
    
    print(f"Accuracy: {np.mean(subject_acc_values):.3f} ± {stats.sem(subject_acc_values):.3f} (SEM)")
    print(f"F1-Score: {np.mean(subject_f1_values):.3f} ± {stats.sem(subject_f1_values):.3f} (SEM)")
    print(f"\nModality Weights:")
    for name, w in zip(modality_names, norm_weights):
        print(f"  {name}: {w:.3f}")
    
    return {
        'accuracy': np.mean(subject_acc_values),
        'accuracy_sem': stats.sem(subject_acc_values),
        'f1_score': np.mean(subject_f1_values),
        'f1_sem': stats.sem(subject_f1_values),
        'weights': norm_weights,
        'modality_names': modality_names,
        'predictions': preds_all,
        'y_true': y_true_all
    }

## 6. Compare Fusion With and Without EEG

In [None]:
# Fusion WITHOUT EEG (baseline)
X_modalities_no_eeg = [X_physio, X_behavior, X_gaze]
modality_names_no_eeg = ['Physiology', 'Behavior', 'Gaze']

results_fusion_no_eeg = late_fusion(X_modalities_no_eeg, y, subjects, 
                                     modality_names_no_eeg, 'weighted')

In [None]:
# Fusion WITH EEG
X_modalities_with_eeg = [X_physio, X_behavior, X_gaze, X_eeg]
modality_names_with_eeg = ['Physiology', 'Behavior', 'Gaze', 'EEG']

results_fusion_with_eeg = late_fusion(X_modalities_with_eeg, y, subjects,
                                       modality_names_with_eeg, 'weighted')

## 7. Comparison Analysis

In [None]:
# Create comparison dataframe
comparison_df = pd.DataFrame([
    {
        'Method': 'Physiology Only',
        'Accuracy': results_physio['accuracy'],
        'Accuracy_SEM': results_physio['accuracy_sem'],
        'F1-Score': results_physio['f1_score'],
        'F1_SEM': results_physio['f1_sem']
    },
    {
        'Method': 'Behavior Only',
        'Accuracy': results_behavior['accuracy'],
        'Accuracy_SEM': results_behavior['accuracy_sem'],
        'F1-Score': results_behavior['f1_score'],
        'F1_SEM': results_behavior['f1_sem']
    },
    {
        'Method': 'Gaze Only',
        'Accuracy': results_gaze['accuracy'],
        'Accuracy_SEM': results_gaze['accuracy_sem'],
        'F1-Score': results_gaze['f1_score'],
        'F1_SEM': results_gaze['f1_sem']
    },
    {
        'Method': 'EEG Only',
        'Accuracy': results_eeg['accuracy'],
        'Accuracy_SEM': results_eeg['accuracy_sem'],
        'F1-Score': results_eeg['f1_score'],
        'F1_SEM': results_eeg['f1_sem']
    },
    {
        'Method': 'Fusion (No EEG)',
        'Accuracy': results_fusion_no_eeg['accuracy'],
        'Accuracy_SEM': results_fusion_no_eeg['accuracy_sem'],
        'F1-Score': results_fusion_no_eeg['f1_score'],
        'F1_SEM': results_fusion_no_eeg['f1_sem']
    },
    {
        'Method': 'Fusion (With EEG)',
        'Accuracy': results_fusion_with_eeg['accuracy'],
        'Accuracy_SEM': results_fusion_with_eeg['accuracy_sem'],
        'F1-Score': results_fusion_with_eeg['f1_score'],
        'F1_SEM': results_fusion_with_eeg['f1_sem']
    }
])

print("\n" + "="*80)
print("PERFORMANCE COMPARISON")
print("="*80)
print(comparison_df.to_string(index=False))

# Calculate EEG contribution
acc_improvement = results_fusion_with_eeg['accuracy'] - results_fusion_no_eeg['accuracy']
print(f"\n{'='*80}")
print(f"EEG Contribution: {acc_improvement:.3f} accuracy improvement")
print(f"Relative improvement: {(acc_improvement / results_fusion_no_eeg['accuracy']) * 100:.2f}%")
print(f"{'='*80}")

In [None]:
# Visualize comparison
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# Accuracy
ax = axes[0]
x = range(len(comparison_df))
ax.barh(x, comparison_df['Accuracy'], xerr=comparison_df['Accuracy_SEM'], 
        capsize=5, color='steelblue', alpha=0.7)
ax.set_yticks(x)
ax.set_yticklabels(comparison_df['Method'])
ax.set_xlabel('Accuracy')
ax.set_title('Accuracy Comparison (error bars = SEM)')
ax.set_xlim([0, 1])
ax.axvline(0.5, color='red', linestyle='--', alpha=0.3)
ax.grid(alpha=0.3, axis='x')

# F1-Score
ax = axes[1]
ax.barh(x, comparison_df['F1-Score'], xerr=comparison_df['F1_SEM'],
        capsize=5, color='coral', alpha=0.7)
ax.set_yticks(x)
ax.set_yticklabels(comparison_df['Method'])
ax.set_xlabel('F1-Score')
ax.set_title('F1-Score Comparison (error bars = SEM)')
ax.set_xlim([0, 1])
ax.axvline(0.5, color='red', linestyle='--', alpha=0.3)
ax.grid(alpha=0.3, axis='x')

plt.tight_layout()
plt.show()

## 8. Modality Weights Comparison

In [None]:
# Compare weights
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Without EEG
ax = axes[0]
weights_no_eeg = results_fusion_no_eeg['weights']
names_no_eeg = results_fusion_no_eeg['modality_names']
bars = ax.bar(names_no_eeg, weights_no_eeg, color='steelblue', alpha=0.7)
ax.set_ylabel('Weight')
ax.set_title('Modality Weights (Without EEG)')
ax.set_ylim([0, max(weights_no_eeg) * 1.2])
for i, v in enumerate(weights_no_eeg):
    ax.text(i, v + 0.01, f'{v:.3f}', ha='center', va='bottom')
ax.grid(alpha=0.3, axis='y')

# With EEG
ax = axes[1]
weights_with_eeg = results_fusion_with_eeg['weights']
names_with_eeg = results_fusion_with_eeg['modality_names']
colors = ['steelblue', 'coral', 'mediumseagreen', 'orange']
bars = ax.bar(names_with_eeg, weights_with_eeg, color=colors, alpha=0.7)
ax.set_ylabel('Weight')
ax.set_title('Modality Weights (With EEG)')
ax.set_ylim([0, max(weights_with_eeg) * 1.2])
for i, v in enumerate(weights_with_eeg):
    ax.text(i, v + 0.01, f'{v:.3f}', ha='center', va='bottom')
ax.grid(alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

## 9. Summary

In [None]:
print("\n" + "="*80)
print("SUMMARY")
print("="*80)

print("\n1. INDIVIDUAL MODALITIES:")
print(f"   Physiology:  Acc={results_physio['accuracy']:.3f} ± {results_physio['accuracy_sem']:.3f}")
print(f"   Behavior:    Acc={results_behavior['accuracy']:.3f} ± {results_behavior['accuracy_sem']:.3f}")
print(f"   Gaze:        Acc={results_gaze['accuracy']:.3f} ± {results_gaze['accuracy_sem']:.3f}")
print(f"   EEG:         Acc={results_eeg['accuracy']:.3f} ± {results_eeg['accuracy_sem']:.3f}")

print("\n2. LATE FUSION:")
print(f"   Without EEG: Acc={results_fusion_no_eeg['accuracy']:.3f} ± {results_fusion_no_eeg['accuracy_sem']:.3f}")
print(f"   With EEG:    Acc={results_fusion_with_eeg['accuracy']:.3f} ± {results_fusion_with_eeg['accuracy_sem']:.3f}")
print(f"   Improvement: {acc_improvement:+.3f} ({(acc_improvement / results_fusion_no_eeg['accuracy']) * 100:+.2f}%)")

print("\n3. EEG CONTRIBUTION:")
eeg_weight = results_fusion_with_eeg['weights'][-1]
print(f"   EEG weight in fusion: {eeg_weight:.3f} ({eeg_weight*100:.1f}%)")

print("\n4. BEST METHOD:")
best = comparison_df.loc[comparison_df['Accuracy'].idxmax()]
print(f"   {best['Method']}: Acc={best['Accuracy']:.3f}")

print("\n" + "="*80)