# PhysioNet Architecture Variants Training

Test the **top 3 most promising** architectural improvements to EEG-ARNN:

1. **Baseline (Current)**: 3x (TFEM + 1-layer CARM) - Reference point
2. **Multi-Layer GCN (2-layer)**: 3x (TFEM + 2-layer CARM) - Can see friends of friends (2 hops in graph)
3. **Multi-Layer GCN (3-layer)**: 3x (TFEM + 3-layer CARM) - Can see 3 degrees of separation (3 hops in graph)

**Why these 3?**
- Multi-layer GCN is the KEY improvement from research (Kipf & Welling 2017, GCN paper)
- 2-layer vs 3-layer tests optimal depth for EEG channel graphs
- Same computation time as baseline (~30 min per subject)

All variants trained with:
- Same 5 subjects
- 20 epochs (up from 10)
- 2-fold CV

## Setup and Imports

In [19]:
import sys
from pathlib import Path
import warnings
import json
from datetime import datetime
import time

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

import mne

from train_utils import (
    load_preprocessed_data, filter_classes, normalize_data,
    EEGDataset
)

warnings.filterwarnings('ignore')
mne.set_log_level('ERROR')
sns.set_context('notebook', font_scale=1.1)
plt.style.use('seaborn-v0_8')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

Using device: cpu


## Configuration

In [20]:
CONFIG = {
    'data': {
        'preprocessed_dir': Path('data/physionet/derived/preprocessed'),
        'index_file': Path('data/physionet/derived/physionet_preprocessed_index.csv'),
        'selected_classes': [1, 2],
        'tmin': -1.0,
        'tmax': 5.0,
        'baseline': (-0.5, 0)
    },
    'training': {
        'epochs': 20,  # Increased from 10
        'learning_rate': 0.001,
        'batch_size': 32,
        'n_folds': 2
    },
    'subjects': ['S001', 'S002', 'S005', 'S006', 'S007'],
    'output_dir': Path('results/architecture_variants')
}

CONFIG['output_dir'].mkdir(parents=True, exist_ok=True)
print(json.dumps(CONFIG, indent=2, default=str))

{
  "data": {
    "preprocessed_dir": "data\\physionet\\derived\\preprocessed",
    "index_file": "data\\physionet\\derived\\physionet_preprocessed_index.csv",
    "selected_classes": [
      1,
      2
    ],
    "tmin": -1.0,
    "tmax": 5.0,
    "baseline": [
      -0.5,
      0
    ]
  },
  "training": {
    "epochs": 20,
    "learning_rate": 0.001,
    "batch_size": 32,
    "n_folds": 2
  },
  "subjects": [
    "S001",
    "S002",
    "S005",
    "S006",
    "S007"
  ],
  "output_dir": "results\\architecture_variants"
}


## Improved Architecture Modules

### Multi-Layer CARM (Can see friends of friends)

In [21]:
class MultiLayerCARM(nn.Module):
    """
    Multi-layer Graph Convolution for CARM
    Each layer can see further connections in the graph
    
    Parameters
    ----------
    num_channels : int
        Number of EEG channels
    hidden_dim : int
        Hidden dimension
    num_layers : int
        Number of GCN layers (1, 2, or 3)
    share_adjacency : bool
        If True, all layers share the same adjacency matrix
        If False, each layer learns its own adjacency
    """
    def __init__(self, num_channels, hidden_dim=40, num_layers=2, share_adjacency=True):
        super(MultiLayerCARM, self).__init__()
        
        self.num_channels = num_channels
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.share_adjacency = share_adjacency
        
        # Learnable adjacency matrix(ces)
        if share_adjacency:
            self.W = nn.Parameter(torch.FloatTensor(num_channels, num_channels))
            nn.init.xavier_uniform_(self.W)
        else:
            self.W_list = nn.ParameterList([
                nn.Parameter(torch.FloatTensor(num_channels, num_channels))
                for _ in range(num_layers)
            ])
            for W in self.W_list:
                nn.init.xavier_uniform_(W)
        
        # GCN layers
        self.gcn_layers = nn.ModuleList([
            nn.Linear(hidden_dim, hidden_dim, bias=False)
            for _ in range(num_layers)
        ])
        
        # Batch norm for each layer
        self.bn_layers = nn.ModuleList([
            nn.BatchNorm2d(hidden_dim)
            for _ in range(num_layers)
        ])
        
        self.activation = nn.ELU()
    
    def get_normalized_adjacency(self, W_param, device):
        """
        Compute normalized adjacency matrix: D^(-1/2) * A_tilde * D^(-1/2)
        """
        # Symmetric adjacency
        A = torch.sigmoid(W_param)
        A_sym = (A + A.t()) / 2
        
        # Add self-loops
        I = torch.eye(self.num_channels, device=device)
        A_tilde = A_sym + I
        
        # Degree normalization
        D_tilde = torch.diag(A_tilde.sum(dim=1))
        D_inv_sqrt = torch.pow(D_tilde, -0.5)
        D_inv_sqrt[torch.isinf(D_inv_sqrt)] = 0.0
        
        A_norm = D_inv_sqrt @ A_tilde @ D_inv_sqrt
        
        return A_norm, A_sym
    
    def forward(self, x):
        """
        x: (batch, hidden_dim, num_channels, num_timepoints)
        """
        batch_size = x.size(0)
        hidden_dim = x.size(1)
        num_channels = x.size(2)
        time_steps = x.size(3)
        
        # Get adjacency matrices
        if self.share_adjacency:
            A_norm, A_sym = self.get_normalized_adjacency(self.W, x.device)
            adj_matrices = [A_norm] * self.num_layers
        else:
            adj_matrices = []
            A_sym = None
            for W in self.W_list:
                A_norm, A_sym_i = self.get_normalized_adjacency(W, x.device)
                adj_matrices.append(A_norm)
                if A_sym is None:
                    A_sym = A_sym_i  # Return first adjacency
        
        # Reshape for graph convolution: (batch * time, channels, hidden_dim)
        x_reshaped = x.permute(0, 3, 2, 1)  # (batch, time, channels, hidden)
        x_flat = x_reshaped.contiguous().view(batch_size * time_steps, num_channels, hidden_dim)
        
        # Apply multi-layer GCN
        h = x_flat
        for layer_idx in range(self.num_layers):
            # Graph convolution: A_norm @ H @ Theta
            h = torch.matmul(adj_matrices[layer_idx], h)
            h = self.gcn_layers[layer_idx](h)
            
            # Reshape back for batch norm
            h_reshaped = h.view(batch_size, time_steps, num_channels, hidden_dim)
            h_reshaped = h_reshaped.permute(0, 3, 2, 1)  # (batch, hidden, channels, time)
            h_reshaped = self.bn_layers[layer_idx](h_reshaped)
            h_reshaped = self.activation(h_reshaped)
            
            # Reshape back to flat for next layer
            if layer_idx < self.num_layers - 1:
                h_reshaped = h_reshaped.permute(0, 3, 2, 1)
                h = h_reshaped.contiguous().view(batch_size * time_steps, num_channels, hidden_dim)
        
        return h_reshaped, A_sym

### Residual Multi-Layer CARM (With skip connections)

In [22]:
class ResidualMultiLayerCARM(nn.Module):
    """
    Multi-layer GCN with residual connections
    Helps gradient flow and prevents degradation
    """
    def __init__(self, num_channels, hidden_dim=40, num_layers=2):
        super(ResidualMultiLayerCARM, self).__init__()
        
        self.num_channels = num_channels
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        # Shared adjacency
        self.W = nn.Parameter(torch.FloatTensor(num_channels, num_channels))
        nn.init.xavier_uniform_(self.W)
        
        # GCN layers
        self.gcn_layers = nn.ModuleList([
            nn.Linear(hidden_dim, hidden_dim, bias=False)
            for _ in range(num_layers)
        ])
        
        self.bn_layers = nn.ModuleList([
            nn.BatchNorm2d(hidden_dim)
            for _ in range(num_layers)
        ])
        
        self.activation = nn.ELU()
    
    def get_normalized_adjacency(self, device):
        A = torch.sigmoid(self.W)
        A_sym = (A + A.t()) / 2
        I = torch.eye(self.num_channels, device=device)
        A_tilde = A_sym + I
        D_tilde = torch.diag(A_tilde.sum(dim=1))
        D_inv_sqrt = torch.pow(D_tilde, -0.5)
        D_inv_sqrt[torch.isinf(D_inv_sqrt)] = 0.0
        A_norm = D_inv_sqrt @ A_tilde @ D_inv_sqrt
        return A_norm, A_sym
    
    def forward(self, x):
        batch_size = x.size(0)
        hidden_dim = x.size(1)
        num_channels = x.size(2)
        time_steps = x.size(3)
        
        A_norm, A_sym = self.get_normalized_adjacency(x.device)
        
        # Reshape
        x_reshaped = x.permute(0, 3, 2, 1)
        h = x_reshaped.contiguous().view(batch_size * time_steps, num_channels, hidden_dim)
        
        # Store input for residual
        h_input = h
        
        for layer_idx in range(self.num_layers):
            # Graph convolution
            h = torch.matmul(A_norm, h)
            h = self.gcn_layers[layer_idx](h)
            
            # Add residual connection every 2 layers or at the end
            if layer_idx > 0 and layer_idx % 2 == 1:
                h = h + h_input
            
            # Batch norm and activation
            h_reshaped = h.view(batch_size, time_steps, num_channels, hidden_dim)
            h_reshaped = h_reshaped.permute(0, 3, 2, 1)
            h_reshaped = self.bn_layers[layer_idx](h_reshaped)
            h_reshaped = self.activation(h_reshaped)
            
            if layer_idx < self.num_layers - 1:
                h_reshaped = h_reshaped.permute(0, 3, 2, 1)
                h = h_reshaped.contiguous().view(batch_size * time_steps, num_channels, hidden_dim)
        
        return h_reshaped, A_sym

### TFEM (Same as before)

In [23]:
class TFEM(nn.Module):
    """
    Temporal Feature Extraction Module
    1D CNN across time dimension
    """
    def __init__(self, num_channels, hidden_dim=40, in_channels=None):
        super(TFEM, self).__init__()
        
        self.num_channels = num_channels
        self.hidden_dim = hidden_dim
        
        # If in_channels not specified, assume first block (1 channel input)
        if in_channels is None:
            in_channels = 1
        
        self.in_channels = in_channels
        self.conv = nn.Conv2d(in_channels, hidden_dim, (1, 5), padding=(0, 2))
        self.bn = nn.BatchNorm2d(hidden_dim)
        self.activation = nn.ELU()
    
    def forward(self, x):
        """
        x: (batch, channels, timepoints) or (batch, hidden_dim, channels, timepoints)
        """
        if x.dim() == 3:
            # First block: add channel dimension
            x = x.unsqueeze(1)  # (batch, 1, channels, time)
        
        x = self.conv(x)
        x = self.bn(x)
        x = self.activation(x)
        
        return x

## Architecture Variants

In [24]:
class EEGARNNVariant(nn.Module):
    """
    Flexible EEG-ARNN that supports different configurations
    
    Parameters
    ----------
    variant : str
        - 'baseline': 3x (TFEM + 1-layer CARM)
        - 'multi2': 3x (TFEM + 2-layer CARM)
        - 'multi3': 3x (TFEM + 3-layer CARM)
        - 'wider': 3x (TFEM + 1-layer CARM) with hidden_dim=60
        - 'deeper': 5x (TFEM + 1-layer CARM)
        - 'residual': 3x (TFEM + 2-layer Residual CARM)
    """
    def __init__(self, num_channels, num_timepoints, num_classes, variant='baseline'):
        super(EEGARNNVariant, self).__init__()
        
        self.variant = variant
        self.num_channels = num_channels
        
        # Set architecture parameters
        if variant == 'baseline':
            hidden_dim = 40
            num_blocks = 3
            gcn_layers = 1
            use_residual = False
        elif variant == 'multi2':
            hidden_dim = 40
            num_blocks = 3
            gcn_layers = 2
            use_residual = False
        elif variant == 'multi3':
            hidden_dim = 40
            num_blocks = 3
            gcn_layers = 3
            use_residual = False
        elif variant == 'wider':
            hidden_dim = 60
            num_blocks = 3
            gcn_layers = 1
            use_residual = False
        elif variant == 'deeper':
            hidden_dim = 40
            num_blocks = 5
            gcn_layers = 1
            use_residual = False
        elif variant == 'residual':
            hidden_dim = 40
            num_blocks = 3
            gcn_layers = 2
            use_residual = True
        else:
            raise ValueError(f"Unknown variant: {variant}")
        
        self.hidden_dim = hidden_dim
        self.num_blocks = num_blocks
        
        # Build TFEM-CARM blocks
        self.tfem_blocks = nn.ModuleList()
        self.carm_blocks = nn.ModuleList()
        
        for i in range(num_blocks):
            # First TFEM has 1 input channel, rest have hidden_dim input channels
            in_channels = 1 if i == 0 else hidden_dim
            self.tfem_blocks.append(TFEM(num_channels, hidden_dim, in_channels=in_channels))
            
            if use_residual:
                self.carm_blocks.append(
                    ResidualMultiLayerCARM(num_channels, hidden_dim, num_layers=gcn_layers)
                )
            else:
                # Use MultiLayerCARM for all variants (including single-layer)
                self.carm_blocks.append(
                    MultiLayerCARM(num_channels, hidden_dim, num_layers=gcn_layers)
                )
        
        # Global pooling and classifier
        self.pool = nn.AdaptiveAvgPool2d((num_channels, 1))
        self.fc = nn.Linear(hidden_dim * num_channels, num_classes)
    
    def forward(self, x):
        """
        x: (batch, channels, timepoints)
        """
        adjacency_matrices = []
        
        for i in range(self.num_blocks):
            # TFEM
            x = self.tfem_blocks[i](x)
            
            # CARM
            x, adj = self.carm_blocks[i](x)
            adjacency_matrices.append(adj)
        
        # Pool and classify
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        
        # Return last adjacency matrix for channel selection
        return x, adjacency_matrices[-1]

## Data Loading

In [25]:
def load_subject_data(subject_id, config):
    """
    Load all motor runs for a subject
    """
    index_df = pd.read_csv(config['data']['index_file'])
    success_df = index_df[index_df['status'] == 'success']
    motor_runs = success_df[success_df['category'].isin(['motor_execution', 'motor_imagery'])]
    
    subject_runs = motor_runs[motor_runs['subject'] == subject_id]
    
    all_data = []
    all_labels = []
    
    for _, run_info in subject_runs.iterrows():
        fif_path = Path(run_info['path'])
        
        if not fif_path.exists():
            continue
        
        try:
            data, labels = load_preprocessed_data(
                fif_path,
                tmin=config['data']['tmin'],
                tmax=config['data']['tmax'],
                baseline=config['data']['baseline']
            )
            
            if data is not None and len(data) > 0:
                all_data.append(data)
                all_labels.append(labels)
                
        except Exception as e:
            print(f"Error loading {fif_path.name}: {e}")
            continue
    
    if len(all_data) == 0:
        return None, None
    
    all_data = np.concatenate(all_data, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)
    
    # Filter to selected classes
    all_data, all_labels = filter_classes(
        all_data, all_labels, config['data']['selected_classes']
    )
    
    return all_data, all_labels

## Training Function

In [26]:
def train_variant(variant_name, data, labels, config, device):
    """
    Train a specific architecture variant with cross-validation
    
    Returns
    -------
    results : dict
        Mean accuracy, std, training time, adjacency matrix
    """
    from sklearn.model_selection import StratifiedKFold
    
    num_channels = data.shape[1]
    num_timepoints = data.shape[2]
    num_classes = len(np.unique(labels))
    
    skf = StratifiedKFold(n_splits=config['training']['n_folds'], shuffle=True, random_state=42)
    
    fold_accuracies = []
    fold_times = []
    final_adjacency = None
    
    for fold_idx, (train_idx, val_idx) in enumerate(skf.split(data, labels), 1):
        print(f"    Fold {fold_idx}/{config['training']['n_folds']}...", end=' ')
        
        start_time = time.time()
        
        X_train, X_val = data[train_idx], data[val_idx]
        y_train, y_val = labels[train_idx], labels[val_idx]
        
        # Create model
        model = EEGARNNVariant(
            num_channels=num_channels,
            num_timepoints=num_timepoints,
            num_classes=num_classes,
            variant=variant_name
        ).to(device)
        
        # Data loaders
        train_dataset = EEGDataset(X_train, y_train)
        val_dataset = EEGDataset(X_val, y_val)
        
        train_loader = DataLoader(train_dataset, batch_size=config['training']['batch_size'], shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=config['training']['batch_size'], shuffle=False)
        
        # Training
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=config['training']['learning_rate'])
        
        best_val_acc = 0.0
        best_epoch = 0
        
        for epoch in range(config['training']['epochs']):
            # Train
            model.train()
            for batch_data, batch_labels in train_loader:
                batch_data = batch_data.to(device).float()
                batch_labels = batch_labels.to(device).long()
                
                optimizer.zero_grad()
                outputs, _ = model(batch_data)
                loss = criterion(outputs, batch_labels)
                loss.backward()
                optimizer.step()
            
            # Validate
            model.eval()
            val_correct = 0
            val_total = 0
            
            with torch.no_grad():
                for batch_data, batch_labels in val_loader:
                    batch_data = batch_data.to(device).float()
                    batch_labels = batch_labels.to(device).long()
                    
                    outputs, adj = model(batch_data)
                    _, predicted = torch.max(outputs, 1)
                    
                    val_total += batch_labels.size(0)
                    val_correct += (predicted == batch_labels).sum().item()
            
            val_acc = val_correct / val_total
            
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                best_epoch = epoch + 1
                final_adjacency = adj.detach().cpu().numpy()
        
        fold_time = time.time() - start_time
        fold_accuracies.append(best_val_acc)
        fold_times.append(fold_time)
        
        print(f"Acc: {best_val_acc:.4f} (epoch {best_epoch}), Time: {fold_time:.1f}s")
    
    return {
        'mean_accuracy': np.mean(fold_accuracies),
        'std_accuracy': np.std(fold_accuracies),
        'mean_time': np.mean(fold_times),
        'fold_accuracies': fold_accuracies,
        'adjacency_matrix': final_adjacency
    }

## Run All Variants

In [27]:
# Define TOP 3 variants to test (reduced from 6)
variants = {
    'baseline': '3x (TFEM + 1-layer CARM)',
    'multi2': '3x (TFEM + 2-layer CARM) - 2 hops',
    'multi3': '3x (TFEM + 3-layer CARM) - 3 hops'
}

all_results = []

for subject_id in tqdm(CONFIG['subjects'], desc='Processing subjects'):
    print(f"\n{'='*80}")
    print(f"Subject: {subject_id}")
    print(f"{'='*80}")
    
    # Load data
    data, labels = load_subject_data(subject_id, CONFIG)
    
    if data is None or len(data) < 30:
        print(f"Skipping {subject_id}: insufficient data")
        continue
    
    print(f"Data shape: {data.shape}")
    print(f"Labels: {np.unique(labels, return_counts=True)}")
    
    # Test each variant
    for variant_name, variant_desc in variants.items():
        print(f"\n  Variant: {variant_name} - {variant_desc}")
        
        results = train_variant(variant_name, data, labels, CONFIG, device)
        
        print(f"  Final: {results['mean_accuracy']:.4f} ± {results['std_accuracy']:.4f}")
        print(f"  Avg time per fold: {results['mean_time']:.1f}s")
        
        all_results.append({
            'subject': subject_id,
            'variant': variant_name,
            'description': variant_desc,
            'accuracy': results['mean_accuracy'],
            'std': results['std_accuracy'],
            'time': results['mean_time']
        })

results_df = pd.DataFrame(all_results)
print(f"\n{'='*80}")
print("All variants tested!")
print(f"{'='*80}")

Processing subjects:   0%|          | 0/5 [00:00<?, ?it/s]


Subject: S001
Data shape: (231, 64, 769)
Labels: (array([0, 1]), array([154,  77], dtype=int64))

  Variant: baseline - 3x (TFEM + 1-layer CARM)
    Fold 1/2... 

Processing subjects:   0%|          | 0/5 [01:06<?, ?it/s]


KeyboardInterrupt: 

## Results Summary

In [None]:
if len(results_df) > 0:
    # Aggregate by variant
    summary = results_df.groupby(['variant', 'description']).agg({
        'accuracy': ['mean', 'std'],
        'time': 'mean'
    }).round(4)
    
    summary.columns = ['Mean Accuracy', 'Std Accuracy', 'Mean Time (s)']
    summary = summary.sort_values('Mean Accuracy', ascending=False)
    
    print("\n" + "="*80)
    print("ARCHITECTURE VARIANT COMPARISON")
    print("="*80)
    display(summary)
    
    # Find best variant
    best_variant = summary['Mean Accuracy'].idxmax()[0]
    best_acc = summary['Mean Accuracy'].max()
    baseline_acc = summary.loc[('baseline', variants['baseline']), 'Mean Accuracy']
    improvement = (best_acc - baseline_acc) / baseline_acc * 100
    
    print(f"\nBest variant: {best_variant} ({best_acc:.4f})")
    print(f"Baseline: {baseline_acc:.4f}")
    print(f"Improvement: {improvement:+.2f}%")
    
    # Save results
    results_path = CONFIG['output_dir'] / 'variant_results.csv'
    results_df.to_csv(results_path, index=False)
    print(f"\nResults saved to: {results_path}")
    
    summary_path = CONFIG['output_dir'] / 'variant_summary.csv'
    summary.to_csv(summary_path)
    print(f"Summary saved to: {summary_path}")
else:
    print("No results to summarize.")

## Visualizations

In [None]:
if len(results_df) > 0:
    fig, axes = plt.subplots(2, 2, figsize=(18, 12))
    
    # 1. Mean accuracy comparison
    variant_summary = results_df.groupby('variant').agg({
        'accuracy': ['mean', 'std']
    }).reset_index()
    variant_summary.columns = ['variant', 'mean', 'std']
    variant_summary = variant_summary.sort_values('mean', ascending=False)
    
    colors = ['#2ecc71' if v == 'baseline' else '#3498db' for v in variant_summary['variant']]
    
    axes[0, 0].barh(variant_summary['variant'], variant_summary['mean'],
                    xerr=variant_summary['std'], color=colors, alpha=0.8, capsize=5)
    axes[0, 0].set_xlabel('Accuracy', fontsize=12)
    axes[0, 0].set_title('Variant Comparison (Mean ± Std)', fontsize=14, fontweight='bold')
    axes[0, 0].grid(True, alpha=0.3, axis='x')
    
    # 2. Per-subject comparison
    pivot = results_df.pivot(index='subject', columns='variant', values='accuracy')
    pivot.plot(kind='bar', ax=axes[0, 1], rot=0, alpha=0.8, width=0.8)
    axes[0, 1].set_ylabel('Accuracy', fontsize=12)
    axes[0, 1].set_title('Per-Subject Comparison', fontsize=14, fontweight='bold')
    axes[0, 1].legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=9)
    axes[0, 1].grid(True, alpha=0.3, axis='y')
    
    # 3. Training time comparison
    time_summary = results_df.groupby('variant')['time'].mean().sort_values(ascending=False)
    axes[1, 0].barh(time_summary.index, time_summary.values, color='coral', alpha=0.8)
    axes[1, 0].set_xlabel('Training Time (s) per fold', fontsize=12)
    axes[1, 0].set_title('Computational Cost', fontsize=14, fontweight='bold')
    axes[1, 0].grid(True, alpha=0.3, axis='x')
    
    # 4. Accuracy vs Time trade-off
    tradeoff = results_df.groupby('variant').agg({
        'accuracy': 'mean',
        'time': 'mean'
    }).reset_index()
    
    axes[1, 1].scatter(tradeoff['time'], tradeoff['accuracy'], s=200, alpha=0.6)
    for _, row in tradeoff.iterrows():
        axes[1, 1].annotate(row['variant'], (row['time'], row['accuracy']),
                           fontsize=9, ha='right')
    axes[1, 1].set_xlabel('Training Time (s) per fold', fontsize=12)
    axes[1, 1].set_ylabel('Mean Accuracy', fontsize=12)
    axes[1, 1].set_title('Accuracy vs Computational Cost', fontsize=14, fontweight='bold')
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    fig_path = CONFIG['output_dir'] / 'variant_comparison.png'
    plt.savefig(fig_path, dpi=300, bbox_inches='tight')
    plt.show()
    
    print(f"\nVisualization saved to: {fig_path}")
else:
    print("No results to visualize.")

## Statistical Significance

In [None]:
from scipy import stats

if len(results_df) > 0:
    print("\n" + "="*80)
    print("STATISTICAL SIGNIFICANCE (Paired t-test vs Baseline)")
    print("="*80)
    
    baseline_accs = results_df[results_df['variant'] == 'baseline'].sort_values('subject')['accuracy'].values
    
    stat_results = []
    
    for variant in results_df['variant'].unique():
        if variant != 'baseline':
            variant_accs = results_df[results_df['variant'] == variant].sort_values('subject')['accuracy'].values
            
            if len(variant_accs) == len(baseline_accs):
                t_stat, p_value = stats.ttest_rel(variant_accs, baseline_accs)
                
                mean_diff = np.mean(variant_accs - baseline_accs)
                pct_improvement = (mean_diff / np.mean(baseline_accs)) * 100
                
                significance = '***' if p_value < 0.001 else '**' if p_value < 0.01 else '*' if p_value < 0.05 else 'ns'
                
                stat_results.append({
                    'Variant': variant,
                    'Mean Diff': mean_diff,
                    'Improvement (%)': pct_improvement,
                    't-statistic': t_stat,
                    'p-value': p_value,
                    'Significance': significance
                })
    
    stat_df = pd.DataFrame(stat_results)
    stat_df = stat_df.sort_values('Improvement (%)', ascending=False)
    
    # Format for display
    stat_df_display = stat_df.copy()
    stat_df_display['Mean Diff'] = stat_df_display['Mean Diff'].apply(lambda x: f"{x:.4f}")
    stat_df_display['Improvement (%)'] = stat_df_display['Improvement (%)'].apply(lambda x: f"{x:+.2f}%")
    stat_df_display['t-statistic'] = stat_df_display['t-statistic'].apply(lambda x: f"{x:.3f}")
    stat_df_display['p-value'] = stat_df_display['p-value'].apply(lambda x: f"{x:.4f}")
    
    display(stat_df_display)
    
    print("\nSignificance codes: *** p<0.001, ** p<0.01, * p<0.05, ns: not significant")
    
    stat_path = CONFIG['output_dir'] / 'statistical_significance.csv'
    stat_df.to_csv(stat_path, index=False)
    print(f"\nStatistical results saved to: {stat_path}")
else:
    print("No results for statistical testing.")

## Generate Report

In [None]:
if len(results_df) > 0:
    # Recompute summary statistics for the report
    summary = results_df.groupby(['variant', 'description']).agg({
        'accuracy': ['mean', 'std'],
        'time': 'mean'
    }).round(4)
    
    summary.columns = ['Mean Accuracy', 'Std Accuracy', 'Mean Time (s)']
    summary = summary.sort_values('Mean Accuracy', ascending=False)
    
    # Find best variant
    best_variant = summary['Mean Accuracy'].idxmax()[0]
    best_acc = summary['Mean Accuracy'].max()
    baseline_acc = summary.loc[('baseline', variants['baseline']), 'Mean Accuracy']
    improvement = (best_acc - baseline_acc) / baseline_acc * 100
    
    # Generate report
    report = []
    report.append("# Architecture Variant Analysis Report\n")
    report.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
    
    report.append("## Objective\n")
    report.append("Evaluate architectural improvements to EEG-ARNN to find optimal configuration.\n\n")
    
    report.append("## Variants Tested\n\n")
    for variant_name, variant_desc in variants.items():
        report.append(f"- **{variant_name}**: {variant_desc}\n")
    report.append("\n")
    
    report.append("## Results\n\n")
    for idx, row in summary.iterrows():
        variant_name = idx[0]
        report.append(f"**{variant_name}**\n")
        report.append(f"- Accuracy: {row['Mean Accuracy']:.4f} +/- {row['Std Accuracy']:.4f}\n")
        report.append(f"- Time: {row['Mean Time (s)']:.1f}s per fold\n\n")
    
    report.append("## Key Findings\n\n")
    report.append(f"1. **Best variant**: {best_variant} ({best_acc:.4f})\n")
    report.append(f"2. **Baseline**: {baseline_acc:.4f}\n")
    report.append(f"3. **Improvement**: {improvement:+.2f}%\n\n")
    
    report.append("## Recommendations\n\n")
    if improvement > 2:
        report.append(f"The **{best_variant}** architecture shows significant improvement over baseline. ")
        report.append("Recommend using this configuration for final experiments.\n")
    elif improvement > 0:
        report.append(f"The **{best_variant}** architecture shows modest improvement. ")
        report.append("Consider trade-off with computational cost.\n")
    else:
        report.append("No variant significantly outperforms baseline. ")
        report.append("Baseline architecture is optimal for this dataset.\n")
    
    report_text = ''.join(report)
    print(report_text)
    
    report_path = CONFIG['output_dir'] / 'VARIANT_ANALYSIS_REPORT.md'
    with open(report_path, 'w', encoding='utf-8') as f:
        f.write(report_text)
    
    print(f"\nReport saved to: {report_path}")
else:
    print("No results to generate report.")