In [3]:
# Modified Result_1-_Data_with_Phase1_RLS.py

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset
import numpy as np
import scipy.io as sio
import os, glob, random, time, json
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from datetime import datetime

In [4]:
# Phase 1 RLS attention model import
from models_rls_phase1 import Phase1RLSModel

# Set random seeds for reproducibility
def set_seed(seed: int = 42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

In [5]:
# Dataset class unchanged
class SEEDVII_Dataset(Dataset):
    
    def __init__(self, data_dir: str = ".", modality: str = 'multimodal', 
                 subset_ratio: float = 0.01):
        self.data_dir = data_dir
        self.modality = modality
        self.subset_ratio = subset_ratio
        
        # Dataset specifications
        self.num_classes = 7
        self.num_subjects = 20
        self.eeg_feature_dim = 310
        self.eye_feature_dim = 33
        
        # Load and process data
        self._load_data()
        if self.subset_ratio < 1.0:
            self._create_subset()
    
    def _load_data(self):
        """Loading EEG and eye movement features here."""
        print(f"Loading SEED-VII dataset from {self.data_dir}")
        
        eeg_data, eye_data = [], []
        emotion_labels, subject_labels = [], []
        
        # Get feature files
        eeg_dir = os.path.join(self.data_dir, 'EEG_features')
        eye_dir = os.path.join(self.data_dir, 'EYE_features')
        
        eeg_files = sorted(glob.glob(os.path.join(eeg_dir, '*.mat')))
        eye_files = sorted(glob.glob(os.path.join(eye_dir, '*.mat')))
        
        print(f"Found {len(eeg_files)} EEG files and {len(eye_files)} eye files")
        
        # Simple emotion mapping (based on paper structure)
        emotion_map = self._get_emotion_mapping()
        
        # Process subjects
        for subject_idx, eeg_file in enumerate(eeg_files[:self.num_subjects]):
            subject_name = os.path.basename(eeg_file).replace('.mat', '')
            print(f"Loading subject {subject_idx + 1}: {subject_name}")
            
            # Load files
            try:
                eeg_mat = sio.loadmat(eeg_file)
                eye_file = self._find_matching_eye_file(subject_name, eye_files)
                if eye_file:
                    eye_mat = sio.loadmat(eye_file)
                else:
                    continue
            except Exception as e:
                print(f"Error loading files for {subject_name}: {e}")
                continue
            
            # Process videos
            for video_id in range(1, 81):
                eeg_features, eye_features = self._extract_features(
                    eeg_mat, eye_mat, video_id)
                
                if eeg_features is not None and eye_features is not None:
                    min_windows = min(len(eeg_features), len(eye_features))
                    if min_windows > 0:
                        eeg_data.append(eeg_features[:min_windows])
                        eye_data.append(eye_features[:min_windows])
                        
                        emotion_label = emotion_map.get(video_id, 6)
                        emotion_labels.extend([emotion_label] * min_windows)
                        subject_labels.extend([subject_idx] * min_windows)
        
        # Convert to arrays
        self.eeg_features = np.vstack(eeg_data)
        self.eye_features = np.vstack(eye_data)
        self.emotion_labels = np.array(emotion_labels)
        self.subject_labels = np.array(subject_labels)
        
        print(f"Dataset loaded: {len(self.emotion_labels)} samples")
        print(f"EEG shape: {self.eeg_features.shape}, Eye shape: {self.eye_features.shape}")
    
    def _get_emotion_mapping(self):
        """Here I tried to replicate a simple emotion mapping from the MAET paper."""
        emotion_map = {}
        # Simplified mapping - 4 videos per emotion per session
        for session in range(4):
            emotions = [0, 6, 3, 1, 5, 2, 4][0:7] if session % 2 == 0 else [5, 1, 2, 6, 0, 4, 3]
            for i, emotion in enumerate(emotions):
                for video in range(4):
                    video_id = session * 20 + i * 4 + video + 1
                    if video_id <= 80:
                        emotion_map[video_id] = emotion % 7
        return emotion_map
    
    def _find_matching_eye_file(self, subject_name, eye_files):
        """This code is to find the matching eye file for the subjects."""
        for eye_file in eye_files:
            if subject_name in os.path.basename(eye_file):
                return eye_file
        return None
    
    def _extract_features(self, eeg_mat, eye_mat, video_id):
        video_key = str(video_id)
        
        # Try different key formats for EEG
        eeg_features = None
        for key in [f'de_LDS_{video_id}', f'de_{video_id}', video_key]:
            if key in eeg_mat:
                eeg_features = eeg_mat[key]
                break
        
        # Try different key formats for Eye
        eye_features = None
        for key in [video_key, str(video_id)]:
            if key in eye_mat:
                eye_features = eye_mat[key]
                break
        
        # Process EEG features
        if eeg_features is not None:
            if eeg_features.ndim == 3:
                eeg_features = eeg_features.reshape(eeg_features.shape[0], -1)
            if eeg_features.shape[1] != self.eeg_feature_dim:
                eeg_features = None
        
        # Process eye features
        if eye_features is not None and eye_features.shape[1] != self.eye_feature_dim:
            eye_features = None
        
        return eeg_features, eye_features
    
    def _create_subset(self):
        n_samples = len(self.emotion_labels)
        subset_size = max(1, int(n_samples * self.subset_ratio))
        
        try:
            indices = np.arange(n_samples)
            subset_indices, _ = train_test_split(
                indices, train_size=subset_size, stratify=self.emotion_labels, random_state=42)
        except:
            subset_indices = np.random.choice(n_samples, subset_size, replace=False)
        
        self.eeg_features = self.eeg_features[subset_indices]
        self.eye_features = self.eye_features[subset_indices]
        self.emotion_labels = self.emotion_labels[subset_indices]
        self.subject_labels = self.subject_labels[subset_indices]
        
        print(f"Created {self.subset_ratio*100:.1f}% subset: {len(self.emotion_labels)} samples")
    
    def __len__(self):
        return len(self.emotion_labels)
    
    def __getitem__(self, idx):
        sample = {}
        
        if self.modality in ['eeg', 'multimodal']:
            sample['eeg'] = torch.FloatTensor(self.eeg_features[idx])
        if self.modality in ['eye', 'multimodal']:
            sample['eye'] = torch.FloatTensor(self.eye_features[idx])
        
        sample['label'] = torch.LongTensor([self.emotion_labels[idx]])[0]
        sample['subject'] = torch.LongTensor([self.subject_labels[idx]])[0]
        
        return sample

In [6]:
# Safe forward stripped down for single-model
def safe_forward(model, eeg=None):
    return model(eeg)

In [7]:
def train_epoch_rls(model, dataloader, optimizer, criterion, device):
    model.train()
    total_loss, correct, total = 0., 0, 0
    for batch in dataloader:
        eeg = batch['eeg'].to(device)
        labels = batch['label'].to(device)

        optimizer.zero_grad()
        logits, _ = model(eeg)               # (B, 1, 7)
        logits = logits.squeeze(1)           # (B, 7)
        # DEBUG: print shapes to diagnose mismatch
        #print(f"LOGITS SHAPE: {logits.shape}, LABELS SHAPE: {labels.shape}")

        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        preds = logits.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    return total_loss / len(dataloader), 100. * correct / total


In [8]:
def evaluate_rls(model, dataloader, criterion, device):
    model.eval()
    total_loss, correct, total = 0., 0, 0
    all_preds, all_labels = [], []
    with torch.no_grad():
        for batch in dataloader:
            eeg = batch['eeg'].to(device)
            labels = batch['label'].to(device)
            logits, _ = model(eeg)
            logits = logits.squeeze(1)
            loss = criterion(logits, labels)
            total_loss += loss.item()
            preds = logits.argmax(dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    acc = 100.*correct/total
    f1 = f1_score(all_labels, all_preds, average='weighted')*100
    return total_loss/len(dataloader), acc, f1

In [9]:
# Subject-dependent experiment using Phase1RLSModel
def subject_dependent_experiment_rls(data_dir=".", subset_ratio=0.01):
    print("\n=== SUBJECT-DEPENDENT (RLS) ===")
    dataset = SEEDVII_Dataset(data_dir, modality='eeg', subset_ratio=subset_ratio)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    subjects = np.unique(dataset.subject_labels)
    results = []
    for subj in subjects[:5]:
        idxs = np.where(dataset.subject_labels==subj)[0]
        if len(idxs)<8: continue
        tr, vl = train_test_split(idxs, test_size=0.3, random_state=42)
        tr_loader = DataLoader(Subset(dataset, tr), batch_size=16, shuffle=True)
        vl_loader = DataLoader(Subset(dataset, vl), batch_size=16, shuffle=False)
        # Instantiate Phase1RLSModel
        model = Phase1RLSModel(
            n_channels=dataset.eeg_feature_dim,
            d_model=256, n_classes=dataset.num_classes
        ).to(device)
        opt = torch.optim.Adam(model.parameters(), lr=1e-3)
        crit = nn.CrossEntropyLoss()
        best_acc = 0.
        for epoch in range(20):
            _, train_acc = train_epoch_rls(model, tr_loader, opt, crit, device)
            _, val_acc, _ = evaluate_rls(model, vl_loader, crit, device)
            best_acc = max(best_acc, val_acc)
            if epoch%5==0:
                print(f" Subj {subj} Epoch {epoch}: Train {train_acc:.1f}%, Val {val_acc:.1f}%")
        print(f" Subj {subj} best Val Acc: {best_acc:.2f}%")
        results.append(best_acc)
    avg, std = np.mean(results), np.std(results)
    print(f" Subject-dependent RLS results: {avg:.2f}% ± {std:.2f}%")
    return avg, std

# Cross-subject experiment using Phase1RLSModel
def cross_subject_experiment_rls(data_dir=".", subset_ratio=0.01):
    print("\n=== CROSS-SUBJECT (RLS) ===")
    dataset = SEEDVII_Dataset(data_dir, modality='eeg', subset_ratio=subset_ratio)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    subjects = np.unique(dataset.subject_labels)
    results = []
    for test_subj in subjects[:3]:
        tr_idxs = np.where(dataset.subject_labels!=test_subj)[0]
        te_idxs = np.where(dataset.subject_labels==test_subj)[0]
        if len(te_idxs)<5: continue
        tr_loader = DataLoader(Subset(dataset, tr_idxs), batch_size=16, shuffle=True)
        te_loader = DataLoader(Subset(dataset, te_idxs), batch_size=16, shuffle=False)
        model = Phase1RLSModel(
            n_channels=dataset.eeg_feature_dim,
            d_model=256, n_classes=dataset.num_classes
        ).to(device)
        opt = torch.optim.Adam(model.parameters(), lr=1e-3)
        crit = nn.CrossEntropyLoss()
        for epoch in range(30):
            train_epoch_rls(model, tr_loader, opt, crit, device)
            if epoch%10==0:
                loss, acc, _ = evaluate_rls(model, te_loader, crit, device)
                print(f" TestSubj {test_subj} Epoch {epoch}: Acc {acc:.1f}%")
        _, test_acc, _ = evaluate_rls(model, te_loader, crit, device)
        print(f" TestSubj {test_subj} final Acc: {test_acc:.2f}%")
        results.append(test_acc)
    if results:
        avg, std = np.mean(results), np.std(results)
        print(f" Cross-subject RLS results: {avg:.2f}% ± {std:.2f}%")
        return avg, std
    return 0., 0.

def main():
    print("=== PHASE1 RLS ON SEED-VII ===")
    DATA_DIR = "."
    SUBSET_RATIO = 0.01
    results = {}
    # Dataset test
    _ = SEEDVII_Dataset(DATA_DIR, 'eeg', SUBSET_RATIO)
    # Run experiments
    results['subj_dep'] = subject_dependent_experiment_rls(DATA_DIR, SUBSET_RATIO)
    results['cross_subj'] = cross_subject_experiment_rls(DATA_DIR, SUBSET_RATIO)
    print("\nFINAL RESULTS:")
    for k,(a,s) in results.items():
        print(f" {k}: {a:.2f}% ± {s:.2f}%")

In [10]:
if __name__ == "__main__":
    main()

=== PHASE1 RLS ON SEED-VII ===
Loading SEED-VII dataset from .
Found 20 EEG files and 20 eye files
Loading subject 1: 1
Loading subject 2: 10
Loading subject 3: 11
Loading subject 4: 12
Loading subject 5: 13
Loading subject 6: 14
Loading subject 7: 15
Loading subject 8: 16
Loading subject 9: 17
Loading subject 10: 18
Loading subject 11: 19
Loading subject 12: 2
Loading subject 13: 20
Loading subject 14: 3
Loading subject 15: 4
Loading subject 16: 5
Loading subject 17: 6
Loading subject 18: 7
Loading subject 19: 8
Loading subject 20: 9
Dataset loaded: 69742 samples
EEG shape: (69742, 310), Eye shape: (69742, 33)
Created 1.0% subset: 697 samples

=== SUBJECT-DEPENDENT (RLS) ===
Loading SEED-VII dataset from .
Found 20 EEG files and 20 eye files
Loading subject 1: 1
Loading subject 2: 10
Loading subject 3: 11
Loading subject 4: 12
Loading subject 5: 13
Loading subject 6: 14
Loading subject 7: 15
Loading subject 8: 16
Loading subject 9: 17
Loading subject 10: 18
Loading subject 11: 19
Load