In [41]:
import random
import h5py
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, Subset
from torch.nn.utils.rnn import pad_sequence
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split

In [47]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
set_seed(42)

In [42]:
labels_df = pd.read_csv('/kaggle/input/bci-rnn-raw-data/TrainLabels.csv')
labels = labels_df['Prediction'].to_numpy()
print(labels.shape, len(labels))
label_stats = np.unique(labels, return_counts=True)
print(label_stats)
print(label_stats[1][1])

(5440,) 5440
(array([0, 1]), array([1590, 3850]))
3850


In [43]:
class EEGAugment:
    def __call__(self, x):
        # x shape: (channels, time)
        if random.random() > 0.5:
            x += torch.randn_like(x) * 0.01  # Gaussian noise
        if random.random() > 0.5:
            x = torch.roll(x, shifts=random.randint(-10,10), dims=1)  # Time shift
        return x

In [48]:
TRAIN_DATA_PATH = '/kaggle/input/bci-rnn-raw-data/rnn_data/rnn_data/eeg_train_data.h5'

class EEGDataset(Dataset):
    def __init__(self, path, labels, segment_names=['green', 'blinking', 'feedback']):
        self.path = path
        self.labels = labels
        self.segment_names = segment_names
        
        # Verify data consistency on initialization
        with h5py.File(path, 'r') as f:
            self.num_trials = len(f[segment_names[0]])  # All groups should have same length
            for name in segment_names:
                assert len(f[name]) == self.num_trials, f"Mismatched trial counts in {name} group"
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        with h5py.File(self.path, 'r') as f:
            segments = []
            for name in self.segment_names:
                seg_data = f[name][str(idx)][:]  
                seg_data = np.squeeze(seg_data) # remove if there's a dim of 1

                # Ensure shape is (n_channels, n_samples)
                if seg_data.ndim == 1:
                    seg_data = seg_data[np.newaxis, :]  # If single channel
                
                seg_data = (seg_data - np.mean(seg_data, axis=1, keepdims=True)) / (np.std(seg_data, axis=1, keepdims=True) + 1e-6)  # Avoid division by zero
                
                segments.append(torch.FloatTensor(seg_data))
            
            label = torch.tensor(self.labels[idx], dtype=torch.float32)
            # Assuming you'll add labels later
            return (*segments, label)  # (green_tensor, blinking_tensor, feedback_tensor, label_tensor)

def collate_fn(batch):
    """Handles variable-length segments and labels"""
    
    # Verify and correct shapes before processing
    processed_batch = []
    for item in batch:
        # Ensure each segment has shape (n_channels, n_samples)
        corrected_item = []
        for seg in item[:3]:  # For each of the 3 segments
            if seg.dim() == 3:
                seg = seg.squeeze(0)  # Remove batch dimension if present
            corrected_item.append(seg)
        corrected_item.append(item[3])  # Keep label unchanged
        processed_batch.append(tuple(corrected_item))
    
    # Now process with corrected shapes
    green_segs = [item[0].permute(1, 0) for item in processed_batch]
    blink_segs = [item[1].permute(1, 0) for item in processed_batch]
    feedback_segs = [item[2].permute(1, 0) for item in processed_batch]
    labels = torch.stack([item[3] for item in processed_batch])
    
    # Pad each segment type
    green_padded = pad_sequence(green_segs, batch_first=True).permute(0, 2, 1)
    blink_padded = pad_sequence(blink_segs, batch_first=True).permute(0, 2, 1)
    feedback_padded = pad_sequence(feedback_segs, batch_first=True).permute(0, 2, 1)
    
    return green_padded, blink_padded, feedback_padded, labels

dataset = EEGDataset(TRAIN_DATA_PATH, labels)
dataloader = DataLoader(
    dataset,
    batch_size=32,
    collate_fn=collate_fn,
    shuffle=True,
    num_workers=2  # If using Kaggle GPU
)

In [45]:
len(dataset)

5440

In [46]:
dataset[8][2].shape

TypeError: randn_like(): argument 'input' (position 1) must be Tensor, not list

In [16]:
# Split indices (stratified by labels)
train_idx, val_idx = train_test_split(
    range(len(dataset)),
    test_size=0.2,
    stratify=labels,  # Important for imbalanced classes
    random_state=42
)

train_loader = DataLoader(
    Subset(dataset, train_idx),
    batch_size=32,
    shuffle=True,
    collate_fn=collate_fn,
    num_workers=2
)

val_loader = DataLoader(
    Subset(dataset, val_idx),
    batch_size=32,
    shuffle=False,
    collate_fn=collate_fn,
    num_workers=2
)

# 4. Verify
print(f"Train batches: {len(train_loader)}")
print(f"Val batches: {len(val_loader)}")

Train batches: 136
Val batches: 34


In [17]:
class EEGRNN(nn.Module):
    # input_dim = number of channels
    def __init__(self, input_dim=16, hidden_dim=64, num_layers=2, dropout=0.3):
        super().__init__()
        self.rnn = nn.GRU(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            bidirectional=True,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0
        )
        self.dropout = nn.Dropout(dropout)
        self.classifier = nn.Sequential(
            nn.Linear(hidden_dim*2, 32),  # *2 for bidirectional
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()  # Output probabilities
        )

    def forward(self, x, lengths=None):
        # x shape: (batch, channels, seq_len)
        x = x.permute(0, 2, 1)  # (batch, seq_len, channels)
        
        if lengths is not None:
            # Pack padded sequences if lengths are provided
            x = nn.utils.rnn.pack_padded_sequence(
                x, lengths.cpu(), batch_first=True, enforce_sorted=False
            )
        
        _, hidden = self.rnn(x)  # hidden: (num_layers*2, batch, hidden_dim)
        
        # Concatenate last hidden states from both directions
        hidden = hidden.view(self.rnn.num_layers, 2, -1, self.rnn.hidden_size)
        last_hidden = torch.cat([hidden[-1, 0], hidden[-1, 1]], dim=1)
        
        return self.classifier(self.dropout(last_hidden)).squeeze(-1)

In [25]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

model = EEGRNN(input_dim=16, hidden_dim=32, dropout=0.1).to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-3)

#criterion = nn.BCELoss()  # Binary cross-entropy for probabilities
# Calculate class weights
class_weights = torch.tensor([label_stats[1][1]/label_stats[1][0], 1.0], dtype=torch.float32).to(device)  # [2.42, 1.0]

# Modify your loss function
criterion = nn.BCEWithLogitsLoss(pos_weight=class_weights[0])  # Automatic weighting

# Learning rate scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='max', factor=0.5, patience=3, verbose=True
)

cuda


In [26]:
def train_epoch(model, loader):
    model.train()
    total_loss = 0
    for green, blink, feedback, labels in loader:
        # Use whichever segment type performs best (start with feedback)
        inputs = feedback.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

def validate(model, loader):
    model.eval()
    all_probs = []
    all_labels = []
    with torch.no_grad():
        for green, blink, feedback, labels in loader:
            inputs = feedback.to(device)
            outputs = model(inputs).cpu().numpy()
            all_probs.extend(outputs)
            all_labels.extend(labels.numpy())
    return roc_auc_score(all_labels, all_probs)

Try|Best Score|Loss
:-:|:-:|:-:
1|0.5686|0.6064
2|0.5448|0.9794
3|0.5109|1.0721

In [28]:
NUM_EPOCHS = 15
MODEL_PATH = '/kaggle/working/best_model_try2.pth'

best_auc = 0
for epoch in range(NUM_EPOCHS):
    train_loss = train_epoch(model, train_loader)
    val_auc = validate(model, val_loader)
    scheduler.step(val_auc)
    
    print(f"Epoch {epoch+1}: Loss={train_loss:.4f}, Val AUC={val_auc:.4f}")
    
    if val_auc > best_auc:
        best_auc = val_auc
        torch.save(model.state_dict(), MODEL_PATH)
        print("↳ New best model saved!")

Epoch 1: Loss=0.9794, Val AUC=0.5448
↳ New best model saved!
Epoch 2: Loss=0.9286, Val AUC=0.5448
Epoch 3: Loss=0.9229, Val AUC=0.5428
Epoch 4: Loss=0.9217, Val AUC=0.5413
Epoch 5: Loss=0.9213, Val AUC=0.5396
Epoch 6: Loss=0.9211, Val AUC=0.5391
Epoch 7: Loss=0.9211, Val AUC=0.5389
Epoch 8: Loss=0.9210, Val AUC=0.5383
Epoch 9: Loss=0.9209, Val AUC=0.5380
Epoch 10: Loss=0.9209, Val AUC=0.5379
Epoch 11: Loss=0.9209, Val AUC=0.5376
Epoch 12: Loss=0.9209, Val AUC=0.5376
Epoch 13: Loss=0.9209, Val AUC=0.5373
Epoch 14: Loss=0.9208, Val AUC=0.5371
Epoch 15: Loss=0.9208, Val AUC=0.5369


In [29]:
batch = next(iter(train_loader))
green, blink, feedback, labels_t = batch
print("Feedback segment stats:")
print(f"Shape: {feedback.shape}")
print(f"Mean: {feedback.mean():.4f}, Std: {feedback.std():.4f}")
print(f"Min: {feedback.min():.4f}, Max: {feedback.max():.4f}")
print(f"Label ratio: {labels_t.sum()/len(labels_t):.2%} positive")

Feedback segment stats:
Shape: torch.Size([32, 16, 261])
Mean: 0.0000, Std: 0.9996
Min: -3.4627, Max: 4.3991
Label ratio: 75.00% positive


In [32]:
class EEGSimpleModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(16, 32, kernel_size=5, padding=2),  # Temporal convolution
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.MaxPool1d(2)
        )
        self.rnn = nn.GRU(32, 16, batch_first=True)  # Reduced complexity
        self.head = nn.Linear(16, 1)
        
    def forward(self, x):
        x = self.conv(x)  # (batch, 32, seq_len//2)
        x = x.permute(0, 2, 1)  # (batch, seq_len//2, 32)
        _, h = self.rnn(x)
        return torch.sigmoid(self.head(h.squeeze(0)))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

model = EEGSimpleModel().to(device)
# Replace BCELoss with more stable version
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([3850/1590]).to(device))  # 2.42:1 weighting

# Add gradient clipping (critical for RNNs)
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-5, weight_decay=1e-4)  # Lower LR

NUM_EPOCHS = 15
MODEL_PATH = '/kaggle/working/best_model_try3.pth'
# Add warmup and better scheduling
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=1e-4,
    steps_per_epoch=len(train_loader),
    epochs=NUM_EPOCHS,
    pct_start=0.3
)

# Early stopping with memory
best_auc = 0
for epoch in range(NUM_EPOCHS):
    model.train()
    for batch in train_loader:
        inputs = batch[2].to(device)  # Feedback segment
        labels = batch[3].to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()
    
    val_auc = validate(model, val_loader)
    print(f"Epoch {epoch+1}: Loss={loss.item():.4f}, Val AUC={val_auc:.4f}")
    
    if val_auc > best_auc + 0.005:
        best_auc = val_auc
        torch.save(model.state_dict(), MODEL_PATH)
    elif epoch > 4 and val_auc < 0.55:
        print("Stopping - no improvement")
        break

cuda
Epoch 1: Loss=1.0721, Val AUC=0.5109
Epoch 2: Loss=1.0364, Val AUC=0.4953
Epoch 3: Loss=1.0217, Val AUC=0.4935
Epoch 4: Loss=0.9727, Val AUC=0.4965
Epoch 5: Loss=0.9246, Val AUC=0.4930
Epoch 6: Loss=0.9176, Val AUC=0.4974
Stopping - no improvement


In [35]:
class P300Detector(nn.Module):
    def __init__(self, n_channels=16):
        super().__init__()
        # Spatial attention (channel importance)
        self.spatial_att = nn.Sequential(
            nn.Conv1d(n_channels, 32, kernel_size=1),
            nn.ReLU(),
            nn.Conv1d(32, n_channels, kernel_size=1),
            nn.Sigmoid()
        )
        
        # Temporal convolution (P300 typically 250-500ms)
        self.temp_conv = nn.Sequential(
            nn.Conv1d(n_channels, 64, kernel_size=int(0.3*200)),  # 300ms window @200Hz
            nn.BatchNorm1d(64),
            nn.ELU(),
            nn.MaxPool1d(4)
        )
        
        # LSTM for temporal context
        self.lstm = nn.LSTM(
            input_size=64,
            hidden_size=32,
            bidirectional=True,
            batch_first=True
        )
        
        # Decision head
        self.head = nn.Sequential(
            nn.Linear(64, 16),
            nn.ELU(),
            nn.Dropout(0.3),
            nn.Linear(16, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        # x shape: (batch, channels, time)
        
        # Spatial attention
        att_weights = self.spatial_att(x)  # (batch, channels, 1)
        x = x * att_weights
        
        # Temporal processing
        x = self.temp_conv(x)  # (batch, 64, time//4)
        x = x.permute(0, 2, 1)  # (batch, time//4, 64)
        
        # Sequence modeling
        lstm_out, _ = self.lstm(x)
        x = lstm_out.mean(dim=1)  # (batch, 64)
        
        return self.head(x).squeeze(-1)

# Custom loss function for ERPs
class P300Loss(nn.Module):
    def __init__(self, pos_weight=2.42):  # 3850/1590 ratio
        super().__init__()
        self.pos_weight = pos_weight
        
    def forward(self, preds, targets):
        loss = F.binary_cross_entropy(preds, targets, reduction='none')
        # Emphasize 250-500ms window errors
        if preds.dim() > 1:  # If using sequence outputs
            time_weights = torch.linspace(1, 3, preds.shape[1]).to(preds.device)
            loss = (loss * time_weights).mean()
        return loss.mean()

In [54]:
# Initialize
model = P300Detector(n_channels=16).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=15)
criterion = P300Loss(pos_weight=3850/1590)

In [55]:
def validate(model, val_loader):
    """Compute validation AUC-ROC score"""
    model.eval()  # Set to evaluation mode
    all_probs = []
    all_labels = []
    
    with torch.no_grad():  # Disable gradient calculation
        for batch in val_loader:
            # Assuming batch contains (green, blink, feedback, labels)
            inputs = batch[2].to(device)  # Using feedback segment
            labels = batch[3].cpu().numpy()  # Keep on CPU
            
            # Get model predictions
            outputs = model(inputs)
            probs = torch.sigmoid(outputs).cpu().numpy()  # Convert to probabilities
            
            all_probs.extend(probs)
            all_labels.extend(labels)
    
    # Handle edge case where validation set has only one class
    if len(np.unique(all_labels)) == 1:
        print("Warning: Validation set contains only one class!")
        return 0.5  # Neutral AUC score
    
    return roc_auc_score(all_labels, all_probs)

In [56]:
# Freeze all except temporal layers
for name, param in model.named_parameters():
    if 'temp_conv' not in name:
        param.requires_grad = False

optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4)
best_auc = 0.5

for epoch in range(5):
    model.train()
    for _, _, inputs, labels in train_loader:  # Assuming you've modified loader to return (feedback, labels)
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
    
    # Validation
    model.eval()
    val_probs, val_labels = [], []
    with torch.no_grad():
        for _, _, inputs, labels in val_loader:
            outputs = model(inputs.to(device)).sigmoid().cpu()
            val_probs.extend(outputs.numpy())
            val_labels.extend(labels.numpy())
    
    val_auc = roc_auc_score(val_labels, val_probs)
    print(f"Phase1 Epoch {epoch+1}: Val AUC={val_auc:.4f}")
    
    # Save best model
    if val_auc > best_auc:
        best_auc = val_auc
        torch.save(model.state_dict(), '/kaggle/working/best_phase1.pth')
        print(f"↳ New best model saved (AUC={val_auc:.4f})")

Phase1 Epoch 1: Val AUC=0.5410
↳ New best model saved (AUC=0.5410)
Phase1 Epoch 2: Val AUC=0.5655
↳ New best model saved (AUC=0.5655)
Phase1 Epoch 3: Val AUC=0.5680
↳ New best model saved (AUC=0.5680)
Phase1 Epoch 4: Val AUC=0.5765
↳ New best model saved (AUC=0.5765)
Phase1 Epoch 5: Val AUC=0.5791
↳ New best model saved (AUC=0.5791)


In [57]:
# Unfreeze all layers
for param in model.parameters():
    param.requires_grad = True

# Load best phase1 weights
model.load_state_dict(torch.load('/kaggle/working/best_phase1.pth'))

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=2)
best_auc = 0

for epoch in range(10):
    model.train()
    epoch_loss = 0
    for _, _, inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        epoch_loss += loss.item()
    
    # Validation
    val_auc = validate(model, val_loader)  # Your existing validate function
    scheduler.step(val_auc)
    
    print(f"Phase2 Epoch {epoch+1}: Loss={epoch_loss/len(train_loader):.4f}, Val AUC={val_auc:.4f}")
    
    # Save best model
    if val_auc > best_auc + 0.001:  # Minimum improvement threshold
        best_auc = val_auc
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_auc': val_auc,
        }, '/kaggle/working/best_phase2.pth')
        print(f"↳ New best model saved (AUC={val_auc:.4f})")

  model.load_state_dict(torch.load('/kaggle/working/best_phase1.pth'))


Phase2 Epoch 1: Loss=0.6147, Val AUC=0.5865
↳ New best model saved (AUC=0.5865)
Phase2 Epoch 2: Loss=0.6029, Val AUC=0.5967
↳ New best model saved (AUC=0.5967)
Phase2 Epoch 3: Loss=0.5976, Val AUC=0.5953
Phase2 Epoch 4: Loss=0.5987, Val AUC=0.6005
↳ New best model saved (AUC=0.6005)
Phase2 Epoch 5: Loss=0.5939, Val AUC=0.6037
↳ New best model saved (AUC=0.6037)
Phase2 Epoch 6: Loss=0.5944, Val AUC=0.6127
↳ New best model saved (AUC=0.6127)
Phase2 Epoch 7: Loss=0.5926, Val AUC=0.6082
Phase2 Epoch 8: Loss=0.5882, Val AUC=0.6164
↳ New best model saved (AUC=0.6164)
Phase2 Epoch 9: Loss=0.5888, Val AUC=0.6178
↳ New best model saved (AUC=0.6178)
Phase2 Epoch 10: Loss=0.5846, Val AUC=0.6190
↳ New best model saved (AUC=0.6190)


In [59]:
# Load best phase2 checkpoint
checkpoint = torch.load('/kaggle/working/best_phase2.pth')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

# Smaller LR for fine-tuning
for g in optimizer.param_groups:
    g['lr'] = 1e-5

best_auc = checkpoint['val_auc']

for epoch in range(5):
    model.train()
    for _, _, inputs, labels in train_loader:
        # Focus on P300 window (250-600ms @200Hz = 50:120 samples)
        inputs = inputs[:, :, 50:120].to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    
    val_auc = validate(model, val_loader)
    print(f"Phase3 Epoch {epoch+1}: Val AUC={val_auc:.4f}")
    
    if val_auc > best_auc:
        best_auc = val_auc
        torch.save(model.state_dict(), '/kaggle/working/best_model_try4.pth')
        print(f"↳ New best model saved (AUC={val_auc:.4f})")

  checkpoint = torch.load('/kaggle/working/best_phase2.pth')


Phase3 Epoch 1: Val AUC=0.6199
↳ New best model saved (AUC=0.6199)
Phase3 Epoch 2: Val AUC=0.6155
Phase3 Epoch 3: Val AUC=0.6128
Phase3 Epoch 4: Val AUC=0.6116
Phase3 Epoch 5: Val AUC=0.6097
