In [1]:
import torch
import pandas as pd
import numpy as np
from pathlib import Path

def load_eeg_dataset(folder_path):
    """
    Load EEG data and events from a directory containing CSV files.
    Returns tensors for features and labels.

    Args:
        folder_path (str): Path to the folder containing the data and events CSV files.

    Returns:
        features_tensor (torch.Tensor): Tensor containing all EEG features.
        labels_tensor (torch.Tensor): Tensor containing all corresponding labels.
    """
    folder = Path(folder_path)
    all_features = []
    all_labels = []
    
    # Get all data files in the directory
    data_files = list(folder.glob('*_data.csv'))
    print(f"Found {len(data_files)} data files in the folder: {folder_path}")
    
    for data_file in data_files:
        # Print the current data file being processed
        print(f"\nProcessing data file: {data_file.name}")
        
        # Extract subject and series from filename
        filename_parts = data_file.stem.split('_')
        subject = filename_parts[0][4:]  # Extract from 'subjX'
        series = filename_parts[1][6:]   # Extract from 'seriesY'
        
        # Print extracted subject and series
        print(f"Extracted subject: {subject}, series: {series}")
        
        # Find corresponding events file
        events_file = folder / f"subj{subject}_series{series}_events.csv"
        print(f"Looking for events file: {events_file.name}")
        
        if not events_file.exists():
            print(f"Events file not found for {data_file.name}, skipping...")
            continue
            
        # Load data CSV (excluding timestep column)
        print(f"Loading data from {data_file.name}")
        data_df = pd.read_csv(data_file)
        features = data_df.iloc[:, 1:].values.astype(np.float32)
        print(f"Loaded data shape: {features.shape}")
        
        # Load events CSV and select relevant columns
        print(f"Loading events from {events_file.name}")
        events_df = pd.read_csv(events_file)
        labels = events_df[['HandStart', 'BothStartLoadPhase', 
                           'LiftOff', 'Replace', 'BothReleased']].values.astype(np.float32)
        print(f"Loaded labels shape: {labels.shape}")
        
        # Verify alignment
        if len(features) != len(labels):
            print(f"Mismatch in {data_file.name}: features shape {features.shape}, labels shape {labels.shape}. Skipping...")
            continue
            
        # Append features and labels to the lists
        all_features.append(features)
        all_labels.append(labels)
        print(f"Added data to batch. Current total features shape: {np.concatenate(all_features, axis=0).shape}, labels shape: {np.concatenate(all_labels, axis=0).shape}")
        print("-" * 50)  # Separator for readability

    # Check if any valid data was found
    if len(all_features) == 0 or len(all_labels) == 0:
        raise ValueError("No valid data files found in the specified folder.")

    # Combine all data
    print("\nCombining all data into tensors...")
    features_tensor = torch.tensor(np.concatenate(all_features, axis=0))
    labels_tensor = torch.tensor(np.concatenate(all_labels, axis=0))
    print(f"Final features tensor shape: {features_tensor.shape}")
    print(f"Final labels tensor shape: {labels_tensor.shape}")
    
    print("\nData loading completed successfully!")
    return features_tensor, labels_tensor
train_features, train_labels = load_eeg_dataset('grasp-and-lift-eeg-detection/train')

Found 96 data files in the folder: grasp-and-lift-eeg-detection/train

Processing data file: subj8_series4_data.csv
Extracted subject: 8, series: 4
Looking for events file: subj8_series4_events.csv
Loading data from subj8_series4_data.csv
Loaded data shape: (242562, 32)
Loading events from subj8_series4_events.csv
Loaded labels shape: (242562, 5)
Added data to batch. Current total features shape: (242562, 32), labels shape: (242562, 5)
--------------------------------------------------

Processing data file: subj8_series5_data.csv
Extracted subject: 8, series: 5
Looking for events file: subj8_series5_events.csv
Loading data from subj8_series5_data.csv
Loaded data shape: (118716, 32)
Loading events from subj8_series5_events.csv
Loaded labels shape: (118716, 5)
Added data to batch. Current total features shape: (361278, 32), labels shape: (361278, 5)
--------------------------------------------------

Processing data file: subj6_series3_data.csv
Extracted subject: 6, series: 3
Looking fo

ValueError: No valid data files found in the specified folder.

In [4]:
# test_features, test_labels = load_eeg_dataset('grasp-and-lift-eeg-detection/test')

In [6]:
train_features.shape

torch.Size([17985754, 32])

In [8]:
train_labels.shape

torch.Size([17985754, 5])

In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Data Preprocessing
def preprocess_data(features, labels):
    """
    Normalize features and convert data to float tensors.
    """
    # Normalize features
    mean = features.mean(dim=0, keepdim=True)
    std = features.std(dim=0, keepdim=True) + 1e-6  # Avoid division by zero
    features = (features - mean) / std
    
    # Convert to float tensors
    return features.float(), labels.float()

train_features, train_labels = preprocess_data(train_features, train_labels)

# Define Dataset and DataLoader
batch_size = 128  # Adjust as needed for memory constraints
dataset = TensorDataset(train_features, train_labels)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Define Neural Network Model
class EEGNet(nn.Module):
    def __init__(self, input_dim=32, output_dim=5, hidden_dim=64):
        super(EEGNet, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim),
            nn.Sigmoid()  # Since it's multi-label classification
        )
    
    def forward(self, x):
        return self.model(x)

# Initialize Model, Loss, and Optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = EEGNet().to(device)
criterion = nn.BCELoss()  # Since outputs are between 0 and 1 (multi-label classification)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [15]:
# Training Loop
epochs = 10  # Adjust as needed
for epoch in range(epochs):
    running_loss = 0.0
    for batch_idx, (inputs, targets) in enumerate(dataloader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        print(loss.item())
        
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(dataloader):.4f}")

print("Training Complete!")

0.11059276014566422
0.10184314101934433
0.11308792978525162
0.0921778455376625
0.11653312295675278
0.11786184459924698
0.09854574501514435
0.11223316192626953
0.08460733294487
0.1577948033809662
0.08788762986660004
0.09244047105312347
0.0956498235464096
0.13062074780464172
0.10265807062387466
0.11953897774219513
0.11847994476556778
0.10766235738992691
0.08647976815700531
0.09948794543743134
0.11704540252685547
0.10211054980754852
0.05230318382382393
0.10675568878650665
0.1262768805027008
0.12043772637844086
0.09298964589834213
0.07643263041973114
0.07801525294780731
0.11086548864841461
0.08042601495981216
0.13734792172908783
0.10284237563610077
0.1301157921552658
0.09565004706382751
0.12539389729499817
0.07656972110271454
0.12603497505187988
0.09122361987829208
0.10847683995962143
0.106514573097229
0.08976040780544281
0.13476775586605072
0.14245091378688812
0.09955428540706635
0.1319926679134369
0.12495676428079605
0.10196264088153839
0.09603272378444672
0.11947277933359146
0.108753465


KeyboardInterrupt

