In [64]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset
import os
from pathlib import Path
import numpy as np
import pandas as pd
from scipy.signal import welch 
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import numpy as np
import matplotlib.pyplot as plt


from tqdm import tqdm, trange


In [52]:
from scipy.signal import welch
import numpy as np

def compute_bandpower(data, fs, band):
    freqs, psd = welch(data, fs=fs, nperseg=fs//2)
    idx_band = np.logical_and(freqs >= band[0], freqs <= band[1])
    return np.trapz(psd[idx_band], freqs[idx_band])

def compute_rms(x):
    return np.sqrt(np.mean(np.square(x)))

def compute_hjorth_params(x):
    first_deriv = np.diff(x)
    second_deriv = np.diff(first_deriv)

    var_zero = np.var(x)
    var_d1 = np.var(first_deriv)
    var_d2 = np.var(second_deriv)

    mobility = np.sqrt(var_d1 / var_zero) if var_zero != 0 else 0
    complexity = np.sqrt(var_d2 / var_d1) if var_d1 != 0 else 0

    return mobility, complexity

In [53]:
class EEGWindowDataset(Dataset):
    def __init__(self, combined_data_path, training_data_path, sampling_rate=250, window_sec=2):
        self.combined_data = pd.read_csv(combined_data_path).values.astype(np.float32)
        self.training_data = pd.read_csv(training_data_path).values.astype(np.int64)

        self.window_size = int(window_sec * sampling_rate)
        self.half_window = self.window_size // 2

        self.total_rows = self.combined_data.shape[0]
        self.sampling_rate = sampling_rate

    def __len__(self):
        return len(self.training_data)

    def __getitem__(self, idx):
        class_label, center_idx = self.training_data[idx]
    
        # Define start and end of window
        start_idx = center_idx - self.half_window
        end_idx = center_idx + self.half_window

        # Edge handling (pad with zeros if window goes out of bounds)
        if start_idx < 0 or end_idx > self.total_rows:
            window = np.zeros((self.window_size, 4), dtype=np.float32)
            actual_start = max(0, start_idx)
            actual_end = min(self.total_rows, end_idx)
            window_offset_start = max(0, -start_idx)
            window[window_offset_start:window_offset_start + (actual_end - actual_start)] = \
                self.combined_data[actual_start:actual_end, :4]
        else:
            window = self.combined_data[start_idx:end_idx, :4]
    
        features = []
        for ch in range(4):  # 4 EEG channels
            signal = window[:, ch]
            alpha = compute_bandpower(signal, fs=self.sampling_rate, band=(8, 13))
            beta = compute_bandpower(signal, fs=self.sampling_rate, band=(13, 30))
            rms = compute_rms(signal)
            mobility, complexity = compute_hjorth_params(signal)
            features.extend([alpha, beta, rms, mobility, complexity])
    
        feature_tensor = torch.tensor(features, dtype=torch.float32)
        return feature_tensor, class_label

In [54]:
training_dataset = EEGWindowDataset("TrainingData/combined_data.csv", "TrainingData/training_data.csv", sampling_rate=250, window_sec=2)
testing_dataset = EEGWindowDataset("TrainingData/combined_data.csv", "TrainingData/testing_data.csv", sampling_rate=250, window_sec=2)


# Model

In [55]:
# Define the MLP model
class EEGClassifier(nn.Module):
    def __init__(self):
        super(EEGClassifier, self).__init__()
        self.fc1 = nn.Linear(20, 32)       # Input: 20 features → Hidden: 32 neurons
        self.fc2 = nn.Linear(32, 11)       # Hidden: 32 neurons → Output: 11 classes

    def forward(self, x):
        x = F.relu(self.fc1(x))            # Activation
        x = F.softmax(self.fc2(x), dim=1)  # Softmax for classification
        return x

# Instantiate the model
model = EEGClassifier()

# Optimizer and loss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()



In [68]:
def train_model(model, train_loader, val_loader, optimizer, criterion, num_epochs=30, device='cpu'):
    model.to(device)
    count = 0
    epoch_bar = trange(num_epochs, desc="Training Epochs", unit="epoch")
    
    for epoch in epoch_bar:
        # -------- Training Phase --------
        model.train()
        total_loss = 0
        
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)

            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        
        #print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {avg_loss:.4f}")
        # -------- Validation Phase --------
        if count == num_epochs - 1:
            model.eval()
            y_true, y_pred = [], []
    
            with torch.no_grad():
                for val_X, val_y in val_loader:
                    val_X, val_y = val_X.to(device), val_y.to(device)
                    outputs = model(val_X)
                    preds = torch.argmax(outputs, dim=1)
    
                    y_true.extend(val_y.cpu().numpy())
                    y_pred.extend(preds.cpu().numpy())
    
            # ----- Confusion Matrix -----
            cm = confusion_matrix(y_true, y_pred)
            print("Confusion Matrix:")
            print(cm)
            
            print("\nClassification Report:")
            print(classification_report(y_true, y_pred, digits=3))

            val_accuracy = accuracy_score(y_true, y_pred)
            
        count += 1
    return val_accuracy, avg_loss, cm

## Batch Size Optimization

In [69]:
batch_sizes = [8, 16]
accuracies = []
losses = []

for batch_size in batch_sizes:
    print("Training for Batch Size:", batch_size)

    model = EEGClassifier()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    train_loader = DataLoader(training_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(testing_dataset, batch_size=batch_size, shuffle=False)

    acc, loss = train_model(model, train_loader, val_loader, optimizer, criterion, num_epochs=30)
    accuracies.append(acc)
    losses.append(loss)

plt.figure(figsize=(10, 5))

# Accuracy bar plot
plt.subplot(1, 2, 1)
plt.bar([str(bs) for bs in batch_sizes], accuracies, color='skyblue')
plt.title("Validation Accuracy vs Batch Size")
plt.xlabel("Batch Size")
plt.ylabel("Accuracy")
plt.ylim(0, 1)
plt.grid(True, axis='y')

# Loss bar plot
plt.subplot(1, 2, 2)
plt.bar([str(bs) for bs in batch_sizes], losses, color='salmon')
plt.title("Validation Loss vs Batch Size")
plt.xlabel("Batch Size")
plt.ylabel("Loss")
plt.grid(True, axis='y')

plt.tight_layout()
plt.show()

Training for Batch Size: 8


Training Epochs:   7%|████▏                                                          | 2/30 [00:05<01:18,  2.81s/epoch]


KeyboardInterrupt: 