# Introduction

We are building a machine learning pipeline for classification of EEG signals.

Preprocessing files will be run separetely from this notebook, and we will import their variables.

This notebook will focus on creating the pipeline for assessing the best model to detect seizures in EEG signals. We will use three main strategies:

* Res2Net Transformer
* 1D-CNN + LSTM 
* Gated 2 Tower Transformer 

# Importing Packages

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
import lightning.pytorch as pl
from pytorch_lightning.loggers import WandbLogger

print(torch.cuda.is_available())
print(torch.__version__)

plt.style.use("ggplot")

True
2.0.0+cu118


# Importing and Preprocessing Data

In [2]:
%run ./preprocessing.py

In [3]:
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
X_val = torch.FloatTensor(X_val)
y_train = torch.FloatTensor(y_train).unsqueeze(1)
y_test = torch.FloatTensor(y_test).unsqueeze(1)
y_val = torch.FloatTensor(y_val).unsqueeze(1)

In [4]:
class EEGDataset(Dataset):
    def __init__(self, features, target) -> None:
        super().__init__()
        self.features = features
        self.target = target

    def __getitem__(self, index):
        data = {}
        features = self.features[index]
        target = self.target[index]
        data["X"] = features
        data["y"] = target
        return data

    def __len__(self):
        return len(self.features)


BATCH_SIZE = 64
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
train_dataloader = DataLoader(EEGDataset(X_train, y_train), batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(EEGDataset(X_val, y_val), batch_size=BATCH_SIZE, shuffle=False)
test_dataloader = DataLoader(EEGDataset(X_test, y_test), batch_size=BATCH_SIZE, shuffle=False)

In [5]:
DEVICE

'cuda:0'

# Creating Models

## Training code

In [6]:


def training(
        model, train_dataloader=None, val_dataloader=None,
        epochs=5, lr=0.001, device='cpu'):
    model = model.to(device)
    optimizer = Adam(model.parameters(), lr=lr)
    criterion = nn.BCEWithLogitsLoss()
    len_train_dataset = len(train_dataloader.dataset)
    len_val_dataset = len(val_dataloader.dataset)

    for epoch in range(epochs):
        running_loss = 0
        for i, data in enumerate(train_dataloader, 1):
            # get the inputs; data is a list of [inputs, labels]
            X, y = data["X"].to(device), data["y"].to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(X)
            train_loss = criterion(outputs, y)
            train_loss.backward()
            optimizer.step()

            # print statistics
            running_loss += train_loss.item()
        
        
        validation_loss = 0
        for j, data in enumerate(val_dataloader, 1):
            # get the inputs; data is a list of [inputs, labels]
            X, y = data["X"].to(device), data["y"].to(device)


            # forward + backward + optimize
            with torch.no_grad():
                outputs = model(X)
                val_loss = criterion(outputs, y)
                # print statistics
                validation_loss += val_loss.item()
        print(f"Epoch: {epoch}\tTraining loss: {running_loss/i:.5f}\t\t Validation Loss: {validation_loss/j:.5f}")
    return model

## 1D CNN-LSTM Model

In [7]:
class CNN_LSTM_Classifier(pl.LightningModule):
    def __init__(self,):
        super().__init__()
        self.conv_1 = nn.Conv1d(1, 64, 3)
        self.relu = nn.ReLU()
        self.max_pool = nn.MaxPool1d(2, 2)
        self.conv_layers = nn.Sequential(
            nn.Conv1d(64, 128, 3),
            nn.ReLU(),
            nn.Conv1d(128, 512, 3),
            nn.ReLU(),
            nn.Conv1d(512, 1024, 3),
            nn.ReLU()
        )
        self.flatten_layer = nn.Linear(82, 256)
        dropout = 0.2
        self.dropout = nn.Dropout(dropout)

        self.lstm = nn.LSTM(1024, 64, 2, batch_first=True, dropout=dropout)

        self.fc_out = nn.Sequential(
            nn.Linear(64, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
        )


    def forward(self, X: torch.FloatTensor, y=None):
        X = X.transpose(1, 2)
        out = self.relu(self.conv_1(X))
        out = self.max_pool(out)
        out = self.conv_layers(out)
        out = self.flatten_layer(out)
        out = out.transpose(1, 2)
        out, (_, _) = self.lstm(out)
        out = out[:, -1, :]
        out = self.fc_out(out)
        return out
    
model = CNN_LSTM_Classifier()
model = training(model, train_dataloader, val_dataloader, device=DEVICE, epochs=15)
# model(X_val)

Epoch: 0	Training loss: 0.51512		 Validation Loss: 0.50043
Epoch: 1	Training loss: 0.50239		 Validation Loss: 0.50351
Epoch: 2	Training loss: 0.50137		 Validation Loss: 0.50125
Epoch: 3	Training loss: 0.50211		 Validation Loss: 0.49863
Epoch: 4	Training loss: 0.50000		 Validation Loss: 0.49773
Epoch: 5	Training loss: 0.49654		 Validation Loss: 0.49064
Epoch: 6	Training loss: 0.49327		 Validation Loss: 0.48968
Epoch: 7	Training loss: 0.49259		 Validation Loss: 0.50155
Epoch: 8	Training loss: 0.49665		 Validation Loss: 0.50193
Epoch: 9	Training loss: 0.49879		 Validation Loss: 0.49901
Epoch: 10	Training loss: 0.49472		 Validation Loss: 0.49771
Epoch: 11	Training loss: 0.48740		 Validation Loss: 0.47974
Epoch: 12	Training loss: 0.48977		 Validation Loss: 0.49954
Epoch: 13	Training loss: 0.49677		 Validation Loss: 0.48971
Epoch: 14	Training loss: 0.49107		 Validation Loss: 0.49076
