# Introduction

We are building a machine learning pipeline for classification of EEG signals.

Preprocessing files will be run separetely from this notebook, and we will import their variables.

This notebook will focus on creating the pipeline for assessing the best model to detect seizures in EEG signals. We will use three main strategies:

* Res2Net Transformer
* 1D-CNN + LSTM 
* Gated 2 Tower Transformer 

# Importing Packages

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math

import torch
from torch import nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
import lightning.pytorch as pl
import pickle
import copy
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

print(torch.cuda.is_available())
print(torch.__version__)


plt.style.use("ggplot")

True
2.0.0+cu118


# Importing and Preprocessing Data

In [2]:
%run ./preprocessing.py

In [3]:
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
X_val = torch.FloatTensor(X_val)
y_train = torch.FloatTensor(y_train).unsqueeze(1)
y_test = torch.FloatTensor(y_test).unsqueeze(1)
y_val = torch.FloatTensor(y_val).unsqueeze(1)

In [4]:
y_train.unique(return_counts=True)

(tensor([0., 1.]), tensor([6440, 1610]))

In [5]:
class EEGDataset(Dataset):
    def __init__(self, features, target) -> None:
        super().__init__()
        self.features = features
        self.target = target

    def __getitem__(self, index):
        data = {}
        features = self.features[index]
        target = self.target[index]
        data["X"] = features
        data["y"] = target
        return data

    def __len__(self):
        return len(self.features)


BATCH_SIZE = 32
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
NUM_EPOCHS = 75
LEARNING_RATE = 1e-4
train_dataloader = DataLoader(EEGDataset(X_train, y_train), batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(EEGDataset(X_val, y_val), batch_size=BATCH_SIZE, shuffle=False)
final_train_dataloader = DataLoader(EEGDataset(torch.cat((X_train, X_val), 0), torch.cat((y_train, y_val), 0)), batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(EEGDataset(X_test, y_test), batch_size=BATCH_SIZE, shuffle=False)

# Creating Models

## Training code

In [6]:


def training(
        model, train_dataloader=None, val_dataloader=None,
        epochs=5, lr=0.001, device='cpu', earlystopping_tolerance=5):
    model = model.to(device)
    optimizer = Adam(model.parameters(), lr=lr)
    # optimizer = torch.optim.Adadelta(model.parameters(), lr=lr)
    criterion = nn.BCEWithLogitsLoss(pos_weight=torch.FloatTensor([9200/2300]).to(device))
    # criterion = nn.BCELoss()
    model_state = {
        "model": None,
        "train_loss": [],
        "val_loss": [],
        "val_acc": [],
        "val_precision": [],
        "val_recall": [],
        "val_f1": [],
    }
    best_validation = np.inf
    best_model = None
    count_tolerance = 0


    for epoch in range(epochs):
        training_loss = 0

        val_predictions = []
        val_groundtruth = []
        for i, data in enumerate(train_dataloader, 1):
            # get the inputs; data is a list of [inputs, labels]
            X, y = data["X"].to(device), data["y"].to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(X)
            train_loss = criterion(outputs, y)
            train_loss.backward()
            optimizer.step()

            # print statistics
            training_loss += train_loss.item()
        
        training_loss /= i
        
        if isinstance(val_dataloader, DataLoader):
            validation_loss = 0
            for j, data in enumerate(val_dataloader, 1):
                # get the inputs; data is a list of [inputs, labels]
                X, y = data["X"].to(device), data["y"].to(device)
                val_groundtruth.append(y)

                # forward + backward + optimize
                with torch.no_grad():
                    outputs = model(X)
                    val_predictions.append(torch.sigmoid(outputs))
                    val_loss = criterion(outputs, y)
                    # print statistics
                    validation_loss += val_loss.item()
            val_groundtruth = torch.cat(val_groundtruth, axis=0).cpu().squeeze(-1).detach().numpy()
            val_predictions = torch.cat(val_predictions, axis=0).cpu().squeeze(-1).detach().numpy()
            val_predictions = (val_predictions > 0.5).astype(np.int16)
            val_acc = accuracy_score(val_groundtruth, val_predictions)
            val_precision = precision_score(val_groundtruth, val_predictions)
            val_recall = recall_score(val_groundtruth, val_predictions)
            val_f1 = f1_score(val_groundtruth, val_predictions)

            validation_loss /= j
            
            if (validation_loss) < best_validation:
                count_tolerance = 0
                best_validation = validation_loss
                best_model = copy.deepcopy(model)
            
            count_tolerance += 1
            print(f"Epoch: {epoch}\tTraining loss: {training_loss:.5f}\t\t Validation Loss: {validation_loss:.5f}\tValidation Accuracy: {val_acc:.5f}")
            model_state["train_loss"].append(training_loss)
            model_state["val_loss"].append(validation_loss)
            model_state["val_acc"].append(val_acc)
            model_state["val_precision"].append(val_precision)
            model_state["val_recall"].append(val_recall)
            model_state["val_f1"].append(val_f1)

            if count_tolerance >= earlystopping_tolerance:
                break
            
        else:
            print(f"Epoch: {epoch}\tTraining loss: {training_loss:.5f}")
            model_state["train_loss"].append(training_loss)
            best_model = copy.deepcopy(model)
        
    
    model_state["model"] = best_model
    save_model(model_state)
    torch.cuda.empty_cache()
    return model_state

def save_model(model_state):
    with open(f"models/{ model_state['model'].to_string() }.pkl", "wb") as fp:
        model_state["model"] = model_state["model"].to("cpu").state_dict()
        pickle.dump(model_state, fp)
        print("Saved model successfully!")

# def test_accuracy_score(model, test_dataloader):
#     y_pred = 

## 1D CNN-LSTM Model

In [7]:
class CNN_LSTM_Classifier(pl.LightningModule):
    def __init__(self,):
        super().__init__()
        self.device_ = "cuda:0" if torch.cuda.is_available() else "cpu"
        self.conv_1 = nn.Conv1d(1, 64, 3)
        self.relu = nn.ReLU()
        self.max_pool = nn.MaxPool1d(2, 2)
        self.conv_layers = nn.Sequential(
            nn.Conv1d(64, 128, 3),
            nn.ReLU(),
            nn.Conv1d(128, 512, 3),
            nn.ReLU(),
            nn.Conv1d(512, 1024, 3),
            nn.ReLU()
        )
        self.flatten_layer = nn.Linear(82, 256)
        dropout = 0.2
        self.dropout = nn.Dropout(dropout)

        self.lstm = nn.LSTM(1024, 64, 2, batch_first=True, dropout=dropout)

        self.fc_out = nn.Sequential(
            nn.Linear(64, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
        )


    def forward(self, X: torch.FloatTensor, y=None):
        X = X.transpose(1, 2)
        out = self.relu(self.conv_1(X))
        out = self.max_pool(out)
        out = self.conv_layers(out)
        out = self.flatten_layer(out)
        out = out.transpose(1, 2)
        out, (_, _) = self.lstm(out)
        out = out[:, -1, :]
        out = self.fc_out(out)
        return out
    
    def predict_batch(self, X: torch.FloatTensor):
        pred = (torch.sigmoid(self(X)) > 0.5).int()
        return pred

    def predict(self, dataloader: DataLoader):
        predictions = list()
        for i, data in enumerate(dataloader, 1):
            # get the inputs; data is a list of [inputs, labels]
            with torch.no_grad():
                X = data["X"]
                y_pred = self.predict_batch(X)
                predictions.append(y_pred)
        predictions = torch.cat(predictions, 0)
        return predictions

    def to_string(self):
        return "CNN_LSTM_Classifier"


    
model_cnn_lstm = CNN_LSTM_Classifier()
state_cnn_lstm = training(model_cnn_lstm, train_dataloader, val_dataloader, device=DEVICE, epochs=NUM_EPOCHS, lr=LEARNING_RATE, earlystopping_tolerance=NUM_EPOCHS)

Epoch: 0	Training loss: 1.10964		 Validation Loss: 1.10850	Validation Accuracy: 0.20000
Epoch: 1	Training loss: 1.07053		 Validation Loss: 0.96781	Validation Accuracy: 0.81507
Epoch: 2	Training loss: 0.76647		 Validation Loss: 0.58139	Validation Accuracy: 0.84812
Epoch: 3	Training loss: 0.52342		 Validation Loss: 0.40535	Validation Accuracy: 0.90899
Epoch: 4	Training loss: 0.40667		 Validation Loss: 0.29368	Validation Accuracy: 0.92580
Epoch: 5	Training loss: 0.33562		 Validation Loss: 0.27014	Validation Accuracy: 0.92116
Epoch: 6	Training loss: 0.27964		 Validation Loss: 0.36268	Validation Accuracy: 0.95188
Epoch: 7	Training loss: 0.22383		 Validation Loss: 0.23542	Validation Accuracy: 0.95884
Epoch: 8	Training loss: 0.19761		 Validation Loss: 0.22125	Validation Accuracy: 0.94725
Epoch: 9	Training loss: 0.17515		 Validation Loss: 0.25535	Validation Accuracy: 0.94493
Epoch: 10	Training loss: 0.16270		 Validation Loss: 0.22548	Validation Accuracy: 0.93159
Epoch: 11	Training loss: 0.1537

In [8]:
# plt.figure(figsize=(8, 6))
# plt.plot(state["train_loss"], label="Training Loss")
# plt.plot(state["val_loss"], label="Validation Loss")
# plt.legend(loc="best")
# plt.title("Training x Validation Losses")
# plt.xlabel("Epochs")
# plt.ylabel("Binary Cross Entropy Loss")
# plt.show()

## Positional Encoding

In [9]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

In [10]:
class LearnablePositionalEncoding(nn.Module):

    def __init__(self, d_model, dropout=0.1, max_len=1024):
        super(LearnablePositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        # Each position gets its own embedding
        # Since indices are always 0 ... max_len, we don't have to do a look-up
        self.pe = nn.Parameter(torch.empty(max_len, 1, d_model))  # requires_grad automatically set to True
        nn.init.uniform_(self.pe, -0.02, 0.02)

    def forward(self, x):
        r"""Inputs of forward function
        Args:
            x: the sequence fed to the positional encoder model (required).
        Shape:
            x: [sequence length, batch size, embed dim]
            output: [sequence length, batch size, embed dim]
        """

        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

## Gated Transformer Network

In [11]:
class GatedTransformerNet(nn.Module):
    def __init__(self, device="cpu"):
        super().__init__()
        self.device = device 
        self.dropout = nn.Dropout(0.2)

        self.d_model = 512

        self.step_embedding = nn.Linear(1, self.d_model)
        self.channel_embedding = nn.Linear(1, self.d_model)
        self.positional_embedding = PositionalEncoding(d_model=self.d_model, dropout=0.2)
        # self.positional_embedding = LearnablePositionalEncoding(d_model=self.d_model, dropout=0.2, max_len=512)
        self.tanh = nn.Tanh()

        self.step_encoder = nn.TransformerEncoder(nn.TransformerEncoderLayer(self.d_model, nhead=8, batch_first=True), num_layers=2)
        self.channel_encoder = nn.TransformerEncoder(nn.TransformerEncoderLayer(self.d_model, nhead=8, batch_first=True), num_layers=2)

        self.gating = nn.Linear(self.d_model*178*2, 2)

        self.fc_out = nn.Sequential(
            nn.Linear(self.d_model*178*2, 256),
            nn.LayerNorm(256),
            nn.ReLU(),
            nn.Linear(256, 1),

        )
    
    def generate_square_subsequent_mask(self, sz: int) -> torch.Tensor:
        """Generates an upper-triangular matrix of -inf, with zeros on diag."""
        return torch.triu(torch.ones(sz, sz) * float('-inf'), diagonal=1).to(self.device)


    def forward(self, X: torch.FloatTensor, y=None):
        batch_size = X.shape[0]
        seq_len = X.shape[1]
        mask = self.generate_square_subsequent_mask(seq_len)

        channel = self.tanh(self.channel_embedding(X))
        channel = self.dropout(self.channel_encoder(channel))

        step = self.tanh(self.step_embedding(X))
        step = step.transpose(0, 1)
        step = self.positional_embedding(step)
        step = step.transpose(0, 1)


        step = self.dropout(self.step_encoder(step, mask))

        channel = channel.reshape(batch_size, -1)
        step = step.reshape(batch_size, -1)

        concat = torch.cat([channel, step], -1)
        h = self.gating(concat)
        gate = torch.softmax(h, dim=-1)


        encoding = torch.cat([channel * gate[:, 0:1], step * gate[:, 1:2]], dim=-1)
        encoding = self.dropout(encoding)
        out = self.fc_out(encoding)

        return out
    
    def predict_batch(self, X: torch.FloatTensor):
        pred = (torch.sigmoid(self(X)) > 0.5).int()
        return pred

    def predict(self, dataloader: DataLoader):
        predictions = list()
        for i, data in enumerate(dataloader, 1):
            # get the inputs; data is a list of [inputs, labels]
            with torch.no_grad():
                X = data["X"]
                y_pred = self.predict_batch(X)
                predictions.append(y_pred)
        predictions = torch.cat(predictions, 0)
        return predictions

    def to_string(self):
        return "GatedTransformerNet"

model_gated_transformer = GatedTransformerNet(device=DEVICE)
state_gated_transformer = training(model_gated_transformer, train_dataloader, val_dataloader, device=DEVICE, epochs=NUM_EPOCHS, lr=LEARNING_RATE, earlystopping_tolerance=NUM_EPOCHS)
# model(X_train[:32].to(DEVICE))

Epoch: 0	Training loss: 1.11994		 Validation Loss: 1.10000	Validation Accuracy: 0.61739
Epoch: 1	Training loss: 1.00011		 Validation Loss: 0.85511	Validation Accuracy: 0.77043
Epoch: 2	Training loss: 0.76880		 Validation Loss: 0.72057	Validation Accuracy: 0.76928
Epoch: 3	Training loss: 0.66724		 Validation Loss: 0.71045	Validation Accuracy: 0.77971
Epoch: 4	Training loss: 0.60867		 Validation Loss: 0.66965	Validation Accuracy: 0.81391
Epoch: 5	Training loss: 0.56102		 Validation Loss: 0.64398	Validation Accuracy: 0.82493
Epoch: 6	Training loss: 0.49510		 Validation Loss: 0.62926	Validation Accuracy: 0.86087
Epoch: 7	Training loss: 0.43190		 Validation Loss: 0.64546	Validation Accuracy: 0.86551
Epoch: 8	Training loss: 0.38162		 Validation Loss: 0.64019	Validation Accuracy: 0.84348
Epoch: 9	Training loss: 0.32003		 Validation Loss: 0.58394	Validation Accuracy: 0.89159
Epoch: 10	Training loss: 0.26818		 Validation Loss: 0.65003	Validation Accuracy: 0.88696
Epoch: 11	Training loss: 0.2412

In [12]:
DEVICE

'cuda:0'

## Multilayer Perceptron Network

In [13]:
class MLPClassifier(nn.Module):
    def __init__(self, device="cpu"):
        super().__init__()
        self.device_ = device

        self.dropout_1 = nn.Dropout(0.1)

        self.fc_net = nn.Sequential(
            nn.Linear(178, 500),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(500, 500),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(500, 500),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(500, 1)
        )

    def forward(self, X: torch.Tensor) -> torch.Tensor:
        X = X.squeeze(-1).to(self.device_)
        X = self.dropout_1(X)
        return self.fc_net(X)
    
    def predict_batch(self, X: torch.FloatTensor):
        pred = (torch.sigmoid(self(X)) > 0.5).int()
        return pred

    def predict(self, dataloader: DataLoader):
        predictions = list()
        for i, data in enumerate(dataloader, 1):
            # get the inputs; data is a list of [inputs, labels]
            with torch.no_grad():
                X = data["X"]
                y_pred = self.predict_batch(X)
                predictions.append(y_pred)
        predictions = torch.cat(predictions, 0)
        return predictions

    def to_string(self):
        return "MLPClassifier"
    

model_mlp = MLPClassifier(DEVICE)
state_mlp = training(model_mlp, train_dataloader, val_dataloader, device=DEVICE, epochs=NUM_EPOCHS, lr=LEARNING_RATE, earlystopping_tolerance=NUM_EPOCHS)
# model.predict_batch(X_train[:32])

Epoch: 0	Training loss: 1.05881		 Validation Loss: 0.93999	Validation Accuracy: 0.73681
Epoch: 1	Training loss: 0.87763		 Validation Loss: 0.84565	Validation Accuracy: 0.75072
Epoch: 2	Training loss: 0.80970		 Validation Loss: 0.83131	Validation Accuracy: 0.81391
Epoch: 3	Training loss: 0.76696		 Validation Loss: 0.79101	Validation Accuracy: 0.79768
Epoch: 4	Training loss: 0.73102		 Validation Loss: 0.76352	Validation Accuracy: 0.79478
Epoch: 5	Training loss: 0.70028		 Validation Loss: 0.77266	Validation Accuracy: 0.80580
Epoch: 6	Training loss: 0.67705		 Validation Loss: 0.75263	Validation Accuracy: 0.82203
Epoch: 7	Training loss: 0.66391		 Validation Loss: 0.79174	Validation Accuracy: 0.83594
Epoch: 8	Training loss: 0.63080		 Validation Loss: 0.76847	Validation Accuracy: 0.82841
Epoch: 9	Training loss: 0.61500		 Validation Loss: 0.76019	Validation Accuracy: 0.83478
Epoch: 10	Training loss: 0.58734		 Validation Loss: 0.75094	Validation Accuracy: 0.83594
Epoch: 11	Training loss: 0.5753

## FCN (Fully Convoluted)

In [14]:
class FCN(nn.Module):
    def __init__(self, device="cpu"):
        super().__init__()
        self.device_ = device

        self.conv_fc = nn.Sequential(
            nn.Conv1d(1, 128, 3),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Conv1d(128, 256, 3),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Conv1d(256, 128, 3),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1),
            nn.Flatten(),
            nn.Linear(128, 1)
        )

    def forward(self, X):
        X = X.transpose(-1, -2).to(self.device_)
        conv = self.conv_fc(X)
        return conv
    
    def predict_batch(self, X: torch.FloatTensor):
        pred = (torch.sigmoid(self(X)) > 0.5).int()
        return pred

    def predict(self, dataloader: DataLoader):
        predictions = list()
        for i, data in enumerate(dataloader, 1):
            # get the inputs; data is a list of [inputs, labels]
            with torch.no_grad():
                X = data["X"]
                y_pred = self.predict_batch(X)
                predictions.append(y_pred)
        predictions = torch.cat(predictions, 0)
        return predictions

    def to_string(self):
        return "FCN"
    

model_fcn = FCN(DEVICE)
state_fcn = training(model_fcn, train_dataloader, val_dataloader, device=DEVICE, epochs=NUM_EPOCHS, lr=LEARNING_RATE, earlystopping_tolerance=NUM_EPOCHS)


Epoch: 0	Training loss: 0.74550		 Validation Loss: 0.56391	Validation Accuracy: 0.84058
Epoch: 1	Training loss: 0.50823		 Validation Loss: 0.45471	Validation Accuracy: 0.87652
Epoch: 2	Training loss: 0.42527		 Validation Loss: 0.40139	Validation Accuracy: 0.88812
Epoch: 3	Training loss: 0.38402		 Validation Loss: 0.36678	Validation Accuracy: 0.90377
Epoch: 4	Training loss: 0.36392		 Validation Loss: 0.35097	Validation Accuracy: 0.91246
Epoch: 5	Training loss: 0.32596		 Validation Loss: 0.31449	Validation Accuracy: 0.92116
Epoch: 6	Training loss: 0.32539		 Validation Loss: 0.33061	Validation Accuracy: 0.92000
Epoch: 7	Training loss: 0.30213		 Validation Loss: 0.30161	Validation Accuracy: 0.92174
Epoch: 8	Training loss: 0.29620		 Validation Loss: 0.31689	Validation Accuracy: 0.92986
Epoch: 9	Training loss: 0.28595		 Validation Loss: 0.27912	Validation Accuracy: 0.92464
Epoch: 10	Training loss: 0.26517		 Validation Loss: 0.33102	Validation Accuracy: 0.94667
Epoch: 11	Training loss: 0.2779

## Residual Network

In [15]:
class ResNet(nn.Module):
    def __init__(self, device="cpu"):
        super().__init__()
        self.device_ = device

        self.conv_block_1 = nn.Sequential(
            nn.Conv1d(1, 64, 3, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Conv1d(64, 64, 3, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Conv1d(64, 64, 3, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
        )

    def forward(self, X):
        X = X.transpose(-1, -2).to(self.device_)
        conv = self.conv_block_1(X)
        return conv
    

# model = ResNet()
# state = training(model, train_dataloader, val_dataloader, device=DEVICE, epochs=10, lr=1e-4, earlystopping_tolerance=10)

# model(X_train[:32]).shape

## Transformer Unsupervised

In [16]:


class TransformerUnsupervised(nn.Module):
    def __init__(self, device: str = "cpu", supervised: bool = False, dropout: float = 0.2):
        super().__init__()
        self.device_ = device
        self.supervised = supervised
        self.unsupervised_training = False
        self.d_model = 512
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
        self.dropout3 = nn.Dropout(dropout)

        self.projection = nn.Linear(1, self.d_model)

        self.pos_embedding = PositionalEncoding(d_model=self.d_model, dropout=dropout, max_len=1024)

        self.enc_layer = nn.TransformerEncoderLayer(d_model=self.d_model, nhead=8, dim_feedforward=2048, dropout=dropout, batch_first=True)

        self.transformer_encoder = nn.TransformerEncoder(
            self.enc_layer,
            num_layers=2,    
        )

        self.linearization = nn.Linear(self.d_model*178, 178)

        self.fc_out = nn.Sequential(
            nn.Linear(self.d_model*178, 512),
            nn.LayerNorm(512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.LayerNorm(256),
            nn.ReLU(),
            nn.Linear(256, 1),
        )

    def forward(self, X: torch.Tensor):
        # X = X.transpose(0, 1)
        # print(X.shape)
        out = self.dropout1(torch.tanh(self.projection(X)))
        out = self.pos_embedding(out)

        out = self.dropout2(out)
        out = self.transformer_encoder(out)
        
        out = out.view(-1, self.d_model*X.shape[1])
        out = self.dropout3(out)

        if self.supervised:
            out = self.fc_out(out)
        else:
            out = self.linearization(out).squeeze(-1)

        return out

    def predict_batch(self, X: torch.FloatTensor):
        pred = (torch.sigmoid(self(X)) > 0.5).int()
        return pred
    
    def predict(self, dataloader: DataLoader):
        predictions = list()
        for i, data in enumerate(dataloader, 1):
            # get the inputs; data is a list of [inputs, labels]
            with torch.no_grad():
                X = data["X"]
                y_pred = self.predict_batch(X)
                predictions.append(y_pred)
        predictions = torch.cat(predictions, 0)
        return predictions

    def to_string(self):
        return f"TransformerUnsupervised_{self.unsupervised_training}"
    


model_transformer_supervided = TransformerUnsupervised(supervised=True, dropout=0.25)
state_transformer_supervided = training(model_transformer_supervided, train_dataloader, val_dataloader, device=DEVICE, epochs=NUM_EPOCHS, lr=LEARNING_RATE, earlystopping_tolerance=NUM_EPOCHS)
# print(X_train[:32].shape)
# model(X_train[:32]).shape

Epoch: 0	Training loss: 1.11780		 Validation Loss: 1.11590	Validation Accuracy: 0.20000


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 1	Training loss: 1.11184		 Validation Loss: 1.11629	Validation Accuracy: 0.80000
Epoch: 2	Training loss: 1.10912		 Validation Loss: 1.08464	Validation Accuracy: 0.43884
Epoch: 3	Training loss: 0.88967		 Validation Loss: 0.74350	Validation Accuracy: 0.83478
Epoch: 4	Training loss: 0.70538		 Validation Loss: 0.69571	Validation Accuracy: 0.82493
Epoch: 5	Training loss: 0.62042		 Validation Loss: 0.68383	Validation Accuracy: 0.82783
Epoch: 6	Training loss: 0.55493		 Validation Loss: 0.68053	Validation Accuracy: 0.87304
Epoch: 7	Training loss: 0.48433		 Validation Loss: 0.66636	Validation Accuracy: 0.85043
Epoch: 8	Training loss: 0.43918		 Validation Loss: 0.70504	Validation Accuracy: 0.88116
Epoch: 9	Training loss: 0.37736		 Validation Loss: 0.66210	Validation Accuracy: 0.85101
Epoch: 10	Training loss: 0.31059		 Validation Loss: 0.81869	Validation Accuracy: 0.87478
Epoch: 11	Training loss: 0.26969		 Validation Loss: 0.71408	Validation Accuracy: 0.85391
Epoch: 12	Training loss: 0.217

### Unsupervised Training

In [17]:
def unsupervised_training(model, train_dataloader, device=DEVICE, max_epochs=100, lr=1e-5):
    model = model.to(device)
    model.supervised = False
    optimizer = Adam(model.parameters(), lr=lr)
    # optimizer = torch.optim.Adadelta(model.parameters(), lr=lr)
    criterion = nn.MSELoss()


    for epoch in range(max_epochs):
        training_loss = 0
        for i, data in enumerate(train_dataloader, 1):
            # get the inputs; data is a list of [inputs, labels]
            X = data["X"].to(device)
            X_true = X.squeeze(-1)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            X_pred = model(X)
            train_loss = criterion(X_pred, X_true)
            train_loss.backward()
            optimizer.step()

            # print statistics
            training_loss += train_loss.item()
        
        training_loss /= i
        print(f"Epoch: {epoch}\tTraining loss: {training_loss:.5f}")
    print("Unsupervised training has ended!")    
    torch.cuda.empty_cache()
    return model

model_transformer_unsupervided = TransformerUnsupervised(device=DEVICE, supervised=False, dropout=0.2)
model_transformer_unsupervided = unsupervised_training(model_transformer_unsupervided, train_dataloader, DEVICE, max_epochs=30, lr=1e-3)
model_transformer_unsupervided.supervised = True
model_transformer_unsupervided.unsupervised_training = True
state_model = training(model_transformer_unsupervided, train_dataloader, val_dataloader, device=DEVICE, epochs=NUM_EPOCHS, lr=LEARNING_RATE, earlystopping_tolerance=NUM_EPOCHS)


Epoch: 0	Training loss: 21.34767
Epoch: 1	Training loss: 0.69984
Epoch: 2	Training loss: 0.49293
Epoch: 3	Training loss: 0.62964
Epoch: 4	Training loss: 0.57151
Epoch: 5	Training loss: 0.60441
Epoch: 6	Training loss: 0.45536
Epoch: 7	Training loss: 0.35912
Epoch: 8	Training loss: 0.26775
Epoch: 9	Training loss: 0.20668
Epoch: 10	Training loss: 0.21424
Epoch: 11	Training loss: 0.13202
Epoch: 12	Training loss: 0.10894
Epoch: 13	Training loss: 0.08259
Epoch: 14	Training loss: 0.06922
Epoch: 15	Training loss: 0.05788
Epoch: 16	Training loss: 0.05321
Epoch: 17	Training loss: 0.04866
Epoch: 18	Training loss: 0.04574
Epoch: 19	Training loss: 0.04044
Epoch: 20	Training loss: 0.03582
Epoch: 21	Training loss: 0.02896
Epoch: 22	Training loss: 0.01972
Epoch: 23	Training loss: 0.01463
Epoch: 24	Training loss: 0.01281
Epoch: 25	Training loss: 0.01194
Epoch: 26	Training loss: 0.01064
Epoch: 27	Training loss: 0.01033
Epoch: 28	Training loss: 0.00985
Epoch: 29	Training loss: 0.00921
Unsupervised traini

## LSTM Only Classifier

In [18]:
class LSTMClassifier(nn.Module):
    def __init__(self, device: str="cpu", dropout: float=0.2):
        super().__init__()
        self.device_ = device

        self.hidden_size = 256
        self.dropout1 = nn.Dropout(dropout)
        self.tanh = nn.Tanh()

        self.projection = nn.Linear(1, self.hidden_size)

        self.lstm = nn.LSTM(input_size=self.hidden_size, hidden_size=self.hidden_size, num_layers=2, dropout=dropout, batch_first=True)


        self.fc_out = nn.Sequential(
            nn.Linear(self.hidden_size, 128),
            nn.LayerNorm(128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )


    def forward(self, X: torch.FloatTensor):
        out = self.dropout1(self.tanh(self.projection(X)))
        out, (_, _) = self.lstm(out)
        out = out[:, -1, :]
        out = self.fc_out(out)
        return out
    
    def predict_batch(self, X: torch.FloatTensor):
        pred = (torch.sigmoid(self(X)) > 0.5).int()
        return pred
    
    def predict(self, dataloader: DataLoader):
        predictions = list()
        for i, data in enumerate(dataloader, 1):
            # get the inputs; data is a list of [inputs, labels]
            with torch.no_grad():
                X = data["X"]
                y_pred = self.predict_batch(X)
                predictions.append(y_pred)
        predictions = torch.cat(predictions, 0)
        return predictions

    def to_string(self):
        return "LSTMClassifier"


model_lstm = LSTMClassifier(device=DEVICE, dropout=0.25)
state_lstm = training(model_lstm, train_dataloader, val_dataloader, device=DEVICE, epochs=NUM_EPOCHS, lr=LEARNING_RATE, earlystopping_tolerance=NUM_EPOCHS)
# print(X_train[:32].shape)
# model(X_train[:32]).shape

Epoch: 0	Training loss: 1.11371		 Validation Loss: 1.10571	Validation Accuracy: 0.40174
Epoch: 1	Training loss: 1.10515		 Validation Loss: 1.10001	Validation Accuracy: 0.32406
Epoch: 2	Training loss: 1.09326		 Validation Loss: 1.05466	Validation Accuracy: 0.57275
Epoch: 3	Training loss: 1.02230		 Validation Loss: 0.97819	Validation Accuracy: 0.64174
Epoch: 4	Training loss: 0.87621		 Validation Loss: 0.82520	Validation Accuracy: 0.84116
Epoch: 5	Training loss: 0.79403		 Validation Loss: 0.76317	Validation Accuracy: 0.84406
Epoch: 6	Training loss: 0.70736		 Validation Loss: 0.70509	Validation Accuracy: 0.86783
Epoch: 7	Training loss: 0.66376		 Validation Loss: 0.65255	Validation Accuracy: 0.80464
Epoch: 8	Training loss: 0.62284		 Validation Loss: 0.62668	Validation Accuracy: 0.87420
Epoch: 9	Training loss: 0.55287		 Validation Loss: 0.56117	Validation Accuracy: 0.90435
Epoch: 10	Training loss: 0.52948		 Validation Loss: 0.55254	Validation Accuracy: 0.87478
Epoch: 11	Training loss: 0.4885