In [2]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
from tqdm.notebook import tqdm
import torch
import torch.nn as nn
from torch.nn import functional as F

# HYPERPARAMETERS
batch_size = 1  # DON'T ALTER IT!!!
eval_batch = 1  # DON'T ALTER IT!!!

train_folder = "/home/dll-1/Desktop/eeg/datasets/dataset_pickle/2019/train"
eval_folder = "/home/dll-1/Desktop/eeg/datasets/dataset_pickle/2019/eval"

# parameters for data loaders
number_of_eeg_channels = 21  # WARNING: If you are altering this, change the "n_channels" hyperparam as well below
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Details of model
n_embd = 64  # Size of embedding dimension (should be divisible by n_heads evenly)
n_head = 2  # Number of heads  
n_layer = 1  # No. of transformer blocks                                            (MA'AM HIRA)
dropout = 0.01  #                                                                   (MA'AM HIRA)
emb_table_size = 10000
n_channels = 21 # No. of channels will give to the model                            (MA'AM HIRA)
window_size = 9000 # context length for predictions?  --> The bugbearer :)          (MA'AM HIRA)

# Training details
epochs = 10  #                                                                      (MA'AM HIRA)
learning_rate = 1e-3  #                                                             (MA'AM HIRA)

In [3]:
# Custom dataset class
class EEGDataset(Dataset):
    def __init__(self, folder_path):
        self.data = self.load_data(folder_path)

    def load_data(self, folder_path):
        data = []
        labels = []
        for file_name in os.listdir(folder_path):
            if file_name.endswith(".npz"):
                file_path = os.path.join(folder_path, file_name)
                data.append(file_path)
        return data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        file_path = self.data[idx]
        npz_data = np.load(file_path)
        eeg_data = npz_data['array1'][0:number_of_eeg_channels]
        label = npz_data['array2']
        return torch.tensor(eeg_data, dtype=torch.float32), torch.tensor(label, dtype=torch.float32)

# Load data from eval and train folders
train_dataset = EEGDataset(train_folder)
val_dataset = EEGDataset(eval_folder)
eval_dataset = EEGDataset(eval_folder)

# Split the data into training and validation sets
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)
eval_loader = DataLoader(eval_dataset, batch_size=eval_batch, shuffle=False)

In [4]:
class Head(nn.Module):
    """ one head of self-attention """

    def __init__(self, head_size):
        super().__init__()
        self.key = nn.Linear(n_embd, head_size, bias=False)
        self.query = nn.Linear(n_embd, head_size, bias=False)
        self.value = nn.Linear(n_embd, head_size, bias=False)
        self.register_buffer('tril', torch.tril(torch.ones(window_size, window_size)))

        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        """
          Input: B, T, C
          Output: B, T, C
        """
        B,T,C = x.shape
        k = self.key(x)   # (B,T,C)
        q = self.query(x) # (B,T,C)
        # compute attention scores ("affinities")
        wei = q @ k.transpose(-2,-1) * C**-0.5 # (B, T, C) @ (B, C, T) -> (B, T, T)
        wei = wei.masked_fill(self.tril[:T, :T] == 0, float('-inf')) # (B, T, T)
        wei = F.softmax(wei, dim=-1) # (B, T, T)
        wei = self.dropout(wei)
        # perform the weighted aggregation of the values
        v = self.value(x) # (B,T,C)
        out = wei @ v # (B, T, T) @ (B, T, C) -> (B, T, C)
        # print("i am in head", out.device)
        return out

class MultiHeadAttention(nn.Module):
    """ multiple heads of self-attention in parallel """

    def __init__(self, num_heads, head_size):
        super().__init__()
        self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
        self.proj = nn.Linear(n_embd, n_embd)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        out = torch.cat([h(x) for h in self.heads], dim=-1)
        out = self.dropout(self.proj(out))
        # print("i am in multihead", out.device)
        return out

class FeedFoward(nn.Module):
    """ a simple linear layer followed by a non-linearity """

    def __init__(self, n_embd):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_embd, 4 * n_embd),
            nn.ReLU(),
            nn.Linear(4 * n_embd, n_embd),
            nn.Dropout(dropout),
        )

    def forward(self, x):
        # print("i am in FeedFoward", self.net(x).device)
        return self.net(x)

class Block(nn.Module):
    """ Transformer block: communication followed by computation """

    def __init__(self, n_embd, n_head):
        # n_embd: embedding dimension, n_head: the number of heads we'd like
        super().__init__()
        head_size = n_embd // n_head
        self.sa = MultiHeadAttention(n_head, head_size)
        self.ffwd = FeedFoward(n_embd)
        self.ln1 = nn.LayerNorm(n_embd)
        self.ln2 = nn.LayerNorm(n_embd)

    def forward(self, x):
        x = x + self.sa(self.ln1(x))
        x = x + self.ffwd(self.ln2(x))
        # print("I am in block", x.device)
        return x

class GPT(nn.Module):

  def __init__(self):
    super().__init__()
    # Instance Variables
    # nn.embedding(no_of_encoding_categories, output_dim_desired_for_each_category)

    # Vector Embedding (input: (1xT), Output: (1, T, C))
    self.embedder = nn.Linear(window_size, n_embd)
    # Positional Embedding (input: (T), Output: (T, C))
    self.pos_encoding_table = nn.Embedding(120000, n_embd)

    # Encoder Layers
    self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
    self.ln_f = nn.LayerNorm(n_embd) # final layer norm
    self.lm_head = nn.Linear(n_embd, 1)
    self.lm_out = nn.Linear(n_channels*window_size, 1)

  def forward(self, x):
    # x is of shape (B, T)
    B, T = x.shape

    embedder_inp = x[:, :, None]
    embedder_inp = embedder_inp.repeat(1, 1, window_size)
    tok_emb = self.embedder(embedder_inp) # (B, T, C)
    pos_emb = self.pos_encoding_table(torch.arange(T, device=device)) # (T, C)
    x = tok_emb + pos_emb

    x = self.blocks(x) # (B, T, C)
    x = self.ln_f(x) # (B, T, C)
    logits = self.lm_head(x) # (B, T, 1)
    logits = logits.squeeze() # (B, T)
    logits = logits.view(1, -1) # (1, B*T)
    prediction = self.lm_out(logits) # (1, 1)
    prediction = prediction.view(1,) # (1,)
    # print("i am in gpt", prediction.device)
    return prediction

model = GPT()
criterion = nn.CrossEntropyLoss()
loss_fn = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.0003)

model = model.to(device)


In [5]:
num_training_steps = epochs * len(train_loader)

print(num_training_steps)

42340


In [9]:
# Function to evaluate F1 score
def evaluateF1Score(model, loader, device):
    model.eval()
    true_positives = 0
    false_positives = 0
    false_negatives = 0
    true_negatives = 0
    
    with torch.no_grad():
        for data in loader:
            inputs, labels = data
            inputs = inputs.to(device)
            inputs = inputs.squeeze()
            inputs = inputs[:, :window_size]
            labels = labels.to(device)
            outputs = model(inputs)
            predicted = torch.round(torch.sigmoid(outputs))
            
            true_positives += ((predicted == 1) & (labels == 1)).sum().item()
            false_positives += ((predicted == 1) & (labels == 0)).sum().item()
            false_negatives += ((predicted == 0) & (labels == 1)).sum().item()
            true_negatives += ((predicted == 0) & (labels == 0)).sum().item()

    precision = true_positives / (true_positives + false_positives + 1e-15)
    recall = true_positives / (true_positives + false_negatives + 1e-15)
    
    f1 = 2 * (precision * recall) / (precision + recall + 1e-15)

    return f1

In [8]:
def eval(model, data_loader, loss_fn, eval_batch):
    model.eval()

    eval_loss = 0.0
    correct_eval = 0.0
    
    with torch.no_grad():
        for x, y in data_loader:
            file = x.to(device)
            file = file.squeeze()
            file = file[:, :window_size]
            label = y.to(device)

            output = model(file)

            if eval_batch == 1:
                loss = loss_fn(output, label)
            else:
                loss = loss_fn(output, label.view(-1, 1))
                
            eval_loss += loss.item()

            preds = torch.where(output > 0.5, 1, 0).T
            correct_eval += (preds == label).sum().item()

        eval_loss = eval_loss / len(data_loader)
        eval_acc = (correct_eval / (len(data_loader) * batch_size))
        eval_f1 = evaluateF1Score(model, data_loader, device)

    return eval_loss, eval_acc, eval_f1

prev_eval_loss, prev_eval_acc, prev_eval_f1 = eval(model, val_loader, loss_fn, 1)
print("Loss:", round(prev_eval_loss, 3), "Accuracy:", round(prev_eval_acc, 3), "F1 score:", round(prev_eval_f1, 3))


In [142]:
def train(model, data_loader, optimizer, loss_fn):
  model.train()
  train_loss = 0.0
  correct_train = 0.0
  optimizer.zero_grad()
  for x,y in data_loader:
        file = x.to(device)
        file = file.squeeze()
        file = file[:, :window_size]
        label = y.to(device)
        output = model(file)
        with torch.no_grad():
          preds = torch.where(output > 0.5, 1, 0).T
          correct_train += ((preds == label)).sum().item()
        # print(output.shape)
        loss = loss_fn(output.view(-1, 1), label.view(-1, 1))
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

  train_loss = train_loss/len(data_loader)
  train_acc = (correct_train/(len(data_loader)*batch_size))
  train_f1 = evaluateF1Score(model, data_loader, device)
  return train_loss, train_acc, train_f1

for epoch in range(epochs):  # loop over the dataset multiple times
    train_loss, train_acc, train_f1 = train(model, train_loader,optimizer, loss_fn)
    print(f"Epoch: {epoch} \t Train loss: {train_loss:.2f} \t Train acc: {train_acc:.2f} \t Train F1: {train_f1:.2f}")

    eval_loss, eval_acc, val_f1 = eval(model, val_loader, loss_fn, eval_batch)
    print(f"Epoch: {epoch} \t Val loss: {eval_loss:.2f} \t Val acc: {eval_acc:.2f} \t Val F1: {val_f1:.2f}")

    print("=========================================")
    if prev_eval_acc < eval_acc:
      prev_eval_acc = eval_acc
      torch.save(model.state_dict(), "./trans.pt")
      print("WEIGHTS SAVED")

Total files to train on: 34
Epoch: 0 	 Train loss: 3.22 	 Train acc: 0.41 	 Train F1: 0.00
Epoch: 0 	 Val loss: 10.73 	 Val acc: 0.50 	 Val F1: 0.00
Total files to train on: 34
Epoch: 1 	 Train loss: 16.82 	 Train acc: 0.56 	 Train F1: 0.94
Epoch: 1 	 Val loss: 38.90 	 Val acc: 0.50 	 Val F1: 0.67
Total files to train on: 34
Epoch: 2 	 Train loss: 22.01 	 Train acc: 0.65 	 Train F1: 0.00
Epoch: 2 	 Val loss: 13.51 	 Val acc: 0.50 	 Val F1: 0.00
Total files to train on: 34
Epoch: 3 	 Train loss: 5.46 	 Train acc: 0.85 	 Train F1: 0.94
Epoch: 3 	 Val loss: 21.75 	 Val acc: 0.50 	 Val F1: 0.67
Total files to train on: 34
Epoch: 4 	 Train loss: 2.70 	 Train acc: 0.88 	 Train F1: 0.94
Epoch: 4 	 Val loss: 3.56 	 Val acc: 0.50 	 Val F1: 0.67
Total files to train on: 34
Epoch: 5 	 Train loss: 1.42 	 Train acc: 0.88 	 Train F1: 0.94
Epoch: 5 	 Val loss: 2.55 	 Val acc: 0.50 	 Val F1: 0.67
Total files to train on: 34
Epoch: 6 	 Train loss: 0.18 	 Train acc: 0.97 	 Train F1: 1.00
Epoch: 6 	 Val 