In [1]:
!pip install pandas
!pip install torch


[notice] A new release of pip available: 22.1.2 -> 23.3.2
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip available: 22.1.2 -> 23.3.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import os
from os.path import dirname, join as pjoin
import scipy.io as sio
import pandas as pd
import numpy as np

import torch
from torch.utils.data import Dataset, DataLoader

import matplotlib
from matplotlib.pyplot import plot,figure
from typing import List

In [3]:
#Get raw and filtered data for S002

data_dir = pjoin(os.getcwd(),'..', 'dataset_phase1_ntx23')
S002_raw_mat_fname = pjoin(data_dir, 'train_S002_night1_hackathon_raw.mat')
S002_filt_mat_fname = pjoin(data_dir, 'train_S002_night1_hackathon_filt.mat')

S002_raw_mat_contents = sio.loadmat(S002_raw_mat_fname)
S002_filt_mat_contents = sio.loadmat(S002_filt_mat_fname)


S002_raw_EEG = S002_raw_mat_contents['EEG']
S002_filt_EEG = S002_filt_mat_contents['EEG']

fs = S002_raw_EEG['srate'].item()[0].item()

raw_data = S002_raw_EEG['data'].item()[0]
filt_data_1_to_35Hz = S002_filt_EEG['data'].item()[0]
filt_data_4_to_18Hz = S002_filt_EEG['data'].item()[1]

time = S002_raw_EEG['times'].item()[0]

#Load marker data for S002 into dataframe
marker_fname = pjoin(os.getcwd(),"..","dataset_phase1_ntx23","train_S002_labeled.csv")
markers_df = pd.read_csv(marker_fname)

In [4]:
window_len = 7500
print(len(filt_data_1_to_35Hz))
num_epochs = int(np.ceil(len(filt_data_1_to_35Hz)/window_len))
print(num_epochs)
filt_data_1_to_35Hz_epochs = [filt_data_1_to_35Hz[i*window_len:(i*window_len+window_len)] for i in range(0,num_epochs-1)]


print(len(filt_data_1_to_35Hz_epochs[5]))
last_epoch = filt_data_1_to_35Hz[(num_epochs-1)*window_len:]
zeros = np.zeros((1,window_len - len(last_epoch)))
last_epoch = np.append(last_epoch,zeros)
print(len(last_epoch))


4965399
663
7500
7500


In [5]:
labels_to_use:List[str]=["SS1","REM1","REM0","K1"]

timestamp_labels = np.zeros((len(filt_data_1_to_35Hz)))
print(timestamp_labels.shape)
for i,curr_label in enumerate(labels_to_use):
    idx = markers_df.loc[markers_df[curr_label]>0,['Timestamp']].values
    timestamp_labels[idx] = i+1


(4965399,)


In [157]:
class DatasetEEG(Dataset):
    def __init__(self,signal:np.ndarray,labels_df:pd.DataFrame, transform, window_len:int=7500, labels_to_use:List[str]=["SS1","REM1","REM0","K1"]):
        super().__init__()
        self.signal = signal
        self.labels_df = labels_df 
        self.signal_len = len(signal)
        self.timestamp_labels = np.zeros((self.signal_len))
        self.window_len = window_len
        self.num_epochs = int(np.ceil(self.signal_len/self.window_len))
        self.labels_to_use = labels_to_use
        self.transform = transform

        #split signal into epochs of length window_len
        self.epochs = [self.signal[i*self.window_len:(i*self.window_len+self.window_len)] for i in range(0,self.num_epochs-1)]
        last_epoch = self.signal[(self.num_epochs-1)*self.window_len:]

        #Add zero padding to final epoch and append to epochs list
        if len(last_epoch) < window_len:
            zeros = np.zeros((1,window_len - len(last_epoch)))
            last_epoch = np.append(last_epoch,zeros)
        self.epochs.append(last_epoch)

        self.GetTimestampLabels()
        self.epoch_labels = [self.timestamp_labels[i*self.window_len:(i*self.window_len+self.window_len)] for i in range(0,self.num_epochs-1)]
        last_epoch_labels = self.timestamp_labels[(self.num_epochs-1)*self.window_len:]
        #Add zero padding to final epoch and append to epochs list
        
        if len(last_epoch_labels) < window_len:
            last_epoch_labels = np.append(last_epoch_labels,zeros)
        self.epoch_labels.append(last_epoch_labels)
        

    def GetTimestampLabels(self):
        """Get labels for every timestamp/sample"""
        for i,curr_label in enumerate(self.labels_to_use):
            idx = self.labels_df.loc[self.labels_df[curr_label]>0,['Timestamp']].values
            self.timestamp_labels[idx] = i+1


    def __len__(self):
        return self.num_epochs
    
    def __getitem__(self, index):
        #if self.transform:
        #    return self.transform(torch.tensor(self.epochs[index])), torch.tensor(self.epoch_labels[index])
        return torch.tensor(self.epochs[index]), torch.tensor(self.epoch_labels[index], dtype=torch.long)

In [158]:
data = DatasetEEG(
                 signal=filt_data_1_to_35Hz, #[:len(filt_data_1_to_35Hz)-399],
                 labels_df=markers_df,
                 transform = None,
                 window_len=7500,
                 labels_to_use=["SS1","REM1","REM0","K1"]
                 )

split_ind = int(len(data)*0.8)


train_dataloader = DataLoader(data, batch_size=1, shuffle=False)

To Do:
-Figure out issue with using full signal length (right now uncommenting "[:len(filt_data_1_to_35Hz)-399]," and removing last 399 samples)
-Figure out need for final MLP layer after LSTM
-Create training loop
    -Add optimizer
    -Add loss function
    -Add backprop
-Create metrics for performance assessment (WandB?)
-

In [76]:
import torch.nn as nn
from torch.autograd import Variable 

In [159]:
class LSTM1(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length):
        super(LSTM1, self).__init__()
        self.num_classes = num_classes #number of classes
        self.num_layers = num_layers #number of layers
        self.input_size = input_size #input size
        self.hidden_size = hidden_size #hidden state
        self.seq_length = seq_length #sequence length

        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True) #lstm
        self.fc_1 =  nn.Linear(hidden_size, 128) #fully connected 1
        self.fc = nn.Linear(128, num_classes) #fully connected last layer

        self.relu = nn.ReLU()
    
    def forward(self,x):
        h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #hidden state
        c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #internal state
        # Propagate input through LSTM
        output, (hn, cn) = self.lstm(x, (h_0, c_0)) #lstm with input, hidden, and internal state
        out = self.relu(output)
        out = self.fc_1(out) #first Dense
        out = self.relu(out) #relu
        out = self.fc(out) #Final Output
        return out

In [171]:
num_epochs = 100 #1000 epochs
learning_rate = 0.001 #0.001 lr

input_size = 1 #number of features
hidden_size = 2 #number of features in hidden state
num_layers = 1 #number of stacked lstm layers

num_classes = len(labels_to_use) #number of output classes


lstm1 = LSTM1(num_classes, input_size, hidden_size, num_layers, window_len)


criterion = nn.BCEWithLogitsLoss()   # BCELoss for regression
optimizer = torch.optim.Adam(lstm1.parameters(), lr=learning_rate)


for epoch in range(num_epochs):
  for step,(x,y) in enumerate(train_dataloader):
    x = x.unsqueeze(dim=2)
    outputs = lstm1.forward(x) #forward pass
    optimizer.zero_grad() #caluclate the gradient, manually setting to 0
    y = torch.nn.functional.one_hot(y,num_classes).float()
    
    # obtain the loss function
    loss = criterion(outputs, y)
  
    loss.backward() #calculates the loss of the loss function
  
    optimizer.step() #improve from loss, i.e backprop
  if epoch % 10 == 0:
    print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))