In [None]:
!pip install pandas
!pip install torch



In [None]:
import os
from os.path import dirname, join as pjoin
import scipy.io as sio
import pandas as pd
import numpy as np

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

import matplotlib
from matplotlib.pyplot import plot,figure
from typing import List

from sklearn.model_selection import train_test_split

In [None]:
def getSignal(signal_fname:str, labels_fname:str, labels_to_use:List[str]):
  raw_mat_fname = f'{signal_fname}_raw.mat'
  filt_mat_fname = f'{signal_fname}_filt.mat'

  raw_mat_contents = sio.loadmat(raw_mat_fname)
  filt_mat_contents = sio.loadmat(filt_mat_fname)

  raw_EEG = raw_mat_contents['EEG']
  filt_EEG = filt_mat_contents['EEG']

  fs = raw_EEG['srate'].item()[0].item()

  raw_data = raw_EEG['data'].item()[0]
  filt_data_1_to_35Hz = filt_EEG['data'].item()[0]
  filt_data_4_to_18Hz = filt_EEG['data'].item()[1]

  time = raw_EEG['times'].item()[0]

  #Load marker data for S002 into dataframe
  labels_fname = f'{labels_fname}_labeled.csv'
  markers_df = pd.read_csv(labels_fname)



  timestamp_labels = np.zeros((len(raw_data)))
  label_weights = np.zeros(len(labels_to_use)+1)
  len_signal = len(raw_data)
  num_classes = len(labels)+1
  for i,curr_label in enumerate(labels_to_use):
    idx = markers_df.loc[markers_df[curr_label]>0,['Timestamp']].values
    timestamp_labels[idx] = i+1
    label_weights[i+1] = len_signal/(len(idx)*num_classes)
  label_weights[0] =  len_signal/(len_signal-label_weights[1:].sum()*num_classes)

  print(label_weights)


  return raw_data, filt_data_1_to_35Hz, filt_data_4_to_18Hz, time, timestamp_labels, label_weights

In [None]:
class DatasetEEG(Dataset):
    def __init__(self,signal:np.ndarray,labels: np.ndarray, transform, dev, window_len:int=7500):
        super().__init__()
        self.signal = signal
        self.dev = dev
        self.labels = labels
        self.signal_len = signal.shape[0]
        self.window_len = window_len
        self.num_epochs = int(np.ceil(self.signal_len/self.window_len))
        self.transform = transform

        #split signal into epochs of length window_len
        self.epochs = [self.signal[i*self.window_len:(i*self.window_len+self.window_len), :] for i in range(0,self.num_epochs-1)]
        last_epoch = self.signal[(self.num_epochs-1)*self.window_len:, :]

        #Add zero padding to final epoch and append to epochs list
        if len(last_epoch) < window_len:
          zeros = np.zeros((window_len - len(last_epoch),3))
          last_epoch = np.append(last_epoch,zeros, axis = 0)
        self.epochs.append(last_epoch)

        self.epoch_labels = [self.labels[i*self.window_len:(i*self.window_len+self.window_len)] for i in range(0,self.num_epochs-1)]
        last_epoch_labels = self.labels[(self.num_epochs-1)*self.window_len:]
        #Add zero padding to final epoch and append to epochs list

        if len(last_epoch_labels) < window_len:
          zeros = np.zeros((window_len - len(last_epoch_labels)))
          last_epoch_labels = np.append(last_epoch_labels,zeros, axis = 0)
        self.epoch_labels.append(last_epoch_labels)


    def __len__(self):
        return self.num_epochs

    def __getitem__(self, index):
        #if self.transform:
        #    return self.transform(torch.tensor(self.epochs[index])), torch.tensor(self.epoch_labels[index])
        return torch.tensor(self.epochs[index], device=self.dev, dtype=torch.float32), torch.tensor(self.epoch_labels[index], dtype=torch.long, device=self.dev)

In [None]:
if torch.cuda.is_available():
  device = torch.device('cuda')
  print("Using GPU")
else:
  device = torch.device('cpu')
  print("Using CPU")

window_len = 7500
labels_to_use = ["SS1","REM1","REM0","K1"]


#Get S002 and S003 data
(S002_raw_data,
 S002_filt_data_1_to_35Hz,
 S002_filt_data_4_to_18Hz,
 S002_time,
 S002_markers,
 S002_label_weights) = getSignal('train_S002_night1_hackathon','train_S002', labels_to_use)

(S003_raw_data,
 S003_filt_data_1_to_35Hz,
 S003_filt_data_4_to_18Hz,
 S003_time,
 S003_markers,
 S003_label_weights) = getSignal('train_S003_night5_hackathon','train_S003', labels_to_use)



S002_signal_len = len(S002_filt_data_1_to_35Hz)
S002_final_ind = S002_signal_len
S002_signal = np.vstack((S002_raw_data,S002_filt_data_4_to_18Hz,S002_filt_data_1_to_35Hz))
S002_signal = np.transpose(S002_signal,(1,0))

S003_signal_len = len(S003_filt_data_1_to_35Hz)
S003_final_ind = S003_signal_len
S003_signal = np.vstack((S003_raw_data,S003_filt_data_4_to_18Hz,S003_filt_data_1_to_35Hz))
S003_signal = np.transpose(S003_signal,(1,0))


#train_test_split(S002_signal)

S002_data = DatasetEEG(
                 signal=S002_signal,
                 labels=S002_markers,
                 transform = None,
                 dev = device,
                 window_len=window_len
                 )
S003_data = DatasetEEG(
                 signal=S003_signal,
                 labels=S003_markers,
                 transform = None,
                 dev = device,
                 window_len=window_len
                 )

train_dataloader = DataLoader(S002_data, batch_size=68, shuffle=True)
test_dataloader = DataLoader(S003_data, batch_size=68, shuffle=True)

Using GPU
[1.05957944e+00 2.39181069e+03 8.03462621e+03 7.19623043e+04
 1.06782774e+04]
[1.03727843e+00 5.64294233e+03 8.94996899e+03 4.58153175e+04
 8.74656061e+03]


In [None]:
X_train, X_test, y_train, y_test = train_test_split(S002_signal, S002_markers, test_size=0.25)
print(len(X_test) % 7500)

3850


To Do:
-Figure out issue with using full signal length (right now uncommenting "[:len(filt_data_1_to_35Hz)-399]," and removing last 399 samples)
-Figure out need for final MLP layer after LSTM
-Create training loop
    -Add optimizer
    -Add loss function
    -Add backprop
-Create metrics for performance assessment (WandB?)
-

In [None]:
class LSTM1(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length, dev):
        super(LSTM1, self).__init__()
        self.num_classes = num_classes #number of classes
        self.num_layers = num_layers #number of layers
        self.input_size = input_size #input size
        self.hidden_size = hidden_size #hidden state
        self.seq_length = seq_length #sequence length
        self.dev = dev

        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True,device=self.dev) #lstm
        self.fc_1 =  nn.Linear(hidden_size, 128, device=self.dev) #fully connected 1
        self.fc = nn.Linear(128, num_classes,device=self.dev) #fully connected last layer

        self.relu = nn.ReLU()

    def forward(self,x):
        h_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size,device=self.dev) #hidden state
        c_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size, device=self.dev) #internal state
        # Propagate input through LSTM
        output, (hn, cn) = self.lstm(x, (h_0, c_0)) #lstm with input, hidden, and internal state
        out = self.relu(output)
        out = self.fc_1(out) #first Dense
        out = self.relu(out) #relu
        out = self.fc(out) #Final Output
        return out

In [1]:
num_epochs = 10001 #1000 epochs
learning_rate = 0.001 #0.001 lr

input_size = 3 #number of features
hidden_size = 2 #number of features in hidden state
num_layers = 1 #number of stacked lstm layers
num_classes = len(labels_to_use) + 1 #number of output classes


lstm1 = LSTM1(num_classes, input_size, hidden_size, num_layers, window_len, device)

criterion = nn.BCEWithLogitsLoss(reduction='none')   # BCELoss for regression
class_weights = torch.tensor(S002_label_weights,device=device)
optimizer = torch.optim.Adam(lstm1.parameters(), lr=learning_rate)


for epoch in range(num_epochs):
  #print("Epoch: ",epoch,"\n")
  for i,batch in enumerate(train_dataloader):
    #print("Batch: ",step)
    #x = x.unsqueeze(dim=2)
    (x,y) = batch
    outputs = lstm1.forward(x) #forward pass
    #print(outputs.shape)
    optimizer.zero_grad() #caluclate the gradient, manually setting to 0
    y = torch.nn.functional.one_hot(y,num_classes).float()

    # obtain the loss function
    loss = criterion(outputs, y)
    loss = (loss * class_weights).mean()


    loss.backward() #calculates the loss of the loss function

    optimizer.step() #improve from loss, i.e backprop
  if epoch % 1000 == 0 and epoch != 0:
    print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()),"\n")
    with torch.no_grad():
      for i,batch in enumerate(test_dataloader):
        (x_test,y_test) = batch
        outputs = lstm1.forward(x_test)
        y_pred = torch.argmax(outputs, dim=2)
        label_accs = []
        for label in range(len(labels_to_use) + 1):
          label_acc = torch.where(y_test==label,label,-1)
          label_acc = torch.where(label_acc==y_pred,1,0).sum()/(torch.where(y_test==label,1,0).sum())
          label_accs.append(label_acc)

        total_acc = torch.where(y_test==y_pred,1,0).sum()/(y_test.shape[0]*y_test.shape[1])
        print(f"Batch {i} accuracy: {total_acc}\n")
        for j,acc in enumerate(label_accs):
          print(f"label {j} accuracy: {acc}")
        print("\n")


NameError: ignored