In [3]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import random_split

import torcheval.metrics.functional as metrics

import pandas as pd 
import numpy as np
from tqdm import tqdm

torch.manual_seed(1000) # set the random seed

<torch._C.Generator at 0x104e99190>

In [6]:

class SleepDataset(Dataset):
    def __init__(self, hr_file, ecg_file, window_size=30):
        self.df1 = pd.read_csv(hr_file)
        self.df2 = pd.read_csv(ecg_file)
        self.window_size = window_size

        self.freq1 = 1
        self.freq2 = 256

        
    def __len__(self):
        len1 = len(self.df1)/(self.freq1*self.window_size)
        len2 = len(self.df2)/(self.freq2*self.window_size)
        #should be the same but just in case 
        return round(min(len1,len2))

    def __getitem__(self, idx):
        start_window1 = self.window_size*idx*self.freq1
        start_window2 = self.window_size*idx*self.freq2
        # Extract heart rate data points and label for the current window
        hr = self.df1['heart_rate'].iloc[start_window1:start_window1+self.window_size*self.freq1].values
        act = self.df1['activity_count'].iloc[start_window1:start_window1+self.window_size*self.freq1].values
        labels = self.df1['psg_status'].iloc[start_window1]
        #optional: combine labels
        #labels = np.where(labels != 0, 1, labels)
        if labels in [2,3]:
            labels = 2
        elif labels in [4,5]:
            labels = 3

            

        ecg = self.df2['ECG'].iloc[start_window2:start_window2+self.window_size*self.freq2].values

        # Convert to PyTorch tensors
        hr = torch.tensor(hr, dtype=torch.float).unsqueeze(0)  # Add extra dimension at index 0
        labels = torch.tensor(labels, dtype=torch.long)
        ecg = torch.tensor(ecg, dtype=torch.float).unsqueeze(0)  # Add extra dimension at index 0
        act = torch.tensor(act, dtype=torch.float).unsqueeze(0)  # Add extra dimension at index 0



        return hr, ecg, act, labels


    

In [24]:
# based on: https://github.com/akaraspt/tinysleepnet

#takes in ECG and either activity or heart rate data 

class SleepNet(nn.Module):
    def __init__(self):
        super(SleepNet,self).__init__()
        self.pool1 = nn.MaxPool1d(8, 8) #kernel_size, stride
        self.pool2 = nn.MaxPool1d(4, 4) #kernel_size, stride

        self.conv1 = nn.Conv1d(1, 128, 8) #in_channels, out_chanels, kernel_size
        self.conv2 = nn.Conv1d(128, 128, 8) #in_channels, out_chanels, kernel_size

        self.dropout = nn.Dropout(0.5)
        self.lstm = nn.LSTM(128,128)
        

        self.fc1 = nn.Linear(29952, 16)

        self.fc2 = nn.Linear(30, 16)
        self.fc3 = nn.Linear(32, 4)

        

    def forward(self, ecg, hr):

        ecg = self.pool1(ecg)
        ecg = self.dropout(ecg)
        ecg = F.relu(self.conv1(ecg))
        ecg = F.relu(self.conv2(ecg))
        ecg = self.pool2(F.relu(self.conv2(ecg)))
        ecg = self.dropout(ecg)


        # Transpose dimensions for LSTM input
        ecg = ecg.permute(2, 0, 1)  # Shape: [seq_len, batch_size, input_size]
        
        ecg, _ = self.lstm(ecg)
        ecg = self.dropout(ecg)

        #Get size of final layer
        x_dim = ecg.size(0) * ecg.size(2)

        ecg = ecg.view(-1, x_dim) #[batch size, output size]
        ecg = F.relu(self.fc1(ecg)) #[batch size, 16]

        #fully connected layer for HR data 
        hr = hr.squeeze(1)
        hr = F.relu(self.fc2(hr))  #[batch size, 16]


        cat = torch.cat((ecg, hr), dim=1)
        
        cat = self.fc3(cat)
        cat = cat.squeeze(1) # Flatten to [batch_size]
        
        return cat



In [7]:


dataset = SleepDataset(r'mesa_preproc/final/subject_0002.csv',r'mesa_preproc/final/subject_0002_ecg.csv')


# Define the sizes of train, validation, and test sets
train_size = int(0.7 * len(dataset))  # 70% of the data for training
val_size = int(0.15 * len(dataset))   # 15% of the data for validation
test_size = len(dataset) - train_size - val_size  # Remaining data for testing

# Use random_split to split the dataset
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])






In [8]:

dataset = SleepDataset(r'mesa_preproc/final/subject_0381.csv',r'mesa_preproc/final/subject_0381_ecg.csv')


# Define the sizes of train, validation, and test sets
train_size = int(0.7 * len(dataset))  # 70% of the data for training
val_size = int(0.15 * len(dataset))   # 15% of the data for validation
test_size = len(dataset) - train_size - val_size  # Remaining data for testing

# Use random_split to split the dataset
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])



In [13]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle = True)
for hr, ecg, act, labels in train_loader:
    print(act.shape)

torch.Size([64, 1, 30])
torch.Size([64, 1, 30])
torch.Size([64, 1, 30])
torch.Size([64, 1, 30])
torch.Size([64, 1, 30])
torch.Size([64, 1, 30])
torch.Size([64, 1, 30])
torch.Size([64, 1, 30])
torch.Size([64, 1, 30])
torch.Size([64, 1, 30])
torch.Size([31, 1, 30])


In [16]:
model = SleepNet()
model(ecg,hr)

torch.Size([31, 16])
torch.Size([31, 30])
torch.Size([31, 16])
torch.Size([31, 32])


tensor([[-0.0378,  0.0768,  0.0072, -0.0045],
        [-0.0396,  0.0826, -0.0007, -0.0237],
        [-0.0403,  0.0705, -0.0049, -0.0172],
        [-0.0288,  0.0878, -0.0144, -0.0516],
        [-0.0517,  0.0664,  0.0066, -0.0184],
        [-0.0379,  0.0714, -0.0006, -0.0168],
        [-0.0400,  0.0661, -0.0066, -0.0306],
        [ 0.0215,  0.0936, -0.0050, -0.1137],
        [-0.0354,  0.0764, -0.0023, -0.0243],
        [-0.0232,  0.0855,  0.0036, -0.0470],
        [-0.0302,  0.0830, -0.0009, -0.0326],
        [-0.0340,  0.0690,  0.0038, -0.0294],
        [-0.0343,  0.0851, -0.0044, -0.0618],
        [-0.0384,  0.0687,  0.0113, -0.0132],
        [-0.0377,  0.0768, -0.0032, -0.0425],
        [-0.0329,  0.0778,  0.0091, -0.0267],
        [-0.0394,  0.0741, -0.0098, -0.0164],
        [-0.0249,  0.0814,  0.0018, -0.0352],
        [-0.0133,  0.0903,  0.0002, -0.0739],
        [-0.0326,  0.0836, -0.0019, -0.0420],
        [-0.0265,  0.0842,  0.0019, -0.0414],
        [-0.0318,  0.0711, -0.0043

In [27]:


def train(model, train_data, val_data, batch_size=64,learning_rate = 0.01, num_epochs=1,use_act = False):
    #uses either activity or heart rate data 
    
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle = True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle = True)


    #calculate class weights (bc unblanced data)
    labels = []
    
    for batch in train_loader:
        _, _,_, label_batch = batch
        labels.append(label_batch)
    labels = torch.cat(labels, dim=0)

    class_counts = torch.bincount(labels)
    total_samples = len(labels)
    class_weights = total_samples / (len(class_counts) * class_counts.float())
    class_weights /= class_weights.sum()


    #############################################
    #To Enable GPU Usage
    if use_cuda and torch.cuda.is_available():
        class_weights = class_weights.cuda()
    #############################################



    criterion = nn.CrossEntropyLoss(weight = class_weights)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    iters, val_loss,train_loss, train_acc, val_acc, train_f1, val_f1 = [],[], [], [], [], [], []

    # training
    n = 0 # the number of iterations
    for epoch in range(num_epochs):
        model.train()  # Set the model to training mode
        running_loss = 0.0
        correct_predictions = 0
        total_samples = 0
        m = 0
        for hr,ecg, act, labels in tqdm(iter(train_loader)):



            #############################################
            #To Enable GPU Usage
            if use_cuda and torch.cuda.is_available():
              hr = hr.cuda()
              ecg = ecg.cuda()
              act = act.cuda()
              labels = labels.cuda()
            #############################################

            if use_act:
                out = model(ecg,act)             # forward pass
            else:
                out = model(ecg,hr)             # forward pass
                


            
            loss = criterion(out, labels) # compute the total loss
            loss.backward()               # backward pass (compute parameter updates)
            optimizer.step()              # make the updates for each parameter
            optimizer.zero_grad()         # a clean up step for PyTorch


             # Compute statistics
            running_loss += loss.item() * hr.size(0)
            _, predicted = torch.max(out, 1)
            correct_predictions += (predicted == labels).sum().item()
            total_samples += hr.size(0)

            m += 1

        # Compute training epoch statistics
        train_loss = running_loss / total_samples
        train_accuracy = correct_predictions / total_samples
        iters.append(m)

        # Validate the model
        model.eval()  # Set the model to evaluation mode
        val_running_loss = 0.0
        val_correct_predictions = 0
        val_total_samples = 0
        val_f1 = 0.0
        n = 0
        with torch.no_grad():

            for val_hr_batch, val_ecg_batch, val_act_batch, val_labels_batch in val_loader:
                #############################################
                #To Enable GPU Usage
                if use_cuda and torch.cuda.is_available():
                    val_hr_batch = val_hr_batch.cuda()
                    val_ecg_batch = val_ecg_batch.cuda()
                    val_act_batch = val_act_batch.cuda()
                    val_labels_batch = val_labels_batch.cuda()
                #############################################
                    
                if use_act:
                    val_outputs = model(val_ecg_batch,val_act_batch)           # forward pass
                else:
                    val_outputs = model(val_ecg_batch,val_hr_batch)             # forward pass
                
                val_loss = criterion(val_outputs, val_labels_batch)

                val_running_loss += val_loss.item() * val_ecg_batch.size(0)
                _, val_predicted = torch.max(val_outputs, 1)
                val_correct_predictions += (val_predicted == val_labels_batch).sum().item()
                val_total_samples += val_ecg_batch.size(0)
                val_f1 += metrics.multiclass_f1_score(val_predicted,val_labels_batch)
                n+=1
                #print(val_predicted)
                #print(val_labels_batch)

        # Compute validation statistics
        val_loss = val_running_loss / val_total_samples
        val_accuracy = val_correct_predictions / val_total_samples
        val_f1=val_f1/n
        
        # Save the current model (checkpoint) to a file
        #model_path = get_model_name(model.name, batch_size, learning_rate, epoch)
        #model_path2 = get_model_name2(model.name, batch_size, learning_rate)

        #datadir = "/content/gdrive/MyDrive/Lab2_models/"
        #datadir = "data\models"

        #torch.save(model.state_dict(), (os.path.join(datadir,model_path)))
        # Print epoch statistics
        print(f'Epoch [{epoch+1}/{num_epochs}], '
            f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, '
            f'Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}, Val F1: {val_f1:.4f}')


        #np.savetxt("{}_train_acc.csv".format(os.path.join(datadir,model_path2)), train_acc)
        #np.savetxt("{}_val_acc.csv".format(os.path.join(datadir,model_path2)), val_acc)
        #np.savetxt("{}_losses.csv".format(os.path.join(datadir,model_path2)), losses)



    # plotting


    '''
    plt.title("Training Curve")
    plt.plot(iters, train_loss, label="Train")
    plt.plot(iters, val_loss, label="Validation")
    plt.xlabel("Iterations")
    plt.ylabel("Loss")
    plt.show()

    plt.title("Training Curve")
    plt.plot(iters, train_acc, label="Train")
    plt.plot(iters, val_acc, label="Validation")
    plt.xlabel("Iterations")
    plt.ylabel("Training Accuracy")
    plt.legend(loc='best')
    plt.show()
    '''






In [28]:
#model using heart rate data 
model = SleepNet()

use_cuda = True

if use_cuda and torch.cuda.is_available():
  model.cuda()
  print("Using CUDA")


train(model, train_dataset, val_dataset, batch_size=64,learning_rate = 0.001, num_epochs=20)


100%|██████████| 11/11 [00:22<00:00,  2.06s/it]


Epoch [1/20], Train Loss: 1.4035, Train Accuracy: 0.1580, Val Loss: 1.3866, Val Accuracy: 0.1259, Val F1: 0.1958


100%|██████████| 11/11 [00:21<00:00,  1.99s/it]


Epoch [2/20], Train Loss: 1.3835, Train Accuracy: 0.2086, Val Loss: 1.3904, Val Accuracy: 0.4476, Val F1: 0.4865


100%|██████████| 11/11 [00:19<00:00,  1.79s/it]


Epoch [3/20], Train Loss: 1.3818, Train Accuracy: 0.6006, Val Loss: 1.4228, Val Accuracy: 0.6154, Val F1: 0.5944


100%|██████████| 11/11 [00:19<00:00,  1.73s/it]


Epoch [4/20], Train Loss: 1.3804, Train Accuracy: 0.4471, Val Loss: 1.3705, Val Accuracy: 0.1469, Val F1: 0.1264


100%|██████████| 11/11 [00:18<00:00,  1.68s/it]


Epoch [5/20], Train Loss: 1.3733, Train Accuracy: 0.1267, Val Loss: 1.3804, Val Accuracy: 0.2168, Val F1: 0.2295


100%|██████████| 11/11 [00:19<00:00,  1.73s/it]


Epoch [6/20], Train Loss: 1.3743, Train Accuracy: 0.2608, Val Loss: 1.4281, Val Accuracy: 0.2238, Val F1: 0.1667


100%|██████████| 11/11 [00:19<00:00,  1.74s/it]


Epoch [7/20], Train Loss: 1.3658, Train Accuracy: 0.2787, Val Loss: 1.3799, Val Accuracy: 0.3566, Val F1: 0.3677


100%|██████████| 11/11 [00:18<00:00,  1.72s/it]


Epoch [8/20], Train Loss: 1.3594, Train Accuracy: 0.4262, Val Loss: 1.3601, Val Accuracy: 0.4685, Val F1: 0.4510


100%|██████████| 11/11 [00:19<00:00,  1.79s/it]


Epoch [9/20], Train Loss: 1.3589, Train Accuracy: 0.3800, Val Loss: 1.3773, Val Accuracy: 0.3986, Val F1: 0.3649


100%|██████████| 11/11 [00:20<00:00,  1.83s/it]


Epoch [10/20], Train Loss: 1.3481, Train Accuracy: 0.5037, Val Loss: 1.3652, Val Accuracy: 0.5804, Val F1: 0.6024


100%|██████████| 11/11 [00:23<00:00,  2.12s/it]


Epoch [11/20], Train Loss: 1.3489, Train Accuracy: 0.6438, Val Loss: 1.3809, Val Accuracy: 0.5594, Val F1: 0.5358


100%|██████████| 11/11 [00:24<00:00,  2.21s/it]


Epoch [12/20], Train Loss: 1.3378, Train Accuracy: 0.4739, Val Loss: 1.3417, Val Accuracy: 0.2937, Val F1: 0.2868


100%|██████████| 11/11 [00:23<00:00,  2.14s/it]


Epoch [13/20], Train Loss: 1.3311, Train Accuracy: 0.4128, Val Loss: 1.3548, Val Accuracy: 0.4965, Val F1: 0.5059


100%|██████████| 11/11 [00:26<00:00,  2.41s/it]


Epoch [14/20], Train Loss: 1.3309, Train Accuracy: 0.5410, Val Loss: 1.3437, Val Accuracy: 0.5874, Val F1: 0.5736


100%|██████████| 11/11 [00:24<00:00,  2.26s/it]


Epoch [15/20], Train Loss: 1.3157, Train Accuracy: 0.5678, Val Loss: 1.3710, Val Accuracy: 0.5245, Val F1: 0.5437


100%|██████████| 11/11 [00:21<00:00,  1.92s/it]


Epoch [16/20], Train Loss: 1.3197, Train Accuracy: 0.4590, Val Loss: 1.3567, Val Accuracy: 0.5315, Val F1: 0.4979


100%|██████████| 11/11 [00:20<00:00,  1.85s/it]


Epoch [17/20], Train Loss: 1.3101, Train Accuracy: 0.6438, Val Loss: 1.3351, Val Accuracy: 0.6294, Val F1: 0.5878


100%|██████████| 11/11 [00:20<00:00,  1.88s/it]


Epoch [18/20], Train Loss: 1.3016, Train Accuracy: 0.4873, Val Loss: 1.3367, Val Accuracy: 0.4685, Val F1: 0.5021


100%|██████████| 11/11 [00:19<00:00,  1.78s/it]


Epoch [19/20], Train Loss: 1.2979, Train Accuracy: 0.4709, Val Loss: 1.3373, Val Accuracy: 0.5944, Val F1: 0.5958


100%|██████████| 11/11 [00:22<00:00,  2.02s/it]


Epoch [20/20], Train Loss: 1.2935, Train Accuracy: 0.6170, Val Loss: 1.3231, Val Accuracy: 0.5804, Val F1: 0.5684


In [29]:
#model using activity data
model = SleepNet()

use_cuda = True

if use_cuda and torch.cuda.is_available():
  model.cuda()
  print("Using CUDA")


train(model, train_dataset, val_dataset, batch_size=64,learning_rate = 0.001, num_epochs=20,use_act=True)

100%|██████████| 11/11 [00:20<00:00,  1.87s/it]


Epoch [1/20], Train Loss: 2.0407, Train Accuracy: 0.3145, Val Loss: 1.3898, Val Accuracy: 0.2378, Val F1: 0.2451


100%|██████████| 11/11 [00:23<00:00,  2.11s/it]


Epoch [2/20], Train Loss: 1.6324, Train Accuracy: 0.2250, Val Loss: 1.3923, Val Accuracy: 0.5524, Val F1: 0.5646


100%|██████████| 11/11 [00:22<00:00,  2.07s/it]


Epoch [3/20], Train Loss: 1.4323, Train Accuracy: 0.4531, Val Loss: 1.4096, Val Accuracy: 0.2308, Val F1: 0.2910


100%|██████████| 11/11 [00:23<00:00,  2.11s/it]


Epoch [4/20], Train Loss: 1.3267, Train Accuracy: 0.2578, Val Loss: 1.3931, Val Accuracy: 0.2378, Val F1: 0.2281


100%|██████████| 11/11 [00:29<00:00,  2.66s/it]


Epoch [5/20], Train Loss: 1.3180, Train Accuracy: 0.2891, Val Loss: 1.3659, Val Accuracy: 0.2727, Val F1: 0.2712


100%|██████████| 11/11 [00:22<00:00,  2.03s/it]


Epoch [6/20], Train Loss: 1.3093, Train Accuracy: 0.2623, Val Loss: 1.4164, Val Accuracy: 0.2657, Val F1: 0.2490


  0%|          | 0/11 [00:00<?, ?it/s]


KeyboardInterrupt: 