In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import glob
import torch
import torch.nn as nn

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
PATHROOT="dataset/Samples"
FPS=125

In [3]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Tersedia GPU")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Tersedia MPS Apple Silicon")
else:
    device = torch.device("cpu")
    print("Tersedia CPU")

Tersedia GPU


In [4]:
# populate all files using glob
files = glob.glob(PATHROOT + "/*.csv")
files.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))

In [5]:
# # check len of every file
# for file in files:
#     # load csv into numpy
#     data = np.genfromtxt(file, delimiter=',')
#     data = data.T    
#     # print(file, len(data))

In [6]:
# create collate_fn using RNN pad sequence
def collate_fn(data):
    # data is a list of tuples
    # each tuple is (data, label)
    # sort the data list by label
    data.sort(key=lambda x: len(x[0]), reverse=True)
    # seperate data and label
    # data, = zip(*data)
    # merge data (from tuple of 1D tensor to 2D tensor)
    data = torch.nn.utils.rnn.pad_sequence(data, batch_first=True)
    return data

In [7]:
"""
We have 500 subjects, each subject has 3 column of signal.
Each subjects has different number of samples.


We want to create a dataset class that can be used by PyTorch Dataloader.
"""

class Dataset(torch.utils.data.Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.files = glob.glob(root_dir + "/*.csv")
        self.files.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))
        
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, idx):
        # load csv into numpy
        data = np.genfromtxt(self.files[idx], delimiter=',')
        data = data.T
        # get label from filename
        # label = int(self.files[idx].split('/')[-1].split('_')[0])
        # convert to tensor
        data = torch.from_numpy(data).float()
        # label = torch.tensor(label)
        # apply transform
        if self.transform:
            data = self.transform(data)
        return data

In [8]:
physio_dataset = Dataset(PATHROOT)
print(len(physio_dataset))

500


In [9]:
# Create PyTorch Dataloader using Dataset
traindl = torch.utils.data.DataLoader(physio_dataset, batch_size=2, shuffle=True, collate_fn=collate_fn, num_workers=8)

In [10]:
# # Try to iterate through dataloader, only 1 batch
# for i_batch, sample_batched in enumerate(traindl):
#     print(i_batch, sample_batched.size())
#     if i_batch == 0:
#         break

In [11]:
# sample = next(iter(traindl))

In [12]:
# print(sample.shape)
# print(sample[1][:,1])

In [13]:
"""
Remember, for each subject, we have 3 column signal data.
First is PPG, Second is ABP, Third is ECG.
We want to make a seq2seq model that can predict ABP from PPG and ECG.
"""

# create ABP sequence encoder
class ABPEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout):
        super(ABPEncoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.dropout = dropout
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, dropout=dropout, batch_first=True)
        
    def forward(self, x):
        # set initial hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        # forward propagate LSTM
        out, (h_n, c_n) = self.lstm(x, (h0, c0))
        # return the final hidden state and cell state
        return h_n, c_n

# create ABP sequence decoder
class ABPDecoder(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, dropout):
        super(ABPDecoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.dropout = dropout
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, x, h_n, c_n):
        # forward propagate LSTM
        out, (h_n, c_n) = self.lstm(x, (h_n, c_n))
        # decode the hidden state of the last time step
        out = self.fc(out)
        return out, (h_n, c_n)

In [14]:
"""
Now, combine Encoder and Decoder into Seq2Seq model.
"""

# create seq2seq model
# Remember, we use batch_first=True

class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        
    def forward(self, x):
        # set initial states
        h_n, c_n = self.encoder(x)
        # print(f"Encoder output: {h_n.shape}, {c_n.shape}")
        # create initial input (start with zeros)
        decoder_input = torch.zeros(x.size(0), 1, 1).to(device)
        # init output tensor
        # print(x.size(1), x.size(0))
        outputs = torch.zeros(x.size(1), x.size(0),  1).to(device)
        # decode hidden state of last time step
        for t in range(x.size(1)):
            decoder_output, (h_n, c_n) = self.decoder(decoder_input, h_n, c_n)
            # print(f"Decoder output: {decoder_output.shape}, {h_n.shape}, {c_n.shape}")
            # decoder_output = decoder_output.squeeze(1)
            outputs[t] = decoder_output.squeeze(1)
            decoder_input = decoder_output
        return outputs



# class Seq2Seq(nn.Module):
#     def __init__(self, encoder, decoder):
#         super(Seq2Seq, self).__init__()
#         self.encoder = encoder
#         self.decoder = decoder
        
#     def forward(self, x):
#         # set initial states
#         h_n, c_n = self.encoder(x)
#         print(f"Encoder output: {h_n.shape}, {c_n.shape}")
#         # create initial input (start with zeros)
#         decoder_input = torch.zeros(1, x.size(0), 1).to(device)
#         # init output tensor
#         outputs = torch.zeros(x.size(1), x.size(0), 1).to(device)
#         # decode hidden state of last time step
#         for t in range(x.size(1)):
#             decoder_output, (h_n, c_n) = self.decoder(decoder_input, h_n, c_n)
#             # store output
#             outputs[t] = decoder_output
#             # make current output next decoder input
#             decoder_input = decoder_output
        
#         return outputs

In [15]:
# Model Parameters
INPUT_DIM = 2
OUTPUT_DIM = 1
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
HID_DIM = 512
N_LAYERS = 2
ENC_DROPOUT = 0.5
DEC_DROPPOUT = 0.5

# create encoder and decoder
encoder = ABPEncoder(INPUT_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
decoder = ABPDecoder(1, HID_DIM, OUTPUT_DIM, N_LAYERS, DEC_DROPPOUT)

# create seq2seq model
model = Seq2Seq(encoder, decoder).to(device)

In [16]:
# Initialize weights

def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)

model.apply(init_weights)

Seq2Seq(
  (encoder): ABPEncoder(
    (lstm): LSTM(2, 512, num_layers=2, batch_first=True, dropout=0.5)
  )
  (decoder): ABPDecoder(
    (lstm): LSTM(1, 512, num_layers=2, batch_first=True, dropout=0.5)
    (fc): Linear(in_features=512, out_features=1, bias=True)
  )
)

In [17]:
# Define Optimizer, Loss Function, and Evaluation Metric

opt = torch.optim.Adam(model.parameters())
criterion = nn.MSELoss()

import torchmetrics

# use RMSE as evaluation metric. USe MSE then pass argument False
metric  = torchmetrics.MeanSquaredError(squared=False)

In [18]:
"""
Now we want to train the model. Before that, we create the seq2seq training loop.
"""

# seq2seq training loop for the encoder-decoder model
def train(model, iterator, optimizer, criterion, clip):
    model.train()
    epoch_loss = 0
    for i, batch in enumerate(iterator):
        # get input and targets and get to cuda. Remember, input is on [:,:,0] and [:,:,2], while target is on [:,:,1]
        input = batch[:, :, [0,2]].to(device)
        target = batch[:, :, 1].to(device)
        # forward prop
        output = model(input)
        # output = [batch size, seq len, output dim]
        # target = [batch size, seq len, output dim]
        output_dim = output.shape[-1]
        output = output.contiguous().view(-1, output_dim)
        target = target.contiguous().view(-1, output_dim)
        # loss and backprop
        loss = criterion(output, target)
        optimizer.zero_grad()
        loss.backward()
        # clip gradient norms
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()
        # record loss
        epoch_loss += loss.item()

        print(f"Batch {i+1} loss: {loss.item()}")

    return epoch_loss / len(iterator)


In [19]:
"""
Train Process
"""

# Train The Model

epochs = 10
clip = 1
best_valid_loss = float('inf')

for epoch in range(epochs):
    train_loss = train(model, traindl, opt, criterion, clip)
    print(f'Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f}')

Batch 1 loss: 4976.427734375
Batch 2 loss: 2585.648681640625
Batch 3 loss: 4282.90771484375
Batch 4 loss: 8729.7021484375
Batch 5 loss: 2841.92919921875
Batch 6 loss: 6836.955078125
Batch 7 loss: 6748.6259765625
Batch 8 loss: 4020.52490234375
Batch 9 loss: 1933.9794921875
Batch 10 loss: 3138.328857421875
Batch 11 loss: 7673.94775390625
Batch 12 loss: 6654.3193359375
Batch 13 loss: 5221.34130859375
Batch 14 loss: 5035.107421875
Batch 15 loss: 4035.20458984375
Batch 16 loss: 3272.1083984375
Batch 17 loss: 3030.54638671875
Batch 18 loss: 3835.49365234375
Batch 19 loss: 4351.892578125
Batch 20 loss: 5391.20654296875
Batch 21 loss: 5505.16015625
Batch 22 loss: 2305.82373046875
Batch 23 loss: 2081.389892578125
Batch 24 loss: 4715.22265625
Batch 25 loss: 2035.69873046875
Batch 26 loss: 6326.08984375
Batch 27 loss: 3933.836181640625
Batch 28 loss: 3327.7783203125
Batch 29 loss: 2696.89892578125
Batch 30 loss: 2808.564453125
Batch 31 loss: 3766.423583984375
Batch 32 loss: 2209.57568359375
Batch

In [None]:
x = torch.randn(1, 10, 2)
print(x.shape)