In [1]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold, train_test_split
import matplotlib.pyplot as plt

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
data = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject39_1526417507/1526417507.csv', header = None)
data = data.T
data2 = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject39_1526591202/1526591202.csv', header = None)
data2 = data2.T
data3 = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject42_1527280030/1527280030.csv', header = None)
data3 = data3.T
data4 = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject43_1527806941/1527806941.csv', header = None)
data4 = data4.T
data5 = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject54_1539288817/1539288817.csv', header = None)
data5 = data5.T
data6 = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject55_1539459892/1539459892.csv', header = None)
data6 = data6.T
traindata = pd.concat([data,data2,data3,data4,data5,data6],ignore_index = True)

In [4]:
data = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject39_1526417507/heartrates.csv', header = None)
data = data.T
data = data.drop(0)
data2 = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject39_1526591202/heartrates.csv', header = None)
data2 = data2.T
data2 = data2.drop(0)
data3 = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject42_1527280030/heartrates.csv', header = None)
data3 = data3.T
data3 = data3.drop(0)
data4 = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject43_1527806941/heartrates.csv', header = None)
data4 = data4.T
data4 = data4.drop(0)
data5 = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject54_1539288817/heartrates.csv', header = None)
data5 = data5.T
data5 = data5.drop(0)
data6 = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject55_1539459892/heartrates.csv', header = None)
data6 = data6.T
data6 = data6.drop(0)
target_values = pd.concat([data,data2,data3,data4,data5,data6],ignore_index = True)

In [5]:
class LSTM(nn.Module):
  def __init__(self):
    super(LSTM, self).__init__()
    self.lstm = nn.LSTM(input_size = 6780, hidden_size = 512, batch_first = True)
    self.linear1 = nn.Linear(512, 64)   ## first linear layer
    self.dropout = nn.Dropout(0.5)      ## dropout layer
    self.linear2 = nn.Linear(64, 1)     ## second linear layer
    self.relu = nn.ReLU()               ## using relu activation function

  def forward(self, x):
    h_t, c_t = self.lstm(x)
    h_t = h_t.squeeze()
    res = self.linear1(h_t)
    res = self.relu(res)
    res = self.dropout(res)
    res = self.linear2(res)
    res = res.T
    return res

   ## output hidden state and output cell state are hT and cT respectively

In [6]:
class HeartRateDataset(torch.utils.data.Dataset):
  def __init__(self, train_data, targets = None):
    self.train_data = train_data
    self.targets = targets

  def __len__(self):
    return len(self.train_data)

## the below function tries to retreive both the train and the target data associated with the given index, checks if the target data (self.targets) is available by default.
## if it is not available by default, it falls back on the assumpton that only the input data is available.

  def __getitem__(self, idx):
    if True:
      try:
        train, targ = self.train_data.loc[idx], self.targets.loc[idx]
      except:
        train, targ = self.train_data.iloc[idx], self.targets.iloc[idx]
      train = train.values.reshape((1,len(train)))
      targ = targ.values.reshape((1,len(targ)))
      train = train.astype(np.float32)
      targ = targ.astype(np.float32)
      train = torch.tensor(train)     ## converting to tensors
      targ = torch.tensor(targ)       ## converting to tensors
      train = train.view((1, 8, 6780))
      return train, targ

In [7]:
train_data, validation_data, train_target, validation_target = train_test_split(traindata, target_values, test_size=1/6, shuffle=False)

In [8]:
train_dataset = HeartRateDataset(train_data, train_target)
validation_dataset = HeartRateDataset(validation_data, validation_target)

In [9]:
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size = 1)
validation_dataloader = torch.utils.data.DataLoader(validation_dataset, batch_size = 1)

In [10]:
## training the model

def train(model, lossfn, optimizer, scheduler, device):
    model.train()
    training_error = 0
    for t, (xb, yb) in enumerate(train_dataloader):       ## xb is input data and yb is target data
        xb = xb.to(device, dtype = torch.float32)
        yb = yb.to(device, dtype = torch.float32)
        xb = xb.squeeze(0)
        yb = yb.squeeze()
        predictions = model(xb)
        predictions = predictions.T
        predictions = predictions.to(device, dtype = torch.float32)
        loss = lossfn(predictions, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        training_error += torch.sqrt(torch.sum(torch.square(predictions.view(8)-yb))/8)     ## computing rmse error
    print(f"Training RMSE: {training_error/5}")
    del loss,predictions
    return training_error

def validate(model, lossfn, optimizer, scheduler, device):
    validation_error = 0
    with torch.no_grad():
        model.eval()
        for v, (xv, yv) in enumerate(validation_dataloader):
            xv = xv.to(device, dtype = torch.float32)
            yv = yv.to(device, dtype = torch.int64)
            xv = xv.squeeze(0)
            yv = yv.squeeze()
            predictions = model(xv)
            predictions = predictions.T
            predictions = predictions.to(device, dtype = torch.float32)
            loss = lossfn(predictions, yv)
            validation_error += torch.sqrt(torch.sum(torch.square(predictions.view(8) - yv))/8)
    print(f"Validation RMSE: {validation_error/1}")
    return validation_error


In [11]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

## if the device is in cuda, it gives the power to use the nvidia gpu else it gives the computation responsibilities to the cpu

'cpu'

In [12]:
model = LSTM()
lossfn = torch.nn.functional.mse_loss
optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=5, threshold=0.01, verbose = True)



In [13]:
epochs = 20
model.to(device)
for i in range(epochs):
    print(f"Epoch: {i+1}")
    training_error = train(model, lossfn, optimizer, scheduler, device)
    validation_error = validate(model, lossfn, optimizer, scheduler, device)

Epoch: 1


  loss = lossfn(predictions, yb)


Training RMSE: 58.290504455566406
Validation RMSE: 50.91286087036133
Epoch: 2


  loss = lossfn(predictions, yv)


Training RMSE: 41.92742919921875
Validation RMSE: 27.639904022216797
Epoch: 3
Training RMSE: 31.454578399658203
Validation RMSE: 22.71575927734375
Epoch: 4
Training RMSE: 27.499053955078125
Validation RMSE: 21.19328498840332
Epoch: 5
Training RMSE: 20.57087516784668
Validation RMSE: 21.926753997802734
Epoch: 6
Training RMSE: 24.427677154541016
Validation RMSE: 24.62149429321289
Epoch: 7
Training RMSE: 23.932842254638672
Validation RMSE: 22.854591369628906
Epoch: 8
Training RMSE: 29.052265167236328
Validation RMSE: 20.51565933227539
Epoch: 9
Training RMSE: 24.244722366333008
Validation RMSE: 20.15991973876953
Epoch: 10
Training RMSE: 24.619747161865234
Validation RMSE: 20.369543075561523
Epoch: 11
Training RMSE: 25.170154571533203
Validation RMSE: 20.1131591796875
Epoch: 12
Training RMSE: 22.941268920898438
Validation RMSE: 20.025484085083008
Epoch: 13
Training RMSE: 20.995431900024414
Validation RMSE: 20.39466094970703
Epoch: 14
Training RMSE: 25.472543716430664
Validation RMSE: 20.110

In [14]:
epochs = 20
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
for i in range(epochs):
    print(f"Epoch: {i+1}")
    training_error = train(model, lossfn, optimizer, scheduler, device)
    validation_error = validate(model, lossfn, optimizer, scheduler, device)

Epoch: 1


  loss = lossfn(predictions, yb)


Training RMSE: 21.297557830810547
Validation RMSE: 19.61996841430664
Epoch: 2


  loss = lossfn(predictions, yv)


Training RMSE: 22.6173152923584
Validation RMSE: 19.654878616333008
Epoch: 3
Training RMSE: 23.051071166992188
Validation RMSE: 19.740571975708008
Epoch: 4
Training RMSE: 21.699968338012695
Validation RMSE: 19.75630760192871
Epoch: 5
Training RMSE: 23.380813598632812
Validation RMSE: 19.778913497924805
Epoch: 6
Training RMSE: 23.140825271606445
Validation RMSE: 19.812458038330078
Epoch: 7
Training RMSE: 22.41436195373535
Validation RMSE: 19.819164276123047
Epoch: 8
Training RMSE: 22.24386978149414
Validation RMSE: 19.782609939575195
Epoch: 9
Training RMSE: 21.389114379882812
Validation RMSE: 19.824867248535156
Epoch: 10
Training RMSE: 21.024089813232422
Validation RMSE: 19.929292678833008
Epoch: 11
Training RMSE: 21.820480346679688
Validation RMSE: 19.956369400024414
Epoch: 12
Training RMSE: 25.741596221923828
Validation RMSE: 19.859766006469727
Epoch: 13
Training RMSE: 21.940710067749023
Validation RMSE: 19.8614501953125
Epoch: 14
Training RMSE: 21.29745101928711
Validation RMSE: 19.8

In [15]:
with torch.no_grad():
    model.eval()
    for v, (xv, yv) in enumerate(train_dataloader):
        xv = xv.to(device, dtype = torch.float32)
        yv = yv.to(device, dtype = torch.int64)
        xv = xv.squeeze(0)
        yv = yv.squeeze()
        predictions = model(xv)
        predictions = predictions.T
        predictions = predictions.to(device, dtype = torch.float32)
        print(predictions)
        print(yv)

tensor([[48.7541],
        [56.2737],
        [59.4939],
        [60.1271],
        [60.0917],
        [60.7732],
        [60.4927],
        [60.3689]])
tensor([55, 57, 61, 65, 67, 64, 68, 67])
tensor([[51.8975],
        [59.2612],
        [60.3898],
        [60.5453],
        [60.5664],
        [60.5693],
        [60.5696],
        [60.5697]])
tensor([69, 69, 70, 69, 68, 69, 67, 69])
tensor([[48.9429],
        [57.9954],
        [59.9558],
        [60.3750],
        [60.5286],
        [60.5378],
        [60.6931],
        [60.5433]])
tensor([61, 61, 65, 63, 62, 62, 61, 62])
tensor([[41.8168],
        [54.8196],
        [57.9276],
        [56.7088],
        [44.9952],
        [56.7589],
        [56.9420],
        [56.9687]])
tensor([-1, 60, 68, 62, -1, 65, 61, 63])
tensor([[40.4454],
        [46.9109],
        [39.1607],
        [38.6881],
        [35.3345],
        [50.6196],
        [50.4791],
        [51.6556]])
tensor([89, 94, -1, 84, -1, 62, -1, -1])


In [16]:
with torch.no_grad():
    model.eval()
    for v, (xv, yv) in enumerate(validation_dataloader):
        xv = xv.to(device, dtype = torch.float32)
        yv = yv.to(device, dtype = torch.int64)
        xv = xv.squeeze(0)
        yv = yv.squeeze()
        predictions = model(xv)
        predictions = predictions.T
        predictions = predictions.to(device, dtype = torch.float32)
        print(predictions)
        print(yv)

tensor([[50.7835],
        [57.8505],
        [58.8181],
        [59.0913],
        [59.0700],
        [59.8385],
        [46.6926],
        [59.4315]])
tensor([66, 66, 67, 67, 69, 70, -1, 74])


In [17]:
predictions

tensor([[50.7835],
        [57.8505],
        [58.8181],
        [59.0913],
        [59.0700],
        [59.8385],
        [46.6926],
        [59.4315]])

In [18]:
target_values

Unnamed: 0,0,1,2,3,4,5,6,7
0,55.8,57.22,61.64,65.51,67.97,64.88,68.14,67.97
1,69.9,69.54,70.26,69.36,68.83,69.18,67.8,69.36
2,61.22,61.78,65.07,63.81,62.2,62.78,61.22,62.63
3,-1.0,60.54,68.31,62.49,-1.0,65.23,61.92,63.36
4,89.86,94.83,-1.0,84.44,-1.0,62.35,-1.0,-1.0
5,66.8,66.31,67.13,67.3,69.72,70.99,-1.0,74.3


In [19]:
class TestDataset(torch.utils.data.Dataset):
  def __init__(self, train_data, targets = None):
    self.train_data = train_data

  def __len__(self):
    return len(self.train_data)

  def __getitem__(self, idx):
    try:
      train = self.train_data.loc[idx]
    except:
      train = self.train_data.iloc[idx]
    train = train.values.reshape((1,len(train)))
    train = train.astype(np.float32)
    train = torch.tensor(train)
    train = train.view((1, 8, 6780))
    return train

In [20]:
import os

df = pd.DataFrame()
addresses = []
for root, dir, files in os.walk('/content/drive/MyDrive/hackathon/dataset'):
  if files == []:
    pass
  else:
    path = os.path.join(root, files[0])
    data = pd.read_csv(path, header = None)
    addresses.append(root)
    data = data.T
    df = pd.concat([df,data], ignore_index = True)

In [21]:
test_dataset = TestDataset(df)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size = 1, shuffle = True)

In [22]:
with torch.no_grad():
        model.eval()
        for v, xv in enumerate(test_dataloader):
          xv = xv.to(device, dtype = torch.float32)
          xv = xv.squeeze(0)
          predictions = model(xv).view(8)
          predictions = predictions.detach().cpu().numpy()
          predictions = np.round(predictions, 2)
          path = addresses[v]
          time = int(path[-10:])
          timestamps = np.arange(time, time+240, 30)
          predictionsdf = pd.DataFrame([timestamps,predictions])
          predictionsdf = predictionsdf.transpose()

In [23]:
predictionsdf

Unnamed: 0,0,1
0,1547585000.0,50.869999
1,1547585000.0,59.02
2,1547585000.0,59.029999
3,1547585000.0,59.459999
4,1547585000.0,58.52
5,1547585000.0,59.57
6,1547585000.0,59.98
7,1547585000.0,59.759998


In [24]:
## The above dataframe matches the values of the tensors.