In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Libraries


In [2]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import numpy as np
from sklearn.preprocessing import MinMaxScaler, Normalizer
import plotly.express as px
import math
from torch.nn.utils.rnn import pad_sequence
from sklearn.model_selection import train_test_split

# Load Data

In [3]:
Kaggle_Data = pd.read_csv("https://raw.githubusercontent.com/amaye15/Data/main/Team%20Project%20(SIT764)/Kaggle_Data.csv")
Cardio_Data = pd.read_csv("https://raw.githubusercontent.com/amaye15/Data/main/Team%20Project%20(SIT764)/Cardio_Data.csv")
Malaga_Data = pd.read_csv("https://raw.githubusercontent.com/amaye15/Data/main/Team%20Project%20(SIT764)/Malaga_Data.csv")

In [4]:
temp = [i.split(":") for i in Cardio_Data.Time.values]
Cardio_Data["Time"] = [float(i[0] + i[1])/100 if len(i) == 2 else float(i[0]) for i in temp]
Malaga_Data.dropna(inplace=True)

# Data Preprocessing

In [5]:
# Min-Max transformation for Kaggle Data - time, Oxygen, HR, RF
  # Filter by Participant and Method
X = []
Y = []

for participant in Kaggle_Data.Participant.unique():
  for method in Kaggle_Data.Method.unique():
    time = MinMaxScaler().fit_transform(Kaggle_Data.loc[(Kaggle_Data["Participant"] == participant) & (Kaggle_Data["Method"] == method), "time"].values.reshape(-1,1)).reshape(-1)
    oxygen = MinMaxScaler().fit_transform(Kaggle_Data.loc[(Kaggle_Data["Participant"] == participant) & (Kaggle_Data["Method"] == method), "Oxygen"].values.reshape(-1,1)).reshape(-1)
    heart_rate = MinMaxScaler().fit_transform(Kaggle_Data.loc[(Kaggle_Data["Participant"] == participant) & (Kaggle_Data["Method"] == method), "HR"].values.reshape(-1,1)).reshape(-1)
    breathing_rate = MinMaxScaler().fit_transform(Kaggle_Data.loc[(Kaggle_Data["Participant"] == participant) & (Kaggle_Data["Method"] == method), "RF"].values.reshape(-1,1)).reshape(-1)
    X.append(np.array(list(zip(time, heart_rate, breathing_rate))))
    Y.append(np.array(oxygen))

# Min-Max transformation for Cardio Data - Time, RF, HR, Oxygen
  # Filter by Participant, Speed, and Exercise

for participant in Cardio_Data.Participant.unique():
  for speed in Cardio_Data.Speed.unique():
    for exercise in Cardio_Data.Exercise.unique():
      try:
        time = MinMaxScaler().fit_transform(Cardio_Data.loc[(Cardio_Data["Participant"] == participant) & (Cardio_Data["Speed"] == speed) & (Cardio_Data["Exercise"] == exercise), "Time"].values.reshape(-1,1)).reshape(-1)
        oxygen = MinMaxScaler().fit_transform(Cardio_Data.loc[(Cardio_Data["Participant"] == participant) & (Cardio_Data["Speed"] == speed) & (Cardio_Data["Exercise"] == exercise), "Oxygen"].values.reshape(-1,1)).reshape(-1)
        heart_rate = MinMaxScaler().fit_transform(Cardio_Data.loc[(Cardio_Data["Participant"] == participant) & (Cardio_Data["Speed"] == speed) & (Cardio_Data["Exercise"] == exercise), "HR"].values.reshape(-1,1)).reshape(-1)
        breathing_rate = MinMaxScaler().fit_transform(Cardio_Data.loc[(Cardio_Data["Participant"] == participant) & (Cardio_Data["Speed"] == speed) & (Cardio_Data["Exercise"] == exercise), "RF"].values.reshape(-1,1)).reshape(-1)
        X.append(np.array(list(zip(time, heart_rate, breathing_rate))))
        Y.append(np.array(oxygen))
      except:
        pass

# Min-Max transformation for Malaga Data - time, HR, VO2, RR	
  # Filter by Participant and Method
for ID in Malaga_Data.ID_test.unique():
  time = MinMaxScaler().fit_transform(Malaga_Data.loc[(Malaga_Data["ID_test"] == ID), "time"].values.reshape(-1,1)).reshape(-1)
  oxygen = MinMaxScaler().fit_transform(Malaga_Data.loc[(Malaga_Data["ID_test"] == ID), "VO2"].values.reshape(-1,1)).reshape(-1)
  heart_rate = MinMaxScaler().fit_transform(Malaga_Data.loc[(Malaga_Data["ID_test"] == ID), "HR"].values.reshape(-1,1)).reshape(-1)
  breathing_rate = MinMaxScaler().fit_transform(Malaga_Data.loc[(Malaga_Data["ID_test"] == ID), "RR"].values.reshape(-1,1)).reshape(-1)
  X.append(np.array(list(zip(time, heart_rate, breathing_rate))))
  Y.append(np.array(oxygen))


In [6]:
class CustomDataset(Dataset):
  def __init__(self, x, y):
    self.x = x
    self.y = y
    self.data_length = len(x)
  
  def __len__(self):
      return self.data_length

  def __getitem__(self, index):
    return index, self.x[index], self.y[index]


In [7]:
class Model(nn.Module):
  def __init__(self, input_size, output_size, num_layers):
        super(Model, self).__init__()
        self.lstm1 = nn.LSTM(input_size=input_size, hidden_size=output_size, num_layers=num_layers, batch_first=True, bidirectional=True, dropout=0.2)
        self.linear = nn.Linear(output_size*2, 1)

  def forward(self, x):
    output, (hn, cn) = self.lstm1(x)
    output = self.linear(output)
    return output

In [8]:
def pad_collate(batch):
  (indexes, x, y) = zip(*batch)
  x = [torch.tensor(i,  dtype=torch.float64) for i in x]
  y = [torch.tensor(i,  dtype=torch.float64) for i in y]
  return indexes, pad_sequence(x, batch_first=True, padding_value=0), pad_sequence(y, batch_first=True, padding_value=0)

In [9]:
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))

In [10]:
x_train, x_test, y_train, y_test = train_test_split(X,Y, shuffle = True, test_size=0.3)

In [11]:
load_train_data = DataLoader(CustomDataset(x_train, y_train), batch_size=32, shuffle=True, collate_fn=pad_collate)
load_test_data = DataLoader(CustomDataset(x_test, y_test), batch_size=1, shuffle=True, collate_fn=pad_collate)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
loaded_model = Model(input_size = 3, output_size= 100, num_layers = 1).to(device).to(torch.double)
criterion = RMSELoss().to(device)
optimizer = torch.optim.Adam(loaded_model.parameters(), lr=0.00001)

  "num_layers={}".format(dropout, num_layers))


In [None]:
epochs = 1



for epoch in range(epochs):
  # Training Phase
  loaded_model.train()
  train_loss = []
  for index, x, y in load_train_data:
    x = x.to(device)
    y = y.to(device)
    predicted = loaded_model(x)
    loss = criterion(predicted.squeeze(), y)
    train_loss.append(loss.item())
    loss.backward()
    optimizer.step()
  
  # Testing Phase
  loaded_model.eval()
  test_loss = []
  for index, x, y in load_test_data:
    x = x.to(device)
    y = y.to(device)
    predicted = loaded_model(x)
    loss = criterion(predicted.flatten(), y.flatten())
    test_loss.append(loss.item())
  
  # Print loss 
  print(f"Epoch {epoch}")
  print(f"Train Loss - {np.mean(train_loss)}")
  print(f"Test Loss - {np.mean(test_loss)}")
  if epoch % 10 == 0:
    graph = px.line(y =[predicted.cpu().flatten().detach().numpy(),y.cpu().flatten().detach().numpy()])
    graph.show()

  


Epoch 0
Train Loss - 0.08025830862072958
Test Loss - 0.11568737025963183


In [12]:
model = torch.load('/content/drive/MyDrive/Deakin/O2_Model')

In [None]:
torch.save(loaded_model, '/content/drive/MyDrive/Deakin/O2_Model')

In [27]:
model.eval()
test_loss = []
for index, x, y in load_test_data:
  x = x.to(device)
  y = y.to(device)
  predicted = model(x)
  loss = criterion(predicted.flatten(), y.flatten())
  test_loss.append(loss.item())

# Print loss 
#print(f"Epoch {epoch}")
#print(f"Train Loss - {np.mean(train_loss)}")
print(f"Test Loss - {np.mean(test_loss)}")
graph = px.line(y =[predicted.cpu().flatten().detach().numpy(),y.cpu().flatten().detach().numpy()])
graph.show()

Test Loss - 0.11386344444207647


In [28]:
from math import sqrt
from sklearn.metrics import r2_score, explained_variance_score, mean_squared_error, mean_absolute_error
print(r2_score(predicted.cpu().flatten().detach().numpy().reshape(-1,1),y.cpu().flatten().detach().numpy().reshape(-1,1)))
print(explained_variance_score(predicted.cpu().flatten().detach().numpy().reshape(-1,1),y.cpu().flatten().detach().numpy().reshape(-1,1)))
print(mean_squared_error(predicted.cpu().flatten().detach().numpy().reshape(-1,1),y.cpu().flatten().detach().numpy().reshape(-1,1)))
print(sqrt(mean_squared_error(predicted.cpu().flatten().detach().numpy().reshape(-1,1),y.cpu().flatten().detach().numpy().reshape(-1,1))))
print(mean_absolute_error(predicted.cpu().flatten().detach().numpy().reshape(-1,1),y.cpu().flatten().detach().numpy().reshape(-1,1)))

0.7312727184130492
0.805669870392964
0.013200170864541377
0.11489199652082549
0.09947481559581779


In [None]:
px.line(y =[predicted.cpu().flatten().detach().numpy(),y.cpu().flatten().detach().numpy()])