In [1]:
import os
from pathlib import Path
import sys
import os

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import sklearn

sys.path.append('../src')
from datasets.kaggle import KaggleDataset
import load_model
import dl_utils
import ipynb_utils

In [2]:
# Load the Kaggle dataset
path = Path('..') / 'data' / 'kaggle_ppg_bp'
batch_size = 64

train = KaggleDataset(path, train=True)
trainloader = torch.utils.data.DataLoader(train, batch_size=batch_size,
                                            shuffle=False, num_workers=1,
                                            pin_memory=False, persistent_workers=True)

dev = KaggleDataset(path, train=False)
devloader = torch.utils.data.DataLoader(dev, batch_size=batch_size,
                                            shuffle=False, num_workers=1,
                                            pin_memory=False, persistent_workers=True)

In [3]:
# Get the dataset statistics
y_true_train = torch.cat([y for x, y in trainloader])
y_true_dev = torch.cat([y for x, y in devloader])

In [4]:
print(f"Train var: {y_true_train.var(dim=0)}")
print(f"Dev var: {y_true_dev.var(dim=0)}")
print()
print(f"Train mean: {y_true_train.mean(dim=0)}")
print(f"Dev mean: {y_true_dev.mean(dim=0)}")
print()
print(f"Train std: {y_true_train.std(dim=0)}")
print(f"Dev std: {y_true_dev.std(dim=0)}")
print()

print(f"Train mean var: {y_true_train.var(dim=0).mean()}")
print(f"Dev mean var: {y_true_dev.var(dim=0).mean()}")


Train var: tensor([485.7870, 124.9180])
Dev var: tensor([381.4030, 102.0371])

Train mean: tensor([128.6287,  63.8986])
Dev mean: tensor([126.2440,  65.4517])

Train std: tensor([22.0406, 11.1767])
Dev std: tensor([19.5295, 10.1013])

Train mean var: 305.3525390625
Dev mean var: 241.72003173828125


In [5]:
# Loading an example model
criterion = nn.MSELoss()
model = ipynb_utils.load_model_from_config('..\experiments\lstm_test\outputs\models\model_20240426-164030.pt')

In [6]:
# Get the predictions
y_pred_train, _ = dl_utils.get_predictions(model, trainloader)
y_pred_dev, _ = dl_utils.get_predictions(model, devloader)

In [7]:
mse_train = criterion(y_pred_train, y_true_train).item()
mse_dev = criterion(y_pred_dev, y_true_dev).item()

print(mse_train)
print(mse_dev)

258.0099792480469
248.01132202148438


## 1d CNN network
![LSTM MSE validation loss](img/1d_CNN_val.png)

In [8]:
# Load all models
model_dir = Path('..\experiments\kaggle_new_subsampled\outputs\models')
models = ipynb_utils.load_models_from_dir(model_dir)
res_mse = []
r2s = []

for model in models:
    # Get the predictions
    y_pred_train, _ = dl_utils.get_predictions(model, trainloader)
    y_pred_dev, _ = dl_utils.get_predictions(model, devloader)
    mse_train = criterion(y_pred_train, y_true_train).item()
    mse_dev = criterion(y_pred_dev, y_true_dev).item()
    res_mse = res_mse + [(mse_train, mse_dev)]
    r2s_dev = sklearn.metrics.r2_score(y_true_dev, y_pred_dev)
    r2s_train = sklearn.metrics.r2_score(y_true_train, y_pred_train)
    r2s = r2s + [(r2s_train, r2s_dev)]
res_mse = np.array(res_mse)

In [17]:

print("[ Train MSE, Dev MSE] sorted by Dev MSE:")
print(np.array(sorted(res_mse, key=lambda x: x[1])[:5]))

[ Train MSE, Dev MSE] sorted by Dev MSE:
[[205.0783844  232.99128723]
 [252.37484741 240.7232666 ]
 [290.38952637 240.87341309]
 [285.22506714 241.84136963]
 [217.04077148 243.54374695]]


In [18]:
# R2 regression score (0 when predicing mean, 1 perfect prediction)
r2_means = np.array(r2s).mean(axis=0)
r2_stds = np.array(r2s).std(axis=0)
print(f"The r^2s are: {r2_means} +- {r2_stds}")

The r^2s are: [ 0.12373147 -0.08952583] +- [0.12277078 0.11538522]


As we can see, the model predicts mean value and not much more.

## LSTM
![LSTM MSE validation loss](img/lstm_val.png)
LSTM network with one fully connected layer for regression does not seem to learn anything but to predict a mean value. Number of LSTM layers and number of cells were optimized.

## xResNet1d
![LSTM MSE validation loss](img/ppg_xresnet_val.png)