In [44]:
# import necessary libraries
import pandas as pd 
import numpy as np
import pickle
import torch
import math
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import LeaveOneGroupOut
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow import keras
import keras_tuner as kt
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.tsa.stattools import ccf

In [45]:
# load data
path = "datasets/CNAP_blood_pressure.csv"

df = pd.read_csv(path)
df.head()

Unnamed: 0,ecg_0,ecg_1,ecg_2,ecg_3,ecg_4,ecg_5,ecg_6,ecg_7,ecg_8,ecg_9,...,ppg_94,ppg_95,ppg_96,ppg_97,ppg_98,ppg_99,SBP,DBP,subject,trial
0,-784.0,-754.0,-732.0,-715.0,-689.0,-671.0,-658.0,-646.0,-638.0,-637.0,...,56603.0,56608.0,56612.0,56581.0,56551.0,56557.0,131.607056,58.044434,0,1.0
1,686.0,680.0,298.0,-294.0,-688.0,-596.0,-101.0,442.0,717.0,715.0,...,56247.0,56227.0,56203.0,56213.0,56224.0,56207.0,131.607056,58.044434,0,1.0
2,590.0,560.0,107.0,-534.0,-871.0,-632.0,-32.0,511.0,748.0,740.0,...,56374.0,56364.0,56351.0,56373.0,56399.0,56397.0,127.789307,57.366943,0,1.0
3,633.0,582.0,164.0,-412.0,-729.0,-529.0,17.0,523.0,724.0,691.0,...,56939.0,56922.0,56904.0,56922.0,56942.0,56930.0,128.90625,55.453491,0,1.0
4,694.0,424.0,-138.0,-671.0,-805.0,-463.0,119.0,573.0,695.0,653.0,...,56378.0,56367.0,56388.0,56408.0,56394.0,56378.0,125.912476,55.142212,0,1.0


In [46]:
df.describe()

Unnamed: 0,ecg_0,ecg_1,ecg_2,ecg_3,ecg_4,ecg_5,ecg_6,ecg_7,ecg_8,ecg_9,...,ppg_94,ppg_95,ppg_96,ppg_97,ppg_98,ppg_99,SBP,DBP,subject,trial
count,16047.0,16047.0,16047.0,16047.0,16047.0,16047.0,16047.0,16047.0,16047.0,16047.0,...,16047.0,16047.0,16047.0,16047.0,16047.0,16047.0,16047.0,16047.0,16047.0,16047.0
mean,1066.188945,431.663613,-1184.575871,-2584.160404,-2717.941609,-1756.763881,-644.251387,52.182402,383.46289,527.773914,...,43437.17536,43437.930953,43438.393968,43438.538605,43438.402817,43438.024553,123.233116,65.425236,3.436219,2.504393
std,596.022072,535.34714,1113.145288,1764.257444,1902.970274,1624.898566,1101.84638,532.029296,249.209632,288.587199,...,18823.48963,18824.907741,18826.414014,18827.809178,18828.936537,18829.911345,17.885018,12.599646,2.226186,1.117698
min,-5336.0,-3104.0,-5561.0,-8135.0,-8159.0,-7152.0,-4674.0,-2434.0,-1592.0,-1497.0,...,19505.0,19481.0,19474.0,19463.0,19443.0,19433.0,72.106934,27.767944,0.0,1.0
25%,641.0,143.0,-1963.5,-3912.5,-4168.5,-2798.5,-1271.5,-216.0,226.0,323.0,...,27992.5,27994.5,27999.5,27995.0,27992.5,27983.0,111.749268,56.67572,2.0,2.0
50%,780.0,418.0,-931.0,-2302.0,-2233.0,-1286.0,-169.0,157.0,344.0,491.0,...,35665.0,35665.0,35669.0,35671.0,35664.0,35662.0,123.074341,64.022827,3.0,3.0
75%,1637.5,656.0,-244.0,-1088.5,-1337.5,-460.5,129.0,391.0,499.0,716.0,...,59455.5,59460.0,59467.0,59476.0,59479.0,59481.0,133.680725,73.104858,5.0,4.0
max,4394.0,3014.0,1375.0,946.0,838.0,1112.0,2779.0,2688.0,2597.0,2754.0,...,83872.0,83911.0,83950.0,83938.0,83925.0,83961.0,191.830444,123.944092,7.0,4.0


In [47]:
# x y split
feature_cols = df.columns[:300]
target_cols = ['SBP', 'DBP']
groups = df['subject'].values

X = df[feature_cols].values
y = df[target_cols].values

In [55]:
groups.shape

(16047,)

In [48]:
# apply MinMaxScaler
scaler_x = MinMaxScaler()
X_scaled = scaler_x.fit_transform(X)
X = X_scaled

In [49]:
# reshape to (samples, timesteps, features)
X = X.reshape((-1, 3, 100))
X = np.transpose(X, (0, 2, 1))
X.shape, y.shape

((16047, 100, 3), (16047, 2))

In [50]:
logo = LeaveOneGroupOut()

In [None]:
for train_idx, test_idx in logo.split(X, y, groups=groups):
    X_train, X_val = X[train_idx], X[test_idx]
    y_train, y_val = y[train_idx], y[test_idx]
    print(f'Train shape: {X_train.shape}, Test shape: {X_val.shape}')

    # define the model
    model = Sequential([
        Input(shape=(100, 3)),
        LSTM(64, activation='tanh', return_sequences=True),
        LSTM(32, activation='tanh'),
        Dense(16, activation='relu'),
        Dense(2)
    ])

    model.compile(optimizer='adam', loss='mse', metrics=['mae', tf.keras.metrics.MeanAbsolutePercentageError()])
    print(model.summary())
    
    # early stopping callback
    early_stop = EarlyStopping(
        monitor = 'val_loss',
        patience = 5,
        restore_best_weights = True
    )
    
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=1,
        batch_size=32,
        callbacks=[early_stop]
    )

    loss, mae, mape = model.evaluate(X_val, y_val)
    print(f'Test Loss: {loss:.4f} - Test MAE: {mae:.4f} - Test MAPE: {mape:.4f}')


Train shape: (14243, 100, 3), Test shape: (1804, 100, 3)


None
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 32ms/step - loss: 3924.2151 - mae: 52.5149 - mean_absolute_percentage_error: 56.3571 - val_loss: 591.2192 - val_mae: 19.2535 - val_mean_absolute_percentage_error: 16.7741
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 591.2192 - mae: 19.2535 - mean_absolute_percentage_error: 16.7741
Test Loss: 591.2192 - Test MAE: 19.2535 - Test MAPE: 16.7741
Train shape: (13847, 100, 3), Test shape: (2200, 100, 3)


None
[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 32ms/step - loss: 5305.6797 - mae: 62.9955 - mean_absolute_percentage_error: 63.7630 - val_loss: 806.7325 - val_mae: 22.3620 - val_mean_absolute_percentage_error: 22.1558
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 806.7325 - mae: 22.3620 - mean_absolute_percentage_error: 22.1558
Test Loss: 806.7325 - Test MAE: 22.3620 - Test MAPE: 22.1558
Train shape: (14100, 100, 3), Test shape: (1947, 100, 3)


None
[1m441/441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 31ms/step - loss: 4602.5298 - mae: 56.1188 - mean_absolute_percentage_error: 56.3249 - val_loss: 626.3755 - val_mae: 20.6293 - val_mean_absolute_percentage_error: 20.1095
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 626.3755 - mae: 20.6293 - mean_absolute_percentage_error: 20.1095
Test Loss: 626.3755 - Test MAE: 20.6293 - Test MAPE: 20.1095
Train shape: (13681, 100, 3), Test shape: (2366, 100, 3)


None
[1m428/428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 32ms/step - loss: 5492.1958 - mae: 66.6718 - mean_absolute_percentage_error: 70.1498 - val_loss: 1778.1743 - val_mae: 34.3956 - val_mean_absolute_percentage_error: 30.3408
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 1778.1743 - mae: 34.3956 - mean_absolute_percentage_error: 30.3408
Test Loss: 1778.1743 - Test MAE: 34.3956 - Test MAPE: 30.3408
Train shape: (13894, 100, 3), Test shape: (2153, 100, 3)


None
[1m435/435[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 34ms/step - loss: 4457.8237 - mae: 57.4342 - mean_absolute_percentage_error: 61.2984 - val_loss: 443.3628 - val_mae: 18.2576 - val_mean_absolute_percentage_error: 18.6247
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 443.3628 - mae: 18.2576 - mean_absolute_percentage_error: 18.6247
Test Loss: 443.3628 - Test MAE: 18.2576 - Test MAPE: 18.6247
Train shape: (13986, 100, 3), Test shape: (2061, 100, 3)


None
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 33ms/step - loss: 5779.0581 - mae: 69.4822 - mean_absolute_percentage_error: 73.0816 - val_loss: 411.2684 - val_mae: 17.4138 - val_mean_absolute_percentage_error: 22.5871
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 411.2684 - mae: 17.4138 - mean_absolute_percentage_error: 22.5871
Test Loss: 411.2684 - Test MAE: 17.4138 - Test MAPE: 22.5871
Train shape: (14467, 100, 3), Test shape: (1580, 100, 3)


None
[1m453/453[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 34ms/step - loss: 4445.4380 - mae: 55.3371 - mean_absolute_percentage_error: 55.5954 - val_loss: 698.7024 - val_mae: 21.3560 - val_mean_absolute_percentage_error: 21.1232
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 698.7024 - mae: 21.3560 - mean_absolute_percentage_error: 21.1232
Test Loss: 698.7024 - Test MAE: 21.3560 - Test MAPE: 21.1232
Train shape: (14111, 100, 3), Test shape: (1936, 100, 3)


None
[1m441/441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 34ms/step - loss: 5913.9277 - mae: 66.6359 - mean_absolute_percentage_error: 66.2090 - val_loss: 1563.4960 - val_mae: 31.5606 - val_mean_absolute_percentage_error: 32.2448
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 1563.4960 - mae: 31.5606 - mean_absolute_percentage_error: 32.2448
Test Loss: 1563.4960 - Test MAE: 31.5606 - Test MAPE: 32.2448
