In [5]:
import os
import math
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm

In [10]:
data_path = '../../data/combined_dataset-1.xlsx'
df = pd.read_excel(data_path)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 657 entries, 0 to 656
Columns: 2114 entries, Num. to 2100
dtypes: float64(1), int64(2108), object(5)
memory usage: 10.6+ MB


In [11]:
df.head(10) 

Unnamed: 0,Num.,subject_ID,Sex(M/F),Age(year),Height(cm),Weight(kg),Systolic Blood Pressure(mmHg),Diastolic Blood Pressure(mmHg),Heart Rate(b/m),BMI(kg/m^2),...,2091,2092,2093,2094,2095,2096,2097,2098,2099,2100
0,1,2,Female,45,152,63,161,89,97,27.268006,...,1766,1766,1766,1833,1833,1827,1827,1827,1754,1754
1,1,2,Female,45,152,63,161,89,97,27.268006,...,1985,1985,2026,2026,2026,1977,1977,1997,1997,1997
2,1,2,Female,45,152,63,161,89,97,27.268006,...,1942,1900,1900,1938,1938,1938,1924,1924,1929,1929
3,2,3,Female,50,157,50,160,93,76,20.284799,...,2073,2072,2072,2072,2051,2051,2036,2036,2036,2045
4,2,3,Female,50,157,50,160,93,76,20.284799,...,2021,2010,2010,2010,2001,2001,2003,2003,2003,1989
5,2,3,Female,50,157,50,160,93,76,20.284799,...,2020,2020,2032,2032,2032,2011,2011,2005,2005,2005
6,3,6,Female,47,150,47,101,71,79,20.888889,...,2047,2047,2017,2017,2017,2053,2053,2038,2038,2038
7,3,6,Female,47,150,47,101,71,79,20.888889,...,2076,2076,2051,2051,2051,2060,2060,2067,2067,2067
8,3,6,Female,47,150,47,101,71,79,20.888889,...,2163,2159,2159,2159,2175,2175,2168,2168,2168,2175
9,4,8,Male,45,172,65,136,93,87,21.971336,...,1985,1985,1985,1984,1984,1995,1995,1995,1972,1972


In [12]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Num.,657.0,110.000000,63.267362,1.0,55.0,110.0,165.0,219.0
subject_ID,657.0,156.598174,101.449344,2.0,85.0,152.0,215.0,419.0
Age(year),657.0,57.168950,15.850110,21.0,48.0,58.0,68.0,86.0
Height(cm),657.0,161.228311,8.190357,145.0,155.0,160.0,167.0,196.0
Weight(kg),657.0,60.191781,11.868168,36.0,52.0,60.0,67.0,103.0
...,...,...,...,...,...,...,...,...
2096,657.0,2085.436834,305.845135,1519.0,1904.0,2014.0,2180.0,3811.0
2097,657.0,2083.791476,304.297297,1515.0,1904.0,2012.0,2176.0,3787.0
2098,657.0,2084.803653,306.657540,1515.0,1904.0,2011.0,2175.0,3774.0
2099,657.0,2085.196347,306.275406,1515.0,1906.0,2012.0,2177.0,3775.0


In [13]:
df.shape

(657, 2114)

In [15]:
meta_end_idx = df.columns.get_loc('cerebrovascular disease') + 1  
meta_cols = df.columns[:meta_end_idx]
signal_cols = df.columns[meta_end_idx:]

print(f"metadata col # : {list(meta_cols)}")
print(f"signal data col # : {len(signal_cols)}")

metadata col # : ['Num.', 'subject_ID', 'Sex(M/F)', 'Age(year)', 'Height(cm)', 'Weight(kg)', 'Systolic Blood Pressure(mmHg)', 'Diastolic Blood Pressure(mmHg)', 'Heart Rate(b/m)', 'BMI(kg/m^2)', 'Hypertension', 'Diabetes', 'cerebral infarction', 'cerebrovascular disease']
signal data col # : 2100


In [16]:
target_cols = ['Systolic Blood Pressure(mmHg)', 'Diastolic Blood Pressure(mmHg)']

X_signals = df[signal_cols].values
y_bp = df[target_cols].values

print(f"signal data : {X_signals.shape}")
print(f"pressure data : {y_bp.shape}")

signal data : (657, 2100)
pressure data : (657, 2)


In [19]:
def create_sliding_windows(signal, window_size=500, stride=100):
    windows = []
    for i in range(0, len(signal) - window_size + 1, stride):
        windows.append(signal[i:i+window_size])
    return windows

window_size = 500  # 0.5 (1000Hz)
stride = 100       # 0.1

all_windows = []
all_bp_values = []

for i in tqdm(range(len(X_signals)), desc="sliding window"):
    signal = X_signals[i]
    bp = y_bp[i]
    
    windows = create_sliding_windows(signal, window_size, stride)
    
    bp_values = np.tile(bp, (len(windows), 1))
    
    all_windows.extend(windows)
    all_bp_values.extend(bp_values)

all_windows = np.array(all_windows)
all_bp_values = np.array(all_bp_values)

print (all_windows.shape)
print (all_bp_values.shape)

sliding window: 100%|█| 657/657 [00:00<00:0

(11169, 500)
(11169, 2)





In [20]:
X_train, X_temp, y_train, y_temp = train_test_split(all_windows, all_bp_values, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print(f"train={X_train.shape}, val={X_val.shape}, test={X_test.shape}")

train=(7818, 500), val=(1675, 500), test=(1676, 500)


In [21]:
scaler_X = StandardScaler()
X_train = scaler_X.fit_transform(X_train.reshape(X_train.shape[0], -1)).reshape(X_train.shape)
X_val = scaler_X.transform(X_val.reshape(X_val.shape[0], -1)).reshape(X_val.shape)
X_test = scaler_X.transform(X_test.reshape(X_test.shape[0], -1)).reshape(X_test.shape)

scaler_y = StandardScaler()
y_train = scaler_y.fit_transform(y_train)
y_val = scaler_y.transform(y_val)
y_test = scaler_y.transform(y_test)

In [23]:
class BPDataset(Dataset):
    def __init__(self, signals, bp_values):
        self.signals = signals
        self.bp_values = bp_values
    
    def __len__(self):
        return len(self.signals)
    
    def __getitem__(self, idx):
        signal = self.signals[idx]
        bp = self.bp_values[idx]
        
        x = torch.FloatTensor(signal).unsqueeze(-1)  # [window_size, 1]
        y = torch.FloatTensor(bp) 
        
        return x, y

train_dataset = BPDataset(X_train, y_train)
val_dataset = BPDataset(X_val, y_val)
test_dataset = BPDataset(X_test, y_test)

batch_size = 128  
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

print(f"train={len(train_dataset)}, val={len(val_dataset)}, test={len(test_dataset)}") # <--- data point
print(f"train={len(train_loader)}, val={len(val_loader)}, test={len(test_loader)}") # <-- batch

train=7818, val=1675, test=1676
train=62, val=14, test=14


In [24]:
class DataEmbedding(nn.Module):
    def __init__(self, c_in, d_model, dropout=0.1):
        super(DataEmbedding, self).__init__()
        self.value_embedding = nn.Linear(c_in, d_model)
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x):
        x = self.value_embedding(x)
        return self.dropout(x)

class Mamba(nn.Module):
    def __init__(self, d_model, d_state, d_conv, expand):
        super(Mamba, self).__init__()
        self.d_model = d_model
        self.d_state = d_state
        self.d_conv = d_conv
        self.expand = expand
        self.d_inner = d_model * expand
        
        self.in_proj = nn.Linear(d_model, self.d_inner)
        self.conv1d = nn.Conv1d(
            in_channels=self.d_inner,
            out_channels=self.d_inner,
            kernel_size=d_conv,
            padding=d_conv-1,
            groups=self.d_inner,
        )
        
        self.x_proj = nn.Linear(self.d_inner, self.d_inner)
        self.dt_proj = nn.Linear(self.d_inner, self.d_inner)
        self.gate_proj = nn.Linear(self.d_inner, self.d_inner)
        self.out_proj = nn.Linear(self.d_inner, d_model)
        
        self.A_log = nn.Parameter(torch.randn(1, d_state))
        self.B = nn.Parameter(torch.randn(1, self.d_inner, d_state))
        self.C = nn.Parameter(torch.randn(1, self.d_inner, d_state))
        
    def forward(self, x):
        B, L, D = x.shape
        
        x_in = self.in_proj(x)
        x_conv = self.conv1d(x_in.transpose(1, 2))[:, :, :L].transpose(1, 2)
        
        x_proj = self.x_proj(x_conv)
        dt = self.dt_proj(x_conv).sigmoid()
        
        A = torch.diag_embed(torch.exp(self.A_log)).expand(1, self.d_state, self.d_state)
        h = torch.zeros(B, self.d_inner, self.d_state, device=x.device)
        
        outputs = []
        for t in range(L):
            u = x_proj[:, t, :]
            h = torch.bmm(h, A.expand(B, -1, -1)) + self.B.expand(B, -1, -1) * u.unsqueeze(-1)
            y = torch.sum(h * self.C.expand(B, -1, -1), dim=-1)
            outputs.append(y)
        
        y = torch.stack(outputs, dim=1)
        gate = self.gate_proj(x_conv).sigmoid()
        y = y * gate
        
        return self.out_proj(y)

class Model(nn.Module):
    def __init__(self, seq_len, d_model=64, d_state=32, d_conv=3, expand=2, dropout=0.1):
        super(Model, self).__init__()
        
        self.embedding = DataEmbedding(1, d_model, dropout)
        
        self.mamba = Mamba(
            d_model=d_model,
            d_state=d_state,
            d_conv=d_conv,
            expand=expand
        )
        
        self.projection = nn.Linear(d_model, 2)
        
    def forward(self, x):
        enc_out = self.embedding(x)
        enc_out = self.mamba(enc_out)
        
        avg_pooled = torch.mean(enc_out, dim=1)
        max_pooled, _ = torch.max(enc_out, dim=1)
        pooled_features = avg_pooled + max_pooled
        
        bp_pred = self.projection(pooled_features)
        
        return bp_pred

In [25]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"device : {device}")

model = Model(seq_len=window_size).to(device)
print (f"# of parameters: {sum(p.numel() for p in model.parameters())}")

device : cpu
# of parameters: 75106


In [27]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)

In [28]:
train_losses = []
val_losses = []

num_epochs = 30
best_val_loss = float('inf')
best_epoch = 0

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    train_pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Train]')
    
    for x, y in train_pbar:
        x = x.to(device)
        y = y.to(device)
        
        optimizer.zero_grad()
        y_pred = model(x)
        loss = criterion(y_pred, y)
        
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * x.size(0)
        train_pbar.set_postfix({'loss': f'{loss.item():.4f}'})
    
    train_loss /= len(train_dataset)
    train_losses.append(train_loss)
    
    model.eval()
    val_loss = 0.0
    val_pbar = tqdm(val_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Valid]')
    
    with torch.no_grad():
        for x, y in val_pbar:
            x = x.to(device)
            y = y.to(device)
            
            y_pred = model(x)
            loss = criterion(y_pred, y)
            val_loss += loss.item() * x.size(0)
            val_pbar.set_postfix({'loss': f'{loss.item():.4f}'})
        
    val_loss /= len(val_dataset)
    val_losses.append(val_loss) 
    
    scheduler.step(val_loss)
    
    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_epoch = epoch
        torch.save(model.state_dict(), 'best_model.pth')
        print(f'Epoch {epoch+1}: model saving (val_loss: {val_loss:.4f})')

Epoch 1/30 [Train]:   0%| | 0/62 [00:13<?, 


KeyboardInterrupt: 

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(range(1, num_epochs+1), train_losses, 'b-', label='Training Loss')
plt.plot(range(1, num_epochs+1), val_losses, 'r-', label='Validation Loss')
plt.axvline(x=best_epoch+1, color='g', linestyle='--', label=f'Best Epoch ({best_epoch+1})')
plt.grid(True)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss Curves')
plt.legend()
plt.tight_layout()
plt.show()