In [1]:
# Data preprocessing. Tedious, but PyPOTS can help.
import numpy as np
from sklearn.preprocessing import StandardScaler
from pygrinder import mcar
from pypots.data import load_specific_dataset

In [33]:
# Model training. This is PyPOTS showtime.
from pypots.imputation import SAITS, CSDI, BRITS, iTransformer, Autoformer
from pypots.optim import Adam
from pypots.utils.logging import logger
from pypots.utils.metrics import calc_mae, calc_mse, calc_mre, calc_rmse

# Dataset List
DATASETS_TO_TEST = [
    "physionet_2012",
    "physionet_2019",
    "beijing_multisite_air_quality",
    "electricity_load_diagrams",
    "electricity_transformer_temperature",
    "vessel_ais",
    "ucr_uea_Wine",
]

In [3]:
data = load_specific_dataset('physionet_2012')

2024-05-24 17:02:57 [INFO]: Loading the dataset physionet_2012 with TSDB (https://github.com/WenjieDu/Time_Series_Data_Beans)...
2024-05-24 17:02:57 [INFO]: Starting preprocessing physionet_2012...
2024-05-24 17:02:57 [INFO]: You're using dataset physionet_2012, please cite it properly in your work. You can find its reference information at the below link: 
https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2012
2024-05-24 17:02:57 [INFO]: Dataset physionet_2012 has already been downloaded. Processing directly...
2024-05-24 17:02:57 [INFO]: Dataset physionet_2012 has already been cached. Loading from cache directly...
2024-05-24 17:02:57 [INFO]: Loaded successfully!


In [4]:
print(data.keys())

dict_keys(['X', 'y', 'ICUType'])


In [5]:
X = data['X']
num_samples = len(X['RecordID'].unique())

X = X.drop(['RecordID', 'Time'], axis = 1)
X = StandardScaler().fit_transform(X.to_numpy())
X = X.reshape(num_samples, 48, -1)
X_ori = X 
X = mcar(X, 0.1)
dataset = {"X": X}

In [6]:
n_steps = 48
n_features = 37
epochs = 30
batch_size = 128
optimizer = Adam(lr=0.001, weight_decay=1e-5)

In [38]:
num_samples

11988

# SAITS

In [7]:
saits = SAITS(
    n_steps=n_steps, 
    n_features=n_features,
    n_layers=2,
    d_model=32,
    n_heads=2,
    d_k=16,
    d_v=16,
    d_ffn=32,
    dropout=0.1,
    batch_size=batch_size, 
    epochs=epochs,
    optimizer=optimizer
)

saits.fit(dataset)

2024-05-24 17:03:19 [INFO]: No given device, using default device: cuda
2024-05-24 17:03:20 [INFO]: SAITS initialized with the given hyperparameters, the number of trainable parameters: 37,174
2024-05-24 17:03:27 [INFO]: Epoch 001 - training loss: 1.1264
2024-05-24 17:03:31 [INFO]: Epoch 002 - training loss: 0.8107
2024-05-24 17:03:35 [INFO]: Epoch 003 - training loss: 0.7112
2024-05-24 17:03:38 [INFO]: Epoch 004 - training loss: 0.6629
2024-05-24 17:03:42 [INFO]: Epoch 005 - training loss: 0.6340
2024-05-24 17:03:46 [INFO]: Epoch 006 - training loss: 0.6144
2024-05-24 17:03:49 [INFO]: Epoch 007 - training loss: 0.6006
2024-05-24 17:03:53 [INFO]: Epoch 008 - training loss: 0.5890
2024-05-24 17:03:57 [INFO]: Epoch 009 - training loss: 0.5810
2024-05-24 17:04:00 [INFO]: Epoch 010 - training loss: 0.5735
2024-05-24 17:04:04 [INFO]: Epoch 011 - training loss: 0.5662
2024-05-24 17:04:08 [INFO]: Epoch 012 - training loss: 0.5610
2024-05-24 17:04:11 [INFO]: Epoch 013 - training loss: 0.5561
2

In [8]:
saits_imputation = saits.impute(dataset)
saits_indicating_mask = np.isnan(X) ^ np.isnan(X_ori)

In [23]:
saits_mae = calc_mae(saits_imputation, np.nan_to_num(X_ori), saits_indicating_mask)
saits_mse = calc_mse(saits_imputation, np.nan_to_num(X_ori), saits_indicating_mask)
saits_mre = calc_mre(saits_imputation, np.nan_to_num(X_ori), saits_indicating_mask)
saits_rmse = calc_rmse(saits_imputation, np.nan_to_num(X_ori), saits_indicating_mask)

In [25]:
print(f'saits_mae : {saits_mae}, saits_mae : {saits_mse}, saits_mae : {saits_mre}, saits_rmse : {saits_rmse}')

saits_mae : 0.29004251595569447, saits_mae : 0.3596137593654933, saits_mae : 0.4167647586318705, saits_rmse : 0.599678046426158


# CSDI

In [11]:
 csdi = CSDI(
    n_steps=n_steps,
    n_features=n_features,
    n_layers=2,
    n_channels=8,
    d_time_embedding=32,
    d_feature_embedding=3,
    d_diffusion_embedding=32,
    n_diffusion_steps=5,
    n_heads=4,
    batch_size=batch_size,
    epochs=epochs,
    optimizer=optimizer
)
csdi.fit(dataset)  # train the model on the dataset

2024-05-24 17:05:16 [INFO]: No given device, using default device: cuda
2024-05-24 17:05:16 [INFO]: CSDI initialized with the given hyperparameters, the number of trainable parameters: 10,280
2024-05-24 17:05:30 [INFO]: Epoch 001 - training loss: 0.8175
2024-05-24 17:05:40 [INFO]: Epoch 002 - training loss: 0.6443
2024-05-24 17:05:50 [INFO]: Epoch 003 - training loss: 0.6190
2024-05-24 17:06:00 [INFO]: Epoch 004 - training loss: 0.6097
2024-05-24 17:06:10 [INFO]: Epoch 005 - training loss: 0.6025
2024-05-24 17:06:20 [INFO]: Epoch 006 - training loss: 0.5965
2024-05-24 17:06:30 [INFO]: Epoch 007 - training loss: 0.5956
2024-05-24 17:06:40 [INFO]: Epoch 008 - training loss: 0.5856
2024-05-24 17:06:50 [INFO]: Epoch 009 - training loss: 0.5829
2024-05-24 17:07:02 [INFO]: Epoch 010 - training loss: 0.5818
2024-05-24 17:07:13 [INFO]: Epoch 011 - training loss: 0.5713
2024-05-24 17:07:23 [INFO]: Epoch 012 - training loss: 0.5717
2024-05-24 17:07:33 [INFO]: Epoch 013 - training loss: 0.5693
20

In [12]:
csdi_imputation = csdi.impute(dataset)
csdi_imputation = csdi_imputation.reshape(num_samples, 48, -1)
csdi_indicating_mask = np.isnan(X) ^ np.isnan(X_ori)

In [26]:
csdi_mae = calc_mae(csdi_imputation, np.nan_to_num(X_ori), csdi_indicating_mask)
csdi_mse = calc_mse(csdi_imputation, np.nan_to_num(X_ori), csdi_indicating_mask)
csdi_mre = calc_mre(csdi_imputation, np.nan_to_num(X_ori), csdi_indicating_mask)
csdi_rmse = calc_rmse(csdi_imputation, np.nan_to_num(X_ori), csdi_indicating_mask)

In [27]:
print(f'csdi_mae : {csdi_mae}, csdi_mse : {csdi_mse}, csdi_mre : {csdi_mre}, csdi_rmse: {csdi_rmse}')

csdi_mae : 0.4642064665419674, csdi_mse : 0.6363236273985146, csdi_mre : 0.6670225409755765, csdi_rmse: 0.7976989578772901


saits.save("save_it_here/saits_physionet2012.pypots")  # save the model for future use
saits.load("save_it_here/saits_physionet2012.pypots")  # reload the serialized model file for following imputation or training

# BRITS

In [15]:
brits = BRITS(
    n_steps=n_steps,
    n_features=n_features,
    rnn_hidden_size=32,
    batch_size=batch_size,
    epochs=epochs,
    optimizer=Adam(lr=0.001, weight_decay=1e-5)
)

brits.fit(dataset)

2024-05-24 17:10:36 [INFO]: No given device, using default device: cuda
2024-05-24 17:10:36 [INFO]: BRITS initialized with the given hyperparameters, the number of trainable parameters: 43,696
2024-05-24 17:11:41 [INFO]: Epoch 001 - training loss: 1.2099
2024-05-24 17:12:11 [INFO]: Epoch 002 - training loss: 0.9196
2024-05-24 17:12:42 [INFO]: Epoch 003 - training loss: 0.8266
2024-05-24 17:13:12 [INFO]: Epoch 004 - training loss: 0.7812
2024-05-24 17:13:43 [INFO]: Epoch 005 - training loss: 0.7518
2024-05-24 17:14:13 [INFO]: Epoch 006 - training loss: 0.7315
2024-05-24 17:14:44 [INFO]: Epoch 007 - training loss: 0.7169
2024-05-24 17:15:15 [INFO]: Epoch 008 - training loss: 0.7054
2024-05-24 17:15:46 [INFO]: Epoch 009 - training loss: 0.6961
2024-05-24 17:16:18 [INFO]: Epoch 010 - training loss: 0.6880
2024-05-24 17:16:49 [INFO]: Epoch 011 - training loss: 0.6813
2024-05-24 17:17:20 [INFO]: Epoch 012 - training loss: 0.6760
2024-05-24 17:17:51 [INFO]: Epoch 013 - training loss: 0.6714
2

In [16]:
brits_imputation = brits.impute(dataset)
brits_indicating_mask = np.isnan(X) ^ np.isnan(X_ori)

In [28]:
brits_mae = calc_mae(brits_imputation, np.nan_to_num(X_ori), brits_indicating_mask) 
brits_mse = calc_mse(brits_imputation, np.nan_to_num(X_ori), brits_indicating_mask) 
brits_mre = calc_mre(brits_imputation, np.nan_to_num(X_ori), brits_indicating_mask)
brits_rmse = calc_rmse(brits_imputation, np.nan_to_num(X_ori), brits_indicating_mask) 

In [29]:
print(f'brits_mae : {brits_mae}, brits_mse : {brits_mse}, brits_mre : {brits_mre}, brits_rmse : {brits_rmse}')

brits_mae : 0.2576348501173535, brits_mse : 0.3563537772900153, brits_mre : 0.37019788554281735, brits_rmse : 0.5969537480324714


# iTransformer

In [34]:
itransformer = iTransformer(
    n_steps=n_steps,
    n_features=n_features,
    n_layers=2,
    d_model=32,
    n_heads=2,
    d_k=16,
    d_v=16,
    d_ffn=32,
    dropout=0.1,
    batch_size=batch_size,
    epochs=epochs,
    optimizer=optimizer
)

itransformer.fit(dataset)

2024-05-24 19:43:01 [INFO]: No given device, using default device: cuda
2024-05-24 19:43:01 [INFO]: iTransformer initialized with the given hyperparameters, the number of trainable parameters: 15,824
2024-05-24 19:43:03 [INFO]: Epoch 001 - training loss: 1.0713
2024-05-24 19:43:06 [INFO]: Epoch 002 - training loss: 0.8513
2024-05-24 19:43:09 [INFO]: Epoch 003 - training loss: 0.7985
2024-05-24 19:43:12 [INFO]: Epoch 004 - training loss: 0.7712
2024-05-24 19:43:14 [INFO]: Epoch 005 - training loss: 0.7552
2024-05-24 19:43:17 [INFO]: Epoch 006 - training loss: 0.7431
2024-05-24 19:43:20 [INFO]: Epoch 007 - training loss: 0.7371
2024-05-24 19:43:22 [INFO]: Epoch 008 - training loss: 0.7312
2024-05-24 19:43:25 [INFO]: Epoch 009 - training loss: 0.7274
2024-05-24 19:43:28 [INFO]: Epoch 010 - training loss: 0.7260
2024-05-24 19:43:31 [INFO]: Epoch 011 - training loss: 0.7212
2024-05-24 19:43:33 [INFO]: Epoch 012 - training loss: 0.7192
2024-05-24 19:43:36 [INFO]: Epoch 013 - training loss: 0

In [35]:
itransformer_imputation = itransformer.impute(dataset)
itransformer_indicating_mask = np.isnan(X) ^ np.isnan(X_ori)



In [36]:
itransformer_mae = calc_mae(itransformer_imputation, np.nan_to_num(X_ori), itransformer_indicating_mask) 
itransformer_mse = calc_mse(itransformer_imputation, np.nan_to_num(X_ori), itransformer_indicating_mask) 
itransformer_mre = calc_mre(itransformer_imputation, np.nan_to_num(X_ori), itransformer_indicating_mask)
itransformer_rmse = calc_rmse(itransformer_imputation, np.nan_to_num(X_ori), itransformer_indicating_mask) 

In [37]:
print(f'itransformer_mae : {itransformer_mae}, itransformer_mse : {itransformer_mse}, itransformer_mre : {itransformer_mre}, itransformer_rmse : {itransformer_rmse}')

itransformer_mae : 0.40016157358052423, itransformer_mse : 0.474383479198375, itransformer_mre : 0.5749958452729467, itransformer_rmse : 0.6887550211783395
