In [1]:
# Data preprocessing. Tedious, but PyPOTS can help.
import numpy as np
from sklearn.preprocessing import StandardScaler
from pygrinder import mcar
from pypots.data import load_specific_dataset

In [29]:
# Model training. This is PyPOTS showtime.
from pypots.imputation import SAITS, CSDI, BRITS, iTransformer, Autoformer
from pypots.optim import Adam
from pypots.utils.logging import logger
from pypots.utils.metrics import calc_mae, calc_mse, calc_mre, calc_rmse
from pypots.data.generating import gene_physionet2012

# Dataset List
DATASETS_TO_TEST = [
    "physionet_2012",
    "physionet_2019",
    "beijing_multisite_air_quality",
    "electricity_load_diagrams",
    "electricity_transformer_temperature",
    "vessel_ais",
    "ucr_uea_Wine",
]

In [3]:
physionet2012_dataset = gene_physionet2012(artificially_missing_rate=0.1)

print(physionet2012_dataset.keys())

2024-05-24 18:45:14 [INFO]: Loading the dataset physionet_2012 with TSDB (https://github.com/WenjieDu/Time_Series_Data_Beans)...
2024-05-24 18:45:14 [INFO]: Starting preprocessing physionet_2012...
2024-05-24 18:45:14 [INFO]: You're using dataset physionet_2012, please cite it properly in your work. You can find its reference information at the below link: 
https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2012
2024-05-24 18:45:14 [INFO]: Dataset physionet_2012 has already been downloaded. Processing directly...
2024-05-24 18:45:14 [INFO]: Dataset physionet_2012 has already been cached. Loading from cache directly...
2024-05-24 18:45:14 [INFO]: Loaded successfully!


dict_keys(['n_classes', 'n_steps', 'n_features', 'train_X', 'train_y', 'train_ICUType', 'val_X', 'val_y', 'val_ICUType', 'test_X', 'test_y', 'test_ICUType', 'scaler', 'val_X_ori', 'test_X_ori', 'test_X_indicating_mask'])


In [4]:
dataset_for_training = {
    "X": physionet2012_dataset['train_X'],
}

dataset_for_validating = {
    "X": physionet2012_dataset['val_X'],
    "X_ori": physionet2012_dataset['val_X_ori'],
}

dataset_for_testing = {
    "X": physionet2012_dataset['test_X'],
}

In [5]:
n_steps = physionet2012_dataset['n_steps']
n_features = physionet2012_dataset['n_features']
epochs = 30
batch_size = 128
optimizer = Adam(lr=0.001, weight_decay=1e-5)

In [39]:
len(dataset_for_training['X'])

7672

In [40]:
len(dataset_for_validating['X'])

1918

In [41]:
len(dataset_for_validating['X_ori'])

1918

In [42]:
len(dataset_for_testing['X'])

2398

# SAITS

In [6]:
saits = SAITS(
    n_steps=n_steps, 
    n_features=n_features,
    n_layers=2,
    d_model=32,
    n_heads=2,
    d_k=16,
    d_v=16,
    d_ffn=32,
    dropout=0.1,
    batch_size=batch_size, 
    epochs=epochs,
    optimizer=optimizer
)

saits.fit(train_set=dataset_for_training, val_set=dataset_for_validating)

2024-05-24 18:45:37 [INFO]: No given device, using default device: cuda
2024-05-24 18:45:37 [INFO]: SAITS initialized with the given hyperparameters, the number of trainable parameters: 37,174
2024-05-24 18:45:41 [INFO]: Epoch 001 - training loss: 1.2528, validation loss: 0.6678
2024-05-24 18:45:44 [INFO]: Epoch 002 - training loss: 0.9326, validation loss: 0.5209
2024-05-24 18:45:46 [INFO]: Epoch 003 - training loss: 0.7892, validation loss: 0.4570
2024-05-24 18:45:49 [INFO]: Epoch 004 - training loss: 0.7174, validation loss: 0.4233
2024-05-24 18:45:51 [INFO]: Epoch 005 - training loss: 0.6740, validation loss: 0.4085
2024-05-24 18:45:53 [INFO]: Epoch 006 - training loss: 0.6443, validation loss: 0.3961
2024-05-24 18:45:56 [INFO]: Epoch 007 - training loss: 0.6261, validation loss: 0.3881
2024-05-24 18:45:58 [INFO]: Epoch 008 - training loss: 0.6111, validation loss: 0.3834
2024-05-24 18:46:01 [INFO]: Epoch 009 - training loss: 0.5990, validation loss: 0.3782
2024-05-24 18:46:03 [INF

In [7]:
saits_results = saits.predict(dataset_for_testing)
saits_imputation = saits_results["imputation"]

In [8]:
saits_mae = calc_mae(saits_imputation,
    physionet2012_dataset['test_X_ori'],
    physionet2012_dataset['test_X_indicating_mask']
                    )
saits_mse = calc_mse( saits_imputation,
    physionet2012_dataset['test_X_ori'],
    physionet2012_dataset['test_X_indicating_mask']
                    )
saits_mre = calc_mre( saits_imputation,
    physionet2012_dataset['test_X_ori'],
    physionet2012_dataset['test_X_indicating_mask']
                    )
saits_rmse = calc_rmse( saits_imputation,
    physionet2012_dataset['test_X_ori'],
    physionet2012_dataset['test_X_indicating_mask']
                      )

In [43]:
print(f'saits_mae : {saits_mae}, saits_mse : {saits_mse}, saits_mre : {saits_mre}, saits_rmse : {saits_rmse}')

saits_mae : 0.304007301218049, saits_mse : 0.3783244941753913, saits_mre : 0.43161903693952236, saits_rmse : 0.6150808842545762


# CSDI

In [10]:
 csdi = CSDI(
    n_steps=n_steps,
    n_features=n_features,
    n_layers=2,
    n_channels=8,
    d_time_embedding=32,
    d_feature_embedding=3,
    d_diffusion_embedding=32,
    n_diffusion_steps=5,
    n_heads=4,
    batch_size=batch_size,
    epochs=epochs,
    optimizer=optimizer
)
csdi.fit(train_set=dataset_for_training, val_set=dataset_for_validating)

2024-05-24 18:46:53 [INFO]: No given device, using default device: cuda
2024-05-24 18:46:53 [INFO]: CSDI initialized with the given hyperparameters, the number of trainable parameters: 10,280
2024-05-24 18:47:02 [INFO]: Epoch 001 - training loss: 0.9083, validation loss: 0.7689
2024-05-24 18:47:10 [INFO]: Epoch 002 - training loss: 0.7034, validation loss: 0.6536
2024-05-24 18:47:19 [INFO]: Epoch 003 - training loss: 0.6535, validation loss: 0.6301
2024-05-24 18:47:27 [INFO]: Epoch 004 - training loss: 0.6260, validation loss: 0.6200
2024-05-24 18:47:35 [INFO]: Epoch 005 - training loss: 0.6253, validation loss: 0.6112
2024-05-24 18:47:43 [INFO]: Epoch 006 - training loss: 0.6131, validation loss: 0.6015
2024-05-24 18:47:51 [INFO]: Epoch 007 - training loss: 0.6068, validation loss: 0.5988
2024-05-24 18:48:00 [INFO]: Epoch 008 - training loss: 0.5963, validation loss: 0.5886
2024-05-24 18:48:08 [INFO]: Epoch 009 - training loss: 0.5949, validation loss: 0.5816
2024-05-24 18:48:16 [INFO

In [26]:
#csdi_imputation = csdi.impute(dataset)
#csdi_imputation = csdi_imputation.reshape(num_samples, 48, -1)
#csdi_indicating_mask = np.isnan(X) ^ np.isnan(X_ori)

csdi_results = csdi.predict(dataset_for_testing, n_sampling_times=2)
csdi_imputation = csdi_results["imputation"]

mean_csdi_imputation = csdi_imputation.mean(axis=1)

In [27]:
csdi_mae = calc_mae(mean_csdi_imputation,
                    physionet2012_dataset['test_X_ori'],
                    physionet2012_dataset['test_X_indicating_mask']
                   )
csdi_mse = calc_mse(mean_csdi_imputation,
                    physionet2012_dataset['test_X_ori'],
                    physionet2012_dataset['test_X_indicating_mask']
                   )
csdi_mre = calc_mre(mean_csdi_imputation,
                    physionet2012_dataset['test_X_ori'],
                    physionet2012_dataset['test_X_indicating_mask']
                   )
csdi_rmse = calc_rmse(mean_csdi_imputation,     
                      physionet2012_dataset['test_X_ori'],
                      physionet2012_dataset['test_X_indicating_mask']
                     )

In [28]:
print(f'csdi_mae : {csdi_mae}, csdi_mse : {csdi_mse}, csdi_mre : {csdi_mre}, csdi_rmse: {csdi_rmse}')

csdi_mae : 0.5169652467970249, csdi_mse : 0.6806989965726921, csdi_mre : 0.7339693522481987, csdi_rmse: 0.8250448451888491


# BRITS

In [18]:
brits = BRITS(
    n_steps=n_steps,
    n_features=n_features,
    rnn_hidden_size=32,
    batch_size=batch_size,
    epochs=epochs,
    optimizer=Adam(lr=0.001, weight_decay=1e-5)
)

brits.fit(train_set=dataset_for_training, val_set=dataset_for_validating)

2024-05-24 18:52:31 [INFO]: No given device, using default device: cuda
2024-05-24 18:52:31 [INFO]: BRITS initialized with the given hyperparameters, the number of trainable parameters: 43,696
2024-05-24 18:53:21 [INFO]: Epoch 001 - training loss: 1.3179, validation loss: 0.6653
2024-05-24 18:53:44 [INFO]: Epoch 002 - training loss: 1.0186, validation loss: 0.5082
2024-05-24 18:54:06 [INFO]: Epoch 003 - training loss: 0.8860, validation loss: 0.4472
2024-05-24 18:54:29 [INFO]: Epoch 004 - training loss: 0.8234, validation loss: 0.4146
2024-05-24 18:54:52 [INFO]: Epoch 005 - training loss: 0.7840, validation loss: 0.3929
2024-05-24 18:55:15 [INFO]: Epoch 006 - training loss: 0.7561, validation loss: 0.3767
2024-05-24 18:55:38 [INFO]: Epoch 007 - training loss: 0.7350, validation loss: 0.3643
2024-05-24 18:56:01 [INFO]: Epoch 008 - training loss: 0.7184, validation loss: 0.3540
2024-05-24 18:56:24 [INFO]: Epoch 009 - training loss: 0.7054, validation loss: 0.3479
2024-05-24 18:56:48 [INF

In [19]:
brits_results = brits.predict(dataset_for_testing)
brits_imputation = brits_results["imputation"]

In [20]:
brits_mae = calc_mae(brits_imputation,
                    physionet2012_dataset['test_X_ori'],
                    physionet2012_dataset['test_X_indicating_mask']
                    ) 
brits_mse = calc_mse(brits_imputation,
                    physionet2012_dataset['test_X_ori'],
                    physionet2012_dataset['test_X_indicating_mask']
                    ) 
brits_mre = calc_mre(brits_imputation,
                    physionet2012_dataset['test_X_ori'],
                    physionet2012_dataset['test_X_indicating_mask']
                    )
brits_rmse = calc_rmse(brits_imputation,
                    physionet2012_dataset['test_X_ori'],
                    physionet2012_dataset['test_X_indicating_mask']
                      ) 

In [21]:
print(f'brits_mae : {brits_mae}, brits_mse : {brits_mse}, brits_mre : {brits_mre}, brits_rmse : {brits_rmse}')

brits_mae : 0.2691224267809445, brits_mse : 0.37157659199046367, brits_mre : 0.38209070045559174, brits_rmse : 0.6095708260657359


# iTransformer

In [30]:
itransformer = iTransformer(
    n_steps=n_steps,
    n_features=n_features,
    n_layers=2,
    d_model=32,
    n_heads=2,
    d_k=16,
    d_v=16,
    d_ffn=32,
    dropout=0.1,
    batch_size=batch_size,
    epochs=epochs,
    optimizer=optimizer
)

itransformer.fit(train_set=dataset_for_training, val_set=dataset_for_validating)

2024-05-24 19:45:21 [INFO]: No given device, using default device: cuda
2024-05-24 19:45:21 [INFO]: iTransformer initialized with the given hyperparameters, the number of trainable parameters: 15,824
2024-05-24 19:45:23 [INFO]: Epoch 001 - training loss: 1.1568, validation loss: 0.5719
2024-05-24 19:45:25 [INFO]: Epoch 002 - training loss: 0.9074, validation loss: 0.5203
2024-05-24 19:45:27 [INFO]: Epoch 003 - training loss: 0.8435, validation loss: 0.4988
2024-05-24 19:45:29 [INFO]: Epoch 004 - training loss: 0.8058, validation loss: 0.4896
2024-05-24 19:45:31 [INFO]: Epoch 005 - training loss: 0.7822, validation loss: 0.4832
2024-05-24 19:45:32 [INFO]: Epoch 006 - training loss: 0.7678, validation loss: 0.4816
2024-05-24 19:45:34 [INFO]: Epoch 007 - training loss: 0.7543, validation loss: 0.4786
2024-05-24 19:45:36 [INFO]: Epoch 008 - training loss: 0.7479, validation loss: 0.4767
2024-05-24 19:45:38 [INFO]: Epoch 009 - training loss: 0.7411, validation loss: 0.4752
2024-05-24 19:45:

In [31]:
itransformer_results = itransformer.predict(dataset_for_testing)
itransformer_imputation = itransformer_results["imputation"]

In [32]:
itransformer_mae = calc_mae(itransformer_imputation,
                    physionet2012_dataset['test_X_ori'],
                    physionet2012_dataset['test_X_indicating_mask']
                    ) 
itransformer_mse = calc_mse(itransformer_imputation,
                    physionet2012_dataset['test_X_ori'],
                    physionet2012_dataset['test_X_indicating_mask']
                    ) 
itransformer_mre = calc_mre(itransformer_imputation,
                    physionet2012_dataset['test_X_ori'],
                    physionet2012_dataset['test_X_indicating_mask']
                    )
itransformer_rmse = calc_rmse(itransformer_imputation,
                    physionet2012_dataset['test_X_ori'],
                    physionet2012_dataset['test_X_indicating_mask']
                      ) 

In [33]:
print(f'itransformer_mae : {itransformer_mae}, itransformer_mse : {itransformer_mse}, itransformer_mre : {itransformer_mre}, itransformer_rmse : {itransformer_rmse}')

itransformer_mae : 0.4064529111809905, itransformer_mse : 0.47739525404227046, itransformer_mre : 0.5770677657487421, itransformer_rmse : 0.690937952382318
