In [98]:
import numpy as np
from pypots.classification import GRUD
from pypots.optim import Adam
import logging

In [127]:
def introduce_missing_values(data, missing_rate):
    """在数据集中引入指定比例的缺失值并生成掩码矩阵"""
    mask = np.random.rand(*data.shape) < missing_rate
    data_with_missing = data.copy()
    data_with_missing[mask] = np.nan
    return data_with_missing, mask

def gene_HypotensionData_custom(train_X, val_X, test_X, artificially_missing_rate: float = 0.1):
    train_X_missing, train_mask = introduce_missing_values(train_X, artificially_missing_rate)
    val_X_missing, val_mask = introduce_missing_values(val_X, artificially_missing_rate)
    test_X_missing, test_mask = introduce_missing_values(test_X, artificially_missing_rate)
    
    dataset = {
        'train_X': train_X_missing,
        'val_X': val_X_missing,
        'val_X_ori': val_X,
        'test_X': test_X_missing,
        'test_X_ori': test_X,
        'train_mask': train_mask,
        'val_mask': val_mask,
        'test_mask': test_mask
    }
    
    return dataset

# 加载数据
X_train = np.load('X_train_nan.npy', allow_pickle=True)
Y_train = np.load('Y_train_nan.npy', allow_pickle=True)
X_val = np.load('X_val_nan.npy', allow_pickle=True)
Y_val = np.load('Y_val_nan.npy', allow_pickle=True)
X_test = np.load('X_test_nan.npy', allow_pickle=True)
Y_test = np.load('Y_test_nan.npy', allow_pickle=True)

# 交换第二、三维
X_train = np.transpose(X_train, (0, 2, 1))
X_val = np.transpose(X_val, (0, 2, 1))
X_test = np.transpose(X_test, (0, 2, 1))

# 使用自定义函数处理数据集
HypotensionData_dataset = gene_HypotensionData_custom(X_train, X_val, X_test, artificially_missing_rate=0.1)

# 构建数据集格式
dataset_for_training = {
    "X": HypotensionData_dataset['train_X'],
    "y": Y_train,
    "mask": HypotensionData_dataset['train_mask']
}

dataset_for_validating = {
    "X": HypotensionData_dataset['val_X'],
    "y": Y_val,
    "mask": HypotensionData_dataset['val_mask'],
    "X_ori": HypotensionData_dataset['val_X_ori']
}

dataset_for_testing = {
    "X": HypotensionData_dataset['test_X'],
    "y": Y_test,
    "mask": HypotensionData_dataset['test_mask'],
    "X_ori": HypotensionData_dataset['test_X_ori']
}

# 打印检查
print("Training dataset:", dataset_for_training)
print("Validation dataset:", dataset_for_validating)
print("Testing dataset:", dataset_for_testing)

Training dataset: {'X': array([[[0.25961536, 0.28846157, 0.2990654 , ..., 0.45512822,
         0.6052632 , 0.57723576],
        [0.27884614, 0.30769232, 0.32710278, ..., 0.45512822,
         0.56578946, 0.56097555],
        [       nan,        nan,        nan, ...,        nan,
                nan,        nan],
        ...,
        [       nan,        nan,        nan, ...,        nan,
                nan,        nan],
        [       nan,        nan,        nan, ...,        nan,
                nan,        nan],
        [       nan,        nan,        nan, ...,        nan,
                nan,        nan]],

       [[0.4134615 , 0.53846157, 0.3084112 , ..., 0.64102566,
         0.65789473, 0.699187  ],
        [0.4134615 , 0.53846157, 0.2990654 , ...,        nan,
                nan, 0.7479674 ],
        [       nan, 0.53846157, 0.33644858, ..., 0.71153843,
         0.6973684 , 0.7479674 ],
        ...,
        [0.38461536, 0.5       , 0.27102804, ..., 0.65384614,
         0.68421054, 0

In [129]:
from pypots.optim import Adam
from pypots.imputation import TimesNet

# initialize the model
timesnet = TimesNet(
    n_steps=30,
    n_features=11,
    n_layers=1,
    top_k=1,
    d_model=128,
    d_ffn=512,
    n_kernels=5,
    dropout=0.5,
    apply_nonstationary_norm=False,
    batch_size=32,
    # here we set epochs=10 for a quick demo, you can set it to 100 or more for better performance
    epochs=10,
    # here we set patience=3 to early stop the training if the evaluting loss doesn't decrease for 3 epoches.
    # You can leave it to defualt as None to disable early stopping.
    patience=3,
    # give the optimizer. Different from torch.optim.Optimizer, you don't have to specify model's parameters when
    # initializing pypots.optim.Optimizer. You can also leave it to default. It will initilize an Adam optimizer with lr=0.001.
    optimizer=Adam(lr=1e-3),
    # this num_workers argument is for torch.utils.data.Dataloader. It's the number of subprocesses to use for data loading.
    # Leaving it to default as 0 means data loading will be in the main process, i.e. there won't be subprocesses.
    # You can increase it to >1 if you think your dataloading is a bottleneck to your model training speed
    num_workers=0,
    # just leave it to default as None, PyPOTS will automatically assign the best device for you.
    # Set it as 'cpu' if you don't have CUDA devices. You can also set it to 'cuda:0' or 'cuda:1' if you have multiple CUDA devices, even parallelly on ['cuda:0', 'cuda:1']
    device=None,
    # set the path for saving tensorboard and trained model files 
    saving_path="tutorial_results/imputation/timesnet",
    # only save the best model after training finished.
    # You can also set it as "better" to save models performing better ever during training.
    model_saving_strategy="best",
)

2024-07-28 15:35:57 [INFO]: No given device, using default device: cpu
2024-07-28 15:35:57 [INFO]: Model files will be saved to tutorial_results/imputation/timesnet\20240728_T153557
2024-07-28 15:35:57 [INFO]: Tensorboard file will be saved to tutorial_results/imputation/timesnet\20240728_T153557\tensorboard
2024-07-28 15:35:57 [INFO]: TimesNet initialized with the given hyperparameters, the number of trainable parameters: 21,636,235


In [131]:
timesnet.fit(train_set=dataset_for_training, val_set=dataset_for_validating)

2024-07-28 15:36:08 [INFO]: Epoch 001 - training loss: 0.2258, validation loss: 0.0309
2024-07-28 15:36:08 [INFO]: Saved the model to tutorial_results/imputation/timesnet\20240728_T153557\TimesNet_epoch1_loss0.03094053516785304.pypots
2024-07-28 15:36:18 [INFO]: Epoch 002 - training loss: 0.0552, validation loss: 0.0176
2024-07-28 15:36:18 [INFO]: Saved the model to tutorial_results/imputation/timesnet\20240728_T153557\TimesNet_epoch2_loss0.01757576937476794.pypots
2024-07-28 15:36:28 [INFO]: Epoch 003 - training loss: 0.0233, validation loss: 0.0156
2024-07-28 15:36:28 [INFO]: Saved the model to tutorial_results/imputation/timesnet\20240728_T153557\TimesNet_epoch3_loss0.015597327922781309.pypots
2024-07-28 15:36:38 [INFO]: Epoch 004 - training loss: 0.0185, validation loss: 0.0136
2024-07-28 15:36:38 [INFO]: Saved the model to tutorial_results/imputation/timesnet\20240728_T153557\TimesNet_epoch4_loss0.01360697237153848.pypots
2024-07-28 15:36:50 [INFO]: Epoch 005 - training loss: 0.01

In [139]:
timesnet_results = timesnet.predict(dataset_for_testing)
timesnet_imputation = timesnet_results["imputation"]

In [168]:
indicating_mask = np.isnan(HypotensionData_dataset['test_X']) ^ np.isnan(HypotensionData_dataset['test_X_ori'])

In [172]:
from pypots.utils.metrics import calc_mae

# calculate mean absolute error on the ground truth (artificially-missing values)
testing_mae = calc_mae(
    timesnet_imputation,
    np.nan_to_num(HypotensionData_dataset['test_X_ori']),
    indicating_mask,
)
print(f"Testing mean absolute error: {testing_mae:.4f}")

Testing mean absolute error: 0.0760


In [176]:
imputed_train_X = timesnet.impute({"X": dataset_for_training["X"]})
imputed_val_X = timesnet.impute({"X": dataset_for_validating["X"]})
imputed_test_X = timesnet.impute({"X": dataset_for_testing["X"]})

In [178]:
np.save('imputed_X_train.npy', imputed_train_X)
np.save('imputed_X_val.npy', imputed_val_X)
np.save('imputed_X_test.npy', imputed_test_X)