# Load Packages

In [1]:
%load_ext autoreload
%autoreload 2

import sys
from os.path import join
from tqdm.auto import tqdm
import joblib
import torch
sys.path.append("../../")

from src.file_manager.load_data import load_split_dict
from src.models.der.tuning import tune_der_model
from src.models.der.training import train_der_w_param
from src.models.der.predicting import der_model_prediction
from src.misc import create_folder
from seed_file import seed

# seed = 2023
tuning_seed = 2023
data_label = "physionet"
batch_size = 64

# File paths
fp_notebooks_folder = "../"
fp_project_folder = join(fp_notebooks_folder, "../")
fp_processed_data_folder = join(fp_project_folder, "processed_data")
fp_output_data_folder = join(fp_processed_data_folder, "physionet")
fp_checkpoint_folder = join(fp_project_folder, "checkpoints")
fp_project_checkpoints = join(fp_checkpoint_folder, data_label)
fp_tuning = join(fp_project_checkpoints, "tuning")
fp_models = join(fp_project_checkpoints, "models")
fp_predictions = join(fp_project_checkpoints, "predictions")

# Seed filepaths
fp_cur_tune_folder = join(fp_tuning, str(tuning_seed))
create_folder(fp_cur_tune_folder)
fp_cur_model_folder = join(fp_models, str(seed))
create_folder(fp_cur_model_folder)
fp_cur_predictions_folder = join(fp_predictions, str(seed))
create_folder(fp_cur_predictions_folder)

2025-06-05 14:28:55.104974: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-06-05 14:28:55.116979: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-06-05 14:28:55.120626: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-06-05 14:28:55.130728: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Load Data

In [2]:
split_dict = load_split_dict(fp_output_data_folder)

# Tune DER

In [6]:
if seed==tuning_seed:
    all_der_best_hp = {}
    for time_label, target_cols in tqdm(split_dict["target_cols"].items()):
        der_tuning_df, der_best_hp = tune_der_model(
            param_grid={
                "n_hidden_layers":[2, 3, 4],
                "hidden_width": [128, 256, 512]},  
            train_df=split_dict["train_df"], valid_df=split_dict["valid_df"], 
            feat_cols=split_dict["feat_cols"], target_cols=target_cols, 
            epochs=500, patience=5, seed=seed, batch_size=batch_size
        )
        der_tuning_df.to_csv(join(fp_cur_tune_folder, f"tuning_der_{time_label}.csv"))
        all_der_best_hp[time_label] = der_best_hp
        display(der_tuning_df)
    joblib.dump(all_der_best_hp, join(fp_cur_tune_folder, "all_der_best_hp.joblib"))
    display(all_der_best_hp)

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.003
[rank: 1] Metric val/MSE improved. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.004
[rank: 1] Metric val/MSE improved. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.005
[rank: 1] Metric val/MSE improved. New best score: 0.005


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.003
[rank: 1] Metric val/MSE improved. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.003
[rank: 1] Metric val/MSE improved. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.004
[rank: 1] Metric val/MSE improved. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.003
[rank: 1] Metric val/MSE improved. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.007
[rank: 1] Metric val/MSE improved. New best score: 0.007


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.005 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.005 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.005
[rank: 1] Metric val/MSE improved. New best score: 0.005


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.002 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.002 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Unnamed: 0,hidden_width,n_hidden_layers,loss,time/s,best_hyperparameter
0,128,2,0.002302,140.60485,False
1,128,3,0.002281,92.123084,False
2,128,4,0.002348,106.777381,False
3,256,2,0.002288,88.318835,False
4,256,3,0.002329,144.773249,False
5,256,4,0.00224,89.095292,True
6,512,2,0.002279,86.452479,False
7,512,3,0.002364,58.792545,False
8,512,4,0.002317,108.810602,False


  0%|          | 0/9 [00:00<?, ?it/s]

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.004
[rank: 1] Metric val/MSE improved. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.004
[rank: 1] Metric val/MSE improved. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.004
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.005
[rank: 1] Metric val/MSE improved. New best score: 0.005


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.004
[rank: 1] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.004
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.004
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.003
[rank: 1] Metric val/MSE improved. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.003
[rank: 1] Metric val/MSE improved. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.004
[rank: 1] Metric val/MSE improved. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.004
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.003
[rank: 1] Metric val/MSE improved. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.003
[rank: 1] Metric val/MSE improved. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.005
[rank: 1] Metric val/MSE improved. New best score: 0.005


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.004
[rank: 1] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Unnamed: 0,hidden_width,n_hidden_layers,loss,time/s,best_hyperparameter
0,128,2,0.003162,59.535783,False
1,128,3,0.002906,96.838533,False
2,128,4,0.002939,94.245398,False
3,256,2,0.00314,71.661374,False
4,256,3,0.002911,87.160202,False
5,256,4,0.003012,76.162021,False
6,512,2,0.002851,88.657223,True
7,512,3,0.003188,102.666808,False
8,512,4,0.00305,78.025454,False


  0%|          | 0/9 [00:00<?, ?it/s]

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.004
[rank: 1] Metric val/MSE improved. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.004
[rank: 1] Metric val/MSE improved. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.004
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.004
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.005
[rank: 1] Metric val/MSE improved. New best score: 0.005


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.004
[rank: 1] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.003
[rank: 1] Metric val/MSE improved. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.003
[rank: 1] Metric val/MSE improved. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.004
[rank: 1] Metric val/MSE improved. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.004
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.003
[rank: 1] Metric val/MSE improved. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.004
[rank: 1] Metric val/MSE improved. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.004
[rank: 1] Metric val/MSE improved. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.004
[rank: 1] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Unnamed: 0,hidden_width,n_hidden_layers,loss,time/s,best_hyperparameter
0,128,2,0.00353,66.603729,False
1,128,3,0.003299,58.622878,False
2,128,4,0.003257,107.386134,True
3,256,2,0.003383,89.702158,False
4,256,3,0.003339,65.485688,False
5,256,4,0.003358,96.345895,False
6,512,2,0.003318,63.874551,False
7,512,3,0.003405,91.436044,False
8,512,4,0.003566,98.245733,False


{'t+1': {'hidden_width': 256, 'n_hidden_layers': 4},
 't+2': {'hidden_width': 512, 'n_hidden_layers': 2},
 't+3': {'hidden_width': 128, 'n_hidden_layers': 4}}

# Training DER

In [11]:
all_der_best_hp = joblib.load(join(fp_cur_tune_folder, "all_der_best_hp.joblib"))
for time_label, target_cols in tqdm(split_dict["target_cols"].items()):
    fp_model = join(fp_cur_model_folder, f"der_{time_label}.pt")
    der_model, _ = train_der_w_param(
        **all_der_best_hp[time_label], 
        train_df=split_dict["train_df"], valid_df=split_dict["valid_df"], 
        inputs=split_dict["feat_cols"], outputs=target_cols,
        seed=seed, max_epochs=500, patience=5, batch_size=batch_size
    )
    torch.save(der_model, fp_model)

  0%|          | 0/3 [00:00<?, ?it/s]

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.004
[rank: 1] Metric val/MSE improved. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.003
[rank: 1] Metric val/MSE improved. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

Seed set to 2023
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
-------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved. New best score: 0.005
[rank: 1] Metric val/MSE improved. New best score: 0.005


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.004
[rank: 1] Metric val/MSE improved by 0.001 >= min_delta = 0.0. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003
[rank: 1] Metric val/MSE improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[rank: 0] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
[rank: 1] Monitored metric val/MSE did not improve in the last 5 records. Best score: 0.003. Signaling Trainer to stop.
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_flo

Testing: |          | 0/? [00:00<?, ?it/s]

/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MAE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/MSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test/RMSE', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/lirong/miniconda3/envs/main/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/res

# Prediction

In [13]:
for time_label, target_cols in tqdm(split_dict["target_cols"].items()):
    fp_model = join(fp_cur_model_folder, f"der_{time_label}.pt")
    der_model = torch.load(fp_model)
    der_valid_df = der_model_prediction(
        der_model, test_df=split_dict["valid_df"], 
        feat_cols=split_dict["feat_cols"], target_cols=target_cols, 
        seed=seed, silent=False, regressor_label=time_label)
    der_test_df = der_model_prediction(
        der_model, test_df=split_dict["test_df"], 
        feat_cols=split_dict["feat_cols"], target_cols=target_cols, 
        seed=seed, silent=False, regressor_label=time_label)
    der_valid_df.to_csv(join(fp_cur_predictions_folder, f"der_valid_{time_label[-1]}.csv"))
    der_test_df.to_csv(join(fp_cur_predictions_folder, f"der_test_{time_label[-1]}.csv"))

  0%|          | 0/3 [00:00<?, ?it/s]