# Forecasting Experiment on lorenz Dataset

In [1]:
import os
import torch
import numpy as np

import warnings
warnings.filterwarnings("ignore")

import sys
sys.path.append(os.path.join(os.path.dirname('__file__'), '../'))

from engine.solver import Trainer
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import Dataset, DataLoader
from engine.logger import Logger
from Utils.io_utils import load_yaml_config, instantiate_from_config
from Models.interpretable_diffusion.model_utils import normalize_to_neg_one_to_one, unnormalize_to_zero_to_one

from sklearn import metrics

In [2]:
# print(f"Data shape: {data.shape}")
# np.save('/data/0shared/liubo/diffusion-gc/Diffusion-TS-main/Experiments/data/solar_nips.npy', data)

In [3]:
class get_dataset(Dataset):
    def __init__(self, data, seq_length,mode,train_split=0.8):
        super(get_dataset, self).__init__()

        self.samples = data
        
        self.seq_length=seq_length
        self.pred_length = 0
        self.features = data.shape[-1]
        
        self.data = self.get_data(data)
        train_num = int(train_split * len(self.data))
        if mode == 'train':
            self.data = self.data[:train_num, :, :]
        else:
            self.data = self.data[train_num:, :, :]
        
    def __getitem__(self, index):
        return self.data[index, :, :]
                

    def __len__(self):
        return len(self.data)
    
    def get_data(self,data):

        # data_max = np.max(data, axis=0)
        # data_min = np.min(data, axis=0)
 
        # data = (data - data_min) / (data_max - data_min)
        num_sample = len(data) - self.seq_length - self.pred_length + 1
        seq_data = torch.zeros(num_sample,
                               self.seq_length + self.pred_length,
                               self.features)
 
        #         print(data.iloc[0:0 + self.seq_length + 1, self.features].values)
 
        for i in range(num_sample):
            seq_data[i] = torch.tensor(data[i:i + self.seq_length + self.pred_length,
                                       :])
        #         print(data_max)
        #         print(data_min)
 
        return seq_data


data=np.load('/data/0shared/liubo/diffusion-gc/DiffuGC/my_exp/lorenz/lorenz_p15_t1000_F20.npy')
print(data.shape)   

train_split=0.8
batch_size=64

train = data[:int(train_split*data.shape[0]), :]
test = data[int(train_split*data.shape[0]):,:].reshape(1, -1, data.shape[-1])

# scaler = MinMaxScaler()
# train_scaled = normalize_to_neg_one_to_one(scaler.fit_transform(train))
# test_scaled = scaler.transform(test.reshape(-1, test.shape[-1])).reshape(test.shape)
# test_scaled = normalize_to_neg_one_to_one(test_scaled)

# print(f"Train shape: {train_scaled.shape}")
# print(f"Test shape: {test_scaled.shape}")

class Args_Example:
    def __init__(self) -> None:
        self.name='lorenz_p15_t1000_f40'
        self.config_path = './Config/lorenz.yaml'
        self.save_dir = '/data/0shared/liubo/diffusion-gc/DiffuGC/my_exp/lorenz_p15_t1000_f40'
        self.gpu = 0
        os.makedirs(self.save_dir, exist_ok=True)

args =  Args_Example()
configs = load_yaml_config(args.config_path)
device = torch.device(f'cuda:{args.gpu}' if torch.cuda.is_available() else 'cpu')
train_dataset = get_dataset(train, seq_length=configs['model']['params']['seq_length'], mode='train', train_split=train_split)
trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

log_name= 'seq_length'+str(configs['model']['params']['seq_length'])+'_batch_size'+str(batch_size)+'_lr'+str(configs['solver']['base_lr'])+'_d_model'+str(configs['model']['params']['d_model'])+'_num_layers'+str(configs['model']['params']['n_layer_enc'])+'_num_heads'+str(configs['model']['params']['n_heads'])+'_dropout'+str(configs['model']['params']['attn_pd'])





(1000, 15)


In [None]:
for name, param in model.named_parameters():
    print(f"Parameter Name: {name}")
    # print(f"Requires Grad: {param.requires_grad}")
    # print(f"Parameter Value: {param}")
    print(f"Shape: {param.shape}")
    print("-" * 50)

In [4]:
feature_size=data.shape[1]
inferred_gc=[]
for ts_target in range(feature_size):
# ts_target=0
    print(f"ts_target: {ts_target}")
    gc_logger = Logger(args=args,name=log_name,ts_target=ts_target)
    model = instantiate_from_config(configs['model']).to(device)
    trainer = Trainer(config=configs, args=args, model=model, dataloader=trainloader,weight_decay=configs['solver']['weight_decay'],logger=gc_logger)
    norm=trainer.train(ts_target)
    inferred_gc.append(norm)

ts_target: 0
{'overall': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}, 'denoise_Transformer': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}}
lorenz_p15_t1000_f40: start training...


Epoch:50|train loss: 36.3614|l_recon: 16.2293|ridge_loss: 20.1322|l2_loss:11.6587|lr: 0.000500|prox_lam:5.00:   2%|▎         | 50/2000 [01:11<18:46,  1.73it/s]        

lorenz_p15_t1000_f40: train: Epoch 50/2000 Reconstruction Loss: 16.2293 Reglarization: 20.1322 | Total Loss: 36.361430 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.76170695 0.62853914 0.55629945 0.60277957 0.5132062  0.51085454
  0.4822889  0.4876593  0.5616597  0.5596806  0.5003783  0.47243553
  0.6105883  0.52702683 0.56070244]]
Norm: tensor([[0.7617, 0.6285, 0.5563, 0.6028, 0.5132, 0.5109, 0.4823, 0.4877, 0.5617,
         0.5597, 0.5004, 0.4724, 0.6106, 0.5270, 0.5607]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:100|train loss: 23.0949|l_recon: 13.4610|ridge_loss: 9.6339|l2_loss:9.8948|lr: 0.000500|prox_lam:5.00:   5%|▌         | 100/2000 [01:42<18:52,  1.68it/s]

lorenz_p15_t1000_f40: train: Epoch 100/2000 Reconstruction Loss: 13.4610 Reglarization: 9.6339 | Total Loss: 23.094926 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[1.4932455e-01 1.2351561e-02 1.8965475e-04 0.0000000e+00 0.0000000e+00
  2.3804093e-04 0.0000000e+00 9.0045924e-04 3.7945923e-04 4.5263651e-04
  8.3191786e-05 1.3359522e-03 1.4326266e-03 1.3474318e-02 1.4512556e-02]]
Norm: tensor([[1.4932e-01, 1.2352e-02, 1.8965e-04, 0.0000e+00, 0.0000e+00, 2.3804e-04,
         0.0000e+00, 9.0046e-04, 3.7946e-04, 4.5264e-04, 8.3192e-05, 1.3360e-03,
         1.4326e-03, 1.3474e-02, 1.4513e-02]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:150|train loss: 13.9557|l_recon: 9.7762|ridge_loss: 4.1795|l2_loss:8.9990|lr: 0.000500|prox_lam:5.00:   8%|▊         | 150/2000 [02:08<12:38,  2.44it/s]  

lorenz_p15_t1000_f40: train: Epoch 150/2000 Reconstruction Loss: 9.7762 Reglarization: 4.1795 | Total Loss: 13.955747 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[4.2333979e-02 4.5909230e-03 9.7727636e-04 5.1922095e-04 0.0000000e+00
  0.0000000e+00 0.0000000e+00 2.1001371e-04 5.9898477e-05 0.0000000e+00
  0.0000000e+00 5.1408471e-04 0.0000000e+00 5.0953315e-03 7.0973500e-03]]
Norm: tensor([[4.2334e-02, 4.5909e-03, 9.7728e-04, 5.1922e-04, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 2.1001e-04, 5.9898e-05, 0.0000e+00, 0.0000e+00, 5.1408e-04,
         0.0000e+00, 5.0953e-03, 7.0973e-03]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:200|train loss: 11.3229|l_recon: 9.6597|ridge_loss: 1.6632|l2_loss:8.6568|lr: 0.000500|prox_lam:6.00:  10%|█         | 200/2000 [02:31<19:18,  1.55it/s]  

lorenz_p15_t1000_f40: train: Epoch 200/2000 Reconstruction Loss: 9.6597 Reglarization: 1.6632 | Total Loss: 11.322907 || Learning Rate: 0.000500Prox lambda: 6.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.01053143 0.00193695 0.00062783 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.0018461  0.00316468]]
Norm: tensor([[0.0105, 0.0019, 0.0006, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0018, 0.0032]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:250|train loss: 9.1636|l_recon: 8.3884|ridge_loss: 0.7752|l2_loss:9.2129|lr: 0.000250|prox_lam:7.00:  12%|█▎        | 250/2000 [02:54<13:37,  2.14it/s]  

lorenz_p15_t1000_f40: train: Epoch 250/2000 Reconstruction Loss: 8.3884 Reglarization: 0.7752 | Total Loss: 9.163598 || Learning Rate: 0.000250Prox lambda: 7.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.00586047 0.00115816 0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.00120009 0.00161975]]
Norm: tensor([[0.0059, 0.0012, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0012, 0.0016]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:300|train loss: 8.1605|l_recon: 7.6095|ridge_loss: 0.5510|l2_loss:8.0925|lr: 0.000063|prox_lam:8.00:  15%|█▌        | 300/2000 [03:17<13:10,  2.15it/s]

lorenz_p15_t1000_f40: train: Epoch 300/2000 Reconstruction Loss: 7.6095 Reglarization: 0.5510 | Total Loss: 8.160494 || Learning Rate: 0.000063Prox lambda: 8.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[3.4832165e-03 5.6874013e-04 1.4281890e-05 2.3257278e-05 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 6.3325628e-05 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 7.8194303e-04 1.3425534e-03]]
Norm: tensor([[3.4832e-03, 5.6874e-04, 1.4282e-05, 2.3257e-05, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 6.3326e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 7.8194e-04, 1.3426e-03]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:350|train loss: 8.1462|l_recon: 7.6643|ridge_loss: 0.4819|l2_loss:8.5693|lr: 0.000031|prox_lam:9.00:  18%|█▊        | 350/2000 [03:39<12:05,  2.28it/s]

lorenz_p15_t1000_f40: train: Epoch 350/2000 Reconstruction Loss: 7.6643 Reglarization: 0.4819 | Total Loss: 8.146159 || Learning Rate: 0.000031Prox lambda: 9.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[2.7804310e-03 5.2237755e-04 6.0808321e-05 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 5.9753592e-04 1.0939882e-03]]
Norm: tensor([[2.7804e-03, 5.2238e-04, 6.0808e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 5.9754e-04, 1.0940e-03]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:395|train loss: 7.7269|l_recon: 7.2686|ridge_loss: 0.4583|l2_loss:8.2845|lr: 0.000008|prox_lam:10.00:  20%|█▉        | 395/2000 [04:00<16:15,  1.64it/s]


Early stopping at epoch 395
training complete
最佳训练损失: 6.781628 (Epoch 295)
lam: 9.999999999999982,lr: 1e-05, norm: [[3.8226121e-03 7.2834303e-04 8.3093997e-05 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  1.6128912e-04 0.0000000e+00 0.0000000e+00 7.0141099e-04 1.2708284e-03]]
Training done, time: 240.13
ts_target: 1
{'overall': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}, 'denoise_Transformer': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}}
lorenz_p15_t1000_f40: start training...


Epoch:50|train loss: 36.4052|l_recon: 16.0827|ridge_loss: 20.3225|l2_loss:11.2378|lr: 0.000500|prox_lam:5.00:   2%|▎         | 50/2000 [01:02<23:26,  1.39it/s]      

lorenz_p15_t1000_f40: train: Epoch 50/2000 Reconstruction Loss: 16.0827 Reglarization: 20.3225 | Total Loss: 36.405187 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.5193586  0.72444725 0.5214309  0.5369968  0.5154219  0.5385265
  0.49966392 0.45901233 0.48300806 0.54773575 0.51868445 0.5555571
  0.4570166  0.48911288 0.5561892 ]]
Norm: tensor([[0.5194, 0.7244, 0.5214, 0.5370, 0.5154, 0.5385, 0.4997, 0.4590, 0.4830,
         0.5477, 0.5187, 0.5556, 0.4570, 0.4891, 0.5562]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:100|train loss: 21.3112|l_recon: 11.5697|ridge_loss: 9.7415|l2_loss:9.1577|lr: 0.000500|prox_lam:5.00:   5%|▌         | 100/2000 [01:29<21:12,  1.49it/s]

lorenz_p15_t1000_f40: train: Epoch 100/2000 Reconstruction Loss: 11.5697 Reglarization: 9.7415 | Total Loss: 21.311243 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[1.8467071e-02 1.3797547e-01 1.6578143e-02 1.4218958e-03 3.8706817e-04
  2.1850234e-03 4.8723305e-05 1.5207725e-03 1.5364499e-03 1.5730970e-03
  1.3817409e-03 1.6634283e-03 8.9497986e-04 1.0313583e-03 1.2035537e-02]]
Norm: tensor([[1.8467e-02, 1.3798e-01, 1.6578e-02, 1.4219e-03, 3.8707e-04, 2.1850e-03,
         4.8723e-05, 1.5208e-03, 1.5364e-03, 1.5731e-03, 1.3817e-03, 1.6634e-03,
         8.9498e-04, 1.0314e-03, 1.2036e-02]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:150|train loss: 13.6250|l_recon: 9.9527|ridge_loss: 3.6722|l2_loss:7.9451|lr: 0.000500|prox_lam:5.00:   8%|▊         | 150/2000 [01:58<30:05,  1.02it/s]  

lorenz_p15_t1000_f40: train: Epoch 150/2000 Reconstruction Loss: 9.9527 Reglarization: 3.6722 | Total Loss: 13.624967 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.00420841 0.01916902 0.00439218 0.         0.         0.
  0.00025648 0.         0.         0.00024148 0.0004194  0.
  0.         0.00031847 0.00405389]]
Norm: tensor([[0.0042, 0.0192, 0.0044, 0.0000, 0.0000, 0.0000, 0.0003, 0.0000, 0.0000,
         0.0002, 0.0004, 0.0000, 0.0000, 0.0003, 0.0041]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:200|train loss: 10.0630|l_recon: 8.6638|ridge_loss: 1.3992|l2_loss:7.8617|lr: 0.000500|prox_lam:6.00:  10%|█         | 200/2000 [02:21<13:44,  2.18it/s]  

lorenz_p15_t1000_f40: train: Epoch 200/2000 Reconstruction Loss: 8.6638 Reglarization: 1.3992 | Total Loss: 10.062976 || Learning Rate: 0.000500Prox lambda: 6.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.00278532 0.00990386 0.00205145 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.00215946]]
Norm: tensor([[0.0028, 0.0099, 0.0021, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0022]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:250|train loss: 8.5228|l_recon: 7.8799|ridge_loss: 0.6430|l2_loss:8.1647|lr: 0.000250|prox_lam:7.00:  12%|█▎        | 250/2000 [02:45<14:36,  2.00it/s]  

lorenz_p15_t1000_f40: train: Epoch 250/2000 Reconstruction Loss: 7.8799 Reglarization: 0.6430 | Total Loss: 8.522832 || Learning Rate: 0.000250Prox lambda: 7.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[1.4801016e-03 6.6388929e-03 1.1650426e-03 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  7.7686505e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00 7.1280240e-04]]
Norm: tensor([[1.4801e-03, 6.6389e-03, 1.1650e-03, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 7.7687e-05, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 7.1280e-04]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:300|train loss: 8.0180|l_recon: 7.5698|ridge_loss: 0.4482|l2_loss:8.4597|lr: 0.000125|prox_lam:8.00:  15%|█▌        | 300/2000 [03:07<12:22,  2.29it/s]

lorenz_p15_t1000_f40: train: Epoch 300/2000 Reconstruction Loss: 7.5698 Reglarization: 0.4482 | Total Loss: 8.018013 || Learning Rate: 0.000125Prox lambda: 8.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[1.0018187e-03 3.4430819e-03 7.5880648e-04 3.9071660e-05 0.0000000e+00
  0.0000000e+00 8.6212771e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 8.2668274e-05 0.0000000e+00 0.0000000e+00 7.0464966e-04]]
Norm: tensor([[1.0018e-03, 3.4431e-03, 7.5881e-04, 3.9072e-05, 0.0000e+00, 0.0000e+00,
         8.6213e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 8.2668e-05,
         0.0000e+00, 0.0000e+00, 7.0465e-04]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:343|train loss: 8.1316|l_recon: 7.7765|ridge_loss: 0.3551|l2_loss:7.7377|lr: 0.000063|prox_lam:9.00:  17%|█▋        | 343/2000 [03:26<16:39,  1.66it/s]


Early stopping at epoch 343
training complete
最佳训练损失: 6.645987 (Epoch 243)
lam: 8.999999999999986,lr: 1e-05, norm: [[1.5160455e-03 7.1851718e-03 1.5577527e-03 1.3197481e-04 0.0000000e+00
  1.3005454e-05 0.0000000e+00 1.1265953e-04 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 1.4090216e-03]]
Training done, time: 206.81
ts_target: 2
{'overall': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}, 'denoise_Transformer': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}}
lorenz_p15_t1000_f40: start training...


Epoch:50|train loss: 36.4745|l_recon: 16.2312|ridge_loss: 20.2433|l2_loss:11.9088|lr: 0.000500|prox_lam:5.00:   2%|▎         | 50/2000 [01:08<41:36,  1.28s/it]      

lorenz_p15_t1000_f40: train: Epoch 50/2000 Reconstruction Loss: 16.2312 Reglarization: 20.2433 | Total Loss: 36.474496 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.6007979  0.5549461  0.6938402  0.5616434  0.53105146 0.46506628
  0.51442635 0.54002726 0.52916706 0.54665977 0.46355826 0.48913744
  0.54254264 0.5460207  0.5315655 ]]
Norm: tensor([[0.6008, 0.5549, 0.6938, 0.5616, 0.5311, 0.4651, 0.5144, 0.5400, 0.5292,
         0.5467, 0.4636, 0.4891, 0.5425, 0.5460, 0.5316]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:100|train loss: 23.2045|l_recon: 13.1903|ridge_loss: 10.0142|l2_loss:10.9625|lr: 0.000500|prox_lam:5.00:   5%|▌         | 100/2000 [01:33<14:01,  2.26it/s]

lorenz_p15_t1000_f40: train: Epoch 100/2000 Reconstruction Loss: 13.1903 Reglarization: 10.0142 | Total Loss: 23.204540 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[1.5296832e-02 2.2789946e-02 1.4620507e-01 1.3830587e-02 4.3398989e-03
  0.0000000e+00 1.7012679e-03 1.1393884e-03 1.2992416e-04 3.0829457e-03
  9.7804924e-04 0.0000000e+00 1.3002460e-03 2.3886375e-04 1.7366344e-03]]
Norm: tensor([[1.5297e-02, 2.2790e-02, 1.4621e-01, 1.3831e-02, 4.3399e-03, 0.0000e+00,
         1.7013e-03, 1.1394e-03, 1.2992e-04, 3.0829e-03, 9.7805e-04, 0.0000e+00,
         1.3002e-03, 2.3886e-04, 1.7366e-03]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:150|train loss: 14.5263|l_recon: 10.2216|ridge_loss: 4.3047|l2_loss:8.5846|lr: 0.000500|prox_lam:5.00:   8%|▊         | 150/2000 [01:58<13:39,  2.26it/s]  

lorenz_p15_t1000_f40: train: Epoch 150/2000 Reconstruction Loss: 10.2216 Reglarization: 4.3047 | Total Loss: 14.526324 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0031002  0.00549287 0.02949589 0.00423506 0.         0.
  0.         0.         0.00032982 0.0002037  0.00029563 0.
  0.0003602  0.         0.00045626]]
Norm: tensor([[0.0031, 0.0055, 0.0295, 0.0042, 0.0000, 0.0000, 0.0000, 0.0000, 0.0003,
         0.0002, 0.0003, 0.0000, 0.0004, 0.0000, 0.0005]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:200|train loss: 10.7884|l_recon: 9.0756|ridge_loss: 1.7128|l2_loss:8.0610|lr: 0.000500|prox_lam:6.00:  10%|█         | 200/2000 [02:22<20:30,  1.46it/s]  

lorenz_p15_t1000_f40: train: Epoch 200/2000 Reconstruction Loss: 9.0756 Reglarization: 1.7128 | Total Loss: 10.788403 || Learning Rate: 0.000500Prox lambda: 6.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.00228604 0.00309201 0.01138347 0.00224236 0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.00018575]]
Norm: tensor([[0.0023, 0.0031, 0.0114, 0.0022, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0002]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:250|train loss: 8.6804|l_recon: 7.7816|ridge_loss: 0.8988|l2_loss:7.9881|lr: 0.000250|prox_lam:7.00:  12%|█▎        | 250/2000 [02:49<13:31,  2.16it/s]  

lorenz_p15_t1000_f40: train: Epoch 250/2000 Reconstruction Loss: 7.7816 Reglarization: 0.8988 | Total Loss: 8.680428 || Learning Rate: 0.000250Prox lambda: 7.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.00086031 0.0016567  0.00578588 0.00085803 0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.        ]]
Norm: tensor([[0.0009, 0.0017, 0.0058, 0.0009, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:300|train loss: 8.5526|l_recon: 7.9360|ridge_loss: 0.6166|l2_loss:8.2709|lr: 0.000125|prox_lam:8.00:  15%|█▌        | 300/2000 [03:12<12:35,  2.25it/s]

lorenz_p15_t1000_f40: train: Epoch 300/2000 Reconstruction Loss: 7.9360 Reglarization: 0.6166 | Total Loss: 8.552648 || Learning Rate: 0.000125Prox lambda: 8.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0004503  0.00095427 0.00345192 0.00064715 0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.        ]]
Norm: tensor([[0.0005, 0.0010, 0.0035, 0.0006, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:325|train loss: 8.2374|l_recon: 7.7141|ridge_loss: 0.5232|l2_loss:7.3246|lr: 0.000063|prox_lam:9.00:  16%|█▋        | 325/2000 [03:22<17:23,  1.61it/s]


Early stopping at epoch 325
training complete
最佳训练损失: 6.964899 (Epoch 225)
lam: 8.999999999999986,lr: 1e-05, norm: [[0.00116359 0.00189706 0.00759684 0.00112302 0.00021388 0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.00018514]]
Training done, time: 202.47
ts_target: 3
{'overall': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}, 'denoise_Transformer': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}}
lorenz_p15_t1000_f40: start training...


Epoch:50|train loss: 36.5431|l_recon: 16.2747|ridge_loss: 20.2684|l2_loss:11.6602|lr: 0.000500|prox_lam:5.00:   2%|▎         | 50/2000 [01:00<19:35,  1.66it/s]      

lorenz_p15_t1000_f40: train: Epoch 50/2000 Reconstruction Loss: 16.2747 Reglarization: 20.2684 | Total Loss: 36.543149 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.57809764 0.58562833 0.57484686 0.72605574 0.5977625  0.53212196
  0.5396946  0.5326176  0.54081994 0.5154148  0.5385392  0.44126722
  0.5689891  0.59326595 0.47379223]]
Norm: tensor([[0.5781, 0.5856, 0.5748, 0.7261, 0.5978, 0.5321, 0.5397, 0.5326, 0.5408,
         0.5154, 0.5385, 0.4413, 0.5690, 0.5933, 0.4738]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:100|train loss: 22.6297|l_recon: 12.9594|ridge_loss: 9.6703|l2_loss:10.4838|lr: 0.000500|prox_lam:5.00:   5%|▌         | 100/2000 [01:30<14:46,  2.14it/s]

lorenz_p15_t1000_f40: train: Epoch 100/2000 Reconstruction Loss: 12.9594 Reglarization: 9.6703 | Total Loss: 22.629738 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.00300057 0.01363142 0.02690443 0.15335584 0.01770066 0.0004775
  0.00166017 0.00204589 0.00020513 0.00072818 0.00169624 0.00265943
  0.         0.         0.00235845]]
Norm: tensor([[0.0030, 0.0136, 0.0269, 0.1534, 0.0177, 0.0005, 0.0017, 0.0020, 0.0002,
         0.0007, 0.0017, 0.0027, 0.0000, 0.0000, 0.0024]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:150|train loss: 14.6944|l_recon: 10.5293|ridge_loss: 4.1650|l2_loss:8.5923|lr: 0.000500|prox_lam:5.00:   8%|▊         | 150/2000 [01:56<13:56,  2.21it/s] 

lorenz_p15_t1000_f40: train: Epoch 150/2000 Reconstruction Loss: 10.5293 Reglarization: 4.1650 | Total Loss: 14.694380 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.00029417 0.00409817 0.00631665 0.0387384  0.00515576 0.
  0.         0.         0.         0.         0.         0.
  0.00030782 0.00011261 0.        ]]
Norm: tensor([[0.0003, 0.0041, 0.0063, 0.0387, 0.0052, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0003, 0.0001, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:200|train loss: 10.9110|l_recon: 9.2650|ridge_loss: 1.6461|l2_loss:8.3224|lr: 0.000500|prox_lam:6.00:  10%|█         | 200/2000 [02:20<16:09,  1.86it/s]  

lorenz_p15_t1000_f40: train: Epoch 200/2000 Reconstruction Loss: 9.2650 Reglarization: 1.6461 | Total Loss: 10.911020 || Learning Rate: 0.000500Prox lambda: 6.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 1.8326411e-03 3.6955578e-03 1.0397103e-02 2.4606900e-03
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 3.7369784e-05 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 1.8326e-03, 3.6956e-03, 1.0397e-02, 2.4607e-03, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         3.7370e-05, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:250|train loss: 10.6537|l_recon: 9.9948|ridge_loss: 0.6589|l2_loss:8.6081|lr: 0.000500|prox_lam:7.00:  12%|█▎        | 250/2000 [02:43<12:23,  2.36it/s]  

lorenz_p15_t1000_f40: train: Epoch 250/2000 Reconstruction Loss: 9.9948 Reglarization: 0.6589 | Total Loss: 10.653739 || Learning Rate: 0.000500Prox lambda: 7.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.         0.00637967 0.         0.
  0.         0.         0.         0.         0.00034316 0.
  0.         0.         0.        ]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0064, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0003, 0.0000, 0.0000, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:300|train loss: 8.5609|l_recon: 8.1291|ridge_loss: 0.4318|l2_loss:8.3472|lr: 0.000125|prox_lam:8.00:  15%|█▌        | 300/2000 [03:07<16:30,  1.72it/s]   

lorenz_p15_t1000_f40: train: Epoch 300/2000 Reconstruction Loss: 8.1291 Reglarization: 0.4318 | Total Loss: 8.560929 || Learning Rate: 0.000125Prox lambda: 8.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 3.7528481e-03 0.0000000e+00
  9.6851953e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 3.7528e-03, 0.0000e+00, 9.6852e-05,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:350|train loss: 8.1104|l_recon: 7.7522|ridge_loss: 0.3582|l2_loss:7.9602|lr: 0.000031|prox_lam:9.00:  18%|█▊        | 350/2000 [03:29<11:20,  2.43it/s]

lorenz_p15_t1000_f40: train: Epoch 350/2000 Reconstruction Loss: 7.7522 Reglarization: 0.3582 | Total Loss: 8.110357 || Learning Rate: 0.000031Prox lambda: 9.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 2.6964524e-05 0.0000000e+00 2.4518543e-03 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 2.6965e-05, 0.0000e+00, 2.4519e-03, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:397|train loss: 7.9737|l_recon: 7.6408|ridge_loss: 0.3329|l2_loss:8.8755|lr: 0.000016|prox_lam:10.00:  20%|█▉        | 397/2000 [03:51<15:34,  1.72it/s]


Early stopping at epoch 397
training complete
最佳训练损失: 6.725905 (Epoch 297)
lam: 9.999999999999982,lr: 1e-05, norm: [[0.        0.        0.        0.0031656 0.        0.        0.
  0.        0.        0.        0.        0.        0.        0.
  0.       ]]
Training done, time: 231.36
ts_target: 4
{'overall': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}, 'denoise_Transformer': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}}
lorenz_p15_t1000_f40: start training...


Epoch:50|train loss: 37.2402|l_recon: 16.8623|ridge_loss: 20.3778|l2_loss:13.2056|lr: 0.000500|prox_lam:5.00:   2%|▎         | 50/2000 [01:09<23:21,  1.39it/s]      

lorenz_p15_t1000_f40: train: Epoch 50/2000 Reconstruction Loss: 16.8623 Reglarization: 20.3778 | Total Loss: 37.240151 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.52025414 0.47328442 0.5907743  0.56137246 0.75338805 0.52452505
  0.5569646  0.43977654 0.5571621  0.49305886 0.48563492 0.5318298
  0.51989    0.5110762  0.4530751 ]]
Norm: tensor([[0.5203, 0.4733, 0.5908, 0.5614, 0.7534, 0.5245, 0.5570, 0.4398, 0.5572,
         0.4931, 0.4856, 0.5318, 0.5199, 0.5111, 0.4531]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:100|train loss: 23.9953|l_recon: 13.2419|ridge_loss: 10.7534|l2_loss:10.6063|lr: 0.000500|prox_lam:5.00:   5%|▌         | 100/2000 [01:37<15:11,  2.08it/s]

lorenz_p15_t1000_f40: train: Epoch 100/2000 Reconstruction Loss: 13.2419 Reglarization: 10.7534 | Total Loss: 23.995278 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.0008342  0.01353875 0.02959397 0.14579606 0.01682785
  0.         0.         0.         0.00226558 0.         0.
  0.         0.00052528 0.00212968]]
Norm: tensor([[0.0000, 0.0008, 0.0135, 0.0296, 0.1458, 0.0168, 0.0000, 0.0000, 0.0000,
         0.0023, 0.0000, 0.0000, 0.0000, 0.0005, 0.0021]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:150|train loss: 15.0493|l_recon: 10.3652|ridge_loss: 4.6841|l2_loss:8.8957|lr: 0.000500|prox_lam:5.00:   8%|▊         | 150/2000 [02:03<13:41,  2.25it/s]  

lorenz_p15_t1000_f40: train: Epoch 150/2000 Reconstruction Loss: 10.3652 Reglarization: 4.6841 | Total Loss: 15.049274 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[1.5955977e-04 0.0000000e+00 3.1185704e-03 6.3187163e-03 3.4820039e-02
  4.6339091e-03 0.0000000e+00 0.0000000e+00 5.5376836e-04 5.5738728e-05
  4.9950648e-04 0.0000000e+00 0.0000000e+00 0.0000000e+00 9.9102571e-04]]
Norm: tensor([[1.5956e-04, 0.0000e+00, 3.1186e-03, 6.3187e-03, 3.4820e-02, 4.6339e-03,
         0.0000e+00, 0.0000e+00, 5.5377e-04, 5.5739e-05, 4.9951e-04, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 9.9103e-04]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:200|train loss: 10.9482|l_recon: 9.1283|ridge_loss: 1.8199|l2_loss:8.8036|lr: 0.000500|prox_lam:6.00:  10%|█         | 200/2000 [02:28<13:38,  2.20it/s]  

lorenz_p15_t1000_f40: train: Epoch 200/2000 Reconstruction Loss: 9.1283 Reglarization: 1.8199 | Total Loss: 10.948238 || Learning Rate: 0.000500Prox lambda: 6.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.00172731 0.00239904 0.01098375 0.0020043
  0.00066147 0.         0.         0.         0.         0.
  0.         0.         0.00074911]]
Norm: tensor([[0.0000, 0.0000, 0.0017, 0.0024, 0.0110, 0.0020, 0.0007, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0007]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:250|train loss: 9.1011|l_recon: 8.2187|ridge_loss: 0.8824|l2_loss:9.3221|lr: 0.000250|prox_lam:7.00:  12%|█▎        | 250/2000 [02:51<12:31,  2.33it/s]   

lorenz_p15_t1000_f40: train: Epoch 250/2000 Reconstruction Loss: 8.2187 Reglarization: 0.8824 | Total Loss: 9.101123 || Learning Rate: 0.000250Prox lambda: 7.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.0010297  0.00179621 0.00555948 0.00124915
  0.         0.         0.         0.         0.         0.
  0.         0.         0.        ]]
Norm: tensor([[0.0000, 0.0000, 0.0010, 0.0018, 0.0056, 0.0012, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:300|train loss: 8.1183|l_recon: 7.4609|ridge_loss: 0.6574|l2_loss:7.7068|lr: 0.000063|prox_lam:8.00:  15%|█▌        | 300/2000 [03:15<11:28,  2.47it/s]

lorenz_p15_t1000_f40: train: Epoch 300/2000 Reconstruction Loss: 7.4609 Reglarization: 0.6574 | Total Loss: 8.118268 || Learning Rate: 0.000063Prox lambda: 8.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 8.9264842e-04 1.2352054e-03 3.1664404e-03
  1.0349813e-03 3.2473356e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 6.5843458e-05]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 8.9265e-04, 1.2352e-03, 3.1664e-03, 1.0350e-03,
         3.2473e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 6.5843e-05]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:350|train loss: 7.8432|l_recon: 7.2619|ridge_loss: 0.5813|l2_loss:8.1697|lr: 0.000031|prox_lam:9.00:  18%|█▊        | 350/2000 [03:37<11:32,  2.38it/s]

lorenz_p15_t1000_f40: train: Epoch 350/2000 Reconstruction Loss: 7.2619 Reglarization: 0.5813 | Total Loss: 7.843236 || Learning Rate: 0.000031Prox lambda: 9.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 7.5514341e-04 1.0520435e-03 2.4456326e-03
  7.8943907e-04 3.7746508e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 1.8796971e-05]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 7.5514e-04, 1.0520e-03, 2.4456e-03, 7.8944e-04,
         3.7747e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 1.8797e-05]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:400|train loss: 7.9527|l_recon: 7.4005|ridge_loss: 0.5523|l2_loss:7.7735|lr: 0.000008|prox_lam:10.00:  20%|██        | 400/2000 [04:01<12:28,  2.14it/s] 

lorenz_p15_t1000_f40: train: Epoch 400/2000 Reconstruction Loss: 7.4005 Reglarization: 0.5523 | Total Loss: 7.952734 || Learning Rate: 0.000008Prox lambda: 10.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 6.5519748e-04 9.4270939e-04 2.0022634e-03
  6.8257563e-04 1.5217591e-06 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 1.7471459e-05]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 6.5520e-04, 9.4271e-04, 2.0023e-03, 6.8258e-04,
         1.5218e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 1.7471e-05]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:450|train loss: 7.7552|l_recon: 7.2130|ridge_loss: 0.5422|l2_loss:7.4274|lr: 0.000002|prox_lam:11.00:  22%|██▎       | 450/2000 [04:22<11:00,  2.35it/s]

lorenz_p15_t1000_f40: train: Epoch 450/2000 Reconstruction Loss: 7.2130 Reglarization: 0.5422 | Total Loss: 7.755209 || Learning Rate: 0.000002Prox lambda: 11.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 5.7711970e-04 7.9702353e-04 1.8007038e-03
  6.6176354e-04 5.0203835e-06 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 1.6801980e-05]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 5.7712e-04, 7.9702e-04, 1.8007e-03, 6.6176e-04,
         5.0204e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 1.6802e-05]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:472|train loss: 7.8152|l_recon: 7.2746|ridge_loss: 0.5406|l2_loss:7.6853|lr: 0.000002|prox_lam:12.00:  24%|██▎       | 472/2000 [04:32<14:41,  1.73it/s]


Early stopping at epoch 472
training complete
最佳训练损失: 6.650079 (Epoch 372)
lam: 11.999999999999975,lr: 1e-05, norm: [[0.0000000e+00 0.0000000e+00 6.7043363e-04 9.7810361e-04 2.0937833e-03
  7.6439313e-04 2.3344532e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 4.1256237e-05]]
Training done, time: 272.25
ts_target: 5
{'overall': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}, 'denoise_Transformer': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}}
lorenz_p15_t1000_f40: start training...


Epoch:50|train loss: 36.5358|l_recon: 16.2662|ridge_loss: 20.2696|l2_loss:10.6049|lr: 0.000500|prox_lam:5.00:   2%|▎         | 50/2000 [01:00<21:53,  1.48it/s]      

lorenz_p15_t1000_f40: train: Epoch 50/2000 Reconstruction Loss: 16.2662 Reglarization: 20.2696 | Total Loss: 36.535840 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.53398466 0.52709967 0.5491906  0.5550613  0.5545559  0.77175456
  0.55673105 0.5830464  0.49544105 0.5088718  0.5960564  0.53584546
  0.4821418  0.5315451  0.49011436]]
Norm: tensor([[0.5340, 0.5271, 0.5492, 0.5551, 0.5546, 0.7718, 0.5567, 0.5830, 0.4954,
         0.5089, 0.5961, 0.5358, 0.4821, 0.5315, 0.4901]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:100|train loss: 21.8407|l_recon: 11.9414|ridge_loss: 9.8993|l2_loss:10.2284|lr: 0.000500|prox_lam:5.00:   5%|▌         | 100/2000 [01:23<14:01,  2.26it/s]

lorenz_p15_t1000_f40: train: Epoch 100/2000 Reconstruction Loss: 11.9414 Reglarization: 9.8993 | Total Loss: 21.840688 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[2.6919511e-03 0.0000000e+00 9.6116081e-04 1.1492629e-02 2.0997690e-02
  1.6852397e-01 1.4519125e-02 0.0000000e+00 0.0000000e+00 6.0926680e-04
  1.6258382e-03 0.0000000e+00 0.0000000e+00 7.5207798e-05 0.0000000e+00]]
Norm: tensor([[2.6920e-03, 0.0000e+00, 9.6116e-04, 1.1493e-02, 2.0998e-02, 1.6852e-01,
         1.4519e-02, 0.0000e+00, 0.0000e+00, 6.0927e-04, 1.6258e-03, 0.0000e+00,
         0.0000e+00, 7.5208e-05, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:150|train loss: 16.0162|l_recon: 11.7937|ridge_loss: 4.2226|l2_loss:9.7908|lr: 0.000500|prox_lam:5.00:   8%|▊         | 150/2000 [01:50<18:14,  1.69it/s] 

lorenz_p15_t1000_f40: train: Epoch 150/2000 Reconstruction Loss: 11.7937 Reglarization: 4.2226 | Total Loss: 16.016246 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[4.3850858e-04 0.0000000e+00 3.1015766e-04 3.0059060e-03 6.0702739e-03
  3.3157337e-02 3.4691896e-03 3.6467006e-04 5.1631592e-05 4.7732238e-04
  1.9778287e-04 0.0000000e+00 0.0000000e+00 6.1653677e-04 9.4794668e-06]]
Norm: tensor([[4.3851e-04, 0.0000e+00, 3.1016e-04, 3.0059e-03, 6.0703e-03, 3.3157e-02,
         3.4692e-03, 3.6467e-04, 5.1632e-05, 4.7732e-04, 1.9778e-04, 0.0000e+00,
         0.0000e+00, 6.1654e-04, 9.4795e-06]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:200|train loss: 11.2524|l_recon: 9.5549|ridge_loss: 1.6974|l2_loss:8.6747|lr: 0.000500|prox_lam:6.00:  10%|█         | 200/2000 [02:14<13:51,  2.16it/s]  

lorenz_p15_t1000_f40: train: Epoch 200/2000 Reconstruction Loss: 9.5549 Reglarization: 1.6974 | Total Loss: 11.252351 || Learning Rate: 0.000500Prox lambda: 6.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 5.9876129e-05 0.0000000e+00 1.8270056e-03 2.7214251e-03
  1.0168903e-02 1.0649408e-03 6.7705987e-05 1.4808960e-05 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 5.9876e-05, 0.0000e+00, 1.8270e-03, 2.7214e-03, 1.0169e-02,
         1.0649e-03, 6.7706e-05, 1.4809e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:250|train loss: 8.8555|l_recon: 8.0862|ridge_loss: 0.7693|l2_loss:7.9324|lr: 0.000250|prox_lam:7.00:  12%|█▎        | 250/2000 [02:39<13:36,  2.14it/s]   

lorenz_p15_t1000_f40: train: Epoch 250/2000 Reconstruction Loss: 8.0862 Reglarization: 0.7693 | Total Loss: 8.855517 || Learning Rate: 0.000250Prox lambda: 7.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 6.6130143e-04 1.7641142e-03
  5.5416282e-03 1.0406475e-03 0.0000000e+00 1.3465632e-04 0.0000000e+00
  5.1809708e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 6.6130e-04, 1.7641e-03, 5.5416e-03,
         1.0406e-03, 0.0000e+00, 1.3466e-04, 0.0000e+00, 5.1810e-05, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:300|train loss: 7.9682|l_recon: 7.4592|ridge_loss: 0.5090|l2_loss:7.6117|lr: 0.000125|prox_lam:8.00:  15%|█▌        | 300/2000 [03:03<12:59,  2.18it/s]   

lorenz_p15_t1000_f40: train: Epoch 300/2000 Reconstruction Loss: 7.4592 Reglarization: 0.5090 | Total Loss: 7.968183 || Learning Rate: 0.000125Prox lambda: 8.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.         0.00080476 0.00150938 0.00341857
  0.00108833 0.         0.         0.         0.         0.
  0.         0.         0.        ]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0008, 0.0015, 0.0034, 0.0011, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:350|train loss: 7.6903|l_recon: 7.2659|ridge_loss: 0.4244|l2_loss:6.8408|lr: 0.000031|prox_lam:9.00:  18%|█▊        | 350/2000 [03:28<12:47,  2.15it/s]

lorenz_p15_t1000_f40: train: Epoch 350/2000 Reconstruction Loss: 7.2659 Reglarization: 0.4244 | Total Loss: 7.690292 || Learning Rate: 0.000031Prox lambda: 9.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.         0.00076759 0.00127256 0.0023479
  0.00086253 0.         0.         0.         0.         0.
  0.         0.         0.        ]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0008, 0.0013, 0.0023, 0.0009, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:400|train loss: 7.4804|l_recon: 7.0832|ridge_loss: 0.3972|l2_loss:7.2727|lr: 0.000016|prox_lam:10.00:  20%|██        | 400/2000 [03:55<12:32,  2.13it/s]

lorenz_p15_t1000_f40: train: Epoch 400/2000 Reconstruction Loss: 7.0832 Reglarization: 0.3972 | Total Loss: 7.480442 || Learning Rate: 0.000016Prox lambda: 10.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 7.2091568e-04 1.2633357e-03
  1.9199224e-03 7.3130877e-04 0.0000000e+00 2.2600230e-05 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 7.2092e-04, 1.2633e-03, 1.9199e-03,
         7.3131e-04, 0.0000e+00, 2.2600e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:450|train loss: 7.3754|l_recon: 6.9878|ridge_loss: 0.3876|l2_loss:8.1502|lr: 0.000004|prox_lam:11.00:  22%|██▎       | 450/2000 [04:19<12:44,  2.03it/s]

lorenz_p15_t1000_f40: train: Epoch 450/2000 Reconstruction Loss: 6.9878 Reglarization: 0.3876 | Total Loss: 7.375416 || Learning Rate: 0.000004Prox lambda: 11.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 5.9804867e-04 1.1245445e-03
  1.6762996e-03 7.1129459e-04 4.4280569e-06 6.3761945e-06 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 5.9805e-04, 1.1245e-03, 1.6763e-03,
         7.1129e-04, 4.4281e-06, 6.3762e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:500|train loss: 7.3359|l_recon: 6.9522|ridge_loss: 0.3837|l2_loss:7.8200|lr: 0.000002|prox_lam:12.00:  25%|██▌       | 500/2000 [04:42<11:07,  2.25it/s]

lorenz_p15_t1000_f40: train: Epoch 500/2000 Reconstruction Loss: 6.9522 Reglarization: 0.3837 | Total Loss: 7.335913 || Learning Rate: 0.000002Prox lambda: 12.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 5.3839682e-04 1.0286424e-03
  1.5664244e-03 6.0323993e-04 0.0000000e+00 1.1975917e-06 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 5.3840e-04, 1.0286e-03, 1.5664e-03,
         6.0324e-04, 0.0000e+00, 1.1976e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:550|train loss: 7.4688|l_recon: 7.0864|ridge_loss: 0.3824|l2_loss:8.0360|lr: 0.000000|prox_lam:13.00:  28%|██▊       | 550/2000 [05:04<10:53,  2.22it/s]

lorenz_p15_t1000_f40: train: Epoch 550/2000 Reconstruction Loss: 7.0864 Reglarization: 0.3824 | Total Loss: 7.468804 || Learning Rate: 0.000000Prox lambda: 13.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 5.2376755e-04 9.7215961e-04
  1.5051030e-03 5.6594936e-04 0.0000000e+00 2.2404811e-06 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 5.2377e-04, 9.7216e-04, 1.5051e-03,
         5.6595e-04, 0.0000e+00, 2.2405e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:566|train loss: 7.3832|l_recon: 7.0010|ridge_loss: 0.3822|l2_loss:7.6389|lr: 0.000000|prox_lam:14.00:  28%|██▊       | 566/2000 [05:12<13:10,  1.81it/s]


Early stopping at epoch 566
training complete
最佳训练损失: 6.374257 (Epoch 466)
lam: 13.999999999999968,lr: 1e-05, norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 5.4199621e-04 1.0421189e-03
  1.6054393e-03 6.0686556e-04 0.0000000e+00 4.0735067e-06 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Training done, time: 312.10
ts_target: 6
{'overall': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}, 'denoise_Transformer': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}}
lorenz_p15_t1000_f40: start training...


Epoch:50|train loss: 37.1919|l_recon: 16.8210|ridge_loss: 20.3709|l2_loss:11.6578|lr: 0.000500|prox_lam:5.00:   2%|▎         | 50/2000 [01:14<28:59,  1.12it/s]       

lorenz_p15_t1000_f40: train: Epoch 50/2000 Reconstruction Loss: 16.8210 Reglarization: 20.3709 | Total Loss: 37.191861 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.500217   0.52327585 0.41458833 0.47530547 0.52851856 0.4946937
  0.70329    0.46752533 0.54720956 0.4566455  0.5521192  0.5089583
  0.43726322 0.5075726  0.46408275]]
Norm: tensor([[0.5002, 0.5233, 0.4146, 0.4753, 0.5285, 0.4947, 0.7033, 0.4675, 0.5472,
         0.4566, 0.5521, 0.5090, 0.4373, 0.5076, 0.4641]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:100|train loss: 23.6530|l_recon: 13.4435|ridge_loss: 10.2095|l2_loss:9.4284|lr: 0.000500|prox_lam:5.00:   5%|▌         | 100/2000 [01:39<13:43,  2.31it/s]

lorenz_p15_t1000_f40: train: Epoch 100/2000 Reconstruction Loss: 13.4435 Reglarization: 10.2095 | Total Loss: 23.653028 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.00195979 0.00039511 0.         0.00135357 0.01211234 0.02563084
  0.14831385 0.01402599 0.00045053 0.00123187 0.00179829 0.00030523
  0.00016942 0.         0.        ]]
Norm: tensor([[0.0020, 0.0004, 0.0000, 0.0014, 0.0121, 0.0256, 0.1483, 0.0140, 0.0005,
         0.0012, 0.0018, 0.0003, 0.0002, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:150|train loss: 14.1728|l_recon: 10.0531|ridge_loss: 4.1197|l2_loss:9.4505|lr: 0.000500|prox_lam:5.00:   8%|▊         | 150/2000 [02:21<22:42,  1.36it/s]  

lorenz_p15_t1000_f40: train: Epoch 150/2000 Reconstruction Loss: 10.0531 Reglarization: 4.1197 | Total Loss: 14.172808 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 2.3067696e-05 0.0000000e+00 6.7552994e-04 3.4274044e-03
  6.7319772e-03 3.6045700e-02 4.3338188e-03 3.8515660e-04 9.0350397e-05
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 2.5357120e-05]]
Norm: tensor([[0.0000e+00, 2.3068e-05, 0.0000e+00, 6.7553e-04, 3.4274e-03, 6.7320e-03,
         3.6046e-02, 4.3338e-03, 3.8516e-04, 9.0350e-05, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 2.5357e-05]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:200|train loss: 12.2780|l_recon: 10.7618|ridge_loss: 1.5162|l2_loss:9.5187|lr: 0.000500|prox_lam:6.00:  10%|█         | 200/2000 [02:46<12:17,  2.44it/s] 

lorenz_p15_t1000_f40: train: Epoch 200/2000 Reconstruction Loss: 10.7618 Reglarization: 1.5162 | Total Loss: 12.277979 || Learning Rate: 0.000500Prox lambda: 6.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.         0.         0.00213988 0.00181821
  0.00965919 0.00136356 0.         0.         0.         0.
  0.         0.         0.        ]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0021, 0.0018, 0.0097, 0.0014, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:250|train loss: 9.7188|l_recon: 8.9419|ridge_loss: 0.7768|l2_loss:8.9858|lr: 0.000250|prox_lam:7.00:  12%|█▎        | 250/2000 [03:13<13:38,  2.14it/s]   

lorenz_p15_t1000_f40: train: Epoch 250/2000 Reconstruction Loss: 8.9419 Reglarization: 0.7768 | Total Loss: 9.718791 || Learning Rate: 0.000250Prox lambda: 7.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 6.1236047e-03 0.0000000e+00 0.0000000e+00 8.5214037e-05
  7.2987517e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         6.1236e-03, 0.0000e+00, 0.0000e+00, 8.5214e-05, 7.2988e-05, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:300|train loss: 8.7073|l_recon: 8.1139|ridge_loss: 0.5933|l2_loss:8.1851|lr: 0.000063|prox_lam:8.00:  15%|█▌        | 300/2000 [03:37<11:44,  2.41it/s]   

lorenz_p15_t1000_f40: train: Epoch 300/2000 Reconstruction Loss: 8.1139 Reglarization: 0.5933 | Total Loss: 8.707271 || Learning Rate: 0.000063Prox lambda: 8.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 8.0454745e-05
  0.0000000e+00 4.8938678e-03 4.5112916e-05 1.5347154e-04 1.0962482e-05
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 8.0455e-05, 0.0000e+00,
         4.8939e-03, 4.5113e-05, 1.5347e-04, 1.0962e-05, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:350|train loss: 8.2839|l_recon: 7.7480|ridge_loss: 0.5359|l2_loss:7.6641|lr: 0.000031|prox_lam:9.00:  18%|█▊        | 350/2000 [04:04<15:19,  1.79it/s] 

lorenz_p15_t1000_f40: train: Epoch 350/2000 Reconstruction Loss: 7.7480 Reglarization: 0.5359 | Total Loss: 8.283888 || Learning Rate: 0.000031Prox lambda: 9.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 4.8327605e-05
  0.0000000e+00 3.9649890e-03 0.0000000e+00 1.7119056e-04 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 3.1847737e-06 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 4.8328e-05, 0.0000e+00,
         3.9650e-03, 0.0000e+00, 1.7119e-04, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 3.1848e-06, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:400|train loss: 8.3149|l_recon: 7.7974|ridge_loss: 0.5175|l2_loss:8.1949|lr: 0.000008|prox_lam:10.00:  20%|██        | 400/2000 [04:32<11:07,  2.40it/s]  

lorenz_p15_t1000_f40: train: Epoch 400/2000 Reconstruction Loss: 7.7974 Reglarization: 0.5175 | Total Loss: 8.314878 || Learning Rate: 0.000008Prox lambda: 10.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[2.5508933e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00 3.6566285e-05
  0.0000000e+00 3.6547217e-03 0.0000000e+00 9.2695598e-05 4.9146183e-06
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[2.5509e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 3.6566e-05, 0.0000e+00,
         3.6547e-03, 0.0000e+00, 9.2696e-05, 4.9146e-06, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:450|train loss: 8.2390|l_recon: 7.7282|ridge_loss: 0.5107|l2_loss:8.2754|lr: 0.000002|prox_lam:11.00:  22%|██▎       | 450/2000 [04:58<12:31,  2.06it/s]

lorenz_p15_t1000_f40: train: Epoch 450/2000 Reconstruction Loss: 7.7282 Reglarization: 0.5107 | Total Loss: 8.238965 || Learning Rate: 0.000002Prox lambda: 11.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 2.0666694e-05
  0.0000000e+00 3.3219249e-03 4.3630280e-06 6.0648013e-05 1.0385160e-05
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 2.0667e-05, 0.0000e+00,
         3.3219e-03, 4.3630e-06, 6.0648e-05, 1.0385e-05, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:500|train loss: 8.4963|l_recon: 7.9877|ridge_loss: 0.5087|l2_loss:7.8738|lr: 0.000001|prox_lam:12.00:  25%|██▌       | 500/2000 [05:22<11:55,  2.10it/s]

lorenz_p15_t1000_f40: train: Epoch 500/2000 Reconstruction Loss: 7.9877 Reglarization: 0.5087 | Total Loss: 8.496341 || Learning Rate: 0.000001Prox lambda: 12.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 2.9504081e-05
  0.0000000e+00 3.1713299e-03 0.0000000e+00 6.1459548e-05 8.8009001e-06
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 2.9504e-05, 0.0000e+00,
         3.1713e-03, 0.0000e+00, 6.1460e-05, 8.8009e-06, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:550|train loss: 8.1713|l_recon: 7.6633|ridge_loss: 0.5080|l2_loss:8.2168|lr: 0.000000|prox_lam:13.00:  28%|██▊       | 550/2000 [05:46<11:06,  2.18it/s]

lorenz_p15_t1000_f40: train: Epoch 550/2000 Reconstruction Loss: 7.6633 Reglarization: 0.5080 | Total Loss: 8.171312 || Learning Rate: 0.000000Prox lambda: 13.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 5.1608591e-08 0.0000000e+00 1.4385295e-05
  0.0000000e+00 3.0736851e-03 0.0000000e+00 3.5615987e-05 2.9231931e-06
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 5.1609e-08, 0.0000e+00, 1.4385e-05, 0.0000e+00,
         3.0737e-03, 0.0000e+00, 3.5616e-05, 2.9232e-06, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:600|train loss: 8.3209|l_recon: 7.8132|ridge_loss: 0.5077|l2_loss:9.3871|lr: 0.000000|prox_lam:14.00:  30%|███       | 600/2000 [06:10<10:16,  2.27it/s]

lorenz_p15_t1000_f40: train: Epoch 600/2000 Reconstruction Loss: 7.8132 Reglarization: 0.5077 | Total Loss: 8.320949 || Learning Rate: 0.000000Prox lambda: 14.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 1.6500673e-05
  0.0000000e+00 3.0164267e-03 0.0000000e+00 3.4360000e-05 2.7327977e-07
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.6501e-05, 0.0000e+00,
         3.0164e-03, 0.0000e+00, 3.4360e-05, 2.7328e-07, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:650|train loss: 8.4292|l_recon: 7.9216|ridge_loss: 0.5076|l2_loss:8.4261|lr: 0.000000|prox_lam:15.00:  32%|███▎      | 650/2000 [06:34<10:44,  2.10it/s]

lorenz_p15_t1000_f40: train: Epoch 650/2000 Reconstruction Loss: 7.9216 Reglarization: 0.5076 | Total Loss: 8.429205 || Learning Rate: 0.000000Prox lambda: 15.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 6.7546216e-06
  0.0000000e+00 2.9689290e-03 0.0000000e+00 2.3867251e-05 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 6.7546e-06, 0.0000e+00,
         2.9689e-03, 0.0000e+00, 2.3867e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:700|train loss: 8.3683|l_recon: 7.8609|ridge_loss: 0.5074|l2_loss:6.8689|lr: 0.000000|prox_lam:16.00:  35%|███▌      | 700/2000 [06:57<10:01,  2.16it/s]

lorenz_p15_t1000_f40: train: Epoch 700/2000 Reconstruction Loss: 7.8609 Reglarization: 0.5074 | Total Loss: 8.368341 || Learning Rate: 0.000000Prox lambda: 16.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 6.2388658e-06
  0.0000000e+00 2.9037828e-03 0.0000000e+00 2.4197981e-05 9.0330087e-07
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 6.2389e-06, 0.0000e+00,
         2.9038e-03, 0.0000e+00, 2.4198e-05, 9.0330e-07, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:706|train loss: 8.2155|l_recon: 7.7081|ridge_loss: 0.5074|l2_loss:7.5498|lr: 0.000000|prox_lam:17.00:  35%|███▌      | 706/2000 [07:00<12:50,  1.68it/s]


Early stopping at epoch 706
training complete
最佳训练损失: 6.553015 (Epoch 606)
lam: 16.999999999999957,lr: 1e-05, norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 1.3774350e-05
  0.0000000e+00 3.0112090e-03 0.0000000e+00 2.6866952e-05 1.7913601e-06
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Training done, time: 420.42
ts_target: 7
{'overall': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}, 'denoise_Transformer': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}}
lorenz_p15_t1000_f40: start training...


Epoch:50|train loss: 38.9171|l_recon: 18.4783|ridge_loss: 20.4387|l2_loss:12.8950|lr: 0.000500|prox_lam:5.00:   2%|▎         | 50/2000 [01:12<1:36:07,  2.96s/it]    

lorenz_p15_t1000_f40: train: Epoch 50/2000 Reconstruction Loss: 18.4783 Reglarization: 20.4387 | Total Loss: 38.917054 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.5481281  0.44945318 0.4940338  0.56706387 0.48999348 0.5884903
  0.5091579  0.7612255  0.51679134 0.5773747  0.4944187  0.5345737
  0.48793465 0.5407806  0.5657939 ]]
Norm: tensor([[0.5481, 0.4495, 0.4940, 0.5671, 0.4900, 0.5885, 0.5092, 0.7612, 0.5168,
         0.5774, 0.4944, 0.5346, 0.4879, 0.5408, 0.5658]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:100|train loss: 25.2445|l_recon: 14.4459|ridge_loss: 10.7986|l2_loss:11.4395|lr: 0.000500|prox_lam:5.00:   5%|▌         | 100/2000 [02:17<23:40,  1.34it/s]

lorenz_p15_t1000_f40: train: Epoch 100/2000 Reconstruction Loss: 14.4459 Reglarization: 10.7986 | Total Loss: 25.244450 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[1.3201337e-03 0.0000000e+00 0.0000000e+00 3.6186283e-04 0.0000000e+00
  1.4964953e-02 2.8168358e-02 1.4271814e-01 1.3241966e-02 1.0491210e-04
  0.0000000e+00 1.2920168e-03 4.6401987e-05 0.0000000e+00 1.7083042e-03]]
Norm: tensor([[1.3201e-03, 0.0000e+00, 0.0000e+00, 3.6186e-04, 0.0000e+00, 1.4965e-02,
         2.8168e-02, 1.4272e-01, 1.3242e-02, 1.0491e-04, 0.0000e+00, 1.2920e-03,
         4.6402e-05, 0.0000e+00, 1.7083e-03]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:150|train loss: 17.0716|l_recon: 12.4269|ridge_loss: 4.6447|l2_loss:10.8850|lr: 0.000500|prox_lam:5.00:   8%|▊         | 150/2000 [03:08<50:16,  1.63s/it]  

lorenz_p15_t1000_f40: train: Epoch 150/2000 Reconstruction Loss: 12.4269 Reglarization: 4.6447 | Total Loss: 17.071551 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.         0.         0.         0.003076
  0.00687771 0.03466189 0.0043005  0.         0.         0.
  0.         0.         0.        ]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0031, 0.0069, 0.0347, 0.0043,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:200|train loss: 13.6495|l_recon: 11.8743|ridge_loss: 1.7752|l2_loss:10.1195|lr: 0.000500|prox_lam:6.00:  10%|█         | 200/2000 [03:38<13:47,  2.17it/s]  

lorenz_p15_t1000_f40: train: Epoch 200/2000 Reconstruction Loss: 11.8743 Reglarization: 1.7752 | Total Loss: 13.649480 || Learning Rate: 0.000500Prox lambda: 6.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.         0.         0.         0.00139596
  0.00281115 0.00900292 0.00162008 0.00024477 0.         0.
  0.         0.         0.        ]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0014, 0.0028, 0.0090, 0.0016,
         0.0002, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:250|train loss: 11.9978|l_recon: 11.2811|ridge_loss: 0.7167|l2_loss:9.4850|lr: 0.000250|prox_lam:7.00:  12%|█▎        | 250/2000 [04:07<13:11,  2.21it/s]   

lorenz_p15_t1000_f40: train: Epoch 250/2000 Reconstruction Loss: 11.2811 Reglarization: 0.7167 | Total Loss: 11.997792 || Learning Rate: 0.000250Prox lambda: 7.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.         0.         0.         0.00010326
  0.         0.00548396 0.         0.00017565 0.         0.
  0.         0.         0.        ]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0001, 0.0000, 0.0055, 0.0000,
         0.0002, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:300|train loss: 9.5991|l_recon: 9.1182|ridge_loss: 0.4808|l2_loss:10.7301|lr: 0.000125|prox_lam:8.00:  15%|█▌        | 300/2000 [04:34<13:50,  2.05it/s]  

lorenz_p15_t1000_f40: train: Epoch 300/2000 Reconstruction Loss: 9.1182 Reglarization: 0.4808 | Total Loss: 9.599093 || Learning Rate: 0.000125Prox lambda: 8.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.         0.         0.         0.
  0.         0.00327219 0.         0.         0.         0.
  0.         0.         0.        ]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0033, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:350|train loss: 9.2178|l_recon: 8.8331|ridge_loss: 0.3847|l2_loss:8.8014|lr: 0.000063|prox_lam:9.00:  18%|█▊        | 350/2000 [05:09<29:52,  1.09s/it]   

lorenz_p15_t1000_f40: train: Epoch 350/2000 Reconstruction Loss: 8.8331 Reglarization: 0.3847 | Total Loss: 9.217845 || Learning Rate: 0.000063Prox lambda: 9.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  3.8878056e-06 0.0000000e+00 2.1734356e-03 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 3.8878e-06,
         0.0000e+00, 2.1734e-03, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:400|train loss: 9.0773|l_recon: 8.7275|ridge_loss: 0.3498|l2_loss:8.1729|lr: 0.000016|prox_lam:10.00:  20%|██        | 400/2000 [06:33<41:17,  1.55s/it] 

lorenz_p15_t1000_f40: train: Epoch 400/2000 Reconstruction Loss: 8.7275 Reglarization: 0.3498 | Total Loss: 9.077296 || Learning Rate: 0.000016Prox lambda: 10.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  1.9848419e-05 0.0000000e+00 1.7041564e-03 0.0000000e+00 3.1433883e-06
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.9848e-05,
         0.0000e+00, 1.7042e-03, 0.0000e+00, 3.1434e-06, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:450|train loss: 8.8437|l_recon: 8.5060|ridge_loss: 0.3377|l2_loss:8.8280|lr: 0.000004|prox_lam:11.00:  22%|██▎       | 450/2000 [07:57<44:45,  1.73s/it] 

lorenz_p15_t1000_f40: train: Epoch 450/2000 Reconstruction Loss: 8.5060 Reglarization: 0.3377 | Total Loss: 8.843716 || Learning Rate: 0.000004Prox lambda: 11.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  1.8034647e-05 0.0000000e+00 1.4864174e-03 0.0000000e+00 6.2862459e-06
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.8035e-05,
         0.0000e+00, 1.4864e-03, 0.0000e+00, 6.2862e-06, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:500|train loss: 9.2186|l_recon: 8.8847|ridge_loss: 0.3339|l2_loss:7.6419|lr: 0.000002|prox_lam:12.00:  25%|██▌       | 500/2000 [08:42<10:12,  2.45it/s] 

lorenz_p15_t1000_f40: train: Epoch 500/2000 Reconstruction Loss: 8.8847 Reglarization: 0.3339 | Total Loss: 9.218640 || Learning Rate: 0.000002Prox lambda: 12.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  7.4965028e-06 0.0000000e+00 1.4300320e-03 0.0000000e+00 2.6619382e-06
  0.0000000e+00 2.5466597e-06 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 7.4965e-06,
         0.0000e+00, 1.4300e-03, 0.0000e+00, 2.6619e-06, 0.0000e+00, 2.5467e-06,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:550|train loss: 9.1469|l_recon: 8.8148|ridge_loss: 0.3320|l2_loss:10.1941|lr: 0.000001|prox_lam:13.00:  28%|██▊       | 550/2000 [09:05<11:26,  2.11it/s]

lorenz_p15_t1000_f40: train: Epoch 550/2000 Reconstruction Loss: 8.8148 Reglarization: 0.3320 | Total Loss: 9.146860 || Learning Rate: 0.000001Prox lambda: 13.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
  1.347465e-05 0.000000e+00 1.295167e-03 0.000000e+00 0.000000e+00
  0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.3475e-05,
         0.0000e+00, 1.2952e-03, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:557|train loss: 8.9404|l_recon: 8.6085|ridge_loss: 0.3318|l2_loss:8.7725|lr: 0.000001|prox_lam:14.00:  28%|██▊       | 557/2000 [09:08<23:40,  1.02it/s] 


Early stopping at epoch 557
training complete
最佳训练损失: 7.240036 (Epoch 457)
lam: 13.999999999999968,lr: 1e-05, norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 1.4617408e-03 0.0000000e+00 7.2256086e-07
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Training done, time: 548.39
ts_target: 8
{'overall': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}, 'denoise_Transformer': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}}
lorenz_p15_t1000_f40: start training...


Epoch:50|train loss: 38.4008|l_recon: 18.0589|ridge_loss: 20.3418|l2_loss:13.5801|lr: 0.000500|prox_lam:5.00:   2%|▎         | 50/2000 [01:21<29:03,  1.12it/s]       

lorenz_p15_t1000_f40: train: Epoch 50/2000 Reconstruction Loss: 18.0589 Reglarization: 20.3418 | Total Loss: 38.400767 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.54039145 0.5980431  0.5647334  0.52997184 0.45728272 0.5178149
  0.5087595  0.5534317  0.7333614  0.4960815  0.5117787  0.4351617
  0.501912   0.504121   0.5615719 ]]
Norm: tensor([[0.5404, 0.5980, 0.5647, 0.5300, 0.4573, 0.5178, 0.5088, 0.5534, 0.7334,
         0.4961, 0.5118, 0.4352, 0.5019, 0.5041, 0.5616]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:100|train loss: 25.8119|l_recon: 16.0143|ridge_loss: 9.7976|l2_loss:13.1537|lr: 0.000500|prox_lam:5.00:   5%|▌         | 100/2000 [01:49<24:23,  1.30it/s]

lorenz_p15_t1000_f40: train: Epoch 100/2000 Reconstruction Loss: 16.0143 Reglarization: 9.7976 | Total Loss: 25.811930 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.00069833 0.00119153 0.         0.0036746  0.
  0.00990319 0.01744969 0.14619783 0.01109177 0.00285899 0.
  0.00067869 0.         0.00031882]]
Norm: tensor([[0.0000, 0.0007, 0.0012, 0.0000, 0.0037, 0.0000, 0.0099, 0.0174, 0.1462,
         0.0111, 0.0029, 0.0000, 0.0007, 0.0000, 0.0003]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:150|train loss: 17.1116|l_recon: 13.0551|ridge_loss: 4.0565|l2_loss:11.0546|lr: 0.000500|prox_lam:5.00:   8%|▊         | 150/2000 [02:20<19:33,  1.58it/s]

lorenz_p15_t1000_f40: train: Epoch 150/2000 Reconstruction Loss: 13.0551 Reglarization: 4.0565 | Total Loss: 17.111585 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.0001623  0.         0.         0.00063021 0.
  0.00342935 0.00516949 0.02696254 0.00353653 0.00016667 0.00054491
  0.         0.         0.00010307]]
Norm: tensor([[0.0000, 0.0002, 0.0000, 0.0000, 0.0006, 0.0000, 0.0034, 0.0052, 0.0270,
         0.0035, 0.0002, 0.0005, 0.0000, 0.0000, 0.0001]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:200|train loss: 14.6829|l_recon: 13.1335|ridge_loss: 1.5494|l2_loss:9.9801|lr: 0.000500|prox_lam:6.00:  10%|█         | 200/2000 [02:44<14:07,  2.12it/s] 

lorenz_p15_t1000_f40: train: Epoch 200/2000 Reconstruction Loss: 13.1335 Reglarization: 1.5494 | Total Loss: 14.682925 || Learning Rate: 0.000500Prox lambda: 6.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[9.6629374e-06 0.0000000e+00 0.0000000e+00 7.9619465e-05 0.0000000e+00
  0.0000000e+00 0.0000000e+00 2.4889482e-04 9.7219646e-03 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[9.6629e-06, 0.0000e+00, 0.0000e+00, 7.9619e-05, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 2.4889e-04, 9.7220e-03, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:250|train loss: 10.4256|l_recon: 9.5232|ridge_loss: 0.9024|l2_loss:9.1103|lr: 0.000125|prox_lam:7.00:  12%|█▎        | 250/2000 [03:09<13:30,  2.16it/s]  

lorenz_p15_t1000_f40: train: Epoch 250/2000 Reconstruction Loss: 9.5232 Reglarization: 0.9024 | Total Loss: 10.425605 || Learning Rate: 0.000125Prox lambda: 7.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[1.6485211e-04 0.0000000e+00 0.0000000e+00 2.9145836e-04 0.0000000e+00
  0.0000000e+00 2.6699787e-04 0.0000000e+00 6.7806225e-03 0.0000000e+00
  0.0000000e+00 4.5372697e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[1.6485e-04, 0.0000e+00, 0.0000e+00, 2.9146e-04, 0.0000e+00, 0.0000e+00,
         2.6700e-04, 0.0000e+00, 6.7806e-03, 0.0000e+00, 0.0000e+00, 4.5373e-05,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:300|train loss: 10.0845|l_recon: 9.3466|ridge_loss: 0.7379|l2_loss:8.9605|lr: 0.000063|prox_lam:8.00:  15%|█▌        | 300/2000 [03:38<21:49,  1.30it/s] 

lorenz_p15_t1000_f40: train: Epoch 300/2000 Reconstruction Loss: 9.3466 Reglarization: 0.7379 | Total Loss: 10.084511 || Learning Rate: 0.000063Prox lambda: 8.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 1.3572745e-05 0.0000000e+00
  2.4301640e-05 0.0000000e+00 0.0000000e+00 5.0931592e-03 0.0000000e+00
  7.8623532e-05 7.5183692e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 1.3573e-05, 0.0000e+00, 2.4302e-05,
         0.0000e+00, 0.0000e+00, 5.0932e-03, 0.0000e+00, 7.8624e-05, 7.5184e-05,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:350|train loss: 9.6974|l_recon: 9.0487|ridge_loss: 0.6487|l2_loss:9.3552|lr: 0.000031|prox_lam:9.00:  18%|█▊        | 350/2000 [04:01<13:04,  2.10it/s]  

lorenz_p15_t1000_f40: train: Epoch 350/2000 Reconstruction Loss: 9.0487 Reglarization: 0.6487 | Total Loss: 9.697416 || Learning Rate: 0.000031Prox lambda: 9.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 3.7879564e-03 0.0000000e+00
  1.6369246e-05 0.0000000e+00 4.5671011e-05 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 3.7880e-03, 0.0000e+00, 1.6369e-05, 0.0000e+00,
         4.5671e-05, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:400|train loss: 9.4995|l_recon: 8.8864|ridge_loss: 0.6131|l2_loss:8.0716|lr: 0.000016|prox_lam:10.00:  20%|██        | 400/2000 [04:26<13:21,  2.00it/s] 

lorenz_p15_t1000_f40: train: Epoch 400/2000 Reconstruction Loss: 8.8864 Reglarization: 0.6131 | Total Loss: 9.499488 || Learning Rate: 0.000016Prox lambda: 10.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 1.7840051e-05 0.0000000e+00
  0.0000000e+00 4.3842487e-05 0.0000000e+00 3.2977578e-03 0.0000000e+00
  1.3272191e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 1.7840e-05, 0.0000e+00, 0.0000e+00,
         4.3842e-05, 0.0000e+00, 3.2978e-03, 0.0000e+00, 1.3272e-05, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:450|train loss: 9.2758|l_recon: 8.6740|ridge_loss: 0.6019|l2_loss:9.0522|lr: 0.000004|prox_lam:11.00:  22%|██▎       | 450/2000 [04:50<12:20,  2.09it/s] 

lorenz_p15_t1000_f40: train: Epoch 450/2000 Reconstruction Loss: 8.6740 Reglarization: 0.6019 | Total Loss: 9.275828 || Learning Rate: 0.000004Prox lambda: 11.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[1.0362830e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  8.6964756e-06 0.0000000e+00 0.0000000e+00 3.1402402e-03 0.0000000e+00
  1.3869139e-05 9.5001124e-06 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[1.0363e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 8.6965e-06,
         0.0000e+00, 0.0000e+00, 3.1402e-03, 0.0000e+00, 1.3869e-05, 9.5001e-06,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:500|train loss: 9.1372|l_recon: 8.5395|ridge_loss: 0.5978|l2_loss:9.3021|lr: 0.000001|prox_lam:12.00:  25%|██▌       | 500/2000 [05:16<12:34,  1.99it/s] 

lorenz_p15_t1000_f40: train: Epoch 500/2000 Reconstruction Loss: 8.5395 Reglarization: 0.5978 | Total Loss: 9.137227 || Learning Rate: 0.000001Prox lambda: 12.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[1.2298298e-06 0.0000000e+00 0.0000000e+00 2.5608579e-06 0.0000000e+00
  7.0954884e-06 9.8811479e-06 0.0000000e+00 2.9659679e-03 0.0000000e+00
  1.6516593e-05 1.2526561e-07 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[1.2298e-06, 0.0000e+00, 0.0000e+00, 2.5609e-06, 0.0000e+00, 7.0955e-06,
         9.8811e-06, 0.0000e+00, 2.9660e-03, 0.0000e+00, 1.6517e-05, 1.2527e-07,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:550|train loss: 9.3379|l_recon: 8.7414|ridge_loss: 0.5965|l2_loss:8.5715|lr: 0.000000|prox_lam:13.00:  28%|██▊       | 550/2000 [05:41<11:42,  2.07it/s] 

lorenz_p15_t1000_f40: train: Epoch 550/2000 Reconstruction Loss: 8.7414 Reglarization: 0.5965 | Total Loss: 9.337882 || Learning Rate: 0.000000Prox lambda: 13.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[3.5622468e-06 0.0000000e+00 0.0000000e+00 3.4515976e-06 0.0000000e+00
  3.2457147e-06 8.2302859e-06 0.0000000e+00 2.8654099e-03 0.0000000e+00
  2.4559638e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[3.5622e-06, 0.0000e+00, 0.0000e+00, 3.4516e-06, 0.0000e+00, 3.2457e-06,
         8.2303e-06, 0.0000e+00, 2.8654e-03, 0.0000e+00, 2.4560e-05, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:576|train loss: 9.6314|l_recon: 9.0352|ridge_loss: 0.5962|l2_loss:8.3277|lr: 0.000000|prox_lam:14.00:  29%|██▉       | 576/2000 [05:53<14:34,  1.63it/s] 


Early stopping at epoch 576
training complete
最佳训练损失: 7.776785 (Epoch 476)
lam: 13.999999999999968,lr: 1e-05, norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 1.8931940e-05 0.0000000e+00
  5.9317390e-06 4.0824489e-06 0.0000000e+00 3.0006603e-03 0.0000000e+00
  2.3695611e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Training done, time: 353.90
ts_target: 9
{'overall': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}, 'denoise_Transformer': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}}
lorenz_p15_t1000_f40: start training...


Epoch:50|train loss: 40.0407|l_recon: 19.4533|ridge_loss: 20.5873|l2_loss:14.6600|lr: 0.000500|prox_lam:5.00:   2%|▎         | 50/2000 [02:10<47:06,  1.45s/it]        

lorenz_p15_t1000_f40: train: Epoch 50/2000 Reconstruction Loss: 19.4533 Reglarization: 20.5873 | Total Loss: 40.040658 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.54612243 0.52246064 0.5145173  0.5365299  0.5421243  0.5181668
  0.57233095 0.60434335 0.54367256 0.6551102  0.59169084 0.57944906
  0.51674527 0.5354267  0.5592646 ]]
Norm: tensor([[0.5461, 0.5225, 0.5145, 0.5365, 0.5421, 0.5182, 0.5723, 0.6043, 0.5437,
         0.6551, 0.5917, 0.5794, 0.5167, 0.5354, 0.5593]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:100|train loss: 26.7964|l_recon: 16.1571|ridge_loss: 10.6392|l2_loss:12.3514|lr: 0.000500|prox_lam:5.00:   5%|▌         | 100/2000 [02:48<25:55,  1.22it/s]

lorenz_p15_t1000_f40: train: Epoch 100/2000 Reconstruction Loss: 16.1571 Reglarization: 10.6392 | Total Loss: 26.796359 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[8.5265609e-05 2.4835768e-03 2.0511587e-04 0.0000000e+00 0.0000000e+00
  9.4374525e-04 1.9170889e-03 1.0622345e-02 1.4542393e-02 1.3206252e-01
  1.2090022e-02 3.3628461e-03 0.0000000e+00 3.7446688e-04 2.2634260e-04]]
Norm: tensor([[8.5266e-05, 2.4836e-03, 2.0512e-04, 0.0000e+00, 0.0000e+00, 9.4375e-04,
         1.9171e-03, 1.0622e-02, 1.4542e-02, 1.3206e-01, 1.2090e-02, 3.3628e-03,
         0.0000e+00, 3.7447e-04, 2.2634e-04]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:150|train loss: 17.8730|l_recon: 13.2917|ridge_loss: 4.5813|l2_loss:10.8982|lr: 0.000500|prox_lam:5.00:   8%|▊         | 150/2000 [03:24<43:08,  1.40s/it] 

lorenz_p15_t1000_f40: train: Epoch 150/2000 Reconstruction Loss: 13.2917 Reglarization: 4.5813 | Total Loss: 17.873043 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.         0.         0.         0.
  0.         0.00366381 0.00523784 0.03727516 0.00470489 0.00010905
  0.00054939 0.         0.        ]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0037, 0.0052,
         0.0373, 0.0047, 0.0001, 0.0005, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:200|train loss: 12.3409|l_recon: 10.6127|ridge_loss: 1.7282|l2_loss:9.6251|lr: 0.000500|prox_lam:6.00:  10%|█         | 200/2000 [03:53<14:48,  2.03it/s] 

lorenz_p15_t1000_f40: train: Epoch 200/2000 Reconstruction Loss: 10.6127 Reglarization: 1.7282 | Total Loss: 12.340920 || Learning Rate: 0.000500Prox lambda: 6.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.         0.         0.         0.
  0.         0.00142922 0.00268347 0.01087131 0.00175162 0.00013918
  0.         0.         0.        ]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0014, 0.0027,
         0.0109, 0.0018, 0.0001, 0.0000, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:250|train loss: 11.0450|l_recon: 10.1522|ridge_loss: 0.8928|l2_loss:9.3647|lr: 0.000250|prox_lam:7.00:  12%|█▎        | 250/2000 [04:19<14:05,  2.07it/s] 

lorenz_p15_t1000_f40: train: Epoch 250/2000 Reconstruction Loss: 10.1522 Reglarization: 0.8928 | Total Loss: 11.045012 || Learning Rate: 0.000250Prox lambda: 7.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.         0.         0.         0.
  0.         0.00126355 0.00128002 0.00559894 0.0010602  0.
  0.         0.         0.        ]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0013, 0.0013,
         0.0056, 0.0011, 0.0000, 0.0000, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:300|train loss: 9.7080|l_recon: 9.0892|ridge_loss: 0.6188|l2_loss:9.3794|lr: 0.000063|prox_lam:8.00:  15%|█▌        | 300/2000 [04:48<18:05,  1.57it/s]   

lorenz_p15_t1000_f40: train: Epoch 300/2000 Reconstruction Loss: 9.0892 Reglarization: 0.6188 | Total Loss: 9.708024 || Learning Rate: 0.000063Prox lambda: 8.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 5.9010723e-04 8.5214037e-04 3.3291595e-03
  6.6910905e-04 7.4174895e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 5.9011e-04, 8.5214e-04, 3.3292e-03, 6.6911e-04, 7.4175e-05,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:350|train loss: 9.2180|l_recon: 8.6797|ridge_loss: 0.5383|l2_loss:9.0285|lr: 0.000031|prox_lam:9.00:  18%|█▊        | 350/2000 [05:21<18:02,  1.52it/s] 

lorenz_p15_t1000_f40: train: Epoch 350/2000 Reconstruction Loss: 8.6797 Reglarization: 0.5383 | Total Loss: 9.218025 || Learning Rate: 0.000031Prox lambda: 9.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 2.9430332e-04 5.9829082e-04 2.2766259e-03
  3.7952844e-04 3.8188384e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 2.9430e-04, 5.9829e-04, 2.2766e-03, 3.7953e-04, 3.8188e-05,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:375|train loss: 9.2159|l_recon: 8.6955|ridge_loss: 0.5204|l2_loss:8.4521|lr: 0.000016|prox_lam:10.00:  19%|█▉        | 375/2000 [05:37<24:22,  1.11it/s]


Early stopping at epoch 375
training complete
最佳训练损失: 7.587337 (Epoch 275)
lam: 9.999999999999982,lr: 1e-05, norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 6.4952928e-04 7.8548264e-04 4.0867277e-03
  6.4442027e-04 9.4825285e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Training done, time: 337.47
ts_target: 10
{'overall': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}, 'denoise_Transformer': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}}
lorenz_p15_t1000_f40: start training...


Epoch:50|train loss: 39.3345|l_recon: 18.8807|ridge_loss: 20.4538|l2_loss:12.8639|lr: 0.000500|prox_lam:5.00:   2%|▎         | 50/2000 [01:24<29:07,  1.12it/s]        

lorenz_p15_t1000_f40: train: Epoch 50/2000 Reconstruction Loss: 18.8807 Reglarization: 20.4538 | Total Loss: 39.334539 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.5280565  0.5583514  0.5656145  0.57610476 0.56656915 0.5217889
  0.4464578  0.55237514 0.49861452 0.575038   0.71556485 0.54330903
  0.5665448  0.54831827 0.52967536]]
Norm: tensor([[0.5281, 0.5584, 0.5656, 0.5761, 0.5666, 0.5218, 0.4465, 0.5524, 0.4986,
         0.5750, 0.7156, 0.5433, 0.5665, 0.5483, 0.5297]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:100|train loss: 26.4388|l_recon: 15.8844|ridge_loss: 10.5544|l2_loss:12.6891|lr: 0.000500|prox_lam:5.00:   5%|▌         | 100/2000 [01:54<18:47,  1.69it/s]

lorenz_p15_t1000_f40: train: Epoch 100/2000 Reconstruction Loss: 15.8844 Reglarization: 10.5544 | Total Loss: 26.438781 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0008045  0.00125707 0.00019447 0.00048806 0.00099262 0.00050381
  0.00148527 0.00125622 0.00824699 0.01490513 0.15762763 0.01102091
  0.00309081 0.         0.        ]]
Norm: tensor([[0.0008, 0.0013, 0.0002, 0.0005, 0.0010, 0.0005, 0.0015, 0.0013, 0.0082,
         0.0149, 0.1576, 0.0110, 0.0031, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:150|train loss: 16.4753|l_recon: 11.8501|ridge_loss: 4.6252|l2_loss:10.1997|lr: 0.000500|prox_lam:5.00:   8%|▊         | 150/2000 [02:28<21:03,  1.46it/s] 

lorenz_p15_t1000_f40: train: Epoch 150/2000 Reconstruction Loss: 11.8501 Reglarization: 4.6252 | Total Loss: 16.475339 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.         0.         0.         0.
  0.00031035 0.0003204  0.00370793 0.00603757 0.04875604 0.00401317
  0.         0.00063601 0.        ]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0003, 0.0003, 0.0037,
         0.0060, 0.0488, 0.0040, 0.0000, 0.0006, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:200|train loss: 12.6914|l_recon: 10.8687|ridge_loss: 1.8227|l2_loss:9.3551|lr: 0.000500|prox_lam:6.00:  10%|█         | 200/2000 [02:53<15:09,  1.98it/s] 

lorenz_p15_t1000_f40: train: Epoch 200/2000 Reconstruction Loss: 10.8687 Reglarization: 1.8227 | Total Loss: 12.691369 || Learning Rate: 0.000500Prox lambda: 6.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.         0.         0.         0.
  0.         0.         0.00053932 0.00200333 0.0091368  0.00145266
  0.         0.         0.        ]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0005,
         0.0020, 0.0091, 0.0015, 0.0000, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:250|train loss: 10.5829|l_recon: 9.6500|ridge_loss: 0.9328|l2_loss:9.1849|lr: 0.000250|prox_lam:7.00:  12%|█▎        | 250/2000 [03:20<14:15,  2.04it/s]  

lorenz_p15_t1000_f40: train: Epoch 250/2000 Reconstruction Loss: 9.6500 Reglarization: 0.9328 | Total Loss: 10.582869 || Learning Rate: 0.000250Prox lambda: 7.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.00591665 0.
  0.00032049 0.         0.        ]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0059, 0.0000, 0.0003, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:300|train loss: 9.4813|l_recon: 8.8329|ridge_loss: 0.6483|l2_loss:8.6138|lr: 0.000125|prox_lam:8.00:  15%|█▌        | 300/2000 [03:44<15:29,  1.83it/s]   

lorenz_p15_t1000_f40: train: Epoch 300/2000 Reconstruction Loss: 8.8329 Reglarization: 0.6483 | Total Loss: 9.481262 || Learning Rate: 0.000125Prox lambda: 8.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.00421498 0.
  0.         0.         0.        ]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0042, 0.0000, 0.0000, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:350|train loss: 9.4282|l_recon: 8.9074|ridge_loss: 0.5208|l2_loss:8.9445|lr: 0.000031|prox_lam:9.00:  18%|█▊        | 350/2000 [04:08<12:39,  2.17it/s] 

lorenz_p15_t1000_f40: train: Epoch 350/2000 Reconstruction Loss: 8.9074 Reglarization: 0.5208 | Total Loss: 9.428214 || Learning Rate: 0.000031Prox lambda: 9.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.00252448 0.
  0.         0.         0.        ]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0025, 0.0000, 0.0000, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:400|train loss: 9.1004|l_recon: 8.6158|ridge_loss: 0.4845|l2_loss:8.8645|lr: 0.000016|prox_lam:10.00:  20%|██        | 400/2000 [04:32<13:29,  1.98it/s] 

lorenz_p15_t1000_f40: train: Epoch 400/2000 Reconstruction Loss: 8.6158 Reglarization: 0.4845 | Total Loss: 9.100383 || Learning Rate: 0.000016Prox lambda: 10.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.00212411 0.
  0.         0.         0.        ]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0021, 0.0000, 0.0000, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:450|train loss: 8.8247|l_recon: 8.3612|ridge_loss: 0.4635|l2_loss:9.2242|lr: 0.000008|prox_lam:11.00:  22%|██▎       | 450/2000 [04:56<12:28,  2.07it/s] 

lorenz_p15_t1000_f40: train: Epoch 450/2000 Reconstruction Loss: 8.3612 Reglarization: 0.4635 | Total Loss: 8.824742 || Learning Rate: 0.000008Prox lambda: 11.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  1.8483882e-03 0.0000000e+00 6.7294968e-06 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.8484e-03, 0.0000e+00,
         6.7295e-06, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:491|train loss: 8.8165|l_recon: 8.3604|ridge_loss: 0.4561|l2_loss:8.1628|lr: 0.000004|prox_lam:12.00:  25%|██▍       | 491/2000 [05:16<16:12,  1.55it/s] 


Early stopping at epoch 491
training complete
最佳训练损失: 7.641660 (Epoch 391)
lam: 11.999999999999975,lr: 1e-05, norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  2.2622722e-03 0.0000000e+00 1.0911025e-05 0.0000000e+00 0.0000000e+00]]
Training done, time: 316.55
ts_target: 11
{'overall': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}, 'denoise_Transformer': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}}
lorenz_p15_t1000_f40: start training...


Epoch:50|train loss: 40.7882|l_recon: 19.6550|ridge_loss: 21.1332|l2_loss:13.0510|lr: 0.000500|prox_lam:5.00:   2%|▎         | 50/2000 [01:17<20:59,  1.55it/s]       

lorenz_p15_t1000_f40: train: Epoch 50/2000 Reconstruction Loss: 19.6550 Reglarization: 21.1332 | Total Loss: 40.788203 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.5000939  0.57032675 0.56986594 0.568555   0.50164366 0.556289
  0.50014555 0.54156816 0.5140355  0.41022518 0.5154716  0.70119745
  0.5905753  0.53529775 0.5395547 ]]
Norm: tensor([[0.5001, 0.5703, 0.5699, 0.5686, 0.5016, 0.5563, 0.5001, 0.5416, 0.5140,
         0.4102, 0.5155, 0.7012, 0.5906, 0.5353, 0.5396]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:100|train loss: 28.8685|l_recon: 17.0447|ridge_loss: 11.8238|l2_loss:12.2202|lr: 0.000500|prox_lam:5.00:   5%|▌         | 100/2000 [01:43<14:57,  2.12it/s]

lorenz_p15_t1000_f40: train: Epoch 100/2000 Reconstruction Loss: 17.0447 Reglarization: 11.8238 | Total Loss: 28.868494 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[5.4761022e-04 0.0000000e+00 1.0128739e-04 1.1574897e-04 1.1757589e-03
  0.0000000e+00 0.0000000e+00 1.4499978e-03 0.0000000e+00 9.9140238e-03
  1.0476438e-02 1.3458982e-01 7.7404515e-03 1.9774470e-04 1.7630163e-03]]
Norm: tensor([[5.4761e-04, 0.0000e+00, 1.0129e-04, 1.1575e-04, 1.1758e-03, 0.0000e+00,
         0.0000e+00, 1.4500e-03, 0.0000e+00, 9.9140e-03, 1.0476e-02, 1.3459e-01,
         7.7405e-03, 1.9774e-04, 1.7630e-03]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:150|train loss: 17.1795|l_recon: 12.3380|ridge_loss: 4.8414|l2_loss:10.4366|lr: 0.000500|prox_lam:5.00:   8%|▊         | 150/2000 [02:14<15:13,  2.03it/s] 

lorenz_p15_t1000_f40: train: Epoch 150/2000 Reconstruction Loss: 12.3380 Reglarization: 4.8414 | Total Loss: 17.179464 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  3.6386168e-04 0.0000000e+00 0.0000000e+00 0.0000000e+00 2.5896353e-03
  3.5927077e-03 2.2916000e-02 2.4771155e-03 3.0368380e-04 9.3896640e-05]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 3.6386e-04,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 2.5896e-03, 3.5927e-03, 2.2916e-02,
         2.4771e-03, 3.0368e-04, 9.3897e-05]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:200|train loss: 15.7207|l_recon: 13.8898|ridge_loss: 1.8309|l2_loss:11.3487|lr: 0.000500|prox_lam:6.00:  10%|█         | 200/2000 [02:38<14:57,  2.01it/s]

lorenz_p15_t1000_f40: train: Epoch 200/2000 Reconstruction Loss: 13.8898 Reglarization: 1.8309 | Total Loss: 15.720652 || Learning Rate: 0.000500Prox lambda: 6.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.         0.         0.         0.
  0.         0.00014463 0.         0.         0.         0.00882727
  0.         0.         0.        ]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0001, 0.0000,
         0.0000, 0.0000, 0.0088, 0.0000, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:250|train loss: 11.6304|l_recon: 10.7101|ridge_loss: 0.9203|l2_loss:9.8416|lr: 0.000250|prox_lam:7.00:  12%|█▎        | 250/2000 [03:08<15:01,  1.94it/s] 

lorenz_p15_t1000_f40: train: Epoch 250/2000 Reconstruction Loss: 10.7101 Reglarization: 0.9203 | Total Loss: 11.630416 || Learning Rate: 0.000250Prox lambda: 7.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  2.1424145e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 5.1404852e-03 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 2.1424e-05,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 5.1405e-03,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:300|train loss: 9.5522|l_recon: 8.8828|ridge_loss: 0.6694|l2_loss:8.9301|lr: 0.000063|prox_lam:8.00:  15%|█▌        | 300/2000 [03:33<14:32,  1.95it/s]   

lorenz_p15_t1000_f40: train: Epoch 300/2000 Reconstruction Loss: 8.8828 Reglarization: 0.6694 | Total Loss: 9.552171 || Learning Rate: 0.000063Prox lambda: 8.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 3.0418511e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 3.7486376e-03 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 3.0419e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 3.7486e-03,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:350|train loss: 9.4139|l_recon: 8.8221|ridge_loss: 0.5918|l2_loss:8.7113|lr: 0.000031|prox_lam:9.00:  18%|█▊        | 350/2000 [04:00<13:37,  2.02it/s] 

lorenz_p15_t1000_f40: train: Epoch 350/2000 Reconstruction Loss: 8.8221 Reglarization: 0.5918 | Total Loss: 9.413889 || Learning Rate: 0.000031Prox lambda: 9.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  3.8187398e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 2.5155994e-03 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 3.8187e-05,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 2.5156e-03,
         0.0000e+00, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:400|train loss: 9.3621|l_recon: 8.8044|ridge_loss: 0.5577|l2_loss:8.8745|lr: 0.000008|prox_lam:10.00:  20%|██        | 400/2000 [04:25<12:53,  2.07it/s] 

lorenz_p15_t1000_f40: train: Epoch 400/2000 Reconstruction Loss: 8.8044 Reglarization: 0.5577 | Total Loss: 9.362079 || Learning Rate: 0.000008Prox lambda: 10.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 1.0056523e-05
  0.0000000e+00 2.3022145e-03 0.0000000e+00 9.1219699e-06 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0057e-05, 0.0000e+00, 2.3022e-03,
         0.0000e+00, 9.1220e-06, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:411|train loss: 9.3041|l_recon: 8.7493|ridge_loss: 0.5548|l2_loss:9.1984|lr: 0.000008|prox_lam:11.00:  21%|██        | 411/2000 [04:30<17:27,  1.52it/s] 


Early stopping at epoch 411
training complete
最佳训练损失: 8.120457 (Epoch 311)
lam: 10.999999999999979,lr: 1e-05, norm: [[0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.00327537
  0.         0.         0.        ]]
Training done, time: 270.91
ts_target: 12
{'overall': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}, 'denoise_Transformer': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}}
lorenz_p15_t1000_f40: start training...


Epoch:50|train loss: 41.4365|l_recon: 21.3579|ridge_loss: 20.0786|l2_loss:13.7183|lr: 0.000500|prox_lam:5.00:   2%|▎         | 50/2000 [01:04<27:20,  1.19it/s]      

lorenz_p15_t1000_f40: train: Epoch 50/2000 Reconstruction Loss: 21.3579 Reglarization: 20.0786 | Total Loss: 41.436487 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.5497496  0.47670332 0.4907269  0.49277025 0.55008537 0.55149513
  0.44080776 0.47964492 0.52715343 0.5428973  0.47102267 0.52831733
  0.7056699  0.51276994 0.5603551 ]]
Norm: tensor([[0.5497, 0.4767, 0.4907, 0.4928, 0.5501, 0.5515, 0.4408, 0.4796, 0.5272,
         0.5429, 0.4710, 0.5283, 0.7057, 0.5128, 0.5604]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:100|train loss: 25.9836|l_recon: 16.4008|ridge_loss: 9.5827|l2_loss:11.5816|lr: 0.000500|prox_lam:5.00:   5%|▌         | 100/2000 [01:32<16:25,  1.93it/s]

lorenz_p15_t1000_f40: train: Epoch 100/2000 Reconstruction Loss: 16.4008 Reglarization: 9.5827 | Total Loss: 25.983595 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.         0.         0.         0.
  0.00111779 0.         0.         0.         0.01188943 0.01692723
  0.15258747 0.0110334  0.        ]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0011, 0.0000, 0.0000,
         0.0000, 0.0119, 0.0169, 0.1526, 0.0110, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:150|train loss: 16.6094|l_recon: 12.5392|ridge_loss: 4.0702|l2_loss:12.1935|lr: 0.000500|prox_lam:5.00:   8%|▊         | 150/2000 [02:00<16:15,  1.90it/s]

lorenz_p15_t1000_f40: train: Epoch 150/2000 Reconstruction Loss: 12.5392 Reglarization: 4.0702 | Total Loss: 16.609354 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 4.0851533e-05 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  3.9128917e-03 4.1906969e-03 2.5223780e-02 3.4111198e-03 1.9144570e-04]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 4.0852e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 3.9129e-03, 4.1907e-03,
         2.5224e-02, 3.4111e-03, 1.9145e-04]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:200|train loss: 13.0629|l_recon: 11.4460|ridge_loss: 1.6169|l2_loss:10.7833|lr: 0.000500|prox_lam:6.00:  10%|█         | 200/2000 [02:24<13:36,  2.21it/s]

lorenz_p15_t1000_f40: train: Epoch 200/2000 Reconstruction Loss: 11.4460 Reglarization: 1.6169 | Total Loss: 13.062867 || Learning Rate: 0.000500Prox lambda: 6.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 3.0398369e-05 0.0000000e+00
  1.2509944e-04 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  1.4678906e-03 1.5324767e-03 8.5327029e-03 5.8216928e-04 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 3.0398e-05, 0.0000e+00, 1.2510e-04,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.4679e-03, 1.5325e-03,
         8.5327e-03, 5.8217e-04, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:250|train loss: 11.2444|l_recon: 10.4467|ridge_loss: 0.7977|l2_loss:9.9002|lr: 0.000250|prox_lam:7.00:  12%|█▎        | 250/2000 [02:50<13:02,  2.24it/s] 

lorenz_p15_t1000_f40: train: Epoch 250/2000 Reconstruction Loss: 10.4467 Reglarization: 0.7977 | Total Loss: 11.244444 || Learning Rate: 0.000250Prox lambda: 7.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.00010208 0.00094987 0.00095666
  0.00491795 0.00062489 0.00011888]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0001, 0.0009, 0.0010, 0.0049, 0.0006, 0.0001]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:300|train loss: 9.9296|l_recon: 9.3576|ridge_loss: 0.5720|l2_loss:10.9238|lr: 0.000063|prox_lam:8.00:  15%|█▌        | 300/2000 [03:13<12:27,  2.27it/s]  

lorenz_p15_t1000_f40: train: Epoch 300/2000 Reconstruction Loss: 9.3576 Reglarization: 0.5720 | Total Loss: 9.929642 || Learning Rate: 0.000063Prox lambda: 8.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 6.6525536e-06 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 3.4903367e-03 3.8238595e-05 1.3616198e-04]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 6.6526e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         3.4903e-03, 3.8239e-05, 1.3616e-04]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:350|train loss: 9.5651|l_recon: 9.0632|ridge_loss: 0.5019|l2_loss:10.4180|lr: 0.000031|prox_lam:9.00:  18%|█▊        | 350/2000 [03:37<12:42,  2.16it/s] 

lorenz_p15_t1000_f40: train: Epoch 350/2000 Reconstruction Loss: 9.0632 Reglarization: 0.5019 | Total Loss: 9.565080 || Learning Rate: 0.000031Prox lambda: 9.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 1.2929872e-05 2.7424707e-03 0.0000000e+00 0.0000000e+00]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.2930e-05,
         2.7425e-03, 0.0000e+00, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:400|train loss: 9.4562|l_recon: 8.9809|ridge_loss: 0.4753|l2_loss:8.9187|lr: 0.000008|prox_lam:10.00:  20%|██        | 400/2000 [04:01<11:50,  2.25it/s] 

lorenz_p15_t1000_f40: train: Epoch 400/2000 Reconstruction Loss: 8.9809 Reglarization: 0.4753 | Total Loss: 9.456203 || Learning Rate: 0.000008Prox lambda: 10.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 3.4500481e-06 2.2583324e-03 0.0000000e+00 1.9509433e-05]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 3.4500e-06,
         2.2583e-03, 0.0000e+00, 1.9509e-05]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:450|train loss: 9.4485|l_recon: 8.9819|ridge_loss: 0.4667|l2_loss:8.6415|lr: 0.000004|prox_lam:11.00:  22%|██▎       | 450/2000 [04:25<11:58,  2.16it/s] 

lorenz_p15_t1000_f40: train: Epoch 450/2000 Reconstruction Loss: 8.9819 Reglarization: 0.4667 | Total Loss: 9.448505 || Learning Rate: 0.000004Prox lambda: 11.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 2.0931910e-03 0.0000000e+00 4.4702138e-06]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         2.0932e-03, 0.0000e+00, 4.4702e-06]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:500|train loss: 9.3061|l_recon: 8.8423|ridge_loss: 0.4637|l2_loss:9.4421|lr: 0.000001|prox_lam:12.00:  25%|██▌       | 500/2000 [04:48<11:43,  2.13it/s] 

lorenz_p15_t1000_f40: train: Epoch 500/2000 Reconstruction Loss: 8.8423 Reglarization: 0.4637 | Total Loss: 9.306060 || Learning Rate: 0.000001Prox lambda: 12.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 1.5382557e-07 2.0014427e-03 0.0000000e+00 1.3341762e-05]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.5383e-07,
         2.0014e-03, 0.0000e+00, 1.3342e-05]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:550|train loss: 9.3217|l_recon: 8.8591|ridge_loss: 0.4627|l2_loss:9.4113|lr: 0.000000|prox_lam:13.00:  28%|██▊       | 550/2000 [05:13<11:07,  2.17it/s] 

lorenz_p15_t1000_f40: train: Epoch 550/2000 Reconstruction Loss: 8.8591 Reglarization: 0.4627 | Total Loss: 9.321740 || Learning Rate: 0.000000Prox lambda: 13.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 1.9249214e-03 0.0000000e+00 1.2208853e-05]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         1.9249e-03, 0.0000e+00, 1.2209e-05]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:600|train loss: 9.4824|l_recon: 9.0200|ridge_loss: 0.4624|l2_loss:8.0710|lr: 0.000000|prox_lam:14.00:  30%|███       | 600/2000 [05:36<10:26,  2.23it/s] 

lorenz_p15_t1000_f40: train: Epoch 600/2000 Reconstruction Loss: 9.0200 Reglarization: 0.4624 | Total Loss: 9.482416 || Learning Rate: 0.000000Prox lambda: 14.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  2.5459029e-08 0.0000000e+00 1.8677447e-03 0.0000000e+00 8.9496980e-06]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 2.5459e-08, 0.0000e+00,
         1.8677e-03, 0.0000e+00, 8.9497e-06]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:626|train loss: 9.3854|l_recon: 8.9231|ridge_loss: 0.4623|l2_loss:8.4980|lr: 0.000000|prox_lam:15.00:  31%|███▏      | 626/2000 [05:48<12:44,  1.80it/s] 


Early stopping at epoch 626
training complete
最佳训练损失: 7.772578 (Epoch 526)
lam: 14.999999999999964,lr: 1e-05, norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 1.9305482e-03 0.0000000e+00 2.2010288e-05]]
Training done, time: 348.47
ts_target: 13
{'overall': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}, 'denoise_Transformer': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}}
lorenz_p15_t1000_f40: start training...


Epoch:50|train loss: 39.2937|l_recon: 18.8096|ridge_loss: 20.4841|l2_loss:13.4195|lr: 0.000500|prox_lam:5.00:   2%|▎         | 50/2000 [00:59<28:34,  1.14it/s]      

lorenz_p15_t1000_f40: train: Epoch 50/2000 Reconstruction Loss: 18.8096 Reglarization: 20.4841 | Total Loss: 39.293736 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.49119177 0.5376109  0.52476007 0.46516475 0.50727457 0.49776012
  0.4842351  0.53442764 0.4805988  0.52129644 0.5780798  0.4280911
  0.46843207 0.7367826  0.5144717 ]]
Norm: tensor([[0.4912, 0.5376, 0.5248, 0.4652, 0.5073, 0.4978, 0.4842, 0.5344, 0.4806,
         0.5213, 0.5781, 0.4281, 0.4684, 0.7368, 0.5145]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:100|train loss: 25.1786|l_recon: 15.0632|ridge_loss: 10.1153|l2_loss:10.8966|lr: 0.000500|prox_lam:5.00:   5%|▌         | 100/2000 [01:33<16:42,  1.90it/s]

lorenz_p15_t1000_f40: train: Epoch 100/2000 Reconstruction Loss: 15.0632 Reglarization: 10.1153 | Total Loss: 25.178565 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.00313918 0.00023575 0.         0.00081534 0.         0.00031594
  0.00205679 0.         0.00075774 0.00222627 0.00096249 0.01671212
  0.02255157 0.12860525 0.01786724]]
Norm: tensor([[0.0031, 0.0002, 0.0000, 0.0008, 0.0000, 0.0003, 0.0021, 0.0000, 0.0008,
         0.0022, 0.0010, 0.0167, 0.0226, 0.1286, 0.0179]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:150|train loss: 16.6388|l_recon: 12.7339|ridge_loss: 3.9049|l2_loss:11.6539|lr: 0.000500|prox_lam:5.00:   8%|▊         | 150/2000 [02:00<18:10,  1.70it/s] 

lorenz_p15_t1000_f40: train: Epoch 150/2000 Reconstruction Loss: 12.7339 Reglarization: 3.9049 | Total Loss: 16.638823 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[2.0819459e-04 4.1477848e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 1.9094581e-04 0.0000000e+00 3.0363863e-04
  0.0000000e+00 3.3483964e-03 5.1242835e-03 2.0245349e-02 4.1330624e-03]]
Norm: tensor([[2.0819e-04, 4.1478e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 1.9095e-04, 0.0000e+00, 3.0364e-04, 0.0000e+00, 3.3484e-03,
         5.1243e-03, 2.0245e-02, 4.1331e-03]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:200|train loss: 12.2245|l_recon: 10.7502|ridge_loss: 1.4743|l2_loss:10.6089|lr: 0.000500|prox_lam:6.00:  10%|█         | 200/2000 [02:27<15:17,  1.96it/s]

lorenz_p15_t1000_f40: train: Epoch 200/2000 Reconstruction Loss: 10.7502 Reglarization: 1.4743 | Total Loss: 12.224527 || Learning Rate: 0.000500Prox lambda: 6.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[1.8031987e-04 1.8579885e-07 9.8054305e-05 8.5610896e-05 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 7.1533001e-04 0.0000000e+00
  0.0000000e+00 9.1121346e-04 1.8673610e-03 8.3787609e-03 1.8721548e-03]]
Norm: tensor([[1.8032e-04, 1.8580e-07, 9.8054e-05, 8.5611e-05, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 7.1533e-04, 0.0000e+00, 0.0000e+00, 9.1121e-04,
         1.8674e-03, 8.3788e-03, 1.8722e-03]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:250|train loss: 11.3218|l_recon: 10.6201|ridge_loss: 0.7017|l2_loss:10.3328|lr: 0.000250|prox_lam:7.00:  12%|█▎        | 250/2000 [02:51<14:01,  2.08it/s]

lorenz_p15_t1000_f40: train: Epoch 250/2000 Reconstruction Loss: 10.6201 Reglarization: 0.7017 | Total Loss: 11.321779 || Learning Rate: 0.000250Prox lambda: 7.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.        0.        0.        0.        0.        0.        0.
  0.        0.        0.        0.        0.        0.        0.0044334
  0.       ]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0044, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:300|train loss: 10.0458|l_recon: 9.5478|ridge_loss: 0.4979|l2_loss:9.8251|lr: 0.000125|prox_lam:8.00:  15%|█▌        | 300/2000 [03:16<13:39,  2.07it/s]  

lorenz_p15_t1000_f40: train: Epoch 300/2000 Reconstruction Loss: 9.5478 Reglarization: 0.4979 | Total Loss: 10.045764 || Learning Rate: 0.000125Prox lambda: 8.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.00032433 0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.00012636
  0.         0.00340064 0.        ]]
Norm: tensor([[0.0003, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0001, 0.0000, 0.0034, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:350|train loss: 9.3305|l_recon: 8.8924|ridge_loss: 0.4382|l2_loss:10.2470|lr: 0.000031|prox_lam:9.00:  18%|█▊        | 350/2000 [03:41<13:05,  2.10it/s] 

lorenz_p15_t1000_f40: train: Epoch 350/2000 Reconstruction Loss: 8.8924 Reglarization: 0.4382 | Total Loss: 9.330538 || Learning Rate: 0.000031Prox lambda: 9.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[5.7987781e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 2.2066776e-03 0.0000000e+00]]
Norm: tensor([[5.7988e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 2.2067e-03, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:400|train loss: 9.6655|l_recon: 9.2502|ridge_loss: 0.4153|l2_loss:9.1863|lr: 0.000008|prox_lam:10.00:  20%|██        | 400/2000 [04:06<13:18,  2.00it/s] 

lorenz_p15_t1000_f40: train: Epoch 400/2000 Reconstruction Loss: 9.2502 Reglarization: 0.4153 | Total Loss: 9.665539 || Learning Rate: 0.000008Prox lambda: 10.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.        0.        0.        0.        0.        0.        0.
  0.        0.        0.        0.        0.        0.        0.0018646
  0.       ]]
Norm: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0019, 0.0000]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:450|train loss: 9.3507|l_recon: 8.9430|ridge_loss: 0.4078|l2_loss:8.8139|lr: 0.000004|prox_lam:11.00:  22%|██▎       | 450/2000 [04:30<11:53,  2.17it/s] 

lorenz_p15_t1000_f40: train: Epoch 450/2000 Reconstruction Loss: 8.9430 Reglarization: 0.4078 | Total Loss: 9.350718 || Learning Rate: 0.000004Prox lambda: 11.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[8.1404251e-06 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  2.4500514e-06 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 1.7231654e-03 0.0000000e+00]]
Norm: tensor([[8.1404e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 2.4501e-06,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 1.7232e-03, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:500|train loss: 9.5209|l_recon: 9.1157|ridge_loss: 0.4052|l2_loss:9.6717|lr: 0.000001|prox_lam:12.00:  25%|██▌       | 500/2000 [04:54<12:04,  2.07it/s] 

lorenz_p15_t1000_f40: train: Epoch 500/2000 Reconstruction Loss: 9.1157 Reglarization: 0.4052 | Total Loss: 9.520917 || Learning Rate: 0.000001Prox lambda: 12.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[1.3360497e-05 0.0000000e+00 0.0000000e+00 5.7175298e-06 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 1.6168201e-03 0.0000000e+00]]
Norm: tensor([[1.3360e-05, 0.0000e+00, 0.0000e+00, 5.7175e-06, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 1.6168e-03, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:550|train loss: 9.2442|l_recon: 8.8399|ridge_loss: 0.4043|l2_loss:8.9788|lr: 0.000000|prox_lam:13.00:  28%|██▊       | 550/2000 [05:18<11:13,  2.15it/s] 

lorenz_p15_t1000_f40: train: Epoch 550/2000 Reconstruction Loss: 8.8399 Reglarization: 0.4043 | Total Loss: 9.244166 || Learning Rate: 0.000000Prox lambda: 13.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[1.4235341e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 1.5287446e-03 0.0000000e+00]]
Norm: tensor([[1.4235e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 1.5287e-03, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:600|train loss: 8.9896|l_recon: 8.5855|ridge_loss: 0.4040|l2_loss:10.2473|lr: 0.000000|prox_lam:14.00:  30%|███       | 600/2000 [05:41<10:56,  2.13it/s]

lorenz_p15_t1000_f40: train: Epoch 600/2000 Reconstruction Loss: 8.5855 Reglarization: 0.4040 | Total Loss: 8.989551 || Learning Rate: 0.000000Prox lambda: 14.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[7.3655638e-06 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 1.4865311e-03 0.0000000e+00]]
Norm: tensor([[7.3656e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 1.4865e-03, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:650|train loss: 9.0615|l_recon: 8.6577|ridge_loss: 0.4039|l2_loss:9.2407|lr: 0.000000|prox_lam:15.00:  32%|███▎      | 650/2000 [06:05<10:53,  2.07it/s] 

lorenz_p15_t1000_f40: train: Epoch 650/2000 Reconstruction Loss: 8.6577 Reglarization: 0.4039 | Total Loss: 9.061539 || Learning Rate: 0.000000Prox lambda: 15.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[6.440263e-06 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
  0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
  0.000000e+00 6.217965e-07 0.000000e+00 1.453999e-03 0.000000e+00]]
Norm: tensor([[6.4403e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 6.2180e-07,
         0.0000e+00, 1.4540e-03, 0.0000e+00]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:660|train loss: 9.6363|l_recon: 9.2325|ridge_loss: 0.4039|l2_loss:9.0071|lr: 0.000000|prox_lam:16.00:  33%|███▎      | 660/2000 [06:10<12:31,  1.78it/s]


Early stopping at epoch 660
training complete
最佳训练损失: 7.791057 (Epoch 560)
lam: 15.999999999999961,lr: 1e-05, norm: [[6.3231482e-06 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 1.5166276e-03 0.0000000e+00]]
Training done, time: 370.26
ts_target: 14
{'overall': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}, 'denoise_Transformer': {'trainable': '17.21M', 'non_trainable': '0', 'total': '17.21M'}}
lorenz_p15_t1000_f40: start training...


Epoch:50|train loss: 38.6090|l_recon: 18.1299|ridge_loss: 20.4790|l2_loss:12.0186|lr: 0.000500|prox_lam:5.00:   2%|▎         | 50/2000 [01:02<27:14,  1.19it/s]      

lorenz_p15_t1000_f40: train: Epoch 50/2000 Reconstruction Loss: 18.1299 Reglarization: 20.4790 | Total Loss: 38.608966 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.64404064 0.47391894 0.42387155 0.55778176 0.51967824 0.50908947
  0.4923729  0.52411145 0.5046379  0.51351017 0.5400754  0.55234706
  0.52644056 0.48533607 0.6880715 ]]
Norm: tensor([[0.6440, 0.4739, 0.4239, 0.5578, 0.5197, 0.5091, 0.4924, 0.5241, 0.5046,
         0.5135, 0.5401, 0.5523, 0.5264, 0.4853, 0.6881]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:100|train loss: 25.8192|l_recon: 15.4055|ridge_loss: 10.4136|l2_loss:11.3309|lr: 0.000500|prox_lam:5.00:   5%|▌         | 100/2000 [01:43<18:05,  1.75it/s]

lorenz_p15_t1000_f40: train: Epoch 100/2000 Reconstruction Loss: 15.4055 Reglarization: 10.4136 | Total Loss: 25.819171 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[1.1236593e-02 0.0000000e+00 8.8762026e-05 1.3708230e-04 1.6975352e-03
  0.0000000e+00 0.0000000e+00 2.6439892e-03 0.0000000e+00 1.4352743e-03
  0.0000000e+00 2.6022550e-03 1.0369960e-02 1.8039554e-02 1.4995168e-01]]
Norm: tensor([[1.1237e-02, 0.0000e+00, 8.8762e-05, 1.3708e-04, 1.6975e-03, 0.0000e+00,
         0.0000e+00, 2.6440e-03, 0.0000e+00, 1.4353e-03, 0.0000e+00, 2.6023e-03,
         1.0370e-02, 1.8040e-02, 1.4995e-01]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:150|train loss: 15.8786|l_recon: 11.1190|ridge_loss: 4.7596|l2_loss:10.1374|lr: 0.000500|prox_lam:5.00:   8%|▊         | 150/2000 [02:35<53:40,  1.74s/it]  

lorenz_p15_t1000_f40: train: Epoch 150/2000 Reconstruction Loss: 11.1190 Reglarization: 4.7596 | Total Loss: 15.878580 || Learning Rate: 0.000500Prox lambda: 5.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.00363798 0.         0.         0.00012667 0.00067337 0.
  0.         0.00015057 0.         0.         0.00015949 0.
  0.00507258 0.0069572  0.05782388]]
Norm: tensor([[0.0036, 0.0000, 0.0000, 0.0001, 0.0007, 0.0000, 0.0000, 0.0002, 0.0000,
         0.0000, 0.0002, 0.0000, 0.0051, 0.0070, 0.0578]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:200|train loss: 13.1349|l_recon: 11.2654|ridge_loss: 1.8696|l2_loss:9.5673|lr: 0.000500|prox_lam:6.00:  10%|█         | 200/2000 [03:07<18:32,  1.62it/s] 

lorenz_p15_t1000_f40: train: Epoch 200/2000 Reconstruction Loss: 11.2654 Reglarization: 1.8696 | Total Loss: 13.134936 || Learning Rate: 0.000500Prox lambda: 6.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.00116319 0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.00089907 0.00251986 0.0097035 ]]
Norm: tensor([[0.0012, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0009, 0.0025, 0.0097]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:250|train loss: 10.4216|l_recon: 9.5128|ridge_loss: 0.9088|l2_loss:9.9003|lr: 0.000250|prox_lam:7.00:  12%|█▎        | 250/2000 [03:43<18:49,  1.55it/s]  

lorenz_p15_t1000_f40: train: Epoch 250/2000 Reconstruction Loss: 9.5128 Reglarization: 0.9088 | Total Loss: 10.421562 || Learning Rate: 0.000250Prox lambda: 7.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 6.0997321e-05 0.0000000e+00 1.6736123e-04
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 7.3654992e-03]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 6.0997e-05, 0.0000e+00, 1.6736e-04, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 7.3655e-03]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:300|train loss: 9.7831|l_recon: 9.1252|ridge_loss: 0.6579|l2_loss:9.6409|lr: 0.000125|prox_lam:8.00:  15%|█▌        | 300/2000 [04:22<18:18,  1.55it/s]   

lorenz_p15_t1000_f40: train: Epoch 300/2000 Reconstruction Loss: 9.1252 Reglarization: 0.6579 | Total Loss: 9.783065 || Learning Rate: 0.000125Prox lambda: 8.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[2.8501963e-05 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 1.2849366e-04 0.0000000e+00 3.9393869e-03]]
Norm: tensor([[2.8502e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         1.2849e-04, 0.0000e+00, 3.9394e-03]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:350|train loss: 9.1784|l_recon: 8.6044|ridge_loss: 0.5740|l2_loss:9.4318|lr: 0.000031|prox_lam:9.00:  18%|█▊        | 350/2000 [04:58<18:06,  1.52it/s] 

lorenz_p15_t1000_f40: train: Epoch 350/2000 Reconstruction Loss: 8.6044 Reglarization: 0.5740 | Total Loss: 9.178387 || Learning Rate: 0.000031Prox lambda: 9.0000start lr: 0.0000 Weight decay: 0.0000d_model: 512.0000 d_ff: 2048.0000 n_heads: 4.0000 n_layer_enc: 2.0000 n_layer_dec: 2.0000 seq_length: 32.0000 Batch size: 64.0000 Dropout: 0.1000
 Norm: [[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 6.2245534e-05 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 3.0747312e-03]]
Norm: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 6.2246e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 3.0747e-03]], device='cuda:0',
       grad_fn=<LinalgVectorNormBackward0>)


Epoch:370|train loss: 8.9492|l_recon: 8.3925|ridge_loss: 0.5567|l2_loss:8.4039|lr: 0.000016|prox_lam:10.00:  18%|█▊        | 370/2000 [05:13<22:59,  1.18it/s]

Early stopping at epoch 370
training complete
最佳训练损失: 7.894440 (Epoch 270)
lam: 9.999999999999982,lr: 1e-05, norm: [[0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.000127   0.
  0.         0.         0.00545257]]
Training done, time: 313.12





In [33]:
inferrence=[i.cpu().detach().numpy() for i in inferred_gc]
inferrence=np.array(inferrence)
for target in range(feature_size):
    print(f"target: {target}")
    ground=np.load('/data/0shared/liubo/diffusion-gc/DiffuGC/my_exp/lorenz_p15_t1000_f40_GC.npy')
    auprc = metrics.average_precision_score(ground[target,:], inferrence[target,:])
    auroc = metrics.roc_auc_score(ground[target,:], inferrence[target,:])
    print(f"auprc: {auprc}, auroc: {auroc}")

target: 0
auprc: 1.0, auroc: 1.0
target: 1
auprc: 1.0, auroc: 1.0
target: 2
auprc: 1.0, auroc: 1.0
target: 3
auprc: 0.45, auroc: 0.625
target: 4
auprc: 1.0, auroc: 1.0
target: 5
auprc: 1.0, auroc: 1.0
target: 6
auprc: 0.55, auroc: 0.6818181818181817
target: 7
auprc: 0.45, auroc: 0.5909090909090908
target: 8
auprc: 0.48333333333333334, auroc: 0.6136363636363635
target: 9
auprc: 1.0, auroc: 1.0
target: 10
auprc: 0.45, auroc: 0.5909090909090908
target: 11
auprc: 0.45, auroc: 0.625
target: 12
auprc: 0.45, auroc: 0.5909090909090908
target: 13
auprc: 0.45, auroc: 0.5909090909090908
target: 14
auprc: 0.45, auroc: 0.5909090909090908


In [12]:
norm=[1.6527136e-03,3.0629034e-04, 3.0610681e-07 ,0.0000000e+00, 0.0000000e+00,
  0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
  0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 3.1029538e-04, 7.8968407e-04]
ground=[1,1,0,0,0,0,0,0,0,0,0,0,0,1,1]
auprc = metrics.average_precision_score(ground, norm)
print("AUROC:", auprc)
auroc = metrics.roc_auc_score(ground, norm)
print("AUROC:", auroc)

AUROC: 1.0
AUROC: 1.0


In [None]:
_, seq_length, feat_num = test.shape
pred_length = 24

test_dataset = get_dataset(test, regular=False, pred_length=pred_length)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=test.shape[0], shuffle=False, num_workers=0, pin_memory=True, sampler=None)

sample, *_ = trainer.restore(test_dataloader, shape=[seq_length, feat_num], coef=1e-1, stepsize=5e-2, sampling_steps=200)
mask = test_dataset.mask
mse = mean_squared_error(sample[~mask], real[~mask])
print(mse)

In [None]:
import matplotlib.pyplot as plt

plt.rcParams["font.size"] = 12

for idx in range(5):
    plt.figure(figsize=(15, 3))
    plt.plot(range(0, seq_length-pred_length), real[0, :(seq_length-pred_length), -idx], color='c', linestyle='solid', label='History')
    plt.plot(range(seq_length-pred_length-1, seq_length), real[0, -pred_length-1:, -idx], color='g', linestyle='solid', label='Ground Truth')
    plt.plot(range(seq_length-pred_length-1, seq_length), sample[0, -pred_length-1:, -idx], color='r', linestyle='solid', label='Prediction')
    plt.tick_params('both', labelsize=15)
    plt.subplots_adjust(bottom=0.1, left=0.05, right=0.99, top=0.95)
    plt.legend()
    plt.show()