In [1]:
import random
import numpy as np
import torch
from exp.exp_long_term_forecasting import Exp_Long_Term_Forecast
from exp.exp_imputation import Exp_Imputation
from exp.exp_short_term_forecasting import Exp_Short_Term_Forecast
from exp.exp_anomaly_detection import Exp_Anomaly_Detection
from exp.exp_classification import Exp_Classification
from data_provider.data_creat import *
import akshare as ak
from torch.utils.tensorboard import SummaryWriter

In [2]:
class Args:
    '''基本配置'''
    # 选项：[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]')
    task_name = 'long_term_forecast'
    is_training = 1
    model_id = 'Stock_96_96'
    # 模型名称，选项：[Autoformer, Transformer, TimesNet]
    model = 'TimesNet'
    
    '''数据加载'''
    # 数据集类型,选项：[ETTh1,ETTh2,ETTm1,ETTm2,custom,m4,PSM,MSL,SMAP,SMD,SWAT,UEA]
    data = 'custom'
    root_path = './dataset/Stock/'
    data_path = 'all_data_scaled.csv'
    # 预测任务 M:多变量预测多变量, S:单变量预测单变量, MS:多变量预测单变量
    features = 'MS'
    # 目标列名，S或MS任务中的目标特征
    target = 'OT'
    # 时间采集粒度，选项：[s:秒, t:分钟, h:小时, d:天, b:工作日, w:周, m:月]
    freq = 'd'
    # 模型检查点的位置
    checkpoints = './checkpoints/'

    '''预测任务'''
    # 输入序列长度,这是用于模型训练的输入序列的长度
    seq_len = 60
    # 开始标记长度,这是模型输出目标中有标签数据的长度，类似于滑动窗口的长度
    label_len = 20
    # 预测序列长度
    pred_len = 1
    # 季节模式（针对M4数据集）
    seasonal_patterns = 'Monthly'
    inverse = False    # 反转输出数据

    '''插补任务'''
    # 插补任务中数据丢失率
    mask_rate = 0.25

    '''异常检测任务'''
    # 异常检测中异常点占比
    anomaly_ratio = 0.25

    '''模型定义'''
    # TimesBlock 中傅里叶变换,频率排名前k个周期
    top_k = 5
    # Inception 中卷积核个数
    num_kernels = 6
    # encoder 输入特征数
    enc_in = 38
    # decoder 输入特征数
    dec_in = 38
    # 输出通道数
    c_out = 38
    # 线性层隐含神经元个数
    d_model = 32
    # FFN 层隐含神经元个数
    d_ff = 32
    # 多头注意力机制
    n_heads = 8
    # encoder 层数
    e_layers = 2
    # decoder 层数
    d_layers = 1
    # 滑动窗口长度
    moving_avg = 20
    # 对 Q 进行采样，对 Q 采样的因子数
    factor = 3
    # 是否下采样操作 pooling
    distil = True
    # dropout 率
    dropout = 0.1
    # 时间特征嵌入方式,选项：[timeF, fixed, learned]
    embed = 'timeF'
    # 激活函数类型
    activation = 'gelu'
    # 是否输出 attention
    output_attention = False

    '''优化'''
    # 并行核心数
    num_workers = 10
    # 实验轮数
    itr = 1
    # 训练迭代次数
    train_epochs = 500
    # batch size 大小
    batch_size = 60
    # early stopping 机制容忍次数
    patience = 5
    # 学习率
    learning_rate = 0.0001
    # 实验描述
    des = 'stock'
    # 损失函数
    loss = 'MSE'
    # 学习率下降策略
    lradj = 'type1'
    # 使用混合精度训练
    use_amp = False

    '''GPU'''
    # 使用 gpu
    use_gpu = True
    gpu = 0
    # 使用多个 gpus
    use_multi_gpu = False
    # 多 gpu 的设备 id
    devices = '0,1,2,3'

    '''去平稳化投影仪参数'''
    # 投影仪的隐藏层维度（列表）
    p_hidden_dims = [128, 128]
    # 投影仪中的隐藏层数
    p_hidden_layers = 2


# 创建参数对象
args = Args()

In [3]:
# 设置随机种子以确保结果可重现
fix_seed = 2021
random.seed(fix_seed)
torch.manual_seed(fix_seed)
np.random.seed(fix_seed)


# 获取列数
num_columns = 35
# args.des = NUM
args.enc_in = num_columns - 1
args.dec_in = num_columns - 1
args.c_out = num_columns - 1

In [4]:
# 检查并设置 GPU
args.use_gpu = torch.cuda.is_available() and args.use_gpu
if args.use_gpu:
    print("使用 GPU.")
    total_cuda_devices = torch.cuda.device_count()  # 获取系统中可用的 GPU 总数
    print(f"系统中总共有 {total_cuda_devices} 个 CUDA 设备可用。")
    if args.use_multi_gpu:
        args.devices = args.devices.replace(' ', '')
        device_ids = args.devices.split(',')
        args.device_ids = [int(id_) for id_ in device_ids]
        args.gpu = args.device_ids[0]
        
        # 打印多 GPU 使用情况
        print(f"使用多个GPU: {args.device_ids}")
        device = torch.device(f"cuda:{args.gpu}" if args.use_gpu else "cpu")
        print(f"Primary GPU (cuda:{args.gpu}) is in use.")
    else:
        args.gpu = 0
        device = torch.device("cuda" if args.use_gpu else "cpu")
        print("使用单个 GPU.")
else:
    device = torch.device("cpu")
    print("使用 CPU.")

# 选择合适的实验类
if args.task_name == 'long_term_forecast':
    Exp = Exp_Long_Term_Forecast
elif args.task_name == 'short_term_forecast':
    Exp = Exp_Short_Term_Forecast
elif args.task_name == 'imputation':
    Exp = Exp_Imputation
elif args.task_name == 'anomaly_detection':
    Exp = Exp_Anomaly_Detection
elif args.task_name == 'classification':
    Exp = Exp_Classification
else:
    Exp = Exp_Long_Term_Forecast  # 默认情况

# 进行训练和测试
if args.is_training:
    for ii in range(args.itr):
        # 实验记录设置
        setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format(
            args.task_name,
            args.model_id,
            args.model,
            args.data,
            args.features,
            args.seq_len,
            args.label_len,
            args.pred_len,
            args.d_model,
            args.n_heads,
            args.e_layers,
            args.d_layers,
            args.d_ff,
            args.factor,
            args.embed,
            args.distil,
            args.des, ii)

        exp = Exp(args)  # 设置实验
        print('>>>>>>>开始训练 : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
        exp.train(setting)

        print('>>>>>>>测试 : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
        exp.test(setting)
        if args.use_gpu:
            torch.cuda.empty_cache()
else:
    ii = 0
    setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format(
        args.task_name,
        args.model_id,
        args.model,
        args.data,
        args.features,
        args.seq_len,
        args.label_len,
        args.pred_len,
        args.d_model,
        args.n_heads,
        args.e_layers,
        args.d_layers,
        args.d_ff,
        args.factor,
        args.embed,
        args.distil,
        args.des, ii)

    exp = Exp(args)  # 设置实验
    print('>>>>>>>测试 : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
    exp.test(setting, test=1)
    if args.use_gpu:
        torch.cuda.empty_cache()

使用 GPU.
系统中总共有 1 个 CUDA 设备可用。
使用单个 GPU.
Use GPU: cuda:0
>>>>>>>开始训练 : long_term_forecast_Stock_96_96_TimesNet_custom_ftMS_sl60_ll20_pl1_dm32_nh8_el2_dl1_df32_fc3_ebtimeF_dtTrue_stock_0>>>>>>>>>>>>>>>>>>>>>>>>>>
train 546486
val 78079
test 156156


  1%|          | 100/9108 [01:43<1:15:02,  2.00it/s]

	iters: 100, epoch: 1 | loss: 0.8073885
	speed: 1.0359s/iter; left time: 4717611.3536s


  2%|▏         | 200/9108 [02:29<1:06:56,  2.22it/s]

	iters: 200, epoch: 1 | loss: 1.1869667
	speed: 0.4559s/iter; left time: 2076302.2233s


  3%|▎         | 300/9108 [03:12<1:06:14,  2.22it/s]

	iters: 300, epoch: 1 | loss: 2.1429231
	speed: 0.4359s/iter; left time: 1985003.1334s


  4%|▍         | 400/9108 [03:58<1:09:06,  2.10it/s]

	iters: 400, epoch: 1 | loss: 1.0122426
	speed: 0.4571s/iter; left time: 2081348.1088s


  5%|▌         | 500/9108 [04:46<1:10:47,  2.03it/s]

	iters: 500, epoch: 1 | loss: 0.9040704
	speed: 0.4821s/iter; left time: 2195434.0149s


  7%|▋         | 600/9108 [05:36<1:15:55,  1.87it/s]

	iters: 600, epoch: 1 | loss: 0.3331074
	speed: 0.5001s/iter; left time: 2277238.0967s


  8%|▊         | 700/9108 [06:29<1:13:30,  1.91it/s]

	iters: 700, epoch: 1 | loss: 0.6949440
	speed: 0.5279s/iter; left time: 2403643.3909s


  9%|▉         | 800/9108 [07:21<1:07:53,  2.04it/s]

	iters: 800, epoch: 1 | loss: 1.2244446
	speed: 0.5219s/iter; left time: 2376286.9130s


 10%|▉         | 900/9108 [08:13<1:10:13,  1.95it/s]

	iters: 900, epoch: 1 | loss: 0.7717887
	speed: 0.5197s/iter; left time: 2366446.7870s


 11%|█         | 1000/9108 [09:05<1:10:28,  1.92it/s]

	iters: 1000, epoch: 1 | loss: 0.7775337
	speed: 0.5150s/iter; left time: 2344735.7354s


 12%|█▏        | 1100/9108 [09:55<1:06:44,  2.00it/s]

	iters: 1100, epoch: 1 | loss: 0.1859683
	speed: 0.5019s/iter; left time: 2284883.0686s


 13%|█▎        | 1200/9108 [10:44<1:04:34,  2.04it/s]

	iters: 1200, epoch: 1 | loss: 0.6847882
	speed: 0.4924s/iter; left time: 2241777.4600s


 14%|█▍        | 1300/9108 [11:32<1:01:23,  2.12it/s]

	iters: 1300, epoch: 1 | loss: 1.2902029
	speed: 0.4795s/iter; left time: 2183019.2090s


 15%|█▌        | 1400/9108 [12:19<56:32,  2.27it/s]  

	iters: 1400, epoch: 1 | loss: 0.4440005
	speed: 0.4681s/iter; left time: 2131291.1951s


 16%|█▋        | 1500/9108 [13:04<55:41,  2.28it/s]  

	iters: 1500, epoch: 1 | loss: 1.0684814
	speed: 0.4518s/iter; left time: 2056712.7389s


 18%|█▊        | 1600/9108 [13:48<55:19,  2.26it/s]

	iters: 1600, epoch: 1 | loss: 0.7017245
	speed: 0.4425s/iter; left time: 2014566.1139s


 19%|█▊        | 1700/9108 [14:32<53:20,  2.31it/s]

	iters: 1700, epoch: 1 | loss: 0.5946164
	speed: 0.4337s/iter; left time: 1974520.3464s


 20%|█▉        | 1800/9108 [15:14<51:21,  2.37it/s]

	iters: 1800, epoch: 1 | loss: 0.7823079
	speed: 0.4258s/iter; left time: 1938546.8421s


 21%|██        | 1900/9108 [15:57<49:45,  2.41it/s]

	iters: 1900, epoch: 1 | loss: 0.1502848
	speed: 0.4267s/iter; left time: 1942517.1525s


 22%|██▏       | 2000/9108 [16:40<48:23,  2.45it/s]

	iters: 2000, epoch: 1 | loss: 0.3025312
	speed: 0.4270s/iter; left time: 1943762.8933s


 23%|██▎       | 2100/9108 [17:23<50:15,  2.32it/s]

	iters: 2100, epoch: 1 | loss: 0.2785082
	speed: 0.4303s/iter; left time: 1958458.8716s


 24%|██▍       | 2200/9108 [18:06<51:59,  2.21it/s]

	iters: 2200, epoch: 1 | loss: 0.6031002
	speed: 0.4328s/iter; left time: 1969972.9150s


 25%|██▌       | 2300/9108 [18:51<52:05,  2.18it/s]

	iters: 2300, epoch: 1 | loss: 0.6937140
	speed: 0.4543s/iter; left time: 2067643.0047s


 26%|██▋       | 2400/9108 [19:35<47:46,  2.34it/s]

	iters: 2400, epoch: 1 | loss: 0.2148872
	speed: 0.4318s/iter; left time: 1965466.7733s


 27%|██▋       | 2500/9108 [20:17<47:34,  2.31it/s]

	iters: 2500, epoch: 1 | loss: 0.5373178
	speed: 0.4289s/iter; left time: 1952154.8658s


 29%|██▊       | 2600/9108 [21:00<45:45,  2.37it/s]

	iters: 2600, epoch: 1 | loss: 0.9784874
	speed: 0.4217s/iter; left time: 1919486.9482s


 30%|██▉       | 2700/9108 [21:43<46:28,  2.30it/s]

	iters: 2700, epoch: 1 | loss: 0.9947442
	speed: 0.4338s/iter; left time: 1974542.0605s


 31%|███       | 2800/9108 [22:26<43:55,  2.39it/s]

	iters: 2800, epoch: 1 | loss: 0.4123195
	speed: 0.4307s/iter; left time: 1960200.3796s


 32%|███▏      | 2900/9108 [23:10<46:38,  2.22it/s]

	iters: 2900, epoch: 1 | loss: 0.5478000
	speed: 0.4387s/iter; left time: 1996359.1281s


 33%|███▎      | 3000/9108 [23:54<45:18,  2.25it/s]

	iters: 3000, epoch: 1 | loss: 0.4732886
	speed: 0.4432s/iter; left time: 2016908.4927s


 34%|███▍      | 3100/9108 [24:40<46:54,  2.13it/s]

	iters: 3100, epoch: 1 | loss: 0.6322207
	speed: 0.4577s/iter; left time: 2082972.9716s


 35%|███▌      | 3200/9108 [25:49<45:07,  2.18it/s]  

	iters: 3200, epoch: 1 | loss: 0.3015834
	speed: 0.6892s/iter; left time: 3136565.5904s


 36%|███▌      | 3300/9108 [26:34<42:11,  2.29it/s]

	iters: 3300, epoch: 1 | loss: 1.0083817
	speed: 0.4454s/iter; left time: 2026809.2485s


 37%|███▋      | 3400/9108 [27:19<43:50,  2.17it/s]

	iters: 3400, epoch: 1 | loss: 0.6332812
	speed: 0.4514s/iter; left time: 2054156.6482s


 38%|███▊      | 3500/9108 [28:03<40:08,  2.33it/s]

	iters: 3500, epoch: 1 | loss: 0.2936819
	speed: 0.4419s/iter; left time: 2010898.7795s


 40%|███▉      | 3600/9108 [28:47<38:29,  2.39it/s]

	iters: 3600, epoch: 1 | loss: 0.2359741
	speed: 0.4411s/iter; left time: 2007015.1597s


 41%|████      | 3700/9108 [29:31<38:14,  2.36it/s]

	iters: 3700, epoch: 1 | loss: 0.8681247
	speed: 0.4376s/iter; left time: 1991314.1837s


 42%|████▏     | 3800/9108 [30:14<36:47,  2.41it/s]

	iters: 3800, epoch: 1 | loss: 1.0642947
	speed: 0.4294s/iter; left time: 1954065.5258s


 43%|████▎     | 3900/9108 [30:58<37:33,  2.31it/s]

	iters: 3900, epoch: 1 | loss: 0.6173339
	speed: 0.4391s/iter; left time: 1997830.9006s


 44%|████▍     | 4000/9108 [31:41<36:27,  2.34it/s]

	iters: 4000, epoch: 1 | loss: 0.7034430
	speed: 0.4341s/iter; left time: 1974944.5411s


 45%|████▌     | 4100/9108 [32:25<37:13,  2.24it/s]

	iters: 4100, epoch: 1 | loss: 0.6669952
	speed: 0.4411s/iter; left time: 2006979.9303s


 46%|████▌     | 4200/9108 [33:09<35:51,  2.28it/s]

	iters: 4200, epoch: 1 | loss: 0.7096673
	speed: 0.4413s/iter; left time: 2007794.5462s


 47%|████▋     | 4300/9108 [33:53<35:32,  2.26it/s]

	iters: 4300, epoch: 1 | loss: 0.1302095
	speed: 0.4371s/iter; left time: 1988665.7720s


 48%|████▊     | 4400/9108 [34:37<33:46,  2.32it/s]

	iters: 4400, epoch: 1 | loss: 0.5438665
	speed: 0.4417s/iter; left time: 2009537.3422s


 49%|████▉     | 4500/9108 [35:20<33:13,  2.31it/s]

	iters: 4500, epoch: 1 | loss: 0.5000464
	speed: 0.4326s/iter; left time: 1968109.5873s


 51%|█████     | 4600/9108 [36:04<32:52,  2.29it/s]

	iters: 4600, epoch: 1 | loss: 0.7583678
	speed: 0.4388s/iter; left time: 1996204.5893s


 52%|█████▏    | 4700/9108 [36:48<32:06,  2.29it/s]

	iters: 4700, epoch: 1 | loss: 1.1393290
	speed: 0.4387s/iter; left time: 1995893.3151s


 53%|█████▎    | 4800/9108 [37:32<31:49,  2.26it/s]

	iters: 4800, epoch: 1 | loss: 0.3887737
	speed: 0.4343s/iter; left time: 1975894.0193s


 54%|█████▍    | 4900/9108 [38:14<29:39,  2.37it/s]

	iters: 4900, epoch: 1 | loss: 0.4054871
	speed: 0.4292s/iter; left time: 1952467.2438s


 55%|█████▍    | 5000/9108 [38:57<29:32,  2.32it/s]

	iters: 5000, epoch: 1 | loss: 0.3553662
	speed: 0.4290s/iter; left time: 1951385.0274s


 56%|█████▌    | 5100/9108 [39:41<28:15,  2.36it/s]

	iters: 5100, epoch: 1 | loss: 0.3872615
	speed: 0.4325s/iter; left time: 1967443.2269s


 57%|█████▋    | 5200/9108 [40:23<25:54,  2.51it/s]

	iters: 5200, epoch: 1 | loss: 0.8089609
	speed: 0.4288s/iter; left time: 1950655.4957s


 58%|█████▊    | 5300/9108 [41:08<29:01,  2.19it/s]

	iters: 5300, epoch: 1 | loss: 0.5135522
	speed: 0.4454s/iter; left time: 2026173.8899s


 59%|█████▉    | 5400/9108 [41:53<28:07,  2.20it/s]

	iters: 5400, epoch: 1 | loss: 0.1065774
	speed: 0.4490s/iter; left time: 2042127.4294s


 60%|██████    | 5500/9108 [42:38<27:30,  2.19it/s]

	iters: 5500, epoch: 1 | loss: 0.2435830
	speed: 0.4534s/iter; left time: 2062489.1612s


 61%|██████▏   | 5600/9108 [43:23<26:34,  2.20it/s]

	iters: 5600, epoch: 1 | loss: 0.6774763
	speed: 0.4517s/iter; left time: 2054358.9314s


 63%|██████▎   | 5700/9108 [44:08<25:53,  2.19it/s]

	iters: 5700, epoch: 1 | loss: 0.3930973
	speed: 0.4492s/iter; left time: 2043105.8999s


 64%|██████▎   | 5800/9108 [44:53<24:36,  2.24it/s]

	iters: 5800, epoch: 1 | loss: 0.3336146
	speed: 0.4484s/iter; left time: 2039409.0846s


 65%|██████▍   | 5900/9108 [45:37<23:39,  2.26it/s]

	iters: 5900, epoch: 1 | loss: 1.0342410
	speed: 0.4349s/iter; left time: 1977996.9715s


 66%|██████▌   | 6000/9108 [46:19<21:39,  2.39it/s]

	iters: 6000, epoch: 1 | loss: 1.0455463
	speed: 0.4280s/iter; left time: 1946469.9976s


 67%|██████▋   | 6100/9108 [47:02<22:09,  2.26it/s]

	iters: 6100, epoch: 1 | loss: 0.3864816
	speed: 0.4286s/iter; left time: 1949320.6213s


 68%|██████▊   | 6200/9108 [47:46<21:40,  2.24it/s]

	iters: 6200, epoch: 1 | loss: 0.6871858
	speed: 0.4343s/iter; left time: 1975095.8058s


 69%|██████▉   | 6300/9108 [48:30<19:47,  2.37it/s]

	iters: 6300, epoch: 1 | loss: 0.7385907
	speed: 0.4399s/iter; left time: 2000367.2044s


 70%|███████   | 6400/9108 [49:13<19:29,  2.32it/s]

	iters: 6400, epoch: 1 | loss: 0.5586243
	speed: 0.4357s/iter; left time: 1981322.8338s


 71%|███████▏  | 6500/9108 [49:58<19:28,  2.23it/s]

	iters: 6500, epoch: 1 | loss: 0.7796095
	speed: 0.4434s/iter; left time: 2016182.2861s


 72%|███████▏  | 6600/9108 [50:42<19:13,  2.17it/s]

	iters: 6600, epoch: 1 | loss: 1.0344616
	speed: 0.4408s/iter; left time: 2004585.6999s


 74%|███████▎  | 6700/9108 [51:25<17:48,  2.25it/s]

	iters: 6700, epoch: 1 | loss: 0.8219375
	speed: 0.4360s/iter; left time: 1982448.2698s


 75%|███████▍  | 6800/9108 [52:09<16:32,  2.32it/s]

	iters: 6800, epoch: 1 | loss: 0.8313220
	speed: 0.4335s/iter; left time: 1971048.5966s


 76%|███████▌  | 6900/9108 [52:52<16:02,  2.29it/s]

	iters: 6900, epoch: 1 | loss: 0.6260713
	speed: 0.4305s/iter; left time: 1957332.2282s


 77%|███████▋  | 7000/9108 [53:34<15:37,  2.25it/s]

	iters: 7000, epoch: 1 | loss: 0.8105592
	speed: 0.4232s/iter; left time: 1924429.3668s


 78%|███████▊  | 7100/9108 [54:16<13:44,  2.44it/s]

	iters: 7100, epoch: 1 | loss: 0.7645522
	speed: 0.4221s/iter; left time: 1919237.7860s


 79%|███████▉  | 7200/9108 [54:57<12:05,  2.63it/s]

	iters: 7200, epoch: 1 | loss: 0.8906936
	speed: 0.4045s/iter; left time: 1839122.6527s


 80%|████████  | 7300/9108 [55:37<12:31,  2.41it/s]

	iters: 7300, epoch: 1 | loss: 0.6446863
	speed: 0.3985s/iter; left time: 1811945.2228s


 81%|████████  | 7400/9108 [56:18<11:55,  2.39it/s]

	iters: 7400, epoch: 1 | loss: 0.4970760
	speed: 0.4141s/iter; left time: 1882742.6325s


 82%|████████▏ | 7500/9108 [57:01<11:36,  2.31it/s]

	iters: 7500, epoch: 1 | loss: 0.9896692
	speed: 0.4293s/iter; left time: 1951723.3681s


 83%|████████▎ | 7600/9108 [57:43<10:10,  2.47it/s]

	iters: 7600, epoch: 1 | loss: 1.2571949
	speed: 0.4255s/iter; left time: 1934642.3191s


 85%|████████▍ | 7700/9108 [58:26<10:05,  2.33it/s]

	iters: 7700, epoch: 1 | loss: 0.4798294
	speed: 0.4273s/iter; left time: 1942684.2027s


 86%|████████▌ | 7800/9108 [59:09<09:50,  2.22it/s]

	iters: 7800, epoch: 1 | loss: 0.6616950
	speed: 0.4290s/iter; left time: 1950128.9469s


 87%|████████▋ | 7900/9108 [59:53<09:04,  2.22it/s]

	iters: 7900, epoch: 1 | loss: 1.1101751
	speed: 0.4345s/iter; left time: 1975228.2862s


 88%|████████▊ | 8000/9108 [1:00:35<08:02,  2.30it/s]

	iters: 8000, epoch: 1 | loss: 0.4687842
	speed: 0.4283s/iter; left time: 1947137.6648s


 89%|████████▉ | 8100/9108 [1:01:19<07:17,  2.30it/s]

	iters: 8100, epoch: 1 | loss: 0.9705433
	speed: 0.4408s/iter; left time: 2003958.4152s


 90%|█████████ | 8200/9108 [1:02:03<06:35,  2.30it/s]

	iters: 8200, epoch: 1 | loss: 0.4075621
	speed: 0.4368s/iter; left time: 1985573.2682s


 91%|█████████ | 8300/9108 [1:02:47<06:10,  2.18it/s]

	iters: 8300, epoch: 1 | loss: 0.3262943
	speed: 0.4366s/iter; left time: 1984686.1375s


 92%|█████████▏| 8400/9108 [1:03:30<05:05,  2.32it/s]

	iters: 8400, epoch: 1 | loss: 0.2124672
	speed: 0.4322s/iter; left time: 1964466.4997s


 93%|█████████▎| 8500/9108 [1:04:12<04:17,  2.36it/s]

	iters: 8500, epoch: 1 | loss: 0.7266654
	speed: 0.4227s/iter; left time: 1921322.5070s


 94%|█████████▍| 8600/9108 [1:04:56<03:43,  2.27it/s]

	iters: 8600, epoch: 1 | loss: 0.7201807
	speed: 0.4328s/iter; left time: 1967341.5583s


 96%|█████████▌| 8700/9108 [1:05:39<02:58,  2.28it/s]

	iters: 8700, epoch: 1 | loss: 1.4474119
	speed: 0.4371s/iter; left time: 1986586.3272s


 97%|█████████▋| 8800/9108 [1:06:23<02:22,  2.16it/s]

	iters: 8800, epoch: 1 | loss: 0.6030265
	speed: 0.4415s/iter; left time: 2006858.9661s


 98%|█████████▊| 8900/9108 [1:07:08<01:26,  2.40it/s]

	iters: 8900, epoch: 1 | loss: 1.0027847
	speed: 0.4445s/iter; left time: 2020408.9137s


 99%|█████████▉| 9000/9108 [1:07:52<00:46,  2.31it/s]

	iters: 9000, epoch: 1 | loss: 1.2804111
	speed: 0.4447s/iter; left time: 2021377.8016s


100%|█████████▉| 9100/9108 [1:08:37<00:03,  2.26it/s]

	iters: 9100, epoch: 1 | loss: 0.6983083
	speed: 0.4433s/iter; left time: 2014969.5939s


100%|██████████| 9108/9108 [1:08:42<00:00,  2.21it/s]


Epoch: 1 cost time: 4122.149995088577
Epoch: 1, Steps: 9108 | Train Loss: 0.7170265 Vali Loss: 0.6500646 Test Loss: 0.7147091
Validation loss decreased (inf --> 0.650065).  Saving model ...
Updating learning rate to 0.0001


  1%|          | 100/9108 [01:28<1:04:36,  2.32it/s]

	iters: 100, epoch: 2 | loss: 0.9869779
	speed: 49.2091s/iter; left time: 223645227.9726s


  2%|▏         | 200/9108 [02:11<1:04:39,  2.30it/s]

	iters: 200, epoch: 2 | loss: 1.2308935
	speed: 0.4341s/iter; left time: 1973069.1674s


  3%|▎         | 300/9108 [02:54<1:05:12,  2.25it/s]

	iters: 300, epoch: 2 | loss: 1.3123395
	speed: 0.4330s/iter; left time: 1967929.9147s


  4%|▍         | 400/9108 [03:38<1:02:30,  2.32it/s]

	iters: 400, epoch: 2 | loss: 0.7008165
	speed: 0.4321s/iter; left time: 1963683.4015s


  5%|▌         | 500/9108 [04:21<1:01:39,  2.33it/s]

	iters: 500, epoch: 2 | loss: 1.0682368
	speed: 0.4370s/iter; left time: 1985835.5694s


  7%|▋         | 600/9108 [05:04<1:03:43,  2.23it/s]

	iters: 600, epoch: 2 | loss: 0.4560034
	speed: 0.4268s/iter; left time: 1939501.2435s


  8%|▊         | 700/9108 [05:48<1:03:54,  2.19it/s]

	iters: 700, epoch: 2 | loss: 1.0057974
	speed: 0.4415s/iter; left time: 2006093.5871s


  9%|▉         | 800/9108 [06:32<1:00:02,  2.31it/s]

	iters: 800, epoch: 2 | loss: 0.3474943
	speed: 0.4433s/iter; left time: 2014283.7177s


 10%|▉         | 900/9108 [07:17<1:00:55,  2.25it/s]

	iters: 900, epoch: 2 | loss: 0.8353213
	speed: 0.4418s/iter; left time: 2007406.1058s


 11%|█         | 1000/9108 [08:01<1:01:22,  2.20it/s]

	iters: 1000, epoch: 2 | loss: 0.1680645
	speed: 0.4429s/iter; left time: 2012373.2063s


 12%|█▏        | 1100/9108 [08:45<56:45,  2.35it/s]  

	iters: 1100, epoch: 2 | loss: 0.7870152
	speed: 0.4427s/iter; left time: 2011334.5027s


 13%|█▎        | 1200/9108 [09:29<58:12,  2.26it/s]

	iters: 1200, epoch: 2 | loss: 1.3046731
	speed: 0.4358s/iter; left time: 1980044.9922s


 14%|█▍        | 1300/9108 [10:13<56:46,  2.29it/s]  

	iters: 1300, epoch: 2 | loss: 0.7198861
	speed: 0.4403s/iter; left time: 2000519.0054s


 15%|█▌        | 1400/9108 [10:56<54:50,  2.34it/s]

	iters: 1400, epoch: 2 | loss: 0.9255454
	speed: 0.4281s/iter; left time: 1945068.5274s


 16%|█▋        | 1500/9108 [11:38<54:22,  2.33it/s]

	iters: 1500, epoch: 2 | loss: 0.5069379
	speed: 0.4256s/iter; left time: 1933811.7702s


 18%|█▊        | 1600/9108 [12:22<55:32,  2.25it/s]

	iters: 1600, epoch: 2 | loss: 0.6015073
	speed: 0.4348s/iter; left time: 1975545.2029s


 19%|█▊        | 1700/9108 [13:06<55:26,  2.23it/s]

	iters: 1700, epoch: 2 | loss: 1.2061354
	speed: 0.4414s/iter; left time: 2005434.9244s


 20%|█▉        | 1800/9108 [13:50<54:59,  2.21it/s]

	iters: 1800, epoch: 2 | loss: 0.9730512
	speed: 0.4453s/iter; left time: 2022837.1402s


 21%|██        | 1900/9108 [14:34<52:19,  2.30it/s]

	iters: 1900, epoch: 2 | loss: 0.8591208
	speed: 0.4351s/iter; left time: 1976711.4504s


 22%|██▏       | 2000/9108 [15:17<51:01,  2.32it/s]

	iters: 2000, epoch: 2 | loss: 0.5452974
	speed: 0.4324s/iter; left time: 1964195.2007s


 23%|██▎       | 2100/9108 [16:00<48:04,  2.43it/s]

	iters: 2100, epoch: 2 | loss: 1.7450293
	speed: 0.4253s/iter; left time: 1932165.3294s


 24%|██▍       | 2200/9108 [16:44<51:19,  2.24it/s]

	iters: 2200, epoch: 2 | loss: 0.5238258
	speed: 0.4404s/iter; left time: 2000547.7015s


 25%|██▌       | 2300/9108 [17:27<49:46,  2.28it/s]

	iters: 2300, epoch: 2 | loss: 0.3509825
	speed: 0.4369s/iter; left time: 1984615.2054s


 26%|██▋       | 2400/9108 [18:10<47:27,  2.36it/s]

	iters: 2400, epoch: 2 | loss: 0.8011184
	speed: 0.4301s/iter; left time: 1953728.8164s


 27%|██▋       | 2500/9108 [18:53<43:47,  2.51it/s]

	iters: 2500, epoch: 2 | loss: 0.6343950
	speed: 0.4262s/iter; left time: 1935955.0038s


 29%|██▊       | 2600/9108 [19:36<45:30,  2.38it/s]

	iters: 2600, epoch: 2 | loss: 0.2489275
	speed: 0.4262s/iter; left time: 1935745.5530s


 30%|██▉       | 2700/9108 [20:18<44:56,  2.38it/s]

	iters: 2700, epoch: 2 | loss: 0.5507483
	speed: 0.4245s/iter; left time: 1927939.5771s


 31%|███       | 2800/9108 [21:00<43:48,  2.40it/s]

	iters: 2800, epoch: 2 | loss: 1.1428587
	speed: 0.4224s/iter; left time: 1918471.2936s


 32%|███▏      | 2900/9108 [21:42<41:56,  2.47it/s]

	iters: 2900, epoch: 2 | loss: 0.5886487
	speed: 0.4122s/iter; left time: 1872103.0023s


 33%|███▎      | 3000/9108 [22:23<42:47,  2.38it/s]

	iters: 3000, epoch: 2 | loss: 0.2254100
	speed: 0.4128s/iter; left time: 1874843.9960s


 34%|███▍      | 3100/9108 [23:04<41:34,  2.41it/s]

	iters: 3100, epoch: 2 | loss: 0.8042838
	speed: 0.4101s/iter; left time: 1862768.3794s


 35%|███▌      | 3200/9108 [23:44<37:39,  2.62it/s]

	iters: 3200, epoch: 2 | loss: 0.7080485
	speed: 0.4021s/iter; left time: 1826007.3746s


 36%|███▌      | 3300/9108 [24:25<39:24,  2.46it/s]

	iters: 3300, epoch: 2 | loss: 1.4254271
	speed: 0.4052s/iter; left time: 1840153.4103s


 37%|███▋      | 3400/9108 [25:03<36:38,  2.60it/s]

	iters: 3400, epoch: 2 | loss: 0.7714157
	speed: 0.3885s/iter; left time: 1764501.0529s


 38%|███▊      | 3500/9108 [25:42<35:43,  2.62it/s]

	iters: 3500, epoch: 2 | loss: 0.9335486
	speed: 0.3911s/iter; left time: 1775925.0189s


 40%|███▉      | 3600/9108 [26:22<35:39,  2.57it/s]

	iters: 3600, epoch: 2 | loss: 1.1458006
	speed: 0.3941s/iter; left time: 1789935.2162s


 41%|████      | 3700/9108 [27:01<34:31,  2.61it/s]

	iters: 3700, epoch: 2 | loss: 0.4021869
	speed: 0.3862s/iter; left time: 1753655.0523s


 42%|████▏     | 3800/9108 [27:39<34:20,  2.58it/s]

	iters: 3800, epoch: 2 | loss: 0.7481030
	speed: 0.3861s/iter; left time: 1753536.3065s


 43%|████▎     | 3900/9108 [28:18<34:25,  2.52it/s]

	iters: 3900, epoch: 2 | loss: 0.3137011
	speed: 0.3858s/iter; left time: 1751752.5783s


 44%|████▍     | 4000/9108 [28:56<30:57,  2.75it/s]

	iters: 4000, epoch: 2 | loss: 0.6486335
	speed: 0.3855s/iter; left time: 1750657.8173s


 45%|████▌     | 4100/9108 [29:35<31:07,  2.68it/s]

	iters: 4100, epoch: 2 | loss: 0.4037570
	speed: 0.3843s/iter; left time: 1745076.3148s


 46%|████▌     | 4200/9108 [30:13<30:49,  2.65it/s]

	iters: 4200, epoch: 2 | loss: 0.8911333
	speed: 0.3795s/iter; left time: 1723363.0699s


 47%|████▋     | 4300/9108 [30:51<30:26,  2.63it/s]

	iters: 4300, epoch: 2 | loss: 0.8834850
	speed: 0.3797s/iter; left time: 1723903.2041s


 48%|████▊     | 4400/9108 [31:29<29:21,  2.67it/s]

	iters: 4400, epoch: 2 | loss: 0.8083732
	speed: 0.3796s/iter; left time: 1723466.7554s


 49%|████▉     | 4500/9108 [32:06<29:38,  2.59it/s]

	iters: 4500, epoch: 2 | loss: 0.3719441
	speed: 0.3779s/iter; left time: 1715611.8772s


 51%|█████     | 4600/9108 [32:45<29:31,  2.55it/s]

	iters: 4600, epoch: 2 | loss: 0.3277498
	speed: 0.3862s/iter; left time: 1753637.3592s


 52%|█████▏    | 4700/9108 [33:24<28:32,  2.57it/s]

	iters: 4700, epoch: 2 | loss: 0.2966518
	speed: 0.3873s/iter; left time: 1758371.2862s


 53%|█████▎    | 4800/9108 [34:03<28:19,  2.53it/s]

	iters: 4800, epoch: 2 | loss: 0.1175890
	speed: 0.3938s/iter; left time: 1788109.0788s


 54%|█████▍    | 4900/9108 [34:44<28:42,  2.44it/s]

	iters: 4900, epoch: 2 | loss: 1.4807957
	speed: 0.4068s/iter; left time: 1846878.2477s


 55%|█████▍    | 5000/9108 [35:24<28:27,  2.41it/s]

	iters: 5000, epoch: 2 | loss: 1.2743410
	speed: 0.4044s/iter; left time: 1836061.8723s


 56%|█████▌    | 5100/9108 [36:05<26:31,  2.52it/s]

	iters: 5100, epoch: 2 | loss: 0.2944052
	speed: 0.4041s/iter; left time: 1834540.1507s


 57%|█████▋    | 5200/9108 [36:45<26:41,  2.44it/s]

	iters: 5200, epoch: 2 | loss: 0.7568615
	speed: 0.4035s/iter; left time: 1831908.6338s


 58%|█████▊    | 5300/9108 [37:25<25:14,  2.51it/s]

	iters: 5300, epoch: 2 | loss: 0.1911297
	speed: 0.4011s/iter; left time: 1820874.5816s


 59%|█████▉    | 5400/9108 [38:05<24:07,  2.56it/s]

	iters: 5400, epoch: 2 | loss: 0.5439163
	speed: 0.3972s/iter; left time: 1803228.9433s


 60%|██████    | 5500/9108 [38:45<24:27,  2.46it/s]

	iters: 5500, epoch: 2 | loss: 0.3832566
	speed: 0.4069s/iter; left time: 1847108.5063s


 61%|██████▏   | 5600/9108 [39:27<24:06,  2.43it/s]

	iters: 5600, epoch: 2 | loss: 0.7979958
	speed: 0.4103s/iter; left time: 1862456.6785s


 63%|██████▎   | 5700/9108 [40:07<21:55,  2.59it/s]

	iters: 5700, epoch: 2 | loss: 0.6839120
	speed: 0.4016s/iter; left time: 1823067.2260s


 64%|██████▎   | 5800/9108 [40:48<23:11,  2.38it/s]

	iters: 5800, epoch: 2 | loss: 1.1502287
	speed: 0.4153s/iter; left time: 1885027.6293s


 65%|██████▍   | 5900/9108 [41:30<22:24,  2.39it/s]

	iters: 5900, epoch: 2 | loss: 0.7157573
	speed: 0.4170s/iter; left time: 1892812.0711s


 66%|██████▌   | 6000/9108 [42:11<21:30,  2.41it/s]

	iters: 6000, epoch: 2 | loss: 0.3803502
	speed: 0.4149s/iter; left time: 1883273.3219s


 67%|██████▋   | 6100/9108 [42:53<20:51,  2.40it/s]

	iters: 6100, epoch: 2 | loss: 0.7159748
	speed: 0.4143s/iter; left time: 1880352.5458s


 68%|██████▊   | 6200/9108 [43:34<19:47,  2.45it/s]

	iters: 6200, epoch: 2 | loss: 0.7475230
	speed: 0.4121s/iter; left time: 1870576.3697s


 69%|██████▉   | 6300/9108 [44:16<19:45,  2.37it/s]

	iters: 6300, epoch: 2 | loss: 0.6348569
	speed: 0.4165s/iter; left time: 1890364.6860s


 70%|███████   | 6400/9108 [44:58<19:28,  2.32it/s]

	iters: 6400, epoch: 2 | loss: 0.6371484
	speed: 0.4246s/iter; left time: 1927042.6038s


 71%|███████▏  | 6500/9108 [45:41<18:34,  2.34it/s]

	iters: 6500, epoch: 2 | loss: 1.0182978
	speed: 0.4244s/iter; left time: 1926209.9862s


 72%|███████▏  | 6600/9108 [46:22<17:31,  2.39it/s]

	iters: 6600, epoch: 2 | loss: 0.3168276
	speed: 0.4177s/iter; left time: 1895547.8637s


 74%|███████▎  | 6700/9108 [47:04<16:29,  2.43it/s]

	iters: 6700, epoch: 2 | loss: 0.3088010
	speed: 0.4159s/iter; left time: 1887308.7593s


 75%|███████▍  | 6800/9108 [47:46<16:00,  2.40it/s]

	iters: 6800, epoch: 2 | loss: 0.5275205
	speed: 0.4240s/iter; left time: 1924087.2974s


 76%|███████▌  | 6900/9108 [48:28<15:19,  2.40it/s]

	iters: 6900, epoch: 2 | loss: 0.8214452
	speed: 0.4194s/iter; left time: 1903056.0169s


 77%|███████▋  | 7000/9108 [49:10<14:22,  2.45it/s]

	iters: 7000, epoch: 2 | loss: 0.4011241
	speed: 0.4171s/iter; left time: 1892873.4798s


 78%|███████▊  | 7100/9108 [49:52<14:09,  2.36it/s]

	iters: 7100, epoch: 2 | loss: 0.8207394
	speed: 0.4193s/iter; left time: 1902756.8478s


 79%|███████▉  | 7200/9108 [50:34<13:03,  2.44it/s]

	iters: 7200, epoch: 2 | loss: 0.5100999
	speed: 0.4190s/iter; left time: 1901439.1750s


 80%|████████  | 7300/9108 [51:16<12:45,  2.36it/s]

	iters: 7300, epoch: 2 | loss: 1.3259668
	speed: 0.4198s/iter; left time: 1905063.0436s


 81%|████████  | 7400/9108 [51:57<11:32,  2.47it/s]

	iters: 7400, epoch: 2 | loss: 0.3790745
	speed: 0.4142s/iter; left time: 1879614.3685s


 82%|████████▏ | 7500/9108 [52:39<11:16,  2.38it/s]

	iters: 7500, epoch: 2 | loss: 0.4205304
	speed: 0.4172s/iter; left time: 1892933.6788s


 83%|████████▎ | 7600/9108 [53:21<10:16,  2.45it/s]

	iters: 7600, epoch: 2 | loss: 0.1440997
	speed: 0.4196s/iter; left time: 1903672.8754s


 85%|████████▍ | 7700/9108 [54:03<09:46,  2.40it/s]

	iters: 7700, epoch: 2 | loss: 1.1851895
	speed: 0.4213s/iter; left time: 1911745.7184s


 86%|████████▌ | 7800/9108 [54:45<09:08,  2.38it/s]

	iters: 7800, epoch: 2 | loss: 0.6204303
	speed: 0.4188s/iter; left time: 1900335.7657s


 87%|████████▋ | 7900/9108 [55:27<08:24,  2.39it/s]

	iters: 7900, epoch: 2 | loss: 1.2498780
	speed: 0.4158s/iter; left time: 1886602.9449s


 88%|████████▊ | 8000/9108 [56:08<07:40,  2.40it/s]

	iters: 8000, epoch: 2 | loss: 0.3046704
	speed: 0.4167s/iter; left time: 1890621.3059s


 89%|████████▉ | 8100/9108 [56:50<06:45,  2.49it/s]

	iters: 8100, epoch: 2 | loss: 0.1743187
	speed: 0.4131s/iter; left time: 1874036.7584s


 90%|█████████ | 8200/9108 [57:31<06:20,  2.39it/s]

	iters: 8200, epoch: 2 | loss: 0.2505195
	speed: 0.4130s/iter; left time: 1873824.6288s


 91%|█████████ | 8300/9108 [58:12<05:38,  2.39it/s]

	iters: 8300, epoch: 2 | loss: 0.9403933
	speed: 0.4160s/iter; left time: 1887347.3715s


 92%|█████████▏| 8400/9108 [58:54<04:57,  2.38it/s]

	iters: 8400, epoch: 2 | loss: 0.8132295
	speed: 0.4182s/iter; left time: 1897362.4253s


 93%|█████████▎| 8500/9108 [59:36<04:19,  2.34it/s]

	iters: 8500, epoch: 2 | loss: 1.0651249
	speed: 0.4191s/iter; left time: 1901109.5015s


 94%|█████████▍| 8600/9108 [1:00:18<03:39,  2.32it/s]

	iters: 8600, epoch: 2 | loss: 1.0519512
	speed: 0.4208s/iter; left time: 1908719.4645s


 96%|█████████▌| 8700/9108 [1:01:00<02:48,  2.42it/s]

	iters: 8700, epoch: 2 | loss: 0.5436082
	speed: 0.4178s/iter; left time: 1895361.4251s


 97%|█████████▋| 8800/9108 [1:01:42<02:07,  2.42it/s]

	iters: 8800, epoch: 2 | loss: 0.6848136
	speed: 0.4204s/iter; left time: 1907198.6614s


 98%|█████████▊| 8900/9108 [1:02:24<01:27,  2.39it/s]

	iters: 8900, epoch: 2 | loss: 0.4687181
	speed: 0.4195s/iter; left time: 1902904.9017s


 99%|█████████▉| 9000/9108 [1:03:06<00:46,  2.30it/s]

	iters: 9000, epoch: 2 | loss: 0.1898419
	speed: 0.4223s/iter; left time: 1915390.6461s


100%|█████████▉| 9100/9108 [1:03:48<00:03,  2.28it/s]

	iters: 9100, epoch: 2 | loss: 0.4321501
	speed: 0.4218s/iter; left time: 1913376.6813s


100%|██████████| 9108/9108 [1:03:53<00:00,  2.38it/s]


Epoch: 2 cost time: 3833.7288694381714
Epoch: 2, Steps: 9108 | Train Loss: 0.6791200 Vali Loss: 0.6416138 Test Loss: 0.7056551
Validation loss decreased (0.650065 --> 0.641614).  Saving model ...
Updating learning rate to 5e-05


  1%|          | 100/9108 [01:27<1:03:17,  2.37it/s]

	iters: 100, epoch: 3 | loss: 0.2043625
	speed: 49.0418s/iter; left time: 222438253.5287s


  2%|▏         | 200/9108 [02:10<1:04:45,  2.29it/s]

	iters: 200, epoch: 3 | loss: 1.1988410
	speed: 0.4251s/iter; left time: 1928084.1363s


  3%|▎         | 300/9108 [02:52<1:01:25,  2.39it/s]

	iters: 300, epoch: 3 | loss: 0.2544245
	speed: 0.4179s/iter; left time: 1895185.6084s


  4%|▍         | 400/9108 [03:34<1:03:11,  2.30it/s]

	iters: 400, epoch: 3 | loss: 0.7623367
	speed: 0.4248s/iter; left time: 1926533.1812s


  5%|▌         | 500/9108 [04:16<1:02:22,  2.30it/s]

	iters: 500, epoch: 3 | loss: 0.2836936
	speed: 0.4222s/iter; left time: 1914926.3702s


  7%|▋         | 600/9108 [04:59<59:14,  2.39it/s]  

	iters: 600, epoch: 3 | loss: 0.7124045
	speed: 0.4236s/iter; left time: 1921137.7942s


  8%|▊         | 700/9108 [05:41<58:33,  2.39it/s]  

	iters: 700, epoch: 3 | loss: 0.3288310
	speed: 0.4241s/iter; left time: 1923355.5456s


  9%|▉         | 800/9108 [06:23<58:35,  2.36it/s]  

	iters: 800, epoch: 3 | loss: 0.8347608
	speed: 0.4212s/iter; left time: 1910299.9040s


 10%|▉         | 900/9108 [07:05<57:56,  2.36it/s]  

	iters: 900, epoch: 3 | loss: 0.3064698
	speed: 0.4203s/iter; left time: 1905963.3372s


 11%|█         | 1000/9108 [07:47<58:37,  2.30it/s] 

	iters: 1000, epoch: 3 | loss: 0.4728279
	speed: 0.4213s/iter; left time: 1910448.9207s


 12%|█▏        | 1100/9108 [08:29<56:42,  2.35it/s]

	iters: 1100, epoch: 3 | loss: 1.0783888
	speed: 0.4184s/iter; left time: 1897315.0976s


 13%|█▎        | 1200/9108 [09:11<54:59,  2.40it/s]

	iters: 1200, epoch: 3 | loss: 0.8498608
	speed: 0.4186s/iter; left time: 1898235.3653s


 14%|█▍        | 1300/9108 [09:53<53:48,  2.42it/s]

	iters: 1300, epoch: 3 | loss: 0.8280835
	speed: 0.4202s/iter; left time: 1905224.4951s


 15%|█▌        | 1400/9108 [10:35<52:32,  2.45it/s]

	iters: 1400, epoch: 3 | loss: 0.4269770
	speed: 0.4157s/iter; left time: 1885058.9226s


 16%|█▋        | 1500/9108 [11:16<53:56,  2.35it/s]

	iters: 1500, epoch: 3 | loss: 1.1761086
	speed: 0.4133s/iter; left time: 1873885.6656s


 18%|█▊        | 1600/9108 [11:58<52:32,  2.38it/s]

	iters: 1600, epoch: 3 | loss: 0.8397982
	speed: 0.4172s/iter; left time: 1891486.5963s


 19%|█▊        | 1700/9108 [12:40<53:07,  2.32it/s]

	iters: 1700, epoch: 3 | loss: 1.2058434
	speed: 0.4215s/iter; left time: 1911070.6943s


 20%|█▉        | 1800/9108 [13:22<52:13,  2.33it/s]

	iters: 1800, epoch: 3 | loss: 0.5745770
	speed: 0.4232s/iter; left time: 1918703.7856s


 21%|██        | 1900/9108 [14:04<51:06,  2.35it/s]

	iters: 1900, epoch: 3 | loss: 0.6040436
	speed: 0.4228s/iter; left time: 1916765.6538s


 22%|██▏       | 2000/9108 [14:46<49:35,  2.39it/s]

	iters: 2000, epoch: 3 | loss: 0.7302086
	speed: 0.4199s/iter; left time: 1903680.0591s


 23%|██▎       | 2100/9108 [15:29<51:33,  2.27it/s]

	iters: 2100, epoch: 3 | loss: 1.0105579
	speed: 0.4235s/iter; left time: 1919884.2188s


 24%|██▍       | 2200/9108 [16:11<48:17,  2.38it/s]

	iters: 2200, epoch: 3 | loss: 0.1850135
	speed: 0.4228s/iter; left time: 1916821.7653s


 25%|██▌       | 2300/9108 [16:53<47:14,  2.40it/s]

	iters: 2300, epoch: 3 | loss: 0.8467303
	speed: 0.4234s/iter; left time: 1919418.8324s


 26%|██▋       | 2400/9108 [17:35<47:01,  2.38it/s]

	iters: 2400, epoch: 3 | loss: 0.5674127
	speed: 0.4198s/iter; left time: 1902913.6736s


 27%|██▋       | 2500/9108 [18:17<46:57,  2.34it/s]

	iters: 2500, epoch: 3 | loss: 0.4549834
	speed: 0.4195s/iter; left time: 1901636.7430s


 29%|██▊       | 2600/9108 [18:59<45:31,  2.38it/s]

	iters: 2600, epoch: 3 | loss: 0.1388870
	speed: 0.4210s/iter; left time: 1908615.2750s


 30%|██▉       | 2700/9108 [19:41<45:21,  2.35it/s]

	iters: 2700, epoch: 3 | loss: 0.7504541
	speed: 0.4206s/iter; left time: 1906464.9861s


 31%|███       | 2800/9108 [20:24<45:19,  2.32it/s]

	iters: 2800, epoch: 3 | loss: 0.8428544
	speed: 0.4285s/iter; left time: 1942358.9084s


 32%|███▏      | 2900/9108 [21:07<43:58,  2.35it/s]

	iters: 2900, epoch: 3 | loss: 0.3542819
	speed: 0.4237s/iter; left time: 1920793.3324s


 33%|███▎      | 3000/9108 [21:49<43:03,  2.36it/s]

	iters: 3000, epoch: 3 | loss: 0.6753338
	speed: 0.4251s/iter; left time: 1927039.7789s


 34%|███▍      | 3100/9108 [22:31<42:38,  2.35it/s]

	iters: 3100, epoch: 3 | loss: 1.0125788
	speed: 0.4210s/iter; left time: 1908340.9991s


 35%|███▌      | 3200/9108 [23:13<39:52,  2.47it/s]

	iters: 3200, epoch: 3 | loss: 0.6377903
	speed: 0.4202s/iter; left time: 1904696.7801s


 36%|███▌      | 3300/9108 [23:55<42:10,  2.30it/s]

	iters: 3300, epoch: 3 | loss: 0.5929797
	speed: 0.4217s/iter; left time: 1911444.0449s


 37%|███▋      | 3400/9108 [24:38<38:02,  2.50it/s]

	iters: 3400, epoch: 3 | loss: 0.6355708
	speed: 0.4224s/iter; left time: 1914540.3381s


 38%|███▊      | 3500/9108 [25:19<37:53,  2.47it/s]

	iters: 3500, epoch: 3 | loss: 0.3662878
	speed: 0.4161s/iter; left time: 1886001.8480s


 40%|███▉      | 3600/9108 [26:01<38:31,  2.38it/s]

	iters: 3600, epoch: 3 | loss: 0.4335439
	speed: 0.4186s/iter; left time: 1897078.3684s


 41%|████      | 3700/9108 [26:44<39:24,  2.29it/s]

	iters: 3700, epoch: 3 | loss: 0.6259097
	speed: 0.4291s/iter; left time: 1944623.6278s


 42%|████▏     | 3800/9108 [27:26<37:48,  2.34it/s]

	iters: 3800, epoch: 3 | loss: 0.9646839
	speed: 0.4243s/iter; left time: 1923022.5147s


 43%|████▎     | 3900/9108 [28:10<38:06,  2.28it/s]

	iters: 3900, epoch: 3 | loss: 0.6646296
	speed: 0.4324s/iter; left time: 1959690.2500s


 44%|████▍     | 4000/9108 [28:53<36:13,  2.35it/s]

	iters: 4000, epoch: 3 | loss: 0.8138909
	speed: 0.4322s/iter; left time: 1958426.8747s


 45%|████▌     | 4100/9108 [29:36<36:22,  2.29it/s]

	iters: 4100, epoch: 3 | loss: 0.8175786
	speed: 0.4302s/iter; left time: 1949450.5340s


 46%|████▌     | 4200/9108 [30:19<36:17,  2.25it/s]

	iters: 4200, epoch: 3 | loss: 0.6205485
	speed: 0.4345s/iter; left time: 1968881.3957s


 47%|████▋     | 4300/9108 [31:03<35:31,  2.26it/s]

	iters: 4300, epoch: 3 | loss: 0.2903766
	speed: 0.4367s/iter; left time: 1979113.2690s


 48%|████▊     | 4400/9108 [31:47<32:57,  2.38it/s]

	iters: 4400, epoch: 3 | loss: 0.9510339
	speed: 0.4364s/iter; left time: 1977428.3881s


 49%|████▉     | 4500/9108 [32:30<33:57,  2.26it/s]

	iters: 4500, epoch: 3 | loss: 0.8820573
	speed: 0.4344s/iter; left time: 1968227.8311s


 51%|█████     | 4600/9108 [33:14<32:36,  2.30it/s]

	iters: 4600, epoch: 3 | loss: 0.6683468
	speed: 0.4385s/iter; left time: 1987140.0337s


 52%|█████▏    | 4700/9108 [33:57<30:49,  2.38it/s]

	iters: 4700, epoch: 3 | loss: 0.3299832
	speed: 0.4291s/iter; left time: 1944133.9008s


 53%|█████▎    | 4800/9108 [34:40<30:25,  2.36it/s]

	iters: 4800, epoch: 3 | loss: 0.5014877
	speed: 0.4323s/iter; left time: 1958884.8195s


 54%|█████▍    | 4900/9108 [35:23<31:22,  2.23it/s]

	iters: 4900, epoch: 3 | loss: 1.3196341
	speed: 0.4309s/iter; left time: 1952376.4151s


 55%|█████▍    | 5000/9108 [36:07<29:02,  2.36it/s]

	iters: 5000, epoch: 3 | loss: 0.5949284
	speed: 0.4333s/iter; left time: 1962979.6493s


 56%|█████▌    | 5100/9108 [36:50<27:39,  2.42it/s]

	iters: 5100, epoch: 3 | loss: 0.5789400
	speed: 0.4317s/iter; left time: 1956029.9174s


 57%|█████▋    | 5200/9108 [37:33<28:22,  2.29it/s]

	iters: 5200, epoch: 3 | loss: 0.9281510
	speed: 0.4321s/iter; left time: 1957588.6881s


 58%|█████▊    | 5300/9108 [38:16<27:45,  2.29it/s]

	iters: 5300, epoch: 3 | loss: 0.0752561
	speed: 0.4342s/iter; left time: 1967227.2584s


 59%|█████▉    | 5400/9108 [38:59<25:55,  2.38it/s]

	iters: 5400, epoch: 3 | loss: 1.0013746
	speed: 0.4277s/iter; left time: 1937552.8482s


 60%|██████    | 5500/9108 [39:42<25:52,  2.32it/s]

	iters: 5500, epoch: 3 | loss: 1.0118093
	speed: 0.4244s/iter; left time: 1922519.8338s


 61%|██████▏   | 5600/9108 [40:25<25:32,  2.29it/s]

	iters: 5600, epoch: 3 | loss: 1.2713628
	speed: 0.4309s/iter; left time: 1952100.7583s


 63%|██████▎   | 5700/9108 [41:08<24:25,  2.32it/s]

	iters: 5700, epoch: 3 | loss: 0.6298030
	speed: 0.4320s/iter; left time: 1956992.5207s


 64%|██████▎   | 5800/9108 [41:50<23:54,  2.31it/s]

	iters: 5800, epoch: 3 | loss: 0.8092212
	speed: 0.4264s/iter; left time: 1931396.6355s


 65%|██████▍   | 5900/9108 [42:33<23:06,  2.31it/s]

	iters: 5900, epoch: 3 | loss: 0.6926095
	speed: 0.4286s/iter; left time: 1941552.2198s


 66%|██████▌   | 6000/9108 [43:16<22:44,  2.28it/s]

	iters: 6000, epoch: 3 | loss: 1.0396917
	speed: 0.4308s/iter; left time: 1951547.1381s


 67%|██████▋   | 6100/9108 [43:59<21:43,  2.31it/s]

	iters: 6100, epoch: 3 | loss: 0.7581794
	speed: 0.4259s/iter; left time: 1929209.5878s


 68%|██████▊   | 6200/9108 [44:42<21:21,  2.27it/s]

	iters: 6200, epoch: 3 | loss: 0.7244179
	speed: 0.4281s/iter; left time: 1939068.1750s


 69%|██████▉   | 6300/9108 [45:25<20:26,  2.29it/s]

	iters: 6300, epoch: 3 | loss: 0.2264484
	speed: 0.4356s/iter; left time: 1973011.7952s


 70%|███████   | 6400/9108 [46:08<19:36,  2.30it/s]

	iters: 6400, epoch: 3 | loss: 0.2918990
	speed: 0.4305s/iter; left time: 1950004.0717s


 71%|███████▏  | 6500/9108 [46:51<18:20,  2.37it/s]

	iters: 6500, epoch: 3 | loss: 0.8065357
	speed: 0.4267s/iter; left time: 1932655.3294s


 72%|███████▏  | 6600/9108 [47:34<17:51,  2.34it/s]

	iters: 6600, epoch: 3 | loss: 0.6089737
	speed: 0.4334s/iter; left time: 1962958.2345s


 74%|███████▎  | 6700/9108 [48:18<17:00,  2.36it/s]

	iters: 6700, epoch: 3 | loss: 0.3049510
	speed: 0.4315s/iter; left time: 1954287.7099s


 75%|███████▍  | 6800/9108 [49:01<16:55,  2.27it/s]

	iters: 6800, epoch: 3 | loss: 0.6524826
	speed: 0.4316s/iter; left time: 1954635.3701s


 76%|███████▌  | 6900/9108 [49:44<15:39,  2.35it/s]

	iters: 6900, epoch: 3 | loss: 0.4533233
	speed: 0.4343s/iter; left time: 1966766.4386s


 77%|███████▋  | 7000/9108 [50:27<14:51,  2.36it/s]

	iters: 7000, epoch: 3 | loss: 1.0189939
	speed: 0.4297s/iter; left time: 1946079.1656s


 78%|███████▊  | 7100/9108 [51:11<14:05,  2.37it/s]

	iters: 7100, epoch: 3 | loss: 0.6691467
	speed: 0.4338s/iter; left time: 1964340.8511s


 79%|███████▉  | 7200/9108 [51:54<13:29,  2.36it/s]

	iters: 7200, epoch: 3 | loss: 0.9626626
	speed: 0.4314s/iter; left time: 1953804.5739s


 80%|████████  | 7300/9108 [52:37<13:12,  2.28it/s]

	iters: 7300, epoch: 3 | loss: 0.0961118
	speed: 0.4351s/iter; left time: 1970486.9124s


 81%|████████  | 7400/9108 [53:21<12:41,  2.24it/s]

	iters: 7400, epoch: 3 | loss: 0.7074081
	speed: 0.4363s/iter; left time: 1975669.8283s


 82%|████████▏ | 7500/9108 [54:05<11:51,  2.26it/s]

	iters: 7500, epoch: 3 | loss: 0.2027109
	speed: 0.4372s/iter; left time: 1979575.8434s


 83%|████████▎ | 7600/9108 [54:48<10:36,  2.37it/s]

	iters: 7600, epoch: 3 | loss: 0.8550347
	speed: 0.4368s/iter; left time: 1977761.2541s


 85%|████████▍ | 7700/9108 [55:31<09:56,  2.36it/s]

	iters: 7700, epoch: 3 | loss: 0.5267246
	speed: 0.4294s/iter; left time: 1944134.6740s


 86%|████████▌ | 7800/9108 [56:14<09:22,  2.33it/s]

	iters: 7800, epoch: 3 | loss: 0.5462833
	speed: 0.4318s/iter; left time: 1954989.7895s


 87%|████████▋ | 7900/9108 [56:58<08:44,  2.30it/s]

	iters: 7900, epoch: 3 | loss: 0.7877429
	speed: 0.4359s/iter; left time: 1973745.4097s


 88%|████████▊ | 8000/9108 [57:41<07:48,  2.36it/s]

	iters: 8000, epoch: 3 | loss: 0.3642386
	speed: 0.4299s/iter; left time: 1946386.2706s


 89%|████████▉ | 8100/9108 [58:24<07:07,  2.36it/s]

	iters: 8100, epoch: 3 | loss: 0.5196918
	speed: 0.4335s/iter; left time: 1962968.7002s


 90%|█████████ | 8200/9108 [59:08<06:45,  2.24it/s]

	iters: 8200, epoch: 3 | loss: 0.9295036
	speed: 0.4358s/iter; left time: 1972974.1608s


 91%|█████████ | 8300/9108 [59:52<06:04,  2.22it/s]

	iters: 8300, epoch: 3 | loss: 0.8544856
	speed: 0.4387s/iter; left time: 1986154.0379s


 92%|█████████▏| 8400/9108 [1:00:36<05:15,  2.24it/s]

	iters: 8400, epoch: 3 | loss: 0.2810748
	speed: 0.4441s/iter; left time: 2010496.0881s


 93%|█████████▎| 8500/9108 [1:01:20<04:20,  2.33it/s]

	iters: 8500, epoch: 3 | loss: 1.0727066
	speed: 0.4435s/iter; left time: 2007863.2090s


 94%|█████████▍| 8600/9108 [1:02:05<03:48,  2.22it/s]

	iters: 8600, epoch: 3 | loss: 0.8733196
	speed: 0.4451s/iter; left time: 2015009.6748s


 96%|█████████▌| 8700/9108 [1:02:49<03:03,  2.23it/s]

	iters: 8700, epoch: 3 | loss: 0.4856668
	speed: 0.4446s/iter; left time: 2012630.2162s


 97%|█████████▋| 8800/9108 [1:03:34<02:14,  2.29it/s]

	iters: 8800, epoch: 3 | loss: 0.5897201
	speed: 0.4438s/iter; left time: 2008973.9527s


 98%|█████████▊| 8900/9108 [1:04:18<01:29,  2.32it/s]

	iters: 8900, epoch: 3 | loss: 0.6183766
	speed: 0.4412s/iter; left time: 1997267.0641s


 99%|█████████▉| 9000/9108 [1:05:02<00:47,  2.27it/s]

	iters: 9000, epoch: 3 | loss: 0.9368425
	speed: 0.4382s/iter; left time: 1983735.2572s


100%|█████████▉| 9100/9108 [1:05:46<00:03,  2.29it/s]

	iters: 9100, epoch: 3 | loss: 0.9312149
	speed: 0.4384s/iter; left time: 1984535.3189s


100%|██████████| 9108/9108 [1:05:50<00:00,  2.31it/s]


Epoch: 3 cost time: 3950.8465275764465
Epoch: 3, Steps: 9108 | Train Loss: 0.6685652 Vali Loss: 0.6379517 Test Loss: 0.7024702
Validation loss decreased (0.641614 --> 0.637952).  Saving model ...
Updating learning rate to 2.5e-05


  1%|          | 100/9108 [01:29<1:05:11,  2.30it/s]

	iters: 100, epoch: 4 | loss: 0.1309956
	speed: 49.1991s/iter; left time: 222703567.7225s


  2%|▏         | 200/9108 [02:13<1:02:50,  2.36it/s]

	iters: 200, epoch: 4 | loss: 0.7772679
	speed: 0.4401s/iter; left time: 1992284.6524s


  3%|▎         | 300/9108 [02:57<1:04:17,  2.28it/s]

	iters: 300, epoch: 4 | loss: 0.7989144
	speed: 0.4386s/iter; left time: 1985405.7325s


  4%|▍         | 400/9108 [03:41<1:05:02,  2.23it/s]

	iters: 400, epoch: 4 | loss: 0.5406136
	speed: 0.4408s/iter; left time: 1995208.8625s


  5%|▌         | 500/9108 [04:25<1:03:31,  2.26it/s]

	iters: 500, epoch: 4 | loss: 1.2512748
	speed: 0.4392s/iter; left time: 1987871.4529s


  7%|▋         | 600/9108 [05:08<1:00:16,  2.35it/s]

	iters: 600, epoch: 4 | loss: 0.8723682
	speed: 0.4369s/iter; left time: 1977262.1650s


  8%|▊         | 700/9108 [05:52<1:01:20,  2.28it/s]

	iters: 700, epoch: 4 | loss: 0.7998312
	speed: 0.4371s/iter; left time: 1978250.9399s


  9%|▉         | 800/9108 [06:36<1:01:10,  2.26it/s]

	iters: 800, epoch: 4 | loss: 1.4035742
	speed: 0.4421s/iter; left time: 2000730.9120s


 10%|▉         | 900/9108 [07:20<1:00:12,  2.27it/s]

	iters: 900, epoch: 4 | loss: 0.0760333
	speed: 0.4368s/iter; left time: 1976935.0474s


 11%|█         | 1000/9108 [08:03<59:06,  2.29it/s] 

	iters: 1000, epoch: 4 | loss: 0.3687934
	speed: 0.4328s/iter; left time: 1958575.8735s


 12%|█▏        | 1100/9108 [08:46<57:01,  2.34it/s]  

	iters: 1100, epoch: 4 | loss: 0.6878579
	speed: 0.4314s/iter; left time: 1952189.3066s


 13%|█▎        | 1200/9108 [09:30<55:48,  2.36it/s]

	iters: 1200, epoch: 4 | loss: 1.0394316
	speed: 0.4318s/iter; left time: 1954031.3896s


 14%|█▍        | 1300/9108 [10:13<56:20,  2.31it/s]

	iters: 1300, epoch: 4 | loss: 0.5950050
	speed: 0.4319s/iter; left time: 1954511.6668s


 15%|█▌        | 1400/9108 [10:56<55:59,  2.29it/s]

	iters: 1400, epoch: 4 | loss: 0.8941981
	speed: 0.4319s/iter; left time: 1954490.4541s


 16%|█▋        | 1500/9108 [11:39<54:02,  2.35it/s]

	iters: 1500, epoch: 4 | loss: 0.8371502
	speed: 0.4311s/iter; left time: 1950970.5310s


 18%|█▊        | 1600/9108 [12:22<55:01,  2.27it/s]

	iters: 1600, epoch: 4 | loss: 0.4013610
	speed: 0.4317s/iter; left time: 1953286.1882s


 19%|█▊        | 1700/9108 [13:05<53:49,  2.29it/s]

	iters: 1700, epoch: 4 | loss: 0.2130580
	speed: 0.4320s/iter; left time: 1954678.9674s


 20%|█▉        | 1800/9108 [13:49<52:58,  2.30it/s]

	iters: 1800, epoch: 4 | loss: 0.9311681
	speed: 0.4320s/iter; left time: 1954816.7841s


 21%|██        | 1900/9108 [14:32<50:43,  2.37it/s]

	iters: 1900, epoch: 4 | loss: 0.7457365
	speed: 0.4306s/iter; left time: 1948480.4343s


 22%|██▏       | 2000/9108 [15:15<52:37,  2.25it/s]

	iters: 2000, epoch: 4 | loss: 0.4140635
	speed: 0.4318s/iter; left time: 1953972.7414s


 23%|██▎       | 2100/9108 [15:58<50:52,  2.30it/s]

	iters: 2100, epoch: 4 | loss: 1.1183496
	speed: 0.4295s/iter; left time: 1943428.2502s


 24%|██▍       | 2200/9108 [16:41<49:08,  2.34it/s]

	iters: 2200, epoch: 4 | loss: 0.9534991
	speed: 0.4285s/iter; left time: 1938707.6251s


 25%|██▌       | 2300/9108 [17:24<47:22,  2.40it/s]

	iters: 2300, epoch: 4 | loss: 1.1773263
	speed: 0.4310s/iter; left time: 1950212.1791s


 26%|██▋       | 2400/9108 [18:07<48:23,  2.31it/s]

	iters: 2400, epoch: 4 | loss: 0.3168639
	speed: 0.4277s/iter; left time: 1934856.1865s


 27%|██▋       | 2500/9108 [18:49<47:24,  2.32it/s]

	iters: 2500, epoch: 4 | loss: 0.5623509
	speed: 0.4246s/iter; left time: 1920903.6200s


 29%|██▊       | 2600/9108 [19:32<46:37,  2.33it/s]

	iters: 2600, epoch: 4 | loss: 1.0997369
	speed: 0.4271s/iter; left time: 1932178.6453s


 30%|██▉       | 2700/9108 [20:15<46:15,  2.31it/s]

	iters: 2700, epoch: 4 | loss: 0.3899532
	speed: 0.4299s/iter; left time: 1944967.7299s


 31%|███       | 2800/9108 [20:57<45:29,  2.31it/s]

	iters: 2800, epoch: 4 | loss: 1.1359087
	speed: 0.4263s/iter; left time: 1928350.2659s


 32%|███▏      | 2900/9108 [21:41<44:14,  2.34it/s]

	iters: 2900, epoch: 4 | loss: 1.0491886
	speed: 0.4327s/iter; left time: 1957548.3302s


 33%|███▎      | 3000/9108 [22:23<43:10,  2.36it/s]

	iters: 3000, epoch: 4 | loss: 0.5750162
	speed: 0.4274s/iter; left time: 1933490.5821s


 34%|███▍      | 3100/9108 [23:06<43:44,  2.29it/s]

	iters: 3100, epoch: 4 | loss: 1.0347327
	speed: 0.4298s/iter; left time: 1944296.6174s


 35%|███▌      | 3200/9108 [23:49<42:57,  2.29it/s]

	iters: 3200, epoch: 4 | loss: 0.5153126
	speed: 0.4284s/iter; left time: 1937851.3956s


 36%|███▌      | 3300/9108 [24:32<41:42,  2.32it/s]

	iters: 3300, epoch: 4 | loss: 0.1145946
	speed: 0.4320s/iter; left time: 1954310.2114s


 37%|███▋      | 3400/9108 [25:16<43:20,  2.19it/s]

	iters: 3400, epoch: 4 | loss: 0.6788580
	speed: 0.4342s/iter; left time: 1964102.9653s


 38%|███▊      | 3500/9108 [26:00<41:14,  2.27it/s]

	iters: 3500, epoch: 4 | loss: 0.3881383
	speed: 0.4386s/iter; left time: 1983960.0579s


 40%|███▉      | 3600/9108 [26:43<39:04,  2.35it/s]

	iters: 3600, epoch: 4 | loss: 0.4103225
	speed: 0.4343s/iter; left time: 1964210.6726s


 41%|████      | 3700/9108 [27:27<40:08,  2.25it/s]

	iters: 3700, epoch: 4 | loss: 0.8523880
	speed: 0.4357s/iter; left time: 1970697.7617s


 42%|████▏     | 3800/9108 [28:10<38:01,  2.33it/s]

	iters: 3800, epoch: 4 | loss: 0.4326992
	speed: 0.4307s/iter; left time: 1948137.4387s


 43%|████▎     | 3900/9108 [28:53<36:47,  2.36it/s]

	iters: 3900, epoch: 4 | loss: 0.7526003
	speed: 0.4308s/iter; left time: 1948492.9429s


 44%|████▍     | 4000/9108 [29:36<36:41,  2.32it/s]

	iters: 4000, epoch: 4 | loss: 0.8794069
	speed: 0.4308s/iter; left time: 1948189.0659s


 45%|████▌     | 4100/9108 [30:19<35:26,  2.36it/s]

	iters: 4100, epoch: 4 | loss: 0.4776483
	speed: 0.4306s/iter; left time: 1947230.3045s


 46%|████▌     | 4200/9108 [31:02<34:41,  2.36it/s]

	iters: 4200, epoch: 4 | loss: 1.3445727
	speed: 0.4289s/iter; left time: 1939538.1099s


 47%|████▋     | 4300/9108 [31:44<35:12,  2.28it/s]

	iters: 4300, epoch: 4 | loss: 1.0127242
	speed: 0.4261s/iter; left time: 1926837.1190s


 48%|████▊     | 4400/9108 [32:28<33:17,  2.36it/s]

	iters: 4400, epoch: 4 | loss: 0.4909326
	speed: 0.4314s/iter; left time: 1950742.3306s


 49%|████▉     | 4500/9108 [33:11<32:20,  2.37it/s]

	iters: 4500, epoch: 4 | loss: 0.4297133
	speed: 0.4302s/iter; left time: 1945389.1913s


 51%|█████     | 4600/9108 [33:54<31:46,  2.36it/s]

	iters: 4600, epoch: 4 | loss: 0.5600476
	speed: 0.4334s/iter; left time: 1959925.5900s


 52%|█████▏    | 4700/9108 [34:37<30:53,  2.38it/s]

	iters: 4700, epoch: 4 | loss: 0.3832643
	speed: 0.4280s/iter; left time: 1935564.8536s


 53%|█████▎    | 4800/9108 [35:20<30:10,  2.38it/s]

	iters: 4800, epoch: 4 | loss: 0.5032353
	speed: 0.4287s/iter; left time: 1938435.0857s


 54%|█████▍    | 4900/9108 [36:03<30:01,  2.34it/s]

	iters: 4900, epoch: 4 | loss: 0.6368461
	speed: 0.4303s/iter; left time: 1945613.7075s


 55%|█████▍    | 5000/9108 [36:45<29:41,  2.31it/s]

	iters: 5000, epoch: 4 | loss: 0.4679977
	speed: 0.4267s/iter; left time: 1929377.1343s


 56%|█████▌    | 5100/9108 [37:28<29:17,  2.28it/s]

	iters: 5100, epoch: 4 | loss: 1.2275987
	speed: 0.4276s/iter; left time: 1933435.0568s


 57%|█████▋    | 5200/9108 [38:10<27:08,  2.40it/s]

	iters: 5200, epoch: 4 | loss: 0.7508196
	speed: 0.4208s/iter; left time: 1902570.2938s


 58%|█████▊    | 5300/9108 [38:53<26:57,  2.35it/s]

	iters: 5300, epoch: 4 | loss: 0.2962282
	speed: 0.4248s/iter; left time: 1920660.9431s


 59%|█████▉    | 5400/9108 [39:35<25:14,  2.45it/s]

	iters: 5400, epoch: 4 | loss: 0.6561896
	speed: 0.4218s/iter; left time: 1906977.1405s


 60%|██████    | 5500/9108 [40:17<25:31,  2.36it/s]

	iters: 5500, epoch: 4 | loss: 0.5447088
	speed: 0.4254s/iter; left time: 1923234.6293s


 61%|██████▏   | 5600/9108 [41:00<23:43,  2.46it/s]

	iters: 5600, epoch: 4 | loss: 0.5699379
	speed: 0.4224s/iter; left time: 1909755.9152s


 63%|██████▎   | 5700/9108 [41:42<24:31,  2.32it/s]

	iters: 5700, epoch: 4 | loss: 0.5509443
	speed: 0.4190s/iter; left time: 1894174.8528s


 64%|██████▎   | 5800/9108 [42:24<23:35,  2.34it/s]

	iters: 5800, epoch: 4 | loss: 0.8264140
	speed: 0.4237s/iter; left time: 1915564.2826s


 65%|██████▍   | 5900/9108 [43:06<23:03,  2.32it/s]

	iters: 5900, epoch: 4 | loss: 0.5523118
	speed: 0.4233s/iter; left time: 1913761.2963s


 66%|██████▌   | 6000/9108 [43:49<22:13,  2.33it/s]

	iters: 6000, epoch: 4 | loss: 0.2640493
	speed: 0.4257s/iter; left time: 1924493.9612s


 67%|██████▋   | 6100/9108 [44:31<22:05,  2.27it/s]

	iters: 6100, epoch: 4 | loss: 1.4163041
	speed: 0.4224s/iter; left time: 1909601.9184s


 68%|██████▊   | 6200/9108 [45:13<20:24,  2.37it/s]

	iters: 6200, epoch: 4 | loss: 0.3607996
	speed: 0.4233s/iter; left time: 1913547.5600s


 69%|██████▉   | 6300/9108 [45:56<19:52,  2.36it/s]

	iters: 6300, epoch: 4 | loss: 0.9883257
	speed: 0.4232s/iter; left time: 1913206.0159s


 70%|███████   | 6400/9108 [46:38<18:07,  2.49it/s]

	iters: 6400, epoch: 4 | loss: 0.9886393
	speed: 0.4184s/iter; left time: 1891333.1339s


 71%|███████▏  | 6500/9108 [47:19<18:24,  2.36it/s]

	iters: 6500, epoch: 4 | loss: 1.0289274
	speed: 0.4183s/iter; left time: 1890620.6868s


 72%|███████▏  | 6600/9108 [48:01<17:41,  2.36it/s]

	iters: 6600, epoch: 4 | loss: 0.4151750
	speed: 0.4180s/iter; left time: 1889399.7814s


 74%|███████▎  | 6700/9108 [48:43<16:39,  2.41it/s]

	iters: 6700, epoch: 4 | loss: 1.4634057
	speed: 0.4158s/iter; left time: 1879501.2876s


 75%|███████▍  | 6800/9108 [49:24<15:54,  2.42it/s]

	iters: 6800, epoch: 4 | loss: 0.5410612
	speed: 0.4144s/iter; left time: 1872971.9645s


 76%|███████▌  | 6900/9108 [50:06<14:57,  2.46it/s]

	iters: 6900, epoch: 4 | loss: 0.6504445
	speed: 0.4139s/iter; left time: 1870602.2875s


 77%|███████▋  | 7000/9108 [50:47<14:15,  2.46it/s]

	iters: 7000, epoch: 4 | loss: 0.4710566
	speed: 0.4176s/iter; left time: 1887462.7932s


 78%|███████▊  | 7100/9108 [51:29<14:04,  2.38it/s]

	iters: 7100, epoch: 4 | loss: 0.3104787
	speed: 0.4144s/iter; left time: 1872707.1146s


 79%|███████▉  | 7200/9108 [52:10<13:17,  2.39it/s]

	iters: 7200, epoch: 4 | loss: 0.3844308
	speed: 0.4167s/iter; left time: 1883339.2011s


 80%|████████  | 7300/9108 [52:52<12:23,  2.43it/s]

	iters: 7300, epoch: 4 | loss: 0.5404924
	speed: 0.4113s/iter; left time: 1858780.6950s


 81%|████████  | 7400/9108 [53:33<12:05,  2.35it/s]

	iters: 7400, epoch: 4 | loss: 1.0259960
	speed: 0.4119s/iter; left time: 1861503.9906s


 82%|████████▏ | 7500/9108 [54:14<10:55,  2.45it/s]

	iters: 7500, epoch: 4 | loss: 0.3751688
	speed: 0.4124s/iter; left time: 1863497.5755s


 83%|████████▎ | 7600/9108 [54:55<10:44,  2.34it/s]

	iters: 7600, epoch: 4 | loss: 0.8202592
	speed: 0.4113s/iter; left time: 1858666.4981s


 85%|████████▍ | 7700/9108 [55:36<09:31,  2.46it/s]

	iters: 7700, epoch: 4 | loss: 0.9203786
	speed: 0.4058s/iter; left time: 1833734.7749s


 86%|████████▌ | 7800/9108 [56:17<09:09,  2.38it/s]

	iters: 7800, epoch: 4 | loss: 0.7314923
	speed: 0.4094s/iter; left time: 1850097.5976s


 87%|████████▋ | 7900/9108 [56:58<08:28,  2.38it/s]

	iters: 7900, epoch: 4 | loss: 0.8101640
	speed: 0.4115s/iter; left time: 1859641.1558s


 88%|████████▊ | 8000/9108 [57:39<07:31,  2.45it/s]

	iters: 8000, epoch: 4 | loss: 0.1226556
	speed: 0.4128s/iter; left time: 1865440.4052s


 89%|████████▉ | 8100/9108 [58:20<06:49,  2.46it/s]

	iters: 8100, epoch: 4 | loss: 0.7732557
	speed: 0.4067s/iter; left time: 1837639.8441s


 90%|█████████ | 8200/9108 [59:00<06:07,  2.47it/s]

	iters: 8200, epoch: 4 | loss: 0.7483575
	speed: 0.4039s/iter; left time: 1825212.9930s


 91%|█████████ | 8300/9108 [59:41<05:25,  2.48it/s]

	iters: 8300, epoch: 4 | loss: 0.2362102
	speed: 0.4066s/iter; left time: 1837166.5334s


 92%|█████████▏| 8400/9108 [1:00:21<04:42,  2.50it/s]

	iters: 8400, epoch: 4 | loss: 1.5722545
	speed: 0.4055s/iter; left time: 1832345.5404s


 93%|█████████▎| 8500/9108 [1:01:02<03:56,  2.57it/s]

	iters: 8500, epoch: 4 | loss: 0.9324952
	speed: 0.4053s/iter; left time: 1831040.8071s


 94%|█████████▍| 8600/9108 [1:01:43<03:30,  2.42it/s]

	iters: 8600, epoch: 4 | loss: 0.2710575
	speed: 0.4111s/iter; left time: 1857170.3524s


 96%|█████████▌| 8700/9108 [1:02:24<02:47,  2.44it/s]

	iters: 8700, epoch: 4 | loss: 1.0580034
	speed: 0.4121s/iter; left time: 1861672.2962s


 97%|█████████▋| 8800/9108 [1:03:05<02:05,  2.45it/s]

	iters: 8800, epoch: 4 | loss: 1.3566012
	speed: 0.4085s/iter; left time: 1845348.9542s


 98%|█████████▊| 8900/9108 [1:03:46<01:26,  2.40it/s]

	iters: 8900, epoch: 4 | loss: 0.9176188
	speed: 0.4076s/iter; left time: 1841624.9655s


 99%|█████████▉| 9000/9108 [1:04:27<00:44,  2.43it/s]

	iters: 9000, epoch: 4 | loss: 1.2225461
	speed: 0.4088s/iter; left time: 1846641.7482s


100%|█████████▉| 9100/9108 [1:05:08<00:03,  2.50it/s]

	iters: 9100, epoch: 4 | loss: 1.2699012
	speed: 0.4104s/iter; left time: 1854011.8650s


100%|██████████| 9108/9108 [1:05:12<00:00,  2.33it/s]


Epoch: 4 cost time: 3912.9047214984894


In [None]:
%reload_ext tensorboard
%tensorboard --logdir=./runs/